mirror of
https://github.com/Y2Z/monolith
synced 2024-11-10 19:10:29 +00:00
Merge branch 'master' into author-robatipoor
This commit is contained in:
commit
d8d6437a15
@ -1,19 +1,18 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "monolith"
|
name = "monolith"
|
||||||
version = "2.0.5"
|
version = "2.0.9"
|
||||||
authors = [
|
authors = [
|
||||||
"Sunshine <sunshine@uberspace.net>",
|
"Sunshine <sunshine@uberspace.net>",
|
||||||
"Mahdi Robatipoor <mahdi.robatipoor@gmil.com>",
|
"Mahdi Robatipoor <mahdi.robatipoor@gmil.com>",
|
||||||
]
|
]
|
||||||
description = "CLI tool to save webpages as a single HTML file"
|
description = "CLI tool for saving web pages as a single HTML file"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
base64 = "0.10.1"
|
base64 = "0.10.1"
|
||||||
clap = "2.33.0"
|
clap = "2.33.0"
|
||||||
html5ever = "0.24.0"
|
html5ever = "0.24.0"
|
||||||
indicatif = "0.11.0"
|
indicatif = "0.11.0"
|
||||||
mime-sniffer = "0.1.2"
|
lazy_static = "1.3.0"
|
||||||
regex = "1.2.1"
|
regex = "1.2.1"
|
||||||
reqwest = "0.9.20"
|
reqwest = "0.9.20"
|
||||||
url = "2.1.0"
|
url = "2.1.0"
|
||||||
lazy_static = "1.3.0"
|
|
||||||
|
34
snap/snapcraft.yaml
Normal file
34
snap/snapcraft.yaml
Normal file
@ -0,0 +1,34 @@
|
|||||||
|
name: monolith
|
||||||
|
base: core18
|
||||||
|
version: git
|
||||||
|
summary: Monolith - Save HTML pages with ease
|
||||||
|
description: |
|
||||||
|
A data hoarder's dream come true: bundle any web page into a single
|
||||||
|
HTML file. You can finally replace that gazillion of open tabs with
|
||||||
|
a gazillion of .html files stored somewhere on your precious little
|
||||||
|
drive.
|
||||||
|
Unlike conventional "Save page as…", monolith not only saves the
|
||||||
|
target document, it embeds CSS, image, and JavaScript assets all
|
||||||
|
at once, producing a single HTML5 document that is a joy to store
|
||||||
|
and share.
|
||||||
|
If compared to saving websites with wget -mpk, monolith embeds
|
||||||
|
all assets as data URLs and therefore displays the saved page
|
||||||
|
exactly the same, being completely separated from the Internet.
|
||||||
|
|
||||||
|
confinement: strict
|
||||||
|
|
||||||
|
parts:
|
||||||
|
monolith:
|
||||||
|
plugin: rust
|
||||||
|
source: .
|
||||||
|
build-packages:
|
||||||
|
- libssl-dev
|
||||||
|
- pkg-config
|
||||||
|
|
||||||
|
apps:
|
||||||
|
monolith:
|
||||||
|
command: monolith
|
||||||
|
plugs:
|
||||||
|
- home
|
||||||
|
- network
|
||||||
|
- removable-media
|
74
src/html.rs
74
src/html.rs
@ -1,6 +1,7 @@
|
|||||||
use http::{is_valid_url, resolve_url, retrieve_asset};
|
use http::{is_valid_url, resolve_url, retrieve_asset};
|
||||||
use std::default::Default;
|
use std::default::Default;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
use utils::data_to_dataurl;
|
||||||
|
|
||||||
use html5ever::parse_document;
|
use html5ever::parse_document;
|
||||||
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
||||||
@ -14,10 +15,12 @@ enum NodeMatch {
|
|||||||
Anchor,
|
Anchor,
|
||||||
Script,
|
Script,
|
||||||
Form,
|
Form,
|
||||||
|
IFrame,
|
||||||
Other,
|
Other,
|
||||||
}
|
}
|
||||||
|
|
||||||
const PNG_PIXEL: &str = "";
|
const TRANSPARENT_PIXEL: &str = "data:image/png;base64,\
|
||||||
|
iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII=";
|
||||||
|
|
||||||
const JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
const JS_DOM_EVENT_ATTRS: [&str; 21] = [
|
||||||
// Input
|
// Input
|
||||||
@ -74,7 +77,8 @@ pub fn walk_and_embed_assets(
|
|||||||
NodeData::Comment { .. } => {
|
NodeData::Comment { .. } => {
|
||||||
// Note: in case of opt_no_js being set to true, there's no need to worry about
|
// Note: in case of opt_no_js being set to true, there's no need to worry about
|
||||||
// getting rid of comments that may contain scripts, e.g. <!--[if IE]><script>...
|
// getting rid of comments that may contain scripts, e.g. <!--[if IE]><script>...
|
||||||
// since that's not part of W3C standard and gets ignored by browsers other than IE [5, 9]
|
// since that's not part of W3C standard and therefore gets ignored
|
||||||
|
// by browsers other than IE [5, 9]
|
||||||
}
|
}
|
||||||
|
|
||||||
NodeData::Element {
|
NodeData::Element {
|
||||||
@ -85,26 +89,26 @@ pub fn walk_and_embed_assets(
|
|||||||
let attrs_mut = &mut attrs.borrow_mut();
|
let attrs_mut = &mut attrs.borrow_mut();
|
||||||
let mut found = NodeMatch::Other;
|
let mut found = NodeMatch::Other;
|
||||||
|
|
||||||
if &name.local == "link" {
|
match name.local.as_ref() {
|
||||||
for attr in attrs_mut.iter_mut() {
|
"link" => {
|
||||||
if &attr.name.local == "rel" {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if is_icon(&attr.value.to_string()) {
|
if &attr.name.local == "rel" {
|
||||||
found = NodeMatch::Icon;
|
if is_icon(&attr.value.to_string()) {
|
||||||
break;
|
found = NodeMatch::Icon;
|
||||||
} else if attr.value.to_string() == "stylesheet" {
|
break;
|
||||||
found = NodeMatch::StyleSheet;
|
} else if attr.value.to_string() == "stylesheet" {
|
||||||
break;
|
found = NodeMatch::StyleSheet;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else if &name.local == "img" {
|
"img" => { found = NodeMatch::Image; }
|
||||||
found = NodeMatch::Image;
|
"a" => { found = NodeMatch::Anchor; }
|
||||||
} else if &name.local == "a" {
|
"script" => { found = NodeMatch::Script; }
|
||||||
found = NodeMatch::Anchor;
|
"form" => { found = NodeMatch::Form; }
|
||||||
} else if &name.local == "script" {
|
"iframe" => { found = NodeMatch::IFrame; }
|
||||||
found = NodeMatch::Script;
|
_ => {}
|
||||||
} else if &name.local == "form" {
|
|
||||||
found = NodeMatch::Form;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
match found {
|
match found {
|
||||||
@ -128,7 +132,7 @@ pub fn walk_and_embed_assets(
|
|||||||
if &attr.name.local == "src" {
|
if &attr.name.local == "src" {
|
||||||
if opt_no_images {
|
if opt_no_images {
|
||||||
attr.value.clear();
|
attr.value.clear();
|
||||||
attr.value.push_slice(PNG_PIXEL);
|
attr.value.push_slice(TRANSPARENT_PIXEL);
|
||||||
} else {
|
} else {
|
||||||
let src_full_url = resolve_url(&url, &attr.value.to_string());
|
let src_full_url = resolve_url(&url, &attr.value.to_string());
|
||||||
let img_datauri = retrieve_asset(
|
let img_datauri = retrieve_asset(
|
||||||
@ -146,8 +150,8 @@ pub fn walk_and_embed_assets(
|
|||||||
NodeMatch::Anchor => {
|
NodeMatch::Anchor => {
|
||||||
for attr in attrs_mut.iter_mut() {
|
for attr in attrs_mut.iter_mut() {
|
||||||
if &attr.name.local == "href" {
|
if &attr.name.local == "href" {
|
||||||
// Do not touch hrefs which begin with a hash sign
|
// Don't touch email links or hrefs which begin with a hash sign
|
||||||
if attr.value.to_string().chars().nth(0) == Some('#') {
|
if attr.value.starts_with('#') || attr.value.starts_with("mailto:") {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -211,6 +215,32 @@ pub fn walk_and_embed_assets(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
NodeMatch::IFrame => {
|
||||||
|
for attr in attrs_mut.iter_mut() {
|
||||||
|
if &attr.name.local == "src" {
|
||||||
|
let src_full_url = resolve_url(&url, &attr.value.to_string()).unwrap();
|
||||||
|
let iframe_data = retrieve_asset(
|
||||||
|
&src_full_url,
|
||||||
|
false,
|
||||||
|
"text/html",
|
||||||
|
opt_user_agent,
|
||||||
|
);
|
||||||
|
let dom = html_to_dom(&iframe_data.unwrap());
|
||||||
|
walk_and_embed_assets(
|
||||||
|
&src_full_url,
|
||||||
|
&dom.document,
|
||||||
|
opt_no_js,
|
||||||
|
opt_no_images,
|
||||||
|
opt_user_agent,
|
||||||
|
);
|
||||||
|
let mut buf: Vec<u8> = Vec::new();
|
||||||
|
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||||
|
let iframe_datauri = data_to_dataurl("text/html", &buf);
|
||||||
|
attr.value.clear();
|
||||||
|
attr.value.push_slice(iframe_datauri.as_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
NodeMatch::Other => {}
|
NodeMatch::Other => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
93
src/http.rs
93
src/http.rs
@ -1,6 +1,6 @@
|
|||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
use reqwest::header::{CONTENT_TYPE, USER_AGENT};
|
use reqwest::header::{CONTENT_TYPE, USER_AGENT};
|
||||||
use reqwest::Client;
|
use reqwest::{Client, RedirectPolicy};
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use url::{ParseError, Url};
|
use url::{ParseError, Url};
|
||||||
use utils::data_to_dataurl;
|
use utils::data_to_dataurl;
|
||||||
@ -9,6 +9,13 @@ lazy_static! {
|
|||||||
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
|
static ref REGEX_URL: Regex = Regex::new(r"^https?://").unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn is_data_url(url: &str) -> Result<bool, String> {
|
||||||
|
match Url::parse(url) {
|
||||||
|
Ok(parsed_url) => Ok(parsed_url.scheme() == "data"),
|
||||||
|
Err(err) => Err(format!("{}", err)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn is_valid_url(path: &str) -> bool {
|
pub fn is_valid_url(path: &str) -> bool {
|
||||||
REGEX_URL.is_match(path)
|
REGEX_URL.is_match(path)
|
||||||
}
|
}
|
||||||
@ -18,63 +25,23 @@ pub fn resolve_url(from: &str, to: &str) -> Result<String, ParseError> {
|
|||||||
// (anything, http://site.com/css/main.css)
|
// (anything, http://site.com/css/main.css)
|
||||||
to.to_string()
|
to.to_string()
|
||||||
} else {
|
} else {
|
||||||
let mut re = String::new();
|
Url::parse(from)?.join(to)?.to_string()
|
||||||
if is_valid_url(from) {
|
|
||||||
// It's a remote resource (HTTP)
|
|
||||||
if to.chars().nth(0) == Some('/') {
|
|
||||||
// (http://site.com/article/1, /...?)
|
|
||||||
let from_url = Url::parse(from)?;
|
|
||||||
|
|
||||||
if to.chars().nth(1) == Some('/') {
|
|
||||||
// (http://site.com/article/1, //images/1.png)
|
|
||||||
re.push_str(from_url.scheme());
|
|
||||||
re.push_str(":");
|
|
||||||
re.push_str(to);
|
|
||||||
} else {
|
|
||||||
// (http://site.com/article/1, /css/main.css)
|
|
||||||
re.push_str(from_url.scheme());
|
|
||||||
re.push_str("://");
|
|
||||||
re.push_str(from_url.host_str().unwrap());
|
|
||||||
re.push_str(to);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// (http://site.com, css/main.css)
|
|
||||||
// TODO improve to ensure no // or /// ever happen
|
|
||||||
re.push_str(from);
|
|
||||||
re.push_str("/");
|
|
||||||
re.push_str(to);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// It's a local resource (fs)
|
|
||||||
// TODO improve to ensure no // or /// ever happen
|
|
||||||
// TODO for fs use basepath instead of $from
|
|
||||||
re.push_str(from);
|
|
||||||
re.push_str("/");
|
|
||||||
re.push_str(to);
|
|
||||||
}
|
|
||||||
re
|
|
||||||
};
|
};
|
||||||
|
|
||||||
Ok(result)
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn url_is_data(url: &str) -> Result<bool, String> {
|
|
||||||
match Url::parse(url) {
|
|
||||||
Ok(parsed_url) => Ok(parsed_url.scheme() == "data"),
|
|
||||||
Err(err) => Err(format!("{}", err)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn retrieve_asset(
|
pub fn retrieve_asset(
|
||||||
url: &str,
|
url: &str,
|
||||||
as_dataurl: bool,
|
as_dataurl: bool,
|
||||||
as_mime: &str,
|
as_mime: &str,
|
||||||
opt_user_agent: &str,
|
opt_user_agent: &str,
|
||||||
) -> Result<String, reqwest::Error> {
|
) -> Result<String, reqwest::Error> {
|
||||||
if url_is_data(&url).unwrap() {
|
if is_data_url(&url).unwrap() {
|
||||||
Ok(url.to_string())
|
Ok(url.to_string())
|
||||||
} else {
|
} else {
|
||||||
let client = Client::builder()
|
let client = Client::builder()
|
||||||
|
.redirect(RedirectPolicy::limited(3))
|
||||||
.timeout(Duration::from_secs(10))
|
.timeout(Duration::from_secs(10))
|
||||||
.build()
|
.build()
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -125,13 +92,19 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_resolve_url() -> Result<(), ParseError> {
|
fn test_resolve_url() -> Result<(), ParseError> {
|
||||||
let resolved_url = resolve_url("https://www.kernel.org", "../category/signatures.html")?;
|
let resolved_url = resolve_url(
|
||||||
|
"https://www.kernel.org",
|
||||||
|
"../category/signatures.html",
|
||||||
|
)?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
resolved_url.as_str(),
|
resolved_url.as_str(),
|
||||||
"https://www.kernel.org/../category/signatures.html"
|
"https://www.kernel.org/category/signatures.html"
|
||||||
);
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url("https://www.kernel.org", "category/signatures.html")?;
|
let resolved_url = resolve_url(
|
||||||
|
"https://www.kernel.org",
|
||||||
|
"category/signatures.html",
|
||||||
|
)?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
resolved_url.as_str(),
|
resolved_url.as_str(),
|
||||||
"https://www.kernel.org/category/signatures.html"
|
"https://www.kernel.org/category/signatures.html"
|
||||||
@ -155,6 +128,15 @@ mod tests {
|
|||||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let resolved_url = resolve_url(
|
||||||
|
"https://www.kernel.org",
|
||||||
|
"//another-host.org/theme/images/logos/tux.png",
|
||||||
|
)?;
|
||||||
|
assert_eq!(
|
||||||
|
resolved_url.as_str(),
|
||||||
|
"https://another-host.org/theme/images/logos/tux.png"
|
||||||
|
);
|
||||||
|
|
||||||
let resolved_url = resolve_url(
|
let resolved_url = resolve_url(
|
||||||
"https://www.kernel.org/category/signatures.html",
|
"https://www.kernel.org/category/signatures.html",
|
||||||
"/theme/images/logos/tux.png",
|
"/theme/images/logos/tux.png",
|
||||||
@ -164,16 +146,25 @@ mod tests {
|
|||||||
"https://www.kernel.org/theme/images/logos/tux.png"
|
"https://www.kernel.org/theme/images/logos/tux.png"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
let resolved_url = resolve_url(
|
||||||
|
"https://www.w3schools.com/html/html_iframe.asp",
|
||||||
|
"default.asp",
|
||||||
|
)?;
|
||||||
|
assert_eq!(
|
||||||
|
resolved_url.as_str(),
|
||||||
|
"https://www.w3schools.com/html/default.asp"
|
||||||
|
);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_url_is_data() {
|
fn test_is_data_url() {
|
||||||
assert!(
|
assert!(
|
||||||
url_is_data("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
|
is_data_url("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h")
|
||||||
.unwrap_or(false)
|
.unwrap_or(false)
|
||||||
);
|
);
|
||||||
assert!(!url_is_data("https://kernel.org").unwrap_or(false));
|
assert!(!is_data_url("https://kernel.org").unwrap_or(false));
|
||||||
assert!(!url_is_data("//kernel.org").unwrap_or(false));
|
assert!(!is_data_url("//kernel.org").unwrap_or(false));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
19
src/main.rs
19
src/main.rs
@ -6,7 +6,8 @@ use clap::{App, Arg};
|
|||||||
use monolith::html::{html_to_dom, print_dom, walk_and_embed_assets};
|
use monolith::html::{html_to_dom, print_dom, walk_and_embed_assets};
|
||||||
use monolith::http::{is_valid_url, retrieve_asset};
|
use monolith::http::{is_valid_url, retrieve_asset};
|
||||||
|
|
||||||
static DEFAULT_USER_AGENT: &str = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0";
|
static DEFAULT_USER_AGENT: &str =
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10.14; rv:66.0) Gecko/20100101 Firefox/66.0";
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let command = App::new("monolith")
|
let command = App::new("monolith")
|
||||||
@ -22,20 +23,26 @@ fn main() {
|
|||||||
)
|
)
|
||||||
.args_from_usage("-j, --no-js 'Excludes JavaScript'")
|
.args_from_usage("-j, --no-js 'Excludes JavaScript'")
|
||||||
.args_from_usage("-i, --no-images 'Removes images'")
|
.args_from_usage("-i, --no-images 'Removes images'")
|
||||||
.args_from_usage("-u, --user-agent=<Iceweasel> 'Custom User-Agent string'")
|
.args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'")
|
||||||
.get_matches();
|
.get_matches();
|
||||||
|
|
||||||
// Process the command
|
// Process the command
|
||||||
let arg_target = command.value_of("url").unwrap();
|
let arg_target = command.value_of("url").unwrap();
|
||||||
let opt_no_js = command.is_present("no-js");
|
let opt_no_js = command.is_present("no-js");
|
||||||
let opt_no_img = command.is_present("no-images");
|
let opt_no_images = command.is_present("no-images");
|
||||||
let opt_user_agent = command.value_of("user-agent").unwrap_or(DEFAULT_USER_AGENT);
|
let opt_user_agent = command.value_of("user-agent").unwrap_or(DEFAULT_USER_AGENT);
|
||||||
|
|
||||||
if is_valid_url(arg_target) {
|
if is_valid_url(arg_target) {
|
||||||
let data = retrieve_asset(&arg_target, false, "", opt_user_agent);
|
let data = retrieve_asset(&arg_target, false, "", opt_user_agent).unwrap();
|
||||||
let dom = html_to_dom(&data.unwrap());
|
let dom = html_to_dom(&data);
|
||||||
|
|
||||||
walk_and_embed_assets(&arg_target, &dom.document, opt_no_js, opt_no_img, opt_user_agent);
|
walk_and_embed_assets(
|
||||||
|
&arg_target,
|
||||||
|
&dom.document,
|
||||||
|
opt_no_js,
|
||||||
|
opt_no_images,
|
||||||
|
opt_user_agent,
|
||||||
|
);
|
||||||
|
|
||||||
print_dom(&dom.document);
|
print_dom(&dom.document);
|
||||||
println!(); // Ensure newline at end of output
|
println!(); // Ensure newline at end of output
|
||||||
|
64
src/utils.rs
64
src/utils.rs
@ -1,8 +1,31 @@
|
|||||||
extern crate base64;
|
extern crate base64;
|
||||||
extern crate mime_sniffer;
|
|
||||||
|
|
||||||
use self::base64::encode;
|
use self::base64::encode;
|
||||||
use self::mime_sniffer::MimeTypeSniffer;
|
|
||||||
|
static MAGIC: [[&[u8]; 2]; 19] = [
|
||||||
|
// Image
|
||||||
|
[b"GIF87a", b"image/gif"],
|
||||||
|
[b"GIF89a", b"image/gif"],
|
||||||
|
[b"\xFF\xD8\xFF", b"image/jpeg"],
|
||||||
|
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
|
||||||
|
[b"<?xml ", b"image/svg+xml"],
|
||||||
|
[b"<svg ", b"image/svg+xml"],
|
||||||
|
[b"RIFF....WEBPVP8 ", b"image/webp"],
|
||||||
|
[b"\x00\x00\x01\x00", b"image/x-icon"],
|
||||||
|
// Audio
|
||||||
|
[b"ID3", b"audio/mpeg"],
|
||||||
|
[b"\xFF\x0E", b"audio/mpeg"],
|
||||||
|
[b"\xFF\x0F", b"audio/mpeg"],
|
||||||
|
[b"OggS", b"audio/ogg"],
|
||||||
|
[b"RIFF....WAVEfmt ", b"audio/wav"],
|
||||||
|
[b"fLaC", b"audio/x-flac"],
|
||||||
|
// Video
|
||||||
|
[b"RIFF....AVI LIST", b"video/avi"],
|
||||||
|
[b"....ftyp", b"video/mp4"],
|
||||||
|
[b"\x00\x00\x01\x0B", b"video/mpeg"],
|
||||||
|
[b"....moov", b"video/quicktime"],
|
||||||
|
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
||||||
|
];
|
||||||
|
|
||||||
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
|
pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
|
||||||
let mimetype = if mime == "" {
|
let mimetype = if mime == "" {
|
||||||
@ -14,7 +37,16 @@ pub fn data_to_dataurl(mime: &str, data: &[u8]) -> String {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn detect_mimetype(data: &[u8]) -> String {
|
fn detect_mimetype(data: &[u8]) -> String {
|
||||||
data.sniff_mime_type().unwrap_or("").to_string()
|
let mut re = String::new();
|
||||||
|
|
||||||
|
for item in MAGIC.iter() {
|
||||||
|
if data.starts_with(item[0]) {
|
||||||
|
re = String::from_utf8(item[1].to_vec()).unwrap();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
re
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@ -31,4 +63,30 @@ mod tests {
|
|||||||
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
"data:application/javascript;base64,dmFyIHdvcmQgPSAnaGVsbG8nOwphbGVydCh3b3JkKTsK"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_detect_mimetype() {
|
||||||
|
// Image
|
||||||
|
assert_eq!(detect_mimetype(b"GIF87a"), "image/gif");
|
||||||
|
assert_eq!(detect_mimetype(b"GIF89a"), "image/gif");
|
||||||
|
assert_eq!(detect_mimetype(b"\xFF\xD8\xFF"), "image/jpeg");
|
||||||
|
assert_eq!(detect_mimetype(b"\x89PNG\x0D\x0A\x1A\x0A"), "image/png");
|
||||||
|
assert_eq!(detect_mimetype(b"<?xml "), "image/svg+xml");
|
||||||
|
assert_eq!(detect_mimetype(b"<svg "), "image/svg+xml");
|
||||||
|
assert_eq!(detect_mimetype(b"RIFF....WEBPVP8 "), "image/webp");
|
||||||
|
assert_eq!(detect_mimetype(b"\x00\x00\x01\x00"), "image/x-icon");
|
||||||
|
// Audio
|
||||||
|
assert_eq!(detect_mimetype(b"ID3"), "audio/mpeg");
|
||||||
|
assert_eq!(detect_mimetype(b"\xFF\x0E"), "audio/mpeg");
|
||||||
|
assert_eq!(detect_mimetype(b"\xFF\x0F"), "audio/mpeg");
|
||||||
|
assert_eq!(detect_mimetype(b"OggS"), "audio/ogg");
|
||||||
|
assert_eq!(detect_mimetype(b"RIFF....WAVEfmt "), "audio/wav");
|
||||||
|
assert_eq!(detect_mimetype(b"fLaC"), "audio/x-flac");
|
||||||
|
// Video
|
||||||
|
assert_eq!(detect_mimetype(b"RIFF....AVI LIST"), "video/avi");
|
||||||
|
assert_eq!(detect_mimetype(b"....ftyp"), "video/mp4");
|
||||||
|
assert_eq!(detect_mimetype(b"\x00\x00\x01\x0B"), "video/mpeg");
|
||||||
|
assert_eq!(detect_mimetype(b"....moov"), "video/quicktime");
|
||||||
|
assert_eq!(detect_mimetype(b"\x1A\x45\xDF\xA3"), "video/webm");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user