mirror of
https://github.com/Y2Z/monolith
synced 2024-11-15 06:12:52 +00:00
Merge branch 'master' into change-meta-charset-to-utf-8
This commit is contained in:
commit
4921a70dda
23
Cargo.lock
generated
23
Cargo.lock
generated
@ -27,9 +27,9 @@ version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"doc-comment 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"predicates 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"predicates-tree 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"predicates 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"predicates-tree 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"wait-timeout 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
@ -608,6 +608,7 @@ name = "monolith"
|
||||
version = "2.4.0"
|
||||
dependencies = [
|
||||
"assert_cmd 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"atty 0.2.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"base64 0.13.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"chrono 0.4.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap 2.33.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -858,24 +859,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "predicates"
|
||||
version = "1.0.6"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"difference 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "predicates-core"
|
||||
version = "1.0.1"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "predicates-tree"
|
||||
version = "1.0.1"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"treeline 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
@ -1755,9 +1756,9 @@ dependencies = [
|
||||
"checksum pkg-config 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
|
||||
"checksum ppv-lite86 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857"
|
||||
"checksum precomputed-hash 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
|
||||
"checksum predicates 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "73dd9b7b200044694dfede9edf907c1ca19630908443e9447e624993700c6932"
|
||||
"checksum predicates-core 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fb3dbeaaf793584e29c58c7e3a82bbb3c7c06b63cea68d13b0e3cddc124104dc"
|
||||
"checksum predicates-tree 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "aee95d988ee893cb35c06b148c80ed2cd52c8eea927f50ba7a0be1a786aeab73"
|
||||
"checksum predicates 1.0.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eeb433456c1a57cc93554dea3ce40b4c19c4057e41c55d4a0f3d84ea71c325aa"
|
||||
"checksum predicates-core 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "57e35a3326b75e49aa85f5dc6ec15b41108cf5aee58eabb1f274dd18b73c2451"
|
||||
"checksum predicates-tree 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "15f553275e5721409451eb85e15fd9a860a6e5ab4496eb215987502b5f5391f2"
|
||||
"checksum proc-macro-hack 0.5.19 (registry+https://github.com/rust-lang/crates.io-index)" = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
|
||||
"checksum proc-macro2 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)" = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
|
||||
"checksum quote 1.0.8 (registry+https://github.com/rust-lang/crates.io-index)" = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df"
|
||||
|
@ -22,6 +22,7 @@ include = [
|
||||
license = "Unlicense"
|
||||
|
||||
[dependencies]
|
||||
atty = "0.2" # Used for highlighting network errors
|
||||
base64 = "0.13.0"
|
||||
chrono = "0.4.19" # Used for formatting creation timestamp
|
||||
clap = "2.33.3"
|
||||
|
@ -61,7 +61,7 @@ or
|
||||
|
||||
## Options
|
||||
- `-a`: Exclude audio sources
|
||||
- `-b`: Use custom base URL
|
||||
- `-b`: Use custom `base URL`
|
||||
- `-c`: Exclude CSS
|
||||
- `-e`: Ignore network errors
|
||||
- `-f`: Omit frames
|
||||
@ -71,10 +71,10 @@ or
|
||||
- `-j`: Exclude JavaScript
|
||||
- `-k`: Accept invalid X.509 (TLS) certificates
|
||||
- `-M`: Don't add timestamp and URL information
|
||||
- `-o`: Write output to file
|
||||
- `-o`: Write output to `file`
|
||||
- `-s`: Be quiet
|
||||
- `-t`: Adjust network request timeout
|
||||
- `-u`: Provide custom User-Agent
|
||||
- `-t`: Adjust `network request timeout`
|
||||
- `-u`: Provide `custom User-Agent`
|
||||
- `-v`: Exclude videos
|
||||
|
||||
---------------------------------------------------
|
||||
|
92
src/html.rs
92
src/html.rs
@ -1063,45 +1063,19 @@ pub fn walk_and_embed_assets(
|
||||
|
||||
if let Some(source_attr_srcset_value) = get_node_attr(node, "srcset") {
|
||||
if parent_node_name == "picture" {
|
||||
if options.no_images {
|
||||
set_node_attr(node, "srcset", Some(str!(empty_image!())));
|
||||
} else {
|
||||
let srcset_full_url =
|
||||
resolve_url(&url, source_attr_srcset_value).unwrap_or_default();
|
||||
let srcset_url_fragment = get_url_fragment(srcset_full_url.clone());
|
||||
match retrieve_asset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&srcset_full_url,
|
||||
options,
|
||||
depth + 1,
|
||||
) {
|
||||
Ok((srcset_data, srcset_final_url, srcset_media_type)) => {
|
||||
let srcset_data_url = data_to_data_url(
|
||||
&srcset_media_type,
|
||||
&srcset_data,
|
||||
&srcset_final_url,
|
||||
);
|
||||
let assembled_url: String = url_with_fragment(
|
||||
srcset_data_url.as_str(),
|
||||
srcset_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "srcset", Some(assembled_url));
|
||||
}
|
||||
Err(_) => {
|
||||
if is_http_url(srcset_full_url.clone()) {
|
||||
// Keep remote reference if unable to retrieve the asset
|
||||
let assembled_url: String = url_with_fragment(
|
||||
srcset_full_url.as_str(),
|
||||
srcset_url_fragment.as_str(),
|
||||
);
|
||||
set_node_attr(node, "srcset", Some(assembled_url));
|
||||
} else {
|
||||
// Exclude non-remote URLs
|
||||
set_node_attr(node, "srcset", None);
|
||||
}
|
||||
}
|
||||
if !source_attr_srcset_value.is_empty() {
|
||||
if options.no_images {
|
||||
set_node_attr(node, "srcset", Some(str!(empty_image!())));
|
||||
} else {
|
||||
let resolved_srcset: String = embed_srcset(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&source_attr_srcset_value,
|
||||
options,
|
||||
depth,
|
||||
);
|
||||
set_node_attr(node, "srcset", Some(resolved_srcset));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1199,8 +1173,8 @@ pub fn walk_and_embed_assets(
|
||||
// Empty inner content of STYLE tags
|
||||
node.children.borrow_mut().clear();
|
||||
} else {
|
||||
for node in node.children.borrow_mut().iter_mut() {
|
||||
if let NodeData::Text { ref contents } = node.data {
|
||||
for child_node in node.children.borrow_mut().iter_mut() {
|
||||
if let NodeData::Text { ref contents } = child_node.data {
|
||||
let mut tendril = contents.borrow_mut();
|
||||
let replacement = embed_css(
|
||||
cache,
|
||||
@ -1436,6 +1410,42 @@ pub fn walk_and_embed_assets(
|
||||
}
|
||||
}
|
||||
}
|
||||
"noscript" => {
|
||||
for child_node in node.children.borrow_mut().iter_mut() {
|
||||
match child_node.data {
|
||||
NodeData::Text { ref contents } => {
|
||||
// Get contents of the NOSCRIPT node
|
||||
let mut noscript_contents = contents.borrow_mut();
|
||||
// Parse contents of the NOSCRIPT node
|
||||
let noscript_contents_dom: RcDom = html_to_dom(&noscript_contents);
|
||||
// Embed assets within the NOSCRIPT node
|
||||
walk_and_embed_assets(
|
||||
cache,
|
||||
client,
|
||||
&url,
|
||||
&noscript_contents_dom.document,
|
||||
&options,
|
||||
depth,
|
||||
);
|
||||
// Get rid of original contents
|
||||
noscript_contents.clear();
|
||||
// Insert HTML containing embedded assets into the NOSCRIPT node
|
||||
if let Some(html) =
|
||||
get_child_node_by_name(&noscript_contents_dom.document, "html")
|
||||
{
|
||||
if let Some(body) = get_child_node_by_name(&html, "body") {
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &body, SerializeOpts::default())
|
||||
.expect("Unable to serialize DOM into buffer");
|
||||
let result = String::from_utf8(buf).unwrap();
|
||||
noscript_contents.push_slice(&result);
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
|
12
src/opts.rs
12
src/opts.rs
@ -1,4 +1,5 @@
|
||||
use clap::{App, Arg};
|
||||
use std::env;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct Options {
|
||||
@ -19,6 +20,7 @@ pub struct Options {
|
||||
pub user_agent: Option<String>,
|
||||
pub no_video: bool,
|
||||
pub target: String,
|
||||
pub no_color: bool,
|
||||
}
|
||||
|
||||
const ASCII: &'static str = " \
|
||||
@ -33,6 +35,8 @@ const ASCII: &'static str = " \
|
||||
const DEFAULT_NETWORK_TIMEOUT: u64 = 120;
|
||||
const DEFAULT_USER_AGENT: &'static str =
|
||||
"Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:73.0) Gecko/20100101 Firefox/73.0";
|
||||
const ENV_VAR_NO_COLOR: &str = "NO_COLOR";
|
||||
const ENV_VAR_TERM: &str = "TERM";
|
||||
|
||||
impl Options {
|
||||
pub fn from_args() -> Options {
|
||||
@ -98,6 +102,14 @@ impl Options {
|
||||
}
|
||||
options.no_video = app.is_present("no-video");
|
||||
|
||||
options.no_color =
|
||||
env::var_os(ENV_VAR_NO_COLOR).is_some() || atty::isnt(atty::Stream::Stderr);
|
||||
if let Some(term) = env::var_os(ENV_VAR_TERM) {
|
||||
if term == "dumb" {
|
||||
options.no_color = true;
|
||||
}
|
||||
}
|
||||
|
||||
options
|
||||
}
|
||||
}
|
||||
|
@ -14,10 +14,10 @@ mod passing {
|
||||
use crate::opts::Options;
|
||||
|
||||
#[test]
|
||||
fn replace_with_empty_images() {
|
||||
fn small_medium_large() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small.png 1x, large.png 2x";
|
||||
let srcset_value = "small.png 1x, medium.png 1.5x, large.png 2x";
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
@ -25,7 +25,28 @@ mod passing {
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!("{} 1x, {} 2x", empty_image!(), empty_image!()),
|
||||
format!(
|
||||
"{} 1x, {} 1.5x, {} 2x",
|
||||
empty_image!(),
|
||||
empty_image!(),
|
||||
empty_image!(),
|
||||
),
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn small_medium_only_medium_has_scale() {
|
||||
let cache = &mut HashMap::new();
|
||||
let client = Client::new();
|
||||
let srcset_value = "small.png, medium.png 1.5x";
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
let embedded_css = html::embed_srcset(cache, &client, "", &srcset_value, &options, 0);
|
||||
|
||||
assert_eq!(
|
||||
embedded_css,
|
||||
format!("{}, {} 1.5x", empty_image!(), empty_image!()),
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -326,4 +326,45 @@ mod passing {
|
||||
</html>"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn processes_noscript_tags() {
|
||||
let html = "<html>\
|
||||
<body>\
|
||||
<noscript>\
|
||||
<img src=\"image.png\" />\
|
||||
</noscript>\
|
||||
</body>\
|
||||
</html>";
|
||||
let dom = html::html_to_dom(&html);
|
||||
let url = "http://localhost";
|
||||
let cache = &mut HashMap::new();
|
||||
|
||||
let mut options = Options::default();
|
||||
options.no_images = true;
|
||||
options.silent = true;
|
||||
|
||||
let client = Client::new();
|
||||
|
||||
html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0);
|
||||
|
||||
let mut buf: Vec<u8> = Vec::new();
|
||||
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
|
||||
|
||||
assert_eq!(
|
||||
buf.iter().map(|&c| c as char).collect::<String>(),
|
||||
format!(
|
||||
"<html>\
|
||||
<head>\
|
||||
</head>\
|
||||
<body>\
|
||||
<noscript>\
|
||||
<img src=\"{}\">\
|
||||
</noscript>\
|
||||
</body>\
|
||||
</html>",
|
||||
empty_image!(),
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
|
16
src/utils.rs
16
src/utils.rs
@ -7,6 +7,8 @@ use std::path::Path;
|
||||
use crate::opts::Options;
|
||||
use crate::url::{clean_url, file_url_to_fs_path, is_data_url, is_file_url, parse_data_url};
|
||||
|
||||
const ANSI_COLOR_RED: &'static str = "\x1b[31m";
|
||||
const ANSI_COLOR_RESET: &'static str = "\x1b[0m";
|
||||
const INDENT: &'static str = " ";
|
||||
|
||||
const MAGIC: [[&[u8]; 2]; 18] = [
|
||||
@ -32,7 +34,6 @@ const MAGIC: [[&[u8]; 2]; 18] = [
|
||||
[b"....moov", b"video/quicktime"],
|
||||
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
||||
];
|
||||
|
||||
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
||||
"application/javascript",
|
||||
"image/svg+xml",
|
||||
@ -128,7 +129,18 @@ pub fn retrieve_asset(
|
||||
Ok(mut response) => {
|
||||
if !options.ignore_errors && response.status() != 200 {
|
||||
if !options.silent {
|
||||
eprintln!("Unable to retrieve {} (error: {})", &url, response.status());
|
||||
eprintln!(
|
||||
"{}{}{} ({}){}",
|
||||
indent(depth).as_str(),
|
||||
if options.no_color { "" } else { ANSI_COLOR_RED },
|
||||
&url,
|
||||
response.status(),
|
||||
if options.no_color {
|
||||
""
|
||||
} else {
|
||||
ANSI_COLOR_RESET
|
||||
},
|
||||
);
|
||||
}
|
||||
// Provoke error
|
||||
return Err(client.get("").send().unwrap_err());
|
||||
|
Loading…
Reference in New Issue
Block a user