2
0
mirror of https://github.com/Y2Z/monolith synced 2024-11-10 19:10:29 +00:00

Merge pull request #125 from snshn/frames

Treat frames the same way as iframes
This commit is contained in:
Sunshine 2020-02-24 21:35:29 -05:00 committed by GitHub
commit 1701425003
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 81 additions and 10 deletions

View File

@ -35,7 +35,7 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as
## Options ## Options
- `-c`: Ignore styles - `-c`: Ignore styles
- `-f`: Exclude iframes - `-f`: Exclude frames and iframes
- `-i`: Remove images - `-i`: Remove images
- `-I`: Isolate the document - `-I`: Isolate the document
- `-j`: Exclude JavaScript - `-j`: Exclude JavaScript

View File

@ -34,7 +34,7 @@ impl AppArgs {
) )
// .args_from_usage("-a, --include-audio 'Removes audio sources'") // .args_from_usage("-a, --include-audio 'Removes audio sources'")
.args_from_usage("-c, --no-css 'Removes CSS'") .args_from_usage("-c, --no-css 'Removes CSS'")
.args_from_usage("-f, --no-frames 'Removes iframes'") .args_from_usage("-f, --no-frames 'Removes frames and iframes'")
.args_from_usage("-i, --no-images 'Removes images'") .args_from_usage("-i, --no-images 'Removes images'")
.args_from_usage("-I, --isolate 'Cuts off document from the Internet'") .args_from_usage("-I, --isolate 'Cuts off document from the Internet'")
.args_from_usage("-j, --no-js 'Removes JavaScript'") .args_from_usage("-j, --no-js 'Removes JavaScript'")

View File

@ -386,7 +386,7 @@ pub fn walk_and_embed_assets(
} }
} }
} }
"iframe" => { "frame" | "iframe" => {
for attr in attrs_mut.iter_mut() { for attr in attrs_mut.iter_mut() {
if &attr.name.local == "src" { if &attr.name.local == "src" {
if opt_no_frames { if opt_no_frames {
@ -395,15 +395,15 @@ pub fn walk_and_embed_assets(
continue; continue;
} }
let iframe_src = attr.value.trim(); let frame_src = attr.value.trim();
// Ignore iframes with empty source (they cause infinite loops) // Ignore (i)frames with empty source — they cause infinite loops
if iframe_src.is_empty() { if frame_src.is_empty() {
continue; continue;
} }
let src_full_url = resolve_url(&url, iframe_src).unwrap_or_default(); let src_full_url = resolve_url(&url, frame_src).unwrap_or_default();
let (iframe_data, iframe_final_url) = retrieve_asset( let (frame_data, frame_final_url) = retrieve_asset(
cache, cache,
client, client,
&src_full_url, &src_full_url,
@ -412,11 +412,11 @@ pub fn walk_and_embed_assets(
opt_silent, opt_silent,
) )
.unwrap_or((str!(), src_full_url)); .unwrap_or((str!(), src_full_url));
let dom = html_to_dom(&iframe_data); let dom = html_to_dom(&frame_data);
walk_and_embed_assets( walk_and_embed_assets(
cache, cache,
client, client,
&iframe_final_url, &frame_final_url,
&dom.document, &dom.document,
opt_no_css, opt_no_css,
opt_no_js, opt_no_js,

View File

@ -132,6 +132,42 @@ fn test_walk_and_embed_assets_ensure_no_recursive_iframe() {
); );
} }
#[test]
fn test_walk_and_embed_assets_ensure_no_recursive_frame() {
let html = "<frameset><frame src=\"\"></frameset>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = false;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test] #[test]
fn test_walk_and_embed_assets_no_css() { fn test_walk_and_embed_assets_no_css() {
let html = "<link rel=\"stylesheet\" href=\"main.css\">\ let html = "<link rel=\"stylesheet\" href=\"main.css\">\
@ -227,6 +263,41 @@ fn test_walk_and_embed_assets_no_images() {
#[test] #[test]
fn test_walk_and_embed_assets_no_frames() { fn test_walk_and_embed_assets_no_frames() {
let html = "<frameset><frame src=\"http://trackbook.com\"></frameset>";
let dom = html_to_dom(&html);
let url = "http://localhost";
let cache = &mut HashMap::new();
let opt_no_css: bool = false;
let opt_no_frames: bool = true;
let opt_no_js: bool = false;
let opt_no_images: bool = false;
let opt_silent = true;
let client = Client::new();
walk_and_embed_assets(
cache,
&client,
&url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
opt_silent,
opt_no_frames,
);
let mut buf: Vec<u8> = Vec::new();
serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap();
assert_eq!(
buf.iter().map(|&c| c as char).collect::<String>(),
"<html><head></head><frameset><frame src=\"\"></frameset></html>"
);
}
#[test]
fn test_walk_and_embed_assets_no_iframes() {
let html = "<iframe src=\"http://trackbook.com\"></iframe>"; let html = "<iframe src=\"http://trackbook.com\"></iframe>";
let dom = html_to_dom(&html); let dom = html_to_dom(&html);
let url = "http://localhost"; let url = "http://localhost";