From 27c9fb4cd39a562e1a33db07f917570cbae10a44 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Wed, 8 Jan 2020 18:51:18 -0500 Subject: [PATCH 1/4] Added comment indicating the context under which the page was downloaded --- Cargo.lock | 1 + Cargo.toml | 4 ++++ src/main.rs | 14 +++++++++++++- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Cargo.lock b/Cargo.lock index 1ff9ea9..e0e1bd3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -531,6 +531,7 @@ dependencies = [ "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "reqwest 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)", + "time 0.1.42 (registry+https://github.com/rust-lang/crates.io-index)", "url 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/Cargo.toml b/Cargo.toml index e7089c1..e30c807 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,10 @@ lazy_static = "1.4.0" regex = "1.3.1" url = "2.1.0" +# Used to render comments indicating the time the page was saved +# also required by reqwest as of v0.10.0 +time = "0.1.42" + [dependencies.reqwest] version = "0.10.*" default-features = false diff --git a/src/main.rs b/src/main.rs index 54c267c..4141390 100644 --- a/src/main.rs +++ b/src/main.rs @@ -71,6 +71,7 @@ fn main() { app_args.silent, ) .unwrap(); + let downloaded_time = time::now(); let dom = html_to_dom(&data); walk_and_embed_assets( @@ -85,7 +86,7 @@ fn main() { app_args.no_frames, ); - let html: String = stringify_document( + let mut html: String = stringify_document( &dom.document, app_args.no_css, app_args.no_frames, @@ -94,6 +95,17 @@ fn main() { app_args.isolate, ); + html.insert_str( + 0, + &format!( + "\n", + &final_url, + downloaded_time.to_local().rfc822(), + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + ), + ); + if app_args.output == str!() { println!("{}", html); } else { From 9be3982dc62b9e70b2022fe032009a3622884499 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Wed, 8 Jan 2020 19:00:53 -0500 Subject: [PATCH 2/4] Added --no-context flag to disable adding context comment --- src/args.rs | 3 +++ src/main.rs | 22 ++++++++++++---------- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/src/args.rs b/src/args.rs index 9448d5f..a7aab9c 100644 --- a/src/args.rs +++ b/src/args.rs @@ -12,6 +12,7 @@ pub struct AppArgs { pub output: String, pub silent: bool, pub user_agent: String, + pub no_context: bool, } const DEFAULT_USER_AGENT: &str = @@ -31,6 +32,7 @@ impl AppArgs { .help("URL to download"), ) // .args_from_usage("-a, --include-audio 'Embed audio sources'") + .args_from_usage("-C, --no-context 'Exclude time and original URL in output'") .args_from_usage("-c, --no-css 'Ignore styles'") .args_from_usage("-f, --no-frames 'Exclude iframes'") .args_from_usage("-i, --no-images 'Remove images'") @@ -48,6 +50,7 @@ impl AppArgs { .value_of("url") .expect("please set target url") .to_string(); + app_args.no_context = app.is_present("no-context"); app_args.no_css = app.is_present("no-css"); app_args.no_frames = app.is_present("no-frames"); app_args.no_images = app.is_present("no-images"); diff --git a/src/main.rs b/src/main.rs index 4141390..27cf030 100644 --- a/src/main.rs +++ b/src/main.rs @@ -95,16 +95,18 @@ fn main() { app_args.isolate, ); - html.insert_str( - 0, - &format!( - "\n", - &final_url, - downloaded_time.to_local().rfc822(), - env!("CARGO_PKG_NAME"), - env!("CARGO_PKG_VERSION"), - ), - ); + if !app_args.no_context { + html.insert_str( + 0, + &format!( + "\n", + &final_url, + downloaded_time.rfc822(), + env!("CARGO_PKG_NAME"), + env!("CARGO_PKG_VERSION"), + ), + ); + } if app_args.output == str!() { println!("{}", html); From 651fa716b48e212d6ad5052ba6b3e06174ea4878 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Fri, 10 Jan 2020 14:18:15 -0500 Subject: [PATCH 3/4] Clean user, pass, and fragment from URL before writing --- src/main.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/main.rs b/src/main.rs index 27cf030..85f2bf7 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,6 +10,7 @@ use monolith::http::retrieve_asset; use monolith::utils::is_valid_url; use reqwest::blocking::Client; use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; +use reqwest::Url; use std::collections::HashMap; use std::fs::{remove_file, File}; use std::io::{Error, Write}; @@ -96,11 +97,17 @@ fn main() { ); if !app_args.no_context { + // Safe to unwrap: We just put this through an HTTP request + let mut clean_url = Url::parse(&final_url).unwrap(); + clean_url.set_fragment(None); + // Safe to unwrap: must have a protocol and thus base 'cause we just used it. + clean_url.set_password(None).unwrap(); + clean_url.set_username("").unwrap(); html.insert_str( 0, &format!( "\n", - &final_url, + &clean_url, downloaded_time.rfc822(), env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"), From 05985583f022a1865937434859e6193fa51425a4 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Fri, 10 Jan 2020 14:30:35 -0500 Subject: [PATCH 4/4] Switch timestamps from rfc822 local time to iso8601 UTC --- src/main.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.rs b/src/main.rs index 85f2bf7..c87ad01 100644 --- a/src/main.rs +++ b/src/main.rs @@ -72,7 +72,7 @@ fn main() { app_args.silent, ) .unwrap(); - let downloaded_time = time::now(); + let downloaded_time = time::now_utc(); let dom = html_to_dom(&data); walk_and_embed_assets( @@ -106,9 +106,9 @@ fn main() { html.insert_str( 0, &format!( - "\n", + "\n", &clean_url, - downloaded_time.rfc822(), + downloaded_time.rfc3339(), env!("CARGO_PKG_NAME"), env!("CARGO_PKG_VERSION"), ),