diff --git a/src/html.rs b/src/html.rs index ea1b620..61ce6dc 100644 --- a/src/html.rs +++ b/src/html.rs @@ -7,6 +7,7 @@ use html5ever::tree_builder::{Attribute, TreeSink}; use html5ever::{local_name, namespace_url, ns}; use http::retrieve_asset; use js::attr_is_event_handler; +use reqwest::Client; use std::collections::HashMap; use std::default::Default; use utils::{data_to_dataurl, is_valid_url, resolve_css_imports, resolve_url, url_has_protocol}; @@ -45,14 +46,13 @@ pub fn is_icon(attr_value: &str) -> bool { pub fn walk_and_embed_assets( cache: &mut HashMap, + client: &Client, url: &str, node: &Handle, opt_no_css: bool, opt_no_js: bool, opt_no_images: bool, - opt_user_agent: &str, opt_silent: bool, - opt_insecure: bool, opt_no_frames: bool, ) { match node.data { @@ -61,14 +61,13 @@ pub fn walk_and_embed_assets( for child in node.children.borrow().iter() { walk_and_embed_assets( cache, + client, &url, child, opt_no_css, opt_no_js, opt_no_images, - opt_user_agent, opt_silent, - opt_insecure, opt_no_frames, ); } @@ -107,12 +106,11 @@ pub fn walk_and_embed_assets( .unwrap_or(EMPTY_STRING.clone()); let (favicon_dataurl, _) = retrieve_asset( cache, + client, &href_full_url, true, "", - opt_user_agent, opt_silent, - opt_insecure, ) .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone())); attr.value.clear(); @@ -131,23 +129,21 @@ pub fn walk_and_embed_assets( .unwrap_or(EMPTY_STRING.clone()); let replacement_text = match retrieve_asset( cache, + client, &href_full_url, false, "text/css", - opt_user_agent, opt_silent, - opt_insecure, ) { // On successful retrieval, traverse CSS Ok((css_data, _)) => resolve_css_imports( cache, + client, &css_data, true, &href_full_url, opt_no_images, - opt_user_agent, opt_silent, - opt_insecure, ), // If a network error occured, warn @@ -194,12 +190,11 @@ pub fn walk_and_embed_assets( resolve_url(&url, &value).unwrap_or(EMPTY_STRING.clone()); let (img_dataurl, _) = retrieve_asset( cache, + client, &src_full_url, true, "", - opt_user_agent, opt_silent, - opt_insecure, ) .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone())); attr.value.clear(); @@ -228,12 +223,11 @@ pub fn walk_and_embed_assets( .unwrap_or(EMPTY_STRING.clone()); let (source_dataurl, _) = retrieve_asset( cache, + client, &srcset_full_url, true, "", - opt_user_agent, opt_silent, - opt_insecure, ) .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone())); attr.value.clear(); @@ -275,12 +269,11 @@ pub fn walk_and_embed_assets( .unwrap_or(EMPTY_STRING.clone()); let (js_dataurl, _) = retrieve_asset( cache, + client, &src_full_url, true, "application/javascript", - opt_user_agent, opt_silent, - opt_insecure, ) .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone())); attr.value.clear(); @@ -299,13 +292,12 @@ pub fn walk_and_embed_assets( let mut tendril = contents.borrow_mut(); let replacement = resolve_css_imports( cache, + client, tendril.as_ref(), false, &url, opt_no_images, - opt_user_agent, opt_silent, - opt_insecure, ); tendril.clear(); tendril.push_slice(&replacement); @@ -347,25 +339,23 @@ pub fn walk_and_embed_assets( resolve_url(&url, &iframe_src).unwrap_or(EMPTY_STRING.clone()); let (iframe_data, iframe_final_url) = retrieve_asset( cache, + client, &src_full_url, false, "text/html", - opt_user_agent, opt_silent, - opt_insecure, ) .unwrap_or((EMPTY_STRING.clone(), src_full_url)); let dom = html_to_dom(&iframe_data); walk_and_embed_assets( cache, + client, &iframe_final_url, &dom.document, opt_no_css, opt_no_js, opt_no_images, - opt_user_agent, opt_silent, - opt_insecure, opt_no_frames, ); let mut buf: Vec = Vec::new(); @@ -393,12 +383,11 @@ pub fn walk_and_embed_assets( .unwrap_or(EMPTY_STRING.clone()); let (poster_dataurl, _) = retrieve_asset( cache, + client, &poster_full_url, true, "", - opt_user_agent, opt_silent, - opt_insecure, ) .unwrap_or((poster_full_url, EMPTY_STRING.clone())); attr.value.clear(); @@ -431,13 +420,12 @@ pub fn walk_and_embed_assets( { let replacement = resolve_css_imports( cache, + client, attribute.value.as_ref(), false, &url, opt_no_images, - opt_user_agent, opt_silent, - opt_insecure, ); attribute.value.clear(); attribute.value.push_slice(&replacement); @@ -462,14 +450,13 @@ pub fn walk_and_embed_assets( for child in node.children.borrow().iter() { walk_and_embed_assets( cache, + client, &url, child, opt_no_css, opt_no_js, opt_no_images, - opt_user_agent, opt_silent, - opt_insecure, opt_no_frames, ); } diff --git a/src/http.rs b/src/http.rs index ff25691..813840d 100644 --- a/src/http.rs +++ b/src/http.rs @@ -1,17 +1,15 @@ -use reqwest::header::{CONTENT_TYPE, USER_AGENT}; +use reqwest::header::CONTENT_TYPE; use reqwest::Client; use std::collections::HashMap; -use std::time::Duration; use utils::{data_to_dataurl, is_data_url}; pub fn retrieve_asset( cache: &mut HashMap, + client: &Client, url: &str, as_dataurl: bool, mime: &str, - opt_user_agent: &str, opt_silent: bool, - opt_insecure: bool, ) -> Result<(String, String), reqwest::Error> { if is_data_url(&url).unwrap() { Ok((url.to_string(), url.to_string())) @@ -25,11 +23,7 @@ pub fn retrieve_asset( Ok((data.to_string(), url.to_string())) } else { // url not in cache, we request it - let client = Client::builder() - .timeout(Duration::from_secs(10)) - .danger_accept_invalid_certs(opt_insecure) - .build()?; - let mut response = client.get(url).header(USER_AGENT, opt_user_agent).send()?; + let mut response = client.get(url).send()?; if !opt_silent { if url == response.url().as_str() { diff --git a/src/main.rs b/src/main.rs index 50fab9e..b7298e0 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ #[macro_use] extern crate clap; extern crate monolith; +extern crate reqwest; mod args; @@ -8,34 +9,50 @@ use args::AppArgs; use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets}; use monolith::http::retrieve_asset; use monolith::utils::is_valid_url; +use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT}; use std::collections::HashMap; +use std::time::Duration; fn main() { let app_args = AppArgs::get(); let cache = &mut HashMap::new(); if is_valid_url(app_args.url_target.as_str()) { + // Initialize client + let mut header_map = HeaderMap::new(); + match HeaderValue::from_str(&app_args.user_agent) { + Ok(header) => header_map.insert(USER_AGENT, header), + Err(err) => { + eprintln!("Invalid user agent! {}", err); + return; + } + }; + let client = reqwest::Client::builder() + .timeout(Duration::from_secs(10)) + .danger_accept_invalid_certs(app_args.insecure) + .default_headers(header_map) + .build() + .expect("Failed to initialize HTTP client"); + let (data, final_url) = retrieve_asset( cache, + &client, app_args.url_target.as_str(), false, "", - app_args.user_agent.as_str(), app_args.silent, - app_args.insecure, ) .unwrap(); let dom = html_to_dom(&data); walk_and_embed_assets( cache, + &client, &final_url, &dom.document, app_args.no_css, app_args.no_js, app_args.no_images, - app_args.user_agent.as_str(), app_args.silent, - app_args.insecure, app_args.no_frames, ); diff --git a/src/tests/html.rs b/src/tests/html.rs index b7a571b..a91e34e 100644 --- a/src/tests/html.rs +++ b/src/tests/html.rs @@ -70,18 +70,18 @@ fn test_walk_and_embed_assets() { let opt_no_js: bool = false; let opt_no_images: bool = false; let opt_silent = true; - let opt_insecure = false; + + let client = reqwest::Client::new(); walk_and_embed_assets( cache, + &client, &url, &dom.document, opt_no_css, opt_no_js, opt_no_images, - "", opt_silent, - opt_insecure, opt_no_frames, ); @@ -106,18 +106,18 @@ fn test_walk_and_embed_assets_ensure_no_recursive_iframe() { let opt_no_js: bool = false; let opt_no_images: bool = false; let opt_silent = true; - let opt_insecure = false; + + let client = reqwest::Client::new(); walk_and_embed_assets( cache, + &client, &url, &dom.document, opt_no_css, opt_no_js, opt_no_images, - "", opt_silent, - opt_insecure, opt_no_frames, ); @@ -144,18 +144,17 @@ fn test_walk_and_embed_assets_no_css() { let opt_no_js: bool = false; let opt_no_images: bool = false; let opt_silent = true; - let opt_insecure = false; + let client = reqwest::Client::new(); walk_and_embed_assets( cache, + &client, &url, &dom.document, opt_no_css, opt_no_js, opt_no_images, - "", opt_silent, - opt_insecure, opt_no_frames, ); @@ -189,18 +188,18 @@ fn test_walk_and_embed_assets_no_images() { let opt_no_js: bool = false; let opt_no_images: bool = true; let opt_silent = true; - let opt_insecure = false; + + let client = reqwest::Client::new(); walk_and_embed_assets( cache, + &client, &url, &dom.document, opt_no_css, opt_no_js, opt_no_images, - "", opt_silent, - opt_insecure, opt_no_frames, ); @@ -236,18 +235,17 @@ fn test_walk_and_embed_assets_no_frames() { let opt_no_js: bool = false; let opt_no_images: bool = false; let opt_silent = true; - let opt_insecure = false; + let client = reqwest::Client::new(); walk_and_embed_assets( cache, + &client, &url, &dom.document, opt_no_css, opt_no_js, opt_no_images, - "", opt_silent, - opt_insecure, opt_no_frames, ); @@ -275,18 +273,18 @@ fn test_walk_and_embed_assets_no_js() { let opt_no_js: bool = true; let opt_no_images: bool = false; let opt_silent = true; - let opt_insecure = false; + + let client = reqwest::Client::new(); walk_and_embed_assets( cache, + &client, &url, &dom.document, opt_no_css, opt_no_js, opt_no_images, - "", opt_silent, - opt_insecure, opt_no_frames, ); diff --git a/src/tests/http.rs b/src/tests/http.rs index b603c2b..f1613f0 100644 --- a/src/tests/http.rs +++ b/src/tests/http.rs @@ -3,26 +3,18 @@ use std::collections::HashMap; #[test] fn test_retrieve_asset() { let cache = &mut HashMap::new(); - let (data, final_url) = retrieve_asset( - cache, - "data:text/html;base64,...", - true, - "", - "", - true, - false, - ) - .unwrap(); + let client = reqwest::Client::new(); + let (data, final_url) = + retrieve_asset(cache, &client, "data:text/html;base64,...", true, "", false).unwrap(); assert_eq!(&data, "data:text/html;base64,..."); assert_eq!(&final_url, "data:text/html;base64,..."); let (data, final_url) = retrieve_asset( cache, + &client, "data:text/html;base64,...", true, "image/png", - "", - true, false, ) .unwrap(); diff --git a/src/utils.rs b/src/utils.rs index 6637a98..81a16e9 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -3,6 +3,7 @@ extern crate base64; use self::base64::encode; use http::retrieve_asset; use regex::Regex; +use reqwest::Client; use std::collections::HashMap; use url::{ParseError, Url}; @@ -112,13 +113,12 @@ pub fn resolve_url, U: AsRef>(from: T, to: U) -> Result, + client: &Client, css_string: &str, as_dataurl: bool, href: &str, opt_no_images: bool, - opt_user_agent: &str, opt_silent: bool, - opt_insecure: bool, ) -> String { let mut resolved_css = String::from(css_string); @@ -141,35 +141,32 @@ pub fn resolve_css_imports( // The link is an @import link retrieve_asset( cache, + client, &embedded_url, false, // Formating as data URL will be done later "text/css", // Expect CSS - opt_user_agent, opt_silent, - opt_insecure, ) .map(|(content, _)| { resolve_css_imports( cache, + client, &content, true, // Finally, convert to a dataurl &embedded_url, opt_no_images, - opt_user_agent, opt_silent, - opt_insecure, ) }) } else if (is_image && !opt_no_images) || is_font { // The link is some other, non-@import link retrieve_asset( cache, + client, &embedded_url, true, // Format as data URL "", // Unknown MIME type - opt_user_agent, opt_silent, - opt_insecure, ) .map(|(a, _)| a) } else {