diff --git a/src/html.rs b/src/html.rs
index 3c7057f..610fa54 100644
--- a/src/html.rs
+++ b/src/html.rs
@@ -7,6 +7,7 @@ use html5ever::tree_builder::{Attribute, TreeSink};
use html5ever::{local_name, namespace_url, ns};
use http::retrieve_asset;
use js::attr_is_event_handler;
+use reqwest::Client;
use std::collections::HashMap;
use std::default::Default;
use utils::{data_to_dataurl, is_valid_url, resolve_url, url_has_protocol};
@@ -45,14 +46,13 @@ pub fn is_icon(attr_value: &str) -> bool {
pub fn walk_and_embed_assets(
cache: &mut HashMap,
+ client: &Client,
url: &str,
node: &Handle,
opt_no_css: bool,
opt_no_js: bool,
opt_no_images: bool,
- opt_user_agent: &str,
opt_silent: bool,
- opt_insecure: bool,
opt_no_frames: bool,
) {
match node.data {
@@ -61,14 +61,13 @@ pub fn walk_and_embed_assets(
for child in node.children.borrow().iter() {
walk_and_embed_assets(
cache,
+ client,
&url,
child,
opt_no_css,
opt_no_js,
opt_no_images,
- opt_user_agent,
opt_silent,
- opt_insecure,
opt_no_frames,
);
}
@@ -107,12 +106,11 @@ pub fn walk_and_embed_assets(
.unwrap_or(EMPTY_STRING.clone());
let (favicon_dataurl, _) = retrieve_asset(
cache,
+ client,
&href_full_url,
true,
"",
- opt_user_agent,
opt_silent,
- opt_insecure,
)
.unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
attr.value.clear();
@@ -131,12 +129,11 @@ pub fn walk_and_embed_assets(
.unwrap_or(EMPTY_STRING.clone());
let (css_dataurl, _) = retrieve_asset(
cache,
+ client,
&href_full_url,
true,
"text/css",
- opt_user_agent,
opt_silent,
- opt_insecure,
)
.unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
attr.value.clear();
@@ -174,12 +171,11 @@ pub fn walk_and_embed_assets(
resolve_url(&url, &value).unwrap_or(EMPTY_STRING.clone());
let (img_dataurl, _) = retrieve_asset(
cache,
+ client,
&src_full_url,
true,
"",
- opt_user_agent,
opt_silent,
- opt_insecure,
)
.unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
attr.value.clear();
@@ -208,12 +204,11 @@ pub fn walk_and_embed_assets(
.unwrap_or(EMPTY_STRING.clone());
let (source_dataurl, _) = retrieve_asset(
cache,
+ client,
&srcset_full_url,
true,
"",
- opt_user_agent,
opt_silent,
- opt_insecure,
)
.unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
attr.value.clear();
@@ -255,12 +250,11 @@ pub fn walk_and_embed_assets(
.unwrap_or(EMPTY_STRING.clone());
let (js_dataurl, _) = retrieve_asset(
cache,
+ client,
&src_full_url,
true,
"application/javascript",
- opt_user_agent,
opt_silent,
- opt_insecure,
)
.unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone()));
attr.value.clear();
@@ -309,25 +303,23 @@ pub fn walk_and_embed_assets(
resolve_url(&url, &iframe_src).unwrap_or(EMPTY_STRING.clone());
let (iframe_data, iframe_final_url) = retrieve_asset(
cache,
+ client,
&src_full_url,
false,
"text/html",
- opt_user_agent,
opt_silent,
- opt_insecure,
)
.unwrap_or((EMPTY_STRING.clone(), src_full_url));
let dom = html_to_dom(&iframe_data);
walk_and_embed_assets(
cache,
+ client,
&iframe_final_url,
&dom.document,
opt_no_css,
opt_no_js,
opt_no_images,
- opt_user_agent,
opt_silent,
- opt_insecure,
opt_no_frames,
);
let mut buf: Vec = Vec::new();
@@ -355,12 +347,11 @@ pub fn walk_and_embed_assets(
.unwrap_or(EMPTY_STRING.clone());
let (poster_dataurl, _) = retrieve_asset(
cache,
+ client,
&poster_full_url,
true,
"",
- opt_user_agent,
opt_silent,
- opt_insecure,
)
.unwrap_or((poster_full_url, EMPTY_STRING.clone()));
attr.value.clear();
@@ -404,14 +395,13 @@ pub fn walk_and_embed_assets(
for child in node.children.borrow().iter() {
walk_and_embed_assets(
cache,
+ client,
&url,
child,
opt_no_css,
opt_no_js,
opt_no_images,
- opt_user_agent,
opt_silent,
- opt_insecure,
opt_no_frames,
);
}
diff --git a/src/http.rs b/src/http.rs
index 97e77a7..a79a841 100644
--- a/src/http.rs
+++ b/src/http.rs
@@ -1,17 +1,15 @@
-use reqwest::header::{CONTENT_TYPE, USER_AGENT};
+use reqwest::header::CONTENT_TYPE;
use reqwest::Client;
use std::collections::HashMap;
-use std::time::Duration;
use utils::{data_to_dataurl, is_data_url};
pub fn retrieve_asset(
cache: &mut HashMap,
+ client: &Client,
url: &str,
as_dataurl: bool,
mime: &str,
- opt_user_agent: &str,
opt_silent: bool,
- opt_insecure: bool,
) -> Result<(String, String), reqwest::Error> {
if is_data_url(&url).unwrap() {
Ok((url.to_string(), url.to_string()))
@@ -25,11 +23,7 @@ pub fn retrieve_asset(
Ok((data.to_string(), url.to_string()))
} else {
// url not in cache, we request it
- let client = Client::builder()
- .timeout(Duration::from_secs(10))
- .danger_accept_invalid_certs(opt_insecure)
- .build()?;
- let mut response = client.get(url).header(USER_AGENT, opt_user_agent).send()?;
+ let mut response = client.get(url).send()?;
if !opt_silent {
if url == response.url().as_str() {
diff --git a/src/main.rs b/src/main.rs
index 50fab9e..b7298e0 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,6 +1,7 @@
#[macro_use]
extern crate clap;
extern crate monolith;
+extern crate reqwest;
mod args;
@@ -8,34 +9,50 @@ use args::AppArgs;
use monolith::html::{html_to_dom, stringify_document, walk_and_embed_assets};
use monolith::http::retrieve_asset;
use monolith::utils::is_valid_url;
+use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use std::collections::HashMap;
+use std::time::Duration;
fn main() {
let app_args = AppArgs::get();
let cache = &mut HashMap::new();
if is_valid_url(app_args.url_target.as_str()) {
+ // Initialize client
+ let mut header_map = HeaderMap::new();
+ match HeaderValue::from_str(&app_args.user_agent) {
+ Ok(header) => header_map.insert(USER_AGENT, header),
+ Err(err) => {
+ eprintln!("Invalid user agent! {}", err);
+ return;
+ }
+ };
+ let client = reqwest::Client::builder()
+ .timeout(Duration::from_secs(10))
+ .danger_accept_invalid_certs(app_args.insecure)
+ .default_headers(header_map)
+ .build()
+ .expect("Failed to initialize HTTP client");
+
let (data, final_url) = retrieve_asset(
cache,
+ &client,
app_args.url_target.as_str(),
false,
"",
- app_args.user_agent.as_str(),
app_args.silent,
- app_args.insecure,
)
.unwrap();
let dom = html_to_dom(&data);
walk_and_embed_assets(
cache,
+ &client,
&final_url,
&dom.document,
app_args.no_css,
app_args.no_js,
app_args.no_images,
- app_args.user_agent.as_str(),
app_args.silent,
- app_args.insecure,
app_args.no_frames,
);