|
|
@ -86,7 +86,6 @@ fn main() {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let target_url: Url;
|
|
|
|
let target_url: Url;
|
|
|
|
let mut base_url: Url;
|
|
|
|
|
|
|
|
let mut use_stdin: bool = false;
|
|
|
|
let mut use_stdin: bool = false;
|
|
|
|
|
|
|
|
|
|
|
|
// Determine exact target URL
|
|
|
|
// Determine exact target URL
|
|
|
@ -156,20 +155,19 @@ fn main() {
|
|
|
|
HeaderValue::from_str(&user_agent).expect("Invalid User-Agent header specified"),
|
|
|
|
HeaderValue::from_str(&user_agent).expect("Invalid User-Agent header specified"),
|
|
|
|
);
|
|
|
|
);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
let timeout: u64 = if options.timeout > 0 {
|
|
|
|
let client = if options.timeout > 0 {
|
|
|
|
options.timeout
|
|
|
|
Client::builder().timeout(Duration::from_secs(options.timeout))
|
|
|
|
} else {
|
|
|
|
} else {
|
|
|
|
std::u64::MAX / 4 // This is pretty close to infinity
|
|
|
|
// No timeout is default
|
|
|
|
};
|
|
|
|
Client::builder()
|
|
|
|
let client = Client::builder()
|
|
|
|
}
|
|
|
|
.timeout(Duration::from_secs(timeout))
|
|
|
|
|
|
|
|
.danger_accept_invalid_certs(options.insecure)
|
|
|
|
.danger_accept_invalid_certs(options.insecure)
|
|
|
|
.default_headers(header_map)
|
|
|
|
.default_headers(header_map)
|
|
|
|
.build()
|
|
|
|
.build()
|
|
|
|
.expect("Failed to initialize HTTP client");
|
|
|
|
.expect("Failed to initialize HTTP client");
|
|
|
|
|
|
|
|
|
|
|
|
// At this stage we assume that the base URL is the same as the target URL
|
|
|
|
// At first we assume that base URL is the same as target URL
|
|
|
|
base_url = target_url.clone();
|
|
|
|
let mut base_url: Url = target_url.clone();
|
|
|
|
|
|
|
|
|
|
|
|
let data: Vec<u8>;
|
|
|
|
let data: Vec<u8>;
|
|
|
|
let mut document_encoding: String = str!();
|
|
|
|
let mut document_encoding: String = str!();
|
|
|
@ -214,16 +212,16 @@ fn main() {
|
|
|
|
dom = html_to_dom(&data, document_encoding.clone());
|
|
|
|
dom = html_to_dom(&data, document_encoding.clone());
|
|
|
|
|
|
|
|
|
|
|
|
// TODO: investigate if charset from filesystem/data URL/HTTP headers
|
|
|
|
// TODO: investigate if charset from filesystem/data URL/HTTP headers
|
|
|
|
// has power over what's specified in HTML
|
|
|
|
// has say over what's specified in HTML
|
|
|
|
|
|
|
|
|
|
|
|
// Attempt to determine document's charset
|
|
|
|
// Attempt to determine document's charset
|
|
|
|
if let Some(charset) = get_charset(&dom.document) {
|
|
|
|
if let Some(html_charset) = get_charset(&dom.document) {
|
|
|
|
if !charset.is_empty() {
|
|
|
|
if !html_charset.is_empty() {
|
|
|
|
// Check if the charset specified inside HTML is valid
|
|
|
|
// Check if the charset specified inside HTML is valid
|
|
|
|
if let Some(encoding) = Encoding::for_label(charset.as_bytes()) {
|
|
|
|
if let Some(encoding) = Encoding::for_label_no_replacement(html_charset.as_bytes()) {
|
|
|
|
// No point in parsing HTML again with the same encoding as before
|
|
|
|
// No point in parsing HTML again with the same encoding as before
|
|
|
|
if encoding.name() != "UTF-8" {
|
|
|
|
if encoding.name() != "UTF-8" {
|
|
|
|
document_encoding = charset;
|
|
|
|
document_encoding = html_charset;
|
|
|
|
dom = html_to_dom(&data, document_encoding.clone());
|
|
|
|
dom = html_to_dom(&data, document_encoding.clone());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -233,8 +231,8 @@ fn main() {
|
|
|
|
// Use custom base URL if specified, read and use what's in the DOM otherwise
|
|
|
|
// Use custom base URL if specified, read and use what's in the DOM otherwise
|
|
|
|
let custom_base_url: String = options.base_url.clone().unwrap_or(str!());
|
|
|
|
let custom_base_url: String = options.base_url.clone().unwrap_or(str!());
|
|
|
|
if custom_base_url.is_empty() {
|
|
|
|
if custom_base_url.is_empty() {
|
|
|
|
// No custom base URL is specified,
|
|
|
|
// No custom base URL is specified
|
|
|
|
// try to see if the document has BASE tag
|
|
|
|
// Try to see if document has BASE element
|
|
|
|
if let Some(existing_base_url) = get_base_url(&dom.document) {
|
|
|
|
if let Some(existing_base_url) = get_base_url(&dom.document) {
|
|
|
|
base_url = resolve_url(&target_url, &existing_base_url);
|
|
|
|
base_url = resolve_url(&target_url, &existing_base_url);
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -253,8 +251,7 @@ fn main() {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Err(_) => {
|
|
|
|
Err(_) => {
|
|
|
|
// Failed to parse given base URL,
|
|
|
|
// Failed to parse given base URL, perhaps it's a filesystem path?
|
|
|
|
// perhaps it's a filesystem path?
|
|
|
|
|
|
|
|
if target_url.scheme() == "file" {
|
|
|
|
if target_url.scheme() == "file" {
|
|
|
|
// Relative paths could work for documents saved from filesystem
|
|
|
|
// Relative paths could work for documents saved from filesystem
|
|
|
|
let path: &Path = Path::new(&custom_base_url);
|
|
|
|
let path: &Path = Path::new(&custom_base_url);
|
|
|
@ -302,9 +299,9 @@ fn main() {
|
|
|
|
&options,
|
|
|
|
&options,
|
|
|
|
0,
|
|
|
|
0,
|
|
|
|
) {
|
|
|
|
) {
|
|
|
|
Ok((data, final_url, media_type, _charset)) => {
|
|
|
|
Ok((data, final_url, media_type, charset)) => {
|
|
|
|
// TODO: use charset
|
|
|
|
let favicon_data_url: Url =
|
|
|
|
let favicon_data_url: Url = create_data_url(&media_type, &data, &final_url);
|
|
|
|
create_data_url(&media_type, &charset, &data, &final_url);
|
|
|
|
dom = add_favicon(&dom.document, favicon_data_url.to_string());
|
|
|
|
dom = add_favicon(&dom.document, favicon_data_url.to_string());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Err(_) => {
|
|
|
|
Err(_) => {
|
|
|
@ -322,7 +319,7 @@ fn main() {
|
|
|
|
// Serialize DOM tree
|
|
|
|
// Serialize DOM tree
|
|
|
|
let mut result: Vec<u8> = serialize_document(dom, document_encoding, &options);
|
|
|
|
let mut result: Vec<u8> = serialize_document(dom, document_encoding, &options);
|
|
|
|
|
|
|
|
|
|
|
|
// Add metadata tag
|
|
|
|
// Prepend metadata comment tag
|
|
|
|
if !options.no_metadata {
|
|
|
|
if !options.no_metadata {
|
|
|
|
let mut metadata_comment: String = create_metadata_tag(&target_url);
|
|
|
|
let mut metadata_comment: String = create_metadata_tag(&target_url);
|
|
|
|
metadata_comment += "\n";
|
|
|
|
metadata_comment += "\n";
|
|
|
|