From feb37f5812f981a625aa33cd11a8ee0e4bfefe73 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Fri, 6 Dec 2019 19:27:41 -0500 Subject: [PATCH 1/2] Added support for lazy loaded images Note: The way this patch works is by resolving any data-src tags on images in the same way as normal source tags are resolved. It is assumed that most lazy-load libraries will use this tag, and that if this tag is set, then it is a URL that is in use. --- src/html.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/html.rs b/src/html.rs index 3c7057f..3756e46 100644 --- a/src/html.rs +++ b/src/html.rs @@ -158,7 +158,7 @@ pub fn walk_and_embed_assets( } "img" => { for attr in attrs_mut.iter_mut() { - if &attr.name.local == "src" { + if &attr.name.local == "src" || &attr.name.local == "data-src" { let value = attr.value.to_string(); // Ignore images with empty source From 292221ea28f51e7f1bec99f70f48a6aa62425fb0 Mon Sep 17 00:00:00 2001 From: Emi Simpson Date: Mon, 9 Dec 2019 19:40:29 -0500 Subject: [PATCH 2/2] Lazyloaded images are now loaded at compilation, with placeholders omitted --- src/html.rs | 73 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 29 deletions(-) diff --git a/src/html.rs b/src/html.rs index 3756e46..48b7da0 100644 --- a/src/html.rs +++ b/src/html.rs @@ -2,7 +2,7 @@ use html5ever::interface::QualName; use html5ever::parse_document; use html5ever::rcdom::{Handle, NodeData, RcDom}; use html5ever::serialize::{serialize, SerializeOpts}; -use html5ever::tendril::{format_tendril, TendrilSink}; +use html5ever::tendril::{format_tendril, Tendril, TendrilSink}; use html5ever::tree_builder::{Attribute, TreeSink}; use html5ever::{local_name, namespace_url, ns}; use http::retrieve_asset; @@ -157,36 +157,51 @@ pub fn walk_and_embed_assets( } } "img" => { - for attr in attrs_mut.iter_mut() { - if &attr.name.local == "src" || &attr.name.local == "data-src" { - let value = attr.value.to_string(); - - // Ignore images with empty source - if value == EMPTY_STRING.clone() { - continue; - } - - if opt_no_images { - attr.value.clear(); - attr.value.push_slice(TRANSPARENT_PIXEL); - } else { - let src_full_url: String = - resolve_url(&url, &value).unwrap_or(EMPTY_STRING.clone()); - let (img_dataurl, _) = retrieve_asset( - cache, - &src_full_url, - true, - "", - opt_user_agent, - opt_silent, - opt_insecure, - ) - .unwrap_or((EMPTY_STRING.clone(), EMPTY_STRING.clone())); - attr.value.clear(); - attr.value.push_slice(img_dataurl.as_str()); - } + // Find source tags + let mut found_src: Option = None; + let mut found_datasrc: Option = None; + let mut i = 0; + while i < attrs_mut.len() { + let name = attrs_mut[i].name.local.as_ref(); + if name.eq_ignore_ascii_case("src") { + found_src = Some(attrs_mut.remove(i)); + } else if name.eq_ignore_ascii_case("data-src") { + found_datasrc = Some(attrs_mut.remove(i)); + } else { + i += 1; } } + + // If images are disabled, clear both sources + if opt_no_images { + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("src")), + value: Tendril::from_slice(TRANSPARENT_PIXEL), + }); + } else if let Some((dataurl, _)) = (&found_datasrc) + .into_iter() + .chain(&found_src) // Give dataurl priority + .map(|attr| &attr.value) + .filter(|src| !src.is_empty()) // Ignore empty srcs + .next() + .and_then(|src| resolve_url(&url, src).ok()) //Make absolute + .and_then(|abs_src| // Download and convert to dataurl + retrieve_asset( + cache, + &abs_src, + true, + "", + opt_user_agent, + opt_silent, + opt_insecure, + ).ok()) + { + // Add the new dataurl src attribute + attrs_mut.push(Attribute { + name: QualName::new(None, ns!(), local_name!("src")), + value: Tendril::from_slice(dataurl.as_ref()), + }); + } } "source" => { for attr in attrs_mut.iter_mut() {