diff --git a/src/css.rs b/src/css.rs index 67317ab..0b7a9a5 100644 --- a/src/css.rs +++ b/src/css.rs @@ -2,7 +2,7 @@ use cssparser::{ParseError, Parser, ParserInput, SourcePosition, Token}; use reqwest::blocking::Client; use std::collections::HashMap; -use crate::utils::{data_to_data_url, decode_url, get_url_fragment, resolve_url, retrieve_asset}; +use crate::utils::{data_to_data_url, get_url_fragment, resolve_url, retrieve_asset}; const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ "background", @@ -142,12 +142,11 @@ pub fn process_css<'a>( let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let url_fragment = get_url_fragment(full_url.clone()); - let full_url_decoded = decode_url(full_url); let (css, final_url) = retrieve_asset( cache, client, &parent_url, - &full_url_decoded, + &full_url, false, "", opt_silent, @@ -261,12 +260,11 @@ pub fn process_css<'a>( if is_import { let full_url = resolve_url(&parent_url, value).unwrap_or_default(); let url_fragment = get_url_fragment(full_url.clone()); - let full_url_decoded = decode_url(full_url); let (css, final_url) = retrieve_asset( cache, client, &parent_url, - &full_url_decoded, + &full_url, false, "", opt_silent, diff --git a/src/tests/css/embed_css.rs b/src/tests/css/embed_css.rs index 28261c0..07c322b 100644 --- a/src/tests/css/embed_css.rs +++ b/src/tests/css/embed_css.rs @@ -163,9 +163,9 @@ fn passing_import_string() { "\ @charset 'UTF-8';\n\ \n\ -@import 'data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2JhY2tncm91bmQtY29sb3I6IzAwMH0=';\n\ +@import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\ \n\ -@import url('data:text/css;base64,ZGF0YTp0ZXh0L2NzcyxodG1se2NvbG9yOiNmZmZ9')\n\ +@import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\ " ); } diff --git a/src/tests/utils/data_url_to_text.rs b/src/tests/utils/data_url_to_text.rs index b7d50b2..32e3da2 100644 --- a/src/tests/utils/data_url_to_text.rs +++ b/src/tests/utils/data_url_to_text.rs @@ -45,6 +45,14 @@ fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() { ); } +#[test] +fn passing_parse_text_css_url_encoded() { + assert_eq!( + utils::data_url_to_text("data:text/css,div{background-color:%23000}"), + "div{background-color:#000}" + ); +} + // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ // █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ diff --git a/src/tests/utils/decode_url.rs b/src/tests/utils/decode_url.rs index 6639e83..a26ff17 100644 --- a/src/tests/utils/decode_url.rs +++ b/src/tests/utils/decode_url.rs @@ -24,3 +24,13 @@ fn passing_decode_file_url() { "file:///tmp/space here/test#1.html" ); } + +#[test] +fn passing_plus_sign() { + assert_eq!( + utils::decode_url(str!( + "fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic" + )), + "fonts.somewhere.com/css?family=Open+Sans:300,400,400italic,600,600italic" + ); +} diff --git a/src/tests/utils/file_url_to_fs_path.rs b/src/tests/utils/file_url_to_fs_path.rs index b8efa29..2d5d199 100644 --- a/src/tests/utils/file_url_to_fs_path.rs +++ b/src/tests/utils/file_url_to_fs_path.rs @@ -21,3 +21,18 @@ fn passing_remove_protocl_and_fragment() { ); } } + +#[test] +fn passing_decodes_urls() { + if cfg!(windows) { + assert_eq!( + utils::file_url_to_fs_path("file:///C:/Documents%20and%20Settings/some-file.html"), + "C:\\Documents and Settings\\some-file.html" + ); + } else { + assert_eq!( + utils::file_url_to_fs_path("file:///home/user/My%20Documents"), + "/home/user/My Documents" + ); + } +} diff --git a/src/utils.rs b/src/utils.rs index bb75ad5..885f1c3 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -30,6 +30,14 @@ const MAGIC: [[&[u8]; 2]; 18] = [ [b"\x1A\x45\xDF\xA3", b"video/webm"], ]; +const PLAINTEXT_MEDIA_TYPES: &[&str] = &[ + "image/svg+xml", + "text/css", + "text/html", + "text/javascript", + "text/plain", +]; + pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String { let media_type: String = if media_type.is_empty() { detect_media_type(data, &url) @@ -88,6 +96,10 @@ pub fn is_http_url>(url: T) -> bool { .unwrap_or(false) } +pub fn is_plaintext_media_type(media_type: &str) -> bool { + PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str()) +} + pub fn resolve_url, U: AsRef>(from: T, to: U) -> Result { let result = if is_http_url(to.as_ref()) { to.as_ref().to_string() @@ -139,10 +151,11 @@ pub fn data_url_to_text>(url: T) -> String { let mut media_type: &str = ""; let mut encoding: &str = ""; + // Detect media type and encoding let mut i: i8 = 0; for item in &meta_data_items { if i == 0 { - if item.eq_ignore_ascii_case("text/html") { + if is_plaintext_media_type(item) { media_type = item; continue; } @@ -155,7 +168,7 @@ pub fn data_url_to_text>(url: T) -> String { i = i + 1; } - if media_type.eq_ignore_ascii_case("text/html") { + if is_plaintext_media_type(media_type) { if encoding.eq_ignore_ascii_case("base64") { String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!()) } else { @@ -167,6 +180,8 @@ pub fn data_url_to_text>(url: T) -> String { } pub fn decode_url(input: String) -> String { + let input: String = input.replace("+", "%2B"); + form_urlencoded::parse(input.as_bytes()) .map(|(key, val)| { [ @@ -200,7 +215,8 @@ pub fn file_url_to_fs_path(url: &str) -> String { fs_file_path = fs_file_path.replace("/", "\\"); } - fs_file_path + // File paths should not be %-encoded + decode_url(fs_file_path) } pub fn retrieve_asset( @@ -219,7 +235,11 @@ pub fn retrieve_asset( let cache_key = clean_url(&url); if is_data_url(&url) { - Ok((url.to_string(), url.to_string())) + if as_data_url { + Ok((url.to_string(), url.to_string())) + } else { + Ok((data_url_to_text(url), url.to_string())) + } } else if is_file_url(&url) { // Check if parent_url is also file:/// // (if not, then we don't embed the asset)