improve data URL media type detection

pull/159/head
Sunshine 4 years ago
parent d2615f51dc
commit 594ad55bd8
No known key found for this signature in database
GPG Key ID: B80CA68703CD8AB1

@ -122,9 +122,9 @@ fn main() {
base_url = final_url; base_url = final_url;
dom = html_to_dom(&data); dom = html_to_dom(&data);
} else if is_data_url(target_url) { } else if is_data_url(target_url) {
let text: String = data_url_to_text(target_url); let (media_type, text): (String, String) = data_url_to_text(target_url);
if text.len() == 0 { if !media_type.eq_ignore_ascii_case("text/html") {
eprintln!("Unsupported data URL input"); eprintln!("Unsupported data URL media type");
process::exit(1); process::exit(1);
} }
base_url = str!(target_url); base_url = str!(target_url);

@ -62,7 +62,7 @@ fn passing_bad_input_data_url() -> Result<(), Box<dyn std::error::Error>> {
// STDERR should contain error description // STDERR should contain error description
assert_eq!( assert_eq!(
std::str::from_utf8(&out.stderr).unwrap(), std::str::from_utf8(&out.stderr).unwrap(),
"Unsupported data URL input\n" "Unsupported data URL media type\n"
); );
// The exit code should be 1 // The exit code should be 1

@ -9,48 +9,74 @@ use crate::utils;
#[test] #[test]
fn passing_parse_text_html_base64() { fn passing_parse_text_html_base64() {
let (media_type, text) = utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg==");
assert_eq!(media_type, "text/html");
assert_eq!( assert_eq!(
utils::data_url_to_text("data:text/html;base64,V29yayBleHBhbmRzIHNvIGFzIHRvIGZpbGwgdGhlIHRpbWUgYXZhaWxhYmxlIGZvciBpdHMgY29tcGxldGlvbg=="), text,
"Work expands so as to fill the time available for its completion" "Work expands so as to fill the time available for its completion"
); );
} }
#[test] #[test]
fn passing_parse_text_html_utf8() { fn passing_parse_text_html_utf8() {
let (media_type, text) = utils::data_url_to_text(
"data:text/html;utf8,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!( assert_eq!(
utils::data_url_to_text( text,
"data:text/html;utf8,Work expands so as to fill the time available for its completion"
),
"Work expands so as to fill the time available for its completion" "Work expands so as to fill the time available for its completion"
); );
} }
#[test] #[test]
fn passing_parse_text_html_plaintext() { fn passing_parse_text_html_plaintext() {
let (media_type, text) = utils::data_url_to_text(
"data:text/html,Work expands so as to fill the time available for its completion",
);
assert_eq!(media_type, "text/html");
assert_eq!( assert_eq!(
utils::data_url_to_text( text,
"data:text/html,Work expands so as to fill the time available for its completion"
),
"Work expands so as to fill the time available for its completion" "Work expands so as to fill the time available for its completion"
); );
} }
#[test] #[test]
fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() { fn passing_parse_text_html_charset_utf_8_between_two_whitespaces() {
let (media_type, text) = utils::data_url_to_text(" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion ");
assert_eq!(media_type, "text/html");
assert_eq!( assert_eq!(
utils::data_url_to_text( text,
" data:text/html;charset=utf-8,Work expands so as to fill the time available for its completion "
),
"Work expands so as to fill the time available for its completion" "Work expands so as to fill the time available for its completion"
); );
} }
#[test] #[test]
fn passing_parse_text_css_url_encoded() { fn passing_parse_text_css_url_encoded() {
assert_eq!( let (media_type, text) = utils::data_url_to_text("data:text/css,div{background-color:%23000}");
utils::data_url_to_text("data:text/css,div{background-color:%23000}"),
"div{background-color:#000}" assert_eq!(media_type, "text/css");
); assert_eq!(text, "div{background-color:#000}");
}
#[test]
fn passing_parse_no_media_type_base64() {
let (media_type, text) = utils::data_url_to_text("data:;base64,dGVzdA==");
assert_eq!(media_type, "");
assert_eq!(text, "test");
}
#[test]
fn passing_parse_no_media_type_no_encoding() {
let (media_type, text) = utils::data_url_to_text("data:;,test%20test");
assert_eq!(media_type, "");
assert_eq!(text, "test test");
} }
// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗
@ -62,5 +88,8 @@ fn passing_parse_text_css_url_encoded() {
#[test] #[test]
fn failing_just_word_data() { fn failing_just_word_data() {
assert_eq!(utils::data_url_to_text("data"), ""); let (media_type, text) = utils::data_url_to_text("data");
assert_eq!(media_type, "");
assert_eq!(text, "");
} }

@ -133,50 +133,47 @@ pub fn clean_url<T: AsRef<str>>(url: T) -> String {
result.to_string() result.to_string()
} }
pub fn data_url_to_text<T: AsRef<str>>(url: T) -> String { pub fn data_url_to_text<T: AsRef<str>>(url: T) -> (String, String) {
let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("http://[::1]").unwrap()); let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
let path: String = parsed_url.path().to_string(); let path: String = parsed_url.path().to_string();
let comma_loc: usize = path.find(',').unwrap_or(path.len()); let comma_loc: usize = path.find(',').unwrap_or(path.len());
if comma_loc == path.len() {
return str!();
}
let meta_data: String = path.chars().take(comma_loc).collect(); let meta_data: String = path.chars().take(comma_loc).collect();
let raw_data: String = path.chars().skip(comma_loc + 1).collect(); let raw_data: String = path.chars().skip(comma_loc + 1).collect();
let data: String = decode_url(raw_data); let data: String = decode_url(raw_data);
let meta_data_items: Vec<&str> = meta_data.split(';').collect(); let meta_data_items: Vec<&str> = meta_data.split(';').collect();
let mut media_type: &str = "";
let mut encoding: &str = ""; let mut encoding: &str = "";
// Detect media type and encoding let mut media_type: String = str!();
let mut text: String = str!();
let mut i: i8 = 0; let mut i: i8 = 0;
for item in &meta_data_items { for item in &meta_data_items {
if i == 0 { if i == 0 {
if is_plaintext_media_type(item) { media_type = str!(item);
media_type = item; } else {
continue; if item.eq_ignore_ascii_case("base64")
|| item.eq_ignore_ascii_case("utf8")
|| item.eq_ignore_ascii_case("charset=UTF-8")
{
encoding = item;
} }
} }
if item.eq_ignore_ascii_case("base64") || item.eq_ignore_ascii_case("utf8") {
encoding = item;
}
i = i + 1; i = i + 1;
} }
if is_plaintext_media_type(media_type) { if is_plaintext_media_type(&media_type) || media_type.is_empty() {
if encoding.eq_ignore_ascii_case("base64") { if encoding.eq_ignore_ascii_case("base64") {
String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!()) text = String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
} else { } else {
data text = data
} }
} else {
str!()
} }
(media_type, text)
} }
pub fn decode_url(input: String) -> String { pub fn decode_url(input: String) -> String {
@ -238,7 +235,9 @@ pub fn retrieve_asset(
if as_data_url { if as_data_url {
Ok((url.to_string(), url.to_string())) Ok((url.to_string(), url.to_string()))
} else { } else {
Ok((data_url_to_text(url), url.to_string())) let (_media_type, text) = data_url_to_text(url);
Ok((text, url.to_string()))
} }
} else if is_file_url(&url) { } else if is_file_url(&url) {
// Check if parent_url is also file:/// // Check if parent_url is also file:///

Loading…
Cancel
Save