2020-03-08 19:31:42 +00:00
|
|
|
use base64;
|
2020-01-07 04:22:28 +00:00
|
|
|
use reqwest::blocking::Client;
|
2020-03-08 19:31:42 +00:00
|
|
|
use reqwest::header::CONTENT_TYPE;
|
2019-12-06 01:20:09 +00:00
|
|
|
use std::collections::HashMap;
|
2020-03-08 19:31:42 +00:00
|
|
|
use std::fs;
|
|
|
|
use std::path::Path;
|
2020-02-14 04:46:08 +00:00
|
|
|
use url::{form_urlencoded, ParseError, Url};
|
2019-09-29 21:15:49 +00:00
|
|
|
|
2020-03-24 12:22:14 +00:00
|
|
|
const MAGIC: [[&[u8]; 2]; 18] = [
|
2019-08-23 22:48:08 +00:00
|
|
|
// Image
|
|
|
|
[b"GIF87a", b"image/gif"],
|
|
|
|
[b"GIF89a", b"image/gif"],
|
|
|
|
[b"\xFF\xD8\xFF", b"image/jpeg"],
|
|
|
|
[b"\x89PNG\x0D\x0A\x1A\x0A", b"image/png"],
|
|
|
|
[b"<svg ", b"image/svg+xml"],
|
|
|
|
[b"RIFF....WEBPVP8 ", b"image/webp"],
|
|
|
|
[b"\x00\x00\x01\x00", b"image/x-icon"],
|
|
|
|
// Audio
|
|
|
|
[b"ID3", b"audio/mpeg"],
|
|
|
|
[b"\xFF\x0E", b"audio/mpeg"],
|
|
|
|
[b"\xFF\x0F", b"audio/mpeg"],
|
|
|
|
[b"OggS", b"audio/ogg"],
|
|
|
|
[b"RIFF....WAVEfmt ", b"audio/wav"],
|
|
|
|
[b"fLaC", b"audio/x-flac"],
|
|
|
|
// Video
|
|
|
|
[b"RIFF....AVI LIST", b"video/avi"],
|
|
|
|
[b"....ftyp", b"video/mp4"],
|
|
|
|
[b"\x00\x00\x01\x0B", b"video/mpeg"],
|
|
|
|
[b"....moov", b"video/quicktime"],
|
|
|
|
[b"\x1A\x45\xDF\xA3", b"video/webm"],
|
|
|
|
];
|
2019-08-23 03:17:15 +00:00
|
|
|
|
2020-04-10 00:27:07 +00:00
|
|
|
const PLAINTEXT_MEDIA_TYPES: &[&str] = &[
|
|
|
|
"image/svg+xml",
|
|
|
|
"text/css",
|
|
|
|
"text/html",
|
|
|
|
"text/javascript",
|
|
|
|
"text/plain",
|
|
|
|
];
|
|
|
|
|
2020-03-29 07:54:20 +00:00
|
|
|
pub fn data_to_data_url(media_type: &str, data: &[u8], url: &str, fragment: &str) -> String {
|
|
|
|
let media_type: String = if media_type.is_empty() {
|
2020-03-24 12:22:14 +00:00
|
|
|
detect_media_type(data, &url)
|
2019-08-23 03:17:15 +00:00
|
|
|
} else {
|
2020-03-24 12:22:14 +00:00
|
|
|
media_type.to_string()
|
2019-08-23 18:24:45 +00:00
|
|
|
};
|
2020-03-29 07:54:20 +00:00
|
|
|
let hash: String = if fragment != "" {
|
|
|
|
format!("#{}", fragment)
|
|
|
|
} else {
|
|
|
|
str!()
|
|
|
|
};
|
|
|
|
|
|
|
|
format!(
|
|
|
|
"data:{};base64,{}{}",
|
|
|
|
media_type,
|
|
|
|
base64::encode(data),
|
|
|
|
hash
|
|
|
|
)
|
2019-08-23 03:17:15 +00:00
|
|
|
}
|
|
|
|
|
2020-03-24 12:22:14 +00:00
|
|
|
pub fn detect_media_type(data: &[u8], url: &str) -> String {
|
2019-09-22 00:06:00 +00:00
|
|
|
for item in MAGIC.iter() {
|
2019-08-23 22:48:08 +00:00
|
|
|
if data.starts_with(item[0]) {
|
2019-10-10 13:23:00 +00:00
|
|
|
return String::from_utf8(item[1].to_vec()).unwrap();
|
2019-08-23 22:48:08 +00:00
|
|
|
}
|
|
|
|
}
|
2020-03-24 12:22:14 +00:00
|
|
|
|
|
|
|
if url.to_lowercase().ends_with(".svg") {
|
|
|
|
return str!("image/svg+xml");
|
|
|
|
}
|
|
|
|
|
2020-02-13 05:56:30 +00:00
|
|
|
str!()
|
2019-08-23 03:17:15 +00:00
|
|
|
}
|
|
|
|
|
2019-10-10 13:23:00 +00:00
|
|
|
pub fn url_has_protocol<T: AsRef<str>>(url: T) -> bool {
|
2020-02-13 05:56:30 +00:00
|
|
|
Url::parse(url.as_ref())
|
|
|
|
.and_then(|u| Ok(u.scheme().len() > 0))
|
|
|
|
.unwrap_or(false)
|
2019-09-29 21:15:49 +00:00
|
|
|
}
|
2019-08-23 03:17:15 +00:00
|
|
|
|
2020-02-13 05:56:30 +00:00
|
|
|
pub fn is_data_url<T: AsRef<str>>(url: T) -> bool {
|
|
|
|
Url::parse(url.as_ref())
|
|
|
|
.and_then(|u| Ok(u.scheme() == "data"))
|
|
|
|
.unwrap_or(false)
|
2019-09-29 21:15:49 +00:00
|
|
|
}
|
2019-08-23 22:48:08 +00:00
|
|
|
|
2020-03-08 19:31:42 +00:00
|
|
|
pub fn is_file_url<T: AsRef<str>>(url: T) -> bool {
|
|
|
|
Url::parse(url.as_ref())
|
|
|
|
.and_then(|u| Ok(u.scheme() == "file"))
|
|
|
|
.unwrap_or(false)
|
|
|
|
}
|
|
|
|
|
2020-02-13 05:56:30 +00:00
|
|
|
pub fn is_http_url<T: AsRef<str>>(url: T) -> bool {
|
|
|
|
Url::parse(url.as_ref())
|
|
|
|
.and_then(|u| Ok(u.scheme() == "http" || u.scheme() == "https"))
|
|
|
|
.unwrap_or(false)
|
2019-09-29 21:15:49 +00:00
|
|
|
}
|
|
|
|
|
2020-04-10 00:27:07 +00:00
|
|
|
pub fn is_plaintext_media_type(media_type: &str) -> bool {
|
|
|
|
PLAINTEXT_MEDIA_TYPES.contains(&media_type.to_lowercase().as_str())
|
|
|
|
}
|
|
|
|
|
2019-10-10 13:23:00 +00:00
|
|
|
pub fn resolve_url<T: AsRef<str>, U: AsRef<str>>(from: T, to: U) -> Result<String, ParseError> {
|
2020-02-12 06:59:21 +00:00
|
|
|
let result = if is_http_url(to.as_ref()) {
|
2019-10-10 13:23:00 +00:00
|
|
|
to.as_ref().to_string()
|
2019-09-29 21:15:49 +00:00
|
|
|
} else {
|
2019-10-10 13:23:00 +00:00
|
|
|
Url::parse(from.as_ref())?
|
|
|
|
.join(to.as_ref())?
|
|
|
|
.as_ref()
|
|
|
|
.to_string()
|
2019-09-29 21:15:49 +00:00
|
|
|
};
|
|
|
|
Ok(result)
|
2019-08-23 03:17:15 +00:00
|
|
|
}
|
2019-10-12 09:32:59 +00:00
|
|
|
|
2020-03-29 07:54:20 +00:00
|
|
|
pub fn get_url_fragment<T: AsRef<str>>(url: T) -> String {
|
|
|
|
if Url::parse(url.as_ref()).unwrap().fragment() == None {
|
|
|
|
str!()
|
2019-12-06 01:05:52 +00:00
|
|
|
} else {
|
2020-03-29 07:54:20 +00:00
|
|
|
str!(Url::parse(url.as_ref()).unwrap().fragment().unwrap())
|
2019-12-06 01:05:52 +00:00
|
|
|
}
|
2019-10-12 11:05:07 +00:00
|
|
|
}
|
2019-12-12 02:13:11 +00:00
|
|
|
|
2020-04-11 00:43:29 +00:00
|
|
|
pub fn clean_url<T: AsRef<str>>(input: T) -> String {
|
|
|
|
let mut url = Url::parse(input.as_ref()).unwrap();
|
2020-03-24 12:22:14 +00:00
|
|
|
|
2019-12-12 02:13:11 +00:00
|
|
|
// Clear fragment
|
2020-04-11 00:43:29 +00:00
|
|
|
url.set_fragment(None);
|
2020-03-24 12:22:14 +00:00
|
|
|
|
2019-12-12 02:13:11 +00:00
|
|
|
// Get rid of stray question mark
|
2020-04-11 00:43:29 +00:00
|
|
|
if url.query() == Some("") {
|
|
|
|
url.set_query(None);
|
2019-12-12 02:13:11 +00:00
|
|
|
}
|
2020-04-11 00:43:29 +00:00
|
|
|
|
|
|
|
// Remove empty trailing ampersand(s)
|
|
|
|
let mut result: String = url.to_string();
|
|
|
|
while result.ends_with("&") {
|
|
|
|
result.pop();
|
|
|
|
}
|
|
|
|
|
|
|
|
result
|
2019-12-12 02:13:11 +00:00
|
|
|
}
|
2020-02-13 05:56:30 +00:00
|
|
|
|
2020-04-10 09:06:07 +00:00
|
|
|
pub fn data_url_to_text<T: AsRef<str>>(url: T) -> (String, String) {
|
|
|
|
let parsed_url = Url::parse(url.as_ref()).unwrap_or(Url::parse("data:,").unwrap());
|
2020-02-14 04:46:08 +00:00
|
|
|
let path: String = parsed_url.path().to_string();
|
|
|
|
let comma_loc: usize = path.find(',').unwrap_or(path.len());
|
|
|
|
|
|
|
|
let meta_data: String = path.chars().take(comma_loc).collect();
|
|
|
|
let raw_data: String = path.chars().skip(comma_loc + 1).collect();
|
|
|
|
|
2020-03-08 19:31:42 +00:00
|
|
|
let data: String = decode_url(raw_data);
|
2020-02-14 04:46:08 +00:00
|
|
|
|
|
|
|
let meta_data_items: Vec<&str> = meta_data.split(';').collect();
|
|
|
|
let mut encoding: &str = "";
|
|
|
|
|
2020-04-10 09:06:07 +00:00
|
|
|
let mut media_type: String = str!();
|
|
|
|
let mut text: String = str!();
|
|
|
|
|
2020-02-14 04:46:08 +00:00
|
|
|
let mut i: i8 = 0;
|
|
|
|
for item in &meta_data_items {
|
|
|
|
if i == 0 {
|
2020-04-10 09:06:07 +00:00
|
|
|
media_type = str!(item);
|
|
|
|
} else {
|
|
|
|
if item.eq_ignore_ascii_case("base64")
|
|
|
|
|| item.eq_ignore_ascii_case("utf8")
|
|
|
|
|| item.eq_ignore_ascii_case("charset=UTF-8")
|
|
|
|
{
|
|
|
|
encoding = item;
|
2020-02-13 05:56:30 +00:00
|
|
|
}
|
2020-02-14 04:46:08 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
i = i + 1;
|
|
|
|
}
|
|
|
|
|
2020-04-10 09:06:07 +00:00
|
|
|
if is_plaintext_media_type(&media_type) || media_type.is_empty() {
|
2020-02-14 04:46:08 +00:00
|
|
|
if encoding.eq_ignore_ascii_case("base64") {
|
2020-04-10 09:06:07 +00:00
|
|
|
text = String::from_utf8(base64::decode(&data).unwrap_or(vec![])).unwrap_or(str!())
|
2020-02-13 05:56:30 +00:00
|
|
|
} else {
|
2020-04-10 09:06:07 +00:00
|
|
|
text = data
|
2020-02-13 05:56:30 +00:00
|
|
|
}
|
|
|
|
}
|
2020-04-10 09:06:07 +00:00
|
|
|
|
|
|
|
(media_type, text)
|
2020-02-13 05:56:30 +00:00
|
|
|
}
|
2020-03-08 19:31:42 +00:00
|
|
|
|
|
|
|
pub fn decode_url(input: String) -> String {
|
2020-04-10 00:27:07 +00:00
|
|
|
let input: String = input.replace("+", "%2B");
|
|
|
|
|
2020-03-08 19:31:42 +00:00
|
|
|
form_urlencoded::parse(input.as_bytes())
|
|
|
|
.map(|(key, val)| {
|
|
|
|
[
|
|
|
|
key.to_string(),
|
|
|
|
if val.to_string().len() == 0 {
|
|
|
|
str!()
|
|
|
|
} else {
|
|
|
|
str!('=')
|
|
|
|
},
|
|
|
|
val.to_string(),
|
|
|
|
]
|
|
|
|
.concat()
|
|
|
|
})
|
|
|
|
.collect()
|
|
|
|
}
|
|
|
|
|
2020-03-29 07:54:20 +00:00
|
|
|
pub fn file_url_to_fs_path(url: &str) -> String {
|
|
|
|
if !is_file_url(url) {
|
|
|
|
return str!();
|
|
|
|
}
|
|
|
|
|
|
|
|
let cutoff_l = if cfg!(windows) { 8 } else { 7 };
|
|
|
|
let mut fs_file_path: String = decode_url(url.to_string()[cutoff_l..].to_string());
|
|
|
|
let url_fragment = get_url_fragment(url);
|
|
|
|
if url_fragment != "" {
|
|
|
|
let max_len = fs_file_path.len() - 1 - url_fragment.len();
|
|
|
|
fs_file_path = fs_file_path[0..max_len].to_string();
|
|
|
|
}
|
|
|
|
|
|
|
|
if cfg!(windows) {
|
|
|
|
fs_file_path = fs_file_path.replace("/", "\\");
|
|
|
|
}
|
|
|
|
|
2020-04-10 00:27:07 +00:00
|
|
|
// File paths should not be %-encoded
|
|
|
|
decode_url(fs_file_path)
|
2020-03-29 07:54:20 +00:00
|
|
|
}
|
|
|
|
|
2020-03-08 19:31:42 +00:00
|
|
|
pub fn retrieve_asset(
|
2020-04-11 00:43:29 +00:00
|
|
|
cache: &mut HashMap<String, Vec<u8>>,
|
2020-03-08 19:31:42 +00:00
|
|
|
client: &Client,
|
|
|
|
parent_url: &str,
|
|
|
|
url: &str,
|
|
|
|
as_data_url: bool,
|
2020-03-24 12:22:14 +00:00
|
|
|
media_type: &str,
|
2020-03-08 19:31:42 +00:00
|
|
|
opt_silent: bool,
|
|
|
|
) -> Result<(String, String), reqwest::Error> {
|
|
|
|
if url.len() == 0 {
|
|
|
|
return Ok((str!(), str!()));
|
|
|
|
}
|
|
|
|
|
|
|
|
if is_data_url(&url) {
|
2020-04-10 00:27:07 +00:00
|
|
|
if as_data_url {
|
|
|
|
Ok((url.to_string(), url.to_string()))
|
|
|
|
} else {
|
2020-04-10 09:06:07 +00:00
|
|
|
let (_media_type, text) = data_url_to_text(url);
|
|
|
|
|
|
|
|
Ok((text, url.to_string()))
|
2020-04-10 00:27:07 +00:00
|
|
|
}
|
2020-03-08 19:31:42 +00:00
|
|
|
} else if is_file_url(&url) {
|
|
|
|
// Check if parent_url is also file:///
|
2020-03-29 07:54:20 +00:00
|
|
|
// (if not, then we don't embed the asset)
|
2020-03-08 19:31:42 +00:00
|
|
|
if !is_file_url(&parent_url) {
|
|
|
|
return Ok((str!(), str!()));
|
|
|
|
}
|
|
|
|
|
2020-03-29 07:54:20 +00:00
|
|
|
let fs_file_path: String = file_url_to_fs_path(url);
|
2020-03-08 19:31:42 +00:00
|
|
|
let path = Path::new(&fs_file_path);
|
2020-03-29 07:54:20 +00:00
|
|
|
let url_fragment = get_url_fragment(url);
|
2020-03-08 19:31:42 +00:00
|
|
|
if path.exists() {
|
|
|
|
if !opt_silent {
|
|
|
|
eprintln!("{}", &url);
|
|
|
|
}
|
|
|
|
|
|
|
|
if as_data_url {
|
2020-03-24 12:22:14 +00:00
|
|
|
let data_url: String = data_to_data_url(
|
|
|
|
&media_type,
|
|
|
|
&fs::read(&fs_file_path).unwrap(),
|
|
|
|
&fs_file_path,
|
2020-03-29 07:54:20 +00:00
|
|
|
&url_fragment,
|
2020-03-24 12:22:14 +00:00
|
|
|
);
|
2020-03-08 19:31:42 +00:00
|
|
|
Ok((data_url, url.to_string()))
|
|
|
|
} else {
|
|
|
|
let data: String = fs::read_to_string(&fs_file_path).expect(url);
|
|
|
|
Ok((data, url.to_string()))
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
Ok((str!(), url.to_string()))
|
|
|
|
}
|
|
|
|
} else {
|
2020-04-11 00:43:29 +00:00
|
|
|
let cache_key: String = clean_url(&url);
|
|
|
|
|
2020-03-08 19:31:42 +00:00
|
|
|
if cache.contains_key(&cache_key) {
|
2020-04-11 00:43:29 +00:00
|
|
|
// URL is in cache, we retrieve it
|
|
|
|
let data = cache.get(&cache_key).unwrap();
|
|
|
|
|
2020-03-08 19:31:42 +00:00
|
|
|
if !opt_silent {
|
|
|
|
eprintln!("{} (from cache)", &url);
|
|
|
|
}
|
2020-04-11 00:43:29 +00:00
|
|
|
|
|
|
|
if as_data_url {
|
|
|
|
let url_fragment = get_url_fragment(url);
|
|
|
|
Ok((
|
|
|
|
data_to_data_url(media_type, data, url, &url_fragment),
|
|
|
|
url.to_string(),
|
|
|
|
))
|
|
|
|
} else {
|
|
|
|
Ok((String::from_utf8_lossy(data).to_string(), url.to_string()))
|
|
|
|
}
|
2020-03-08 19:31:42 +00:00
|
|
|
} else {
|
|
|
|
// URL not in cache, we request it
|
|
|
|
let mut response = client.get(url).send()?;
|
|
|
|
let res_url = response.url().to_string();
|
|
|
|
|
|
|
|
if !opt_silent {
|
|
|
|
if url == res_url {
|
|
|
|
eprintln!("{}", &url);
|
|
|
|
} else {
|
|
|
|
eprintln!("{} -> {}", &url, &res_url);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-11 00:43:29 +00:00
|
|
|
let new_cache_key: String = clean_url(&res_url);
|
2020-03-08 19:31:42 +00:00
|
|
|
|
|
|
|
if as_data_url {
|
|
|
|
// Convert response into a byte array
|
|
|
|
let mut data: Vec<u8> = vec![];
|
|
|
|
response.copy_to(&mut data)?;
|
|
|
|
|
2020-03-24 12:22:14 +00:00
|
|
|
// Attempt to obtain media type by reading the Content-Type header
|
|
|
|
let media_type = if media_type == "" {
|
2020-03-08 19:31:42 +00:00
|
|
|
response
|
|
|
|
.headers()
|
|
|
|
.get(CONTENT_TYPE)
|
|
|
|
.and_then(|header| header.to_str().ok())
|
2020-03-24 12:22:14 +00:00
|
|
|
.unwrap_or(&media_type)
|
2020-03-08 19:31:42 +00:00
|
|
|
} else {
|
2020-03-24 12:22:14 +00:00
|
|
|
media_type
|
2020-03-08 19:31:42 +00:00
|
|
|
};
|
2020-03-29 07:54:20 +00:00
|
|
|
let url_fragment = get_url_fragment(url);
|
|
|
|
let data_url = data_to_data_url(&media_type, &data, url, &url_fragment);
|
2020-04-11 00:43:29 +00:00
|
|
|
|
2020-03-08 19:31:42 +00:00
|
|
|
// Add to cache
|
2020-04-11 00:43:29 +00:00
|
|
|
cache.insert(new_cache_key, data);
|
|
|
|
|
2020-03-08 19:31:42 +00:00
|
|
|
Ok((data_url, res_url))
|
|
|
|
} else {
|
|
|
|
let content = response.text().unwrap();
|
2020-04-11 00:43:29 +00:00
|
|
|
|
2020-03-08 19:31:42 +00:00
|
|
|
// Add to cache
|
2020-04-11 00:43:29 +00:00
|
|
|
cache.insert(new_cache_key, content.as_bytes().to_vec());
|
|
|
|
|
2020-03-08 19:31:42 +00:00
|
|
|
Ok((content, res_url))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|