|
|
@ -1,12 +1,16 @@
|
|
|
|
|
|
|
|
use html5ever::parse_document;
|
|
|
|
|
|
|
|
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
|
|
|
|
|
|
|
use html5ever::serialize::{serialize, SerializeOpts};
|
|
|
|
|
|
|
|
use html5ever::tendril::TendrilSink;
|
|
|
|
use http::{is_valid_url, resolve_url, retrieve_asset};
|
|
|
|
use http::{is_valid_url, resolve_url, retrieve_asset};
|
|
|
|
|
|
|
|
use regex::Regex;
|
|
|
|
use std::default::Default;
|
|
|
|
use std::default::Default;
|
|
|
|
use std::io;
|
|
|
|
use std::io;
|
|
|
|
use utils::data_to_dataurl;
|
|
|
|
use utils::data_to_dataurl;
|
|
|
|
|
|
|
|
|
|
|
|
use html5ever::parse_document;
|
|
|
|
lazy_static! {
|
|
|
|
use html5ever::rcdom::{Handle, NodeData, RcDom};
|
|
|
|
static ref HAS_PROTOCOL: Regex = Regex::new(r"^[a-z0-9]+:").unwrap();
|
|
|
|
use html5ever::serialize::{serialize, SerializeOpts};
|
|
|
|
}
|
|
|
|
use html5ever::tendril::TendrilSink;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
enum NodeMatch {
|
|
|
|
enum NodeMatch {
|
|
|
|
Icon,
|
|
|
|
Icon,
|
|
|
@ -200,7 +204,7 @@ pub fn walk_and_embed_assets(
|
|
|
|
for attr in attrs_mut.iter_mut() {
|
|
|
|
for attr in attrs_mut.iter_mut() {
|
|
|
|
if &attr.name.local == "href" {
|
|
|
|
if &attr.name.local == "href" {
|
|
|
|
// Don't touch email links or hrefs which begin with a hash sign
|
|
|
|
// Don't touch email links or hrefs which begin with a hash sign
|
|
|
|
if attr.value.starts_with('#') || attr.value.starts_with("mailto:") {
|
|
|
|
if attr.value.starts_with('#') || has_protocol(&attr.value) {
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
@ -318,6 +322,10 @@ pub fn walk_and_embed_assets(
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
fn has_protocol(url: &str) -> bool {
|
|
|
|
|
|
|
|
HAS_PROTOCOL.is_match(&url.to_lowercase())
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
pub fn html_to_dom(data: &str) -> html5ever::rcdom::RcDom {
|
|
|
|
pub fn html_to_dom(data: &str) -> html5ever::rcdom::RcDom {
|
|
|
|
parse_document(RcDom::default(), Default::default())
|
|
|
|
parse_document(RcDom::default(), Default::default())
|
|
|
|
.from_utf8()
|
|
|
|
.from_utf8()
|
|
|
@ -345,4 +353,19 @@ mod tests {
|
|
|
|
assert_eq!(is_icon("icon"), true);
|
|
|
|
assert_eq!(is_icon("icon"), true);
|
|
|
|
assert_eq!(is_icon("stylesheet"), false);
|
|
|
|
assert_eq!(is_icon("stylesheet"), false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
|
|
|
fn test_has_protocol() {
|
|
|
|
|
|
|
|
assert_eq!(has_protocol("mailto:somebody@somewhere.com?subject=hello"), true);
|
|
|
|
|
|
|
|
assert_eq!(has_protocol("tel:5551234567"), true);
|
|
|
|
|
|
|
|
assert_eq!(has_protocol("ftp:user:password@some-ftp-server.com"), true);
|
|
|
|
|
|
|
|
assert_eq!(has_protocol("javascript:void(0)"), true);
|
|
|
|
|
|
|
|
assert_eq!(has_protocol("http://news.ycombinator.com"), true);
|
|
|
|
|
|
|
|
assert_eq!(has_protocol("https://github.com"), true);
|
|
|
|
|
|
|
|
assert_eq!(has_protocol("//some-hostname.com/some-file.html"), false);
|
|
|
|
|
|
|
|
assert_eq!(has_protocol("some-hostname.com/some-file.html"), false);
|
|
|
|
|
|
|
|
assert_eq!(has_protocol("/some-file.html"), false);
|
|
|
|
|
|
|
|
assert_eq!(has_protocol(""), false);
|
|
|
|
|
|
|
|
assert_eq!(has_protocol("MAILTO:somebody@somewhere.com?subject=hello"), true);
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|