Look for links/images in HTML content in the Markdown

pull/792/head
Tom Parker-Shemilt 4 years ago
parent a37b1cfee3
commit 6cb9433de0

@ -16,4 +16,5 @@ lazy_static = "1"
env_logger = "0.7"
async-std = "1"
log = "0.4"
regex = "1"
regex = "1"
scraper = "0.11"

@ -11,7 +11,7 @@ use log::{warn, debug};
use std::io::Write;
use reqwest::{Client, redirect::Policy, StatusCode, header};
use regex::Regex;
use scraper::{Html, Selector};
use failure::{Fail, Error, format_err};
#[derive(Debug, Fail)]
@ -155,22 +155,43 @@ async fn main() -> Result<(), Error> {
let mut url_checks = vec![];
let mut do_check = |url: String| {
if !url.starts_with("http") {
return;
}
if results.working.contains(&url) {
return;
}
let check = get_url(url).boxed();
url_checks.push(check);
};
for (event, _range) in parser.into_offset_iter() {
if let Event::Start(tag) = event {
match tag {
Tag::Link(_link_type, url, _title) | Tag::Image(_link_type, url, _title) => {
if !url.starts_with("http") {
continue;
match event {
Event::Start(tag) => {
match tag {
Tag::Link(_link_type, url, _title) | Tag::Image(_link_type, url, _title) => {
do_check(url.to_string());
}
let url_string = url.to_string();
if results.working.contains(&url_string) {
continue;
_ => {}
}
}
Event::Html(content) => {
let fragment = Html::parse_fragment(&content);
for element in fragment.select(&Selector::parse("img").unwrap()) {
let img_src = element.value().attr("src");
if let Some(src) = img_src {
do_check(src.to_string());
}
}
for element in fragment.select(&Selector::parse("a").unwrap()) {
let a_href = element.value().attr("href");
if let Some(href) = a_href {
do_check(href.to_string());
}
let check = get_url(url_string).boxed();
url_checks.push(check);
}
_ => {}
}
_ => {}
}
}

Loading…
Cancel
Save