|
|
@ -1,5 +1,5 @@
|
|
|
|
use pulldown_cmark::{Parser, Event, Tag};
|
|
|
|
use pulldown_cmark::{Parser, Event, Tag};
|
|
|
|
use std::fs;
|
|
|
|
use std::{cmp::Ordering, fs};
|
|
|
|
use futures::future::{select_all, BoxFuture, FutureExt};
|
|
|
|
use futures::future::{select_all, BoxFuture, FutureExt};
|
|
|
|
use std::collections::{BTreeSet, BTreeMap};
|
|
|
|
use std::collections::{BTreeSet, BTreeMap};
|
|
|
|
use serde::{Serialize, Deserialize};
|
|
|
|
use serde::{Serialize, Deserialize};
|
|
|
@ -26,6 +26,9 @@ enum CheckerError {
|
|
|
|
location: Option<String>,
|
|
|
|
location: Option<String>,
|
|
|
|
},
|
|
|
|
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#[fail(display = "too many requests")]
|
|
|
|
|
|
|
|
TooManyRequests,
|
|
|
|
|
|
|
|
|
|
|
|
#[fail(display = "reqwest error: {}", error)]
|
|
|
|
#[fail(display = "reqwest error: {}", error)]
|
|
|
|
ReqwestError {
|
|
|
|
ReqwestError {
|
|
|
|
error: String,
|
|
|
|
error: String,
|
|
|
@ -191,6 +194,12 @@ fn get_url_core(url: String) -> BoxFuture<'static, (String, Result<(), CheckerEr
|
|
|
|
return (url, res);
|
|
|
|
return (url, res);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if status == StatusCode::TOO_MANY_REQUESTS {
|
|
|
|
|
|
|
|
// We get a lot of these, and we should not retry as they'll just fail again
|
|
|
|
|
|
|
|
warn!("Error while getting {}: {}", url, status);
|
|
|
|
|
|
|
|
return (url, Err(CheckerError::TooManyRequests));
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
warn!("Error while getting {}, retrying: {}", url, status);
|
|
|
|
warn!("Error while getting {}, retrying: {}", url, status);
|
|
|
|
if status.is_redirection() {
|
|
|
|
if status.is_redirection() {
|
|
|
|
res = Err(CheckerError::HttpError {status: status.as_u16(), location: ok.headers().get(header::LOCATION).and_then(|h| h.to_str().ok()).map(|x| x.to_string())});
|
|
|
|
res = Err(CheckerError::HttpError {status: status.as_u16(), location: ok.headers().get(header::LOCATION).and_then(|h| h.to_str().ok()).map(|x| x.to_string())});
|
|
|
@ -284,12 +293,14 @@ async fn main() -> Result<(), Error> {
|
|
|
|
url_checks.push(check);
|
|
|
|
url_checks.push(check);
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
let mut to_check: Vec<String> = vec![];
|
|
|
|
|
|
|
|
|
|
|
|
for (event, _range) in parser.into_offset_iter() {
|
|
|
|
for (event, _range) in parser.into_offset_iter() {
|
|
|
|
match event {
|
|
|
|
match event {
|
|
|
|
Event::Start(tag) => {
|
|
|
|
Event::Start(tag) => {
|
|
|
|
match tag {
|
|
|
|
match tag {
|
|
|
|
Tag::Link(_link_type, url, _title) | Tag::Image(_link_type, url, _title) => {
|
|
|
|
Tag::Link(_link_type, url, _title) | Tag::Image(_link_type, url, _title) => {
|
|
|
|
do_check(url.to_string());
|
|
|
|
to_check.push(url.to_string());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
_ => {}
|
|
|
|
_ => {}
|
|
|
|
}
|
|
|
|
}
|
|
|
@ -301,6 +312,38 @@ async fn main() -> Result<(), Error> {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
to_check.sort_by(|a,b| {
|
|
|
|
|
|
|
|
let get_time = |k| {
|
|
|
|
|
|
|
|
let res = results.get(k);
|
|
|
|
|
|
|
|
if let Some(link) = res {
|
|
|
|
|
|
|
|
if let Some(last_working) = link.last_working {
|
|
|
|
|
|
|
|
Some(last_working)
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
None
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
None
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
let res_a = get_time(a);
|
|
|
|
|
|
|
|
let res_b = get_time(b);
|
|
|
|
|
|
|
|
if res_a.is_none() {
|
|
|
|
|
|
|
|
if res_b.is_none() {
|
|
|
|
|
|
|
|
return a.cmp(b);
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
Ordering::Less
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
} else if res_b.is_none() {
|
|
|
|
|
|
|
|
Ordering::Greater
|
|
|
|
|
|
|
|
} else {
|
|
|
|
|
|
|
|
res_a.unwrap().cmp(&res_b.unwrap())
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for url in to_check {
|
|
|
|
|
|
|
|
do_check(url)
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
let results_keys = results.keys().cloned().collect::<BTreeSet<String>>();
|
|
|
|
let results_keys = results.keys().cloned().collect::<BTreeSet<String>>();
|
|
|
|
let old_links = results_keys.difference(&used);
|
|
|
|
let old_links = results_keys.difference(&used);
|
|
|
|
for link in old_links {
|
|
|
|
for link in old_links {
|
|
|
@ -365,6 +408,13 @@ async fn main() -> Result<(), Error> {
|
|
|
|
failed +=1;
|
|
|
|
failed +=1;
|
|
|
|
continue;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
CheckerError::TooManyRequests => {
|
|
|
|
|
|
|
|
// too many tries
|
|
|
|
|
|
|
|
if link.last_working.is_some() {
|
|
|
|
|
|
|
|
info!("Ignoring 429 failure on {} as we've seen success before", url);
|
|
|
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
_ => {}
|
|
|
|
_ => {}
|
|
|
|
};
|
|
|
|
};
|
|
|
|
if let Some(last_working) = link.last_working {
|
|
|
|
if let Some(last_working) = link.last_working {
|
|
|
|