Check the oldest/unchecked ones first

This commit is contained in:
Tom Parker-Shemilt 2021-06-29 20:48:00 +01:00
parent 4ff49d1345
commit e3cd6c6a10

View File

@ -1,5 +1,5 @@
use pulldown_cmark::{Parser, Event, Tag}; use pulldown_cmark::{Parser, Event, Tag};
use std::fs; use std::{cmp::Ordering, fs};
use futures::future::{select_all, BoxFuture, FutureExt}; use futures::future::{select_all, BoxFuture, FutureExt};
use std::collections::{BTreeSet, BTreeMap}; use std::collections::{BTreeSet, BTreeMap};
use serde::{Serialize, Deserialize}; use serde::{Serialize, Deserialize};
@ -10,7 +10,7 @@ use std::io::Write;
use reqwest::{Client, redirect::Policy, StatusCode, header, Url}; use reqwest::{Client, redirect::Policy, StatusCode, header, Url};
use regex::Regex; use regex::Regex;
use failure::{Fail, Error, format_err}; use failure::{Fail, Error, format_err};
use chrono::{Local, DateTime, Duration}; use chrono::{DateTime, Duration, Local};
use std::env; use std::env;
use tokio::sync::Semaphore; use tokio::sync::Semaphore;
use tokio::sync::SemaphorePermit; use tokio::sync::SemaphorePermit;
@ -121,7 +121,7 @@ fn get_url_core(url: String) -> BoxFuture<'static, (String, Result<(), CheckerEr
async move { async move {
let mut res = Err(CheckerError::NotTried); let mut res = Err(CheckerError::NotTried);
for _ in 0..5u8 { for _ in 0..5u8 {
debug!("Running {}", url); info!("Running {}", url);
lazy_static! { lazy_static! {
static ref GITHUB_REPO_REGEX: Regex = Regex::new(r"^https://github.com/(?P<org>[^/]+)/(?P<repo>[^/]+)$").unwrap(); static ref GITHUB_REPO_REGEX: Regex = Regex::new(r"^https://github.com/(?P<org>[^/]+)/(?P<repo>[^/]+)$").unwrap();
static ref GITHUB_API_REGEX: Regex = Regex::new(r"https://api.github.com/").unwrap(); static ref GITHUB_API_REGEX: Regex = Regex::new(r"https://api.github.com/").unwrap();
@ -293,12 +293,14 @@ async fn main() -> Result<(), Error> {
url_checks.push(check); url_checks.push(check);
}; };
let mut to_check: Vec<String> = vec![];
for (event, _range) in parser.into_offset_iter() { for (event, _range) in parser.into_offset_iter() {
match event { match event {
Event::Start(tag) => { Event::Start(tag) => {
match tag { match tag {
Tag::Link(_link_type, url, _title) | Tag::Image(_link_type, url, _title) => { Tag::Link(_link_type, url, _title) | Tag::Image(_link_type, url, _title) => {
do_check(url.to_string()); to_check.push(url.to_string());
} }
_ => {} _ => {}
} }
@ -310,6 +312,38 @@ async fn main() -> Result<(), Error> {
} }
} }
to_check.sort_by(|a,b| {
let get_time = |k| {
let res = results.get(k);
if let Some(link) = res {
if let Some(last_working) = link.last_working {
Some(last_working)
} else {
None
}
} else {
None
}
};
let res_a = get_time(a);
let res_b = get_time(b);
if res_a.is_none() {
if res_b.is_none() {
return a.cmp(b);
} else {
Ordering::Greater
}
} else if res_b.is_none() {
Ordering::Less
} else {
res_a.unwrap().cmp(&res_b.unwrap())
}
});
for url in to_check {
do_check(url)
}
let results_keys = results.keys().cloned().collect::<BTreeSet<String>>(); let results_keys = results.keys().cloned().collect::<BTreeSet<String>>();
let old_links = results_keys.difference(&used); let old_links = results_keys.difference(&used);
for link in old_links { for link in old_links {