From e3cd6c6a107e80bffec8f634af44be59a7a0b13d Mon Sep 17 00:00:00 2001 From: Tom Parker-Shemilt Date: Tue, 29 Jun 2021 20:48:00 +0100 Subject: [PATCH 1/2] Check the oldest/unchecked ones first --- src/main.rs | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) diff --git a/src/main.rs b/src/main.rs index 8861ca3..aa92607 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,5 +1,5 @@ use pulldown_cmark::{Parser, Event, Tag}; -use std::fs; +use std::{cmp::Ordering, fs}; use futures::future::{select_all, BoxFuture, FutureExt}; use std::collections::{BTreeSet, BTreeMap}; use serde::{Serialize, Deserialize}; @@ -10,7 +10,7 @@ use std::io::Write; use reqwest::{Client, redirect::Policy, StatusCode, header, Url}; use regex::Regex; use failure::{Fail, Error, format_err}; -use chrono::{Local, DateTime, Duration}; +use chrono::{DateTime, Duration, Local}; use std::env; use tokio::sync::Semaphore; use tokio::sync::SemaphorePermit; @@ -121,7 +121,7 @@ fn get_url_core(url: String) -> BoxFuture<'static, (String, Result<(), CheckerEr async move { let mut res = Err(CheckerError::NotTried); for _ in 0..5u8 { - debug!("Running {}", url); + info!("Running {}", url); lazy_static! { static ref GITHUB_REPO_REGEX: Regex = Regex::new(r"^https://github.com/(?P[^/]+)/(?P[^/]+)$").unwrap(); static ref GITHUB_API_REGEX: Regex = Regex::new(r"https://api.github.com/").unwrap(); @@ -293,12 +293,14 @@ async fn main() -> Result<(), Error> { url_checks.push(check); }; + let mut to_check: Vec = vec![]; + for (event, _range) in parser.into_offset_iter() { match event { Event::Start(tag) => { match tag { Tag::Link(_link_type, url, _title) | Tag::Image(_link_type, url, _title) => { - do_check(url.to_string()); + to_check.push(url.to_string()); } _ => {} } @@ -310,6 +312,38 @@ async fn main() -> Result<(), Error> { } } + to_check.sort_by(|a,b| { + let get_time = |k| { + let res = results.get(k); + if let Some(link) = res { + if let Some(last_working) = link.last_working { + Some(last_working) + } else { + None + } + } else { + None + } + }; + let res_a = get_time(a); + let res_b = get_time(b); + if res_a.is_none() { + if res_b.is_none() { + return a.cmp(b); + } else { + Ordering::Greater + } + } else if res_b.is_none() { + Ordering::Less + } else { + res_a.unwrap().cmp(&res_b.unwrap()) + } + }); + + for url in to_check { + do_check(url) + } + let results_keys = results.keys().cloned().collect::>(); let old_links = results_keys.difference(&used); for link in old_links { From d3260a60d609203ffbae643a7931c17a5a1126e3 Mon Sep 17 00:00:00 2001 From: Tom Parker-Shemilt Date: Tue, 29 Jun 2021 20:49:32 +0100 Subject: [PATCH 2/2] Reset some unneeded changes --- src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.rs b/src/main.rs index aa92607..984673e 100644 --- a/src/main.rs +++ b/src/main.rs @@ -10,7 +10,7 @@ use std::io::Write; use reqwest::{Client, redirect::Policy, StatusCode, header, Url}; use regex::Regex; use failure::{Fail, Error, format_err}; -use chrono::{DateTime, Duration, Local}; +use chrono::{Local, DateTime, Duration}; use std::env; use tokio::sync::Semaphore; use tokio::sync::SemaphorePermit; @@ -121,7 +121,7 @@ fn get_url_core(url: String) -> BoxFuture<'static, (String, Result<(), CheckerEr async move { let mut res = Err(CheckerError::NotTried); for _ in 0..5u8 { - info!("Running {}", url); + debug!("Running {}", url); lazy_static! { static ref GITHUB_REPO_REGEX: Regex = Regex::new(r"^https://github.com/(?P[^/]+)/(?P[^/]+)$").unwrap(); static ref GITHUB_API_REGEX: Regex = Regex::new(r"https://api.github.com/").unwrap();