Improve github regex match

pull/1180/head
Tom Parker-Shemilt 3 years ago
parent f13e5fe855
commit fa9b5f30e8

@ -161,7 +161,7 @@ fn get_url(url: String) -> BoxFuture<'static, (String, Result<(), CheckerError>)
lazy_static! {
static ref GITHUB_REPO_REGEX: Regex =
Regex::new(r"^https://github.com/(?P<org>[^/]+)/(?P<repo>[^/]+)/?$").unwrap();
Regex::new(r"^https://github.com/(?P<org>[^/]+)/(?P<repo>[^/]+)(.*)").unwrap();
static ref GITHUB_API_REGEX: Regex = Regex::new(r"https://api.github.com/").unwrap();
static ref CRATE_REGEX: Regex =
Regex::new(r"https://crates.io/crates/(?P<crate>[^/]+)/?$").unwrap();
@ -392,13 +392,6 @@ async fn main() -> Result<(), Error> {
cargo_downloads: BTreeMap::new(),
});
// Overrides for popularity count, reasons at the top of the file
for url in POPULARITY_OVERRIDES.iter() {
popularity_data
.github_stars
.insert(url.clone(), MINIMUM_GITHUB_STARS);
}
let mut url_checks = vec![];
let min_between_checks: Duration = Duration::days(3);
@ -450,25 +443,35 @@ async fn main() -> Result<(), Error> {
Tag::Link(_link_type, url, _title) | Tag::Image(_link_type, url, _title) => {
if !url.starts_with("#") {
let new_url = url.to_string();
let existing = popularity_data.github_stars.get(&new_url);
if let Some(stars) = existing {
// Use existing star data, but re-retrieve url to check aliveness
// Some will have overrides, so don't check the regex yet
github_stars = Some(*stars)
} else if GITHUB_REPO_REGEX.is_match(&url) && existing.is_none() {
github_stars = get_stars(&url).await;
if let Some(raw_stars) = github_stars {
popularity_data.github_stars.insert(new_url, raw_stars);
if raw_stars >= required_stars {
fs::write(
"results/popularity.yaml",
serde_yaml::to_string(&popularity_data)?,
)?;
if POPULARITY_OVERRIDES.contains(&new_url) {
github_stars = Some(MINIMUM_GITHUB_STARS);
} else if GITHUB_REPO_REGEX.is_match(&url) {
let github_url = GITHUB_REPO_REGEX
.replace_all(&url, "https://github.com/$org/$repo")
.to_string();
let existing = popularity_data.github_stars.get(&github_url);
if let Some(stars) = existing {
// Use existing star data, but re-retrieve url to check aliveness
// Some will have overrides, so don't check the regex yet
github_stars = Some(*stars)
} else {
github_stars = get_stars(&github_url).await;
if let Some(raw_stars) = github_stars {
popularity_data
.github_stars
.insert(github_url.to_string(), raw_stars);
if raw_stars >= required_stars {
fs::write(
"results/popularity.yaml",
serde_yaml::to_string(&popularity_data)?,
)?;
}
link_count += 1;
continue;
}
}
link_count += 1;
continue;
} else if CRATE_REGEX.is_match(&url) {
}
if CRATE_REGEX.is_match(&url) {
let existing = popularity_data.cargo_downloads.get(&new_url);
if let Some(downloads) = existing {
cargo_downloads = Some(*downloads);

Loading…
Cancel
Save