Merge branch 'master' into replaced-ytop

pull/1125/head
Tom Parker-Shemilt 3 years ago committed by GitHub
commit f846955f74
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -0,0 +1,22 @@
root = true
[*]
indent_style = space
indent_size = 4
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[Cargo.lock]
# Just ignore that
indent_size = unset
[LICENSE.txt]
# Ignore that too
indent_size = unset
[README.md]
indent_size = 2
[.github/workflows/*.yml]
indent_size = 2

5
.gitattributes vendored

@ -0,0 +1,5 @@
/.github export-ignore
/.gitignore export-ignore
/CONTRIBUTING.md export-ignore
/.markdownlint.json export-ignore
/.editorconfig export-ignore

@ -1,7 +1,7 @@
name: Automatic Approve
on:
schedule:
- cron: "0 0 * * *"
schedule:
- cron: 0 0 * * *
workflow_dispatch:
jobs:
automatic-approve:
@ -12,5 +12,5 @@ jobs:
uses: mheap/automatic-approve-action@v1.1.0
with:
token: ${{ secrets.GITHUB_TOKEN }}
workflows: "rust.yml"
dangerous_files: "src/main.rs,Cargo.toml,Cargo.lock"
workflows: "rust.yml,lint.yml"
dangerous_files: src/main.rs,Cargo.toml,Cargo.lock

@ -0,0 +1,30 @@
name: Lint
on:
push:
branches:
- master
pull_request:
branches:
- master
jobs:
lint-md:
runs-on: ubuntu-latest
name: Lint Markdown content
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Markdown lint for README
uses: docker://avtodev/markdown-lint:v1
with:
config: ./.markdownlint.json
args: ./README.md
lint-editorconfig:
runs-on: ubuntu-latest
name: Lint for editorconfig violations
steps:
- name: Checkout the repository
uses: actions/checkout@v2
- name: Check for editorconfig violations
uses: editorconfig-checker/action-editorconfig-checker@v1

@ -1,43 +1,41 @@
name: Rust
on:
push:
branches: [ master ]
branches:
- master
pull_request:
branches: [ master ]
branches:
- master
schedule:
- cron: '0 0 * * *'
- cron: 0 0 * * *
env:
CARGO_TERM_COLOR: always
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- uses: Swatinem/rust-cache@v1
- name: Get random cache id
run: echo "CACHE_ID=$((RANDOM))" >> $GITHUB_ENV
shell: bash
- uses: pat-s/always-upload-cache@v2.1.5
with:
path: results/results.yaml
key: results-${{ hashFiles('Cargo.lock') }}-${{ hashFiles('README.md') }}-${{ env.CACHE_ID }}
restore-keys: |
results-${{ hashFiles('Cargo.lock') }}-${{ hashFiles('README.md') }}-
results-${{ hashFiles('Cargo.lock') }}-
results-
- name: Build
run: cargo build
- name: Run
run: cargo run
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
RUST_LOG: warn
- uses: actions/checkout@v2
- uses: actions-rs/toolchain@v1
with:
profile: minimal
toolchain: stable
override: true
- uses: Swatinem/rust-cache@v1
- name: Get random cache id
run: echo "CACHE_ID=$((RANDOM))" >> $GITHUB_ENV
shell: bash
- uses: pat-s/always-upload-cache@v2.1.5
with:
path: results/*.yaml
key: results-${{ hashFiles('Cargo.lock') }}-${{ hashFiles('README.md') }}-${{ env.CACHE_ID }}
restore-keys: |
results-${{ hashFiles('Cargo.lock') }}-${{ hashFiles('README.md') }}-
results-${{ hashFiles('Cargo.lock') }}-
results-
- name: Build
run: cargo build
- name: Run
run: cargo run
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
RUST_LOG: warn

3
.gitignore vendored

@ -1,2 +1,3 @@
.idea/
target/
.vscode/
target/

@ -0,0 +1,10 @@
{
"default": true,
"MD004": false,
"MD007": false,
"MD013": {
"line_length": 1000
},
"MD024": false,
"MD032": false
}

@ -11,13 +11,13 @@ The easiest way is to go to https://github.com/rust-unofficial/awesome-rust/blob
If you want to add an entry to the `README.md` please consider this:
- is the entry valuable to people trying to get things done in Rust?
* In order to make this objective, the entry needs to either have at least 50 stars on Github, 2000 downloads on crates.io, or an equivalent level of other popularity metrics (which should be specified in the PR). The maintainers of this repo are not responsible for making your project popular, only for making more people aware of those projects. We don't want to have to pick and choose favourites, and so are using metrics like this to make our lives easier as maintainers.
- if you want to add something, please use the template `[ACCOUNT/REPO](https://github.com/ACCOUNT/REPO) [[CRATE](https://crates.io/crates/CRATE)] — DESCRIPTION`
* if you've not published your crate to `crates.io` remove the `[[CRATE](...)]` part.
* if you have a CI build, please add the build badge. Put the image after the description, separated by a space. Please make sure to add the branch information to the image:
* example for Travis: ` [<img src="https://api.travis-ci.org/XXX/CRATE.svg?branch=master">](https://travis-ci.org/XXX/CRATE)`
* for Github actions please see https://docs.github.com/en/actions/managing-workflow-runs/adding-a-workflow-status-badge
- please pay attention to the alphabetical ordering.
* example for Travis: `[![build badge](https://api.travis-ci.com/XXX/CRATE.svg?branch=master)](https://app.travis-ci.org/github/XXX/CRATE)`
* for Github actions please see [adding-a-workflow-status-badge](https://docs.github.com/en/actions/managing-workflow-runs/adding-a-workflow-status-badge)
- please pay attention to the alphabetical ordering
## Removing projects

21
Cargo.lock generated

@ -24,6 +24,15 @@ dependencies = [
"memchr",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi",
]
[[package]]
name = "atty"
version = "0.2.14"
@ -53,6 +62,7 @@ version = "0.1.0"
dependencies = [
"chrono",
"chrono-humanize",
"diffy",
"env_logger",
"failure",
"futures",
@ -64,6 +74,7 @@ dependencies = [
"reqwest",
"scraper",
"serde",
"serde_json",
"serde_yaml",
"tokio",
]
@ -189,6 +200,15 @@ dependencies = [
"syn",
]
[[package]]
name = "diffy"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c1ff48e3f358d3158f88b2c95071f28d136be31d89e5fa843095032a70bff56"
dependencies = [
"ansi_term",
]
[[package]]
name = "dtoa"
version = "0.4.8"
@ -1025,6 +1045,7 @@ dependencies = [
"pin-project-lite",
"rustls",
"serde",
"serde_json",
"serde_urlencoded",
"tokio",
"tokio-rustls",

@ -10,7 +10,7 @@ default-run = "awesome-rust"
[dependencies]
pulldown-cmark = "0.8"
futures = "0.3"
reqwest = { version="0.11", default_features=false, features=["rustls-tls"] }
reqwest = { version="0.11", default_features=false, features=["rustls-tls", "json"] }
tokio = {version = "1", features = ["macros", "rt", "rt-multi-thread", "time"] }
serde = { version = "1.0", features = ["derive"] }
serde_yaml = "0.8"
@ -22,4 +22,6 @@ log = "0.4"
regex = "1"
scraper = "0.11"
chrono = { version = "0.4", features = ["serde"] }
chrono-humanize = "0.2"
chrono-humanize = "0.2"
diffy = "0.2"
serde_json = "*"

File diff suppressed because it is too large Load Diff

@ -1,2 +1,2 @@
# Note that this is in a directory so we can cache it with Travis
results.yaml
*.yaml

@ -1,4 +1,5 @@
use pulldown_cmark::{Parser, Event, Tag};
use std::u8;
use std::{cmp::Ordering, fs};
use futures::future::{select_all, BoxFuture, FutureExt};
use std::collections::{BTreeSet, BTreeMap};
@ -14,6 +15,54 @@ use chrono::{Local, DateTime, Duration};
use std::env;
use tokio::sync::Semaphore;
use tokio::sync::SemaphorePermit;
use diffy::create_patch;
const MINIMUM_GITHUB_STARS: u32 = 50;
const MINIMUM_CARGO_DOWNLOADS: u32 = 2000;
// Allow overriding the needed stars for a section. "level" is the header level in the markdown, default is MINIMUM_GITHUB_STARS
// In general, we should just use the defaults. However, for some areas where there's not a lot of well-starred projects, but a
// a few that are say just below the thresholds, then it's worth reducing the thresholds so we can get a few more projects.
fn override_stars(level: u32, text: &str) -> Option<u32> {
if level == 2 && text.contains("Resources") {
// This is zero because a lot of the resources are non-github/non-cargo links and overriding for all would be annoying
// These should be evaluated with more primitive means
Some(0)
} else if level == 3 && text.contains("Games") {
Some(40)
} else if level == 3 && text.contains("Emulators") {
Some(40)
} else {
None // i.e. use defaults
}
}
lazy_static! {
// Overrides for popularity count, each needs a good reason (i.e. downloads/stars we don't support automatic counting of)
// Each is a URL that's "enough" for an item to pass the popularity checks
static ref POPULARITY_OVERRIDES: Vec<String> = vec![
"https://github.com/maidsafe".to_string(), // Many repos of Rust code, collectively > 50 stars
"https://pijul.org".to_string(), // Uses it's own VCS at https://nest.pijul.com/pijul/pijul with 190 stars at last check
"https://gitlab.com/veloren/veloren".to_string(), // No direct gitlab support, but >1000 stars there
"https://gitlab.redox-os.org/redox-os/redox".to_string(), // 394 stars
"https://amp.rs".to_string(), // https://github.com/jmacdonald/amp has 2.9k stars
"https://marketplace.visualstudio.com/items?itemName=vadimcn.vscode-lldb".to_string(), // > 350k downloads
"https://gitpod.io".to_string(), // https://github.com/gitpod-io/gitpod has 4.7k stars
"https://wiki.gnome.org/Apps/Builder".to_string(), // https://gitlab.gnome.org/GNOME/gnome-builder has 133 stars
"https://marketplace.visualstudio.com/items?itemName=matklad.rust-analyzer".to_string(), // > 260k downloads
"https://marketplace.visualstudio.com/items?itemName=rust-lang.rust".to_string(), // > 1M downloads
"https://docs.rs".to_string(), // https://github.com/rust-lang/docs.rs has >600 stars
"https://github.com/rust-bio".to_string(), // https://github.com/rust-bio/rust-bio on it's own has >900 stars
"https://github.com/contain-rs".to_string(), // Lots of repos with good star counts
"https://github.com/georust".to_string(), // Lots of repos with good star counts
"http://kiss3d.org".to_string(), // https://github.com/sebcrozet/kiss3d has >900 stars
"https://github.com/rust-qt".to_string(), // Various high-stars repositories
"https://chromium.googlesource.com/chromiumos/platform/crosvm/".to_string(), // Can't tell count directly, but various mirrors of it (e.g. https://github.com/dgreid/crosvm) have enough stars that it's got enough interest
"https://seed-rs.org/".to_string(), // https://github.com/seed-rs/seed has 2.1k stars
"https://crates.io".to_string(), // This one gets a free pass :)
"https://cloudsmith.com/cargo-registry/".to_string() // First private cargo registry (https://cloudsmith.com/blog/worlds-first-private-cargo-registry-w-cloudsmith-rust/) and not much in the way of other options yet. See also https://github.com/rust-unofficial/awesome-rust/pull/1141#discussion_r688711555
];
}
#[derive(Debug, Fail, Serialize, Deserialize)]
enum CheckerError {
@ -27,7 +76,7 @@ enum CheckerError {
},
#[fail(display = "too many requests")]
TooManyRequests,
TooManyRequests,
#[fail(display = "reqwest error: {}", error)]
ReqwestError {
@ -111,15 +160,83 @@ fn get_url(url: String) -> BoxFuture<'static, (String, Result<(), CheckerError>)
}.boxed()
}
lazy_static! {
static ref GITHUB_REPO_REGEX: Regex = Regex::new(r"^https://github.com/(?P<org>[^/]+)/(?P<repo>[^/]+)/?$").unwrap();
static ref GITHUB_API_REGEX: Regex = Regex::new(r"https://api.github.com/").unwrap();
static ref CRATE_REGEX: Regex = Regex::new(r"https://crates.io/crates/(?P<crate>[^/]+)/?$").unwrap();
}
#[derive(Deserialize, Debug)]
struct GithubStars {
stargazers_count: u32
}
async fn get_stars(github_url: &str) -> Option<u32> {
warn!("Downloading Github stars for {}", github_url);
let rewritten = GITHUB_REPO_REGEX.replace_all(&github_url, "https://api.github.com/repos/$org/$repo").to_string();
let mut req = CLIENT
.get(&rewritten);
if let Ok(username) = env::var("GITHUB_USERNAME") {
if let Ok(password) = env::var("GITHUB_TOKEN") {
// needs a token with at least public_repo scope
req = req.basic_auth(username, Some(password));
}
}
let resp = req.send().await;
match resp {
Err(err) => {
warn!("Error while getting {}: {}", github_url, err);
return None;
}
Ok(ok) => {
let raw = ok.text().await.unwrap();
let data = match serde_json::from_str::<GithubStars>(&raw) {
Ok(val) => val,
Err(_) => {
panic!("{:?}", raw);
}
};
return Some(data.stargazers_count);
}
}
}
#[derive(Deserialize, Debug)]
struct CrateInfo {
downloads: u64
}
#[derive(Deserialize, Debug)]
struct Crate {
#[serde(rename = "crate")]
info: CrateInfo
}
async fn get_downloads(github_url: &str) -> Option<u64> {
warn!("Downloading Crates downloads for {}", github_url);
let rewritten = CRATE_REGEX.replace_all(&github_url, "https://crates.io/api/v1/crates/$crate").to_string();
let req = CLIENT
.get(&rewritten);
let resp = req.send().await;
match resp {
Err(err) => {
warn!("Error while getting {}: {}", github_url, err);
return None;
}
Ok(ok) => {
let data = ok.json::<Crate>().await.unwrap();
return Some(data.info.downloads);
}
}
}
fn get_url_core(url: String) -> BoxFuture<'static, (String, Result<(), CheckerError>)> {
async move {
let mut res = Err(CheckerError::NotTried);
for _ in 0..5u8 {
debug!("Running {}", url);
lazy_static! {
static ref GITHUB_REPO_REGEX: Regex = Regex::new(r"^https://github.com/(?P<org>[^/]+)/(?P<repo>[^/]+)$").unwrap();
static ref GITHUB_API_REGEX: Regex = Regex::new(r"https://api.github.com/").unwrap();
}
if env::var("GITHUB_USERNAME").is_ok() && env::var("GITHUB_TOKEN").is_ok() && GITHUB_REPO_REGEX.is_match(&url) {
let rewritten = GITHUB_REPO_REGEX.replace_all(&url, "https://api.github.com/repos/$org/$repo");
info!("Replacing {} with {} to workaround rate limits on Github", url, rewritten);
@ -246,6 +363,12 @@ struct Link {
type Results = BTreeMap<String, Link>;
#[derive(Debug, Serialize, Deserialize)]
struct PopularityData {
pub github_stars: BTreeMap<String, u32>,
pub cargo_downloads: BTreeMap<String, u32>
}
#[tokio::main]
async fn main() -> Result<(), Error> {
env_logger::init();
@ -258,6 +381,16 @@ async fn main() -> Result<(), Error> {
.and_then(|x| serde_yaml::from_str(&x).map_err(|e| format_err!("{}", e)))
.unwrap_or(Results::new());
let mut popularity_data: PopularityData = fs::read_to_string("results/popularity.yaml")
.map_err(|e| format_err!("{}", e))
.and_then(|x| serde_yaml::from_str(&x).map_err(|e| format_err!("{}", e)))
.unwrap_or(PopularityData { github_stars: BTreeMap::new(), cargo_downloads: BTreeMap::new()});
// Overrides for popularity count, reasons at the top of the file
for url in POPULARITY_OVERRIDES.iter() {
popularity_data.github_stars.insert(url.clone(), MINIMUM_GITHUB_STARS);
}
let mut url_checks = vec![];
let min_between_checks: Duration = Duration::days(3);
@ -266,6 +399,9 @@ async fn main() -> Result<(), Error> {
if !url.starts_with("http") {
return;
}
if used.contains(&url) {
return;
}
used.insert(url.clone());
if let Some(link) = results.get(&url) {
if let Working::Yes = link.working {
@ -281,12 +417,150 @@ async fn main() -> Result<(), Error> {
let mut to_check: Vec<String> = vec![];
for (event, _range) in parser.into_offset_iter() {
#[derive(Debug)]
struct ListInfo {
location: usize,
data: Vec<String>
}
let mut list_items: Vec<ListInfo> = Vec::new();
let mut in_list_item = false;
let mut list_item: String = String::new();
let mut link_count: u8 = 0;
let mut github_stars: Option<u32> = None;
let mut cargo_downloads: Option<u32> = None;
let mut required_stars: u32 = MINIMUM_GITHUB_STARS;
let mut last_level: u32 = 0;
let mut star_override_level: Option<u32> = None;
for (event, range) in parser.into_offset_iter() {
match event {
Event::Start(tag) => {
match tag {
Tag::Link(_link_type, url, _title) | Tag::Image(_link_type, url, _title) => {
to_check.push(url.to_string());
if !url.starts_with("#") {
let new_url = url.to_string();
let existing = popularity_data.github_stars.get(&new_url);
if let Some(stars) = existing {
// Use existing star data, but re-retrieve url to check aliveness
// Some will have overrides, so don't check the regex yet
github_stars = Some(*stars)
}
else if GITHUB_REPO_REGEX.is_match(&url) && existing.is_none() {
github_stars = get_stars(&url).await;
if let Some(raw_stars) = github_stars {
popularity_data.github_stars.insert(new_url, raw_stars);
if raw_stars >= required_stars {
fs::write("results/popularity.yaml", serde_yaml::to_string(&popularity_data)?)?;
}
}
link_count += 1;
continue;
}
else if CRATE_REGEX.is_match(&url) {
let existing = popularity_data.cargo_downloads.get(&new_url);
if let Some(downloads) = existing {
cargo_downloads = Some(*downloads);
} else {
let raw_downloads = get_downloads(&url).await;
if let Some(positive_downloads) = raw_downloads {
cargo_downloads = Some(positive_downloads.clamp(0, u32::MAX as u64) as u32);
popularity_data.cargo_downloads.insert(new_url, cargo_downloads.unwrap());
if cargo_downloads.unwrap_or(0) >= MINIMUM_CARGO_DOWNLOADS {
fs::write("results/popularity.yaml", serde_yaml::to_string(&popularity_data)?)?;
}
}
link_count += 1;
continue;
}
}
to_check.push(url.to_string());
link_count += 1;
}
}
Tag::List(_) => {
if in_list_item && list_item.len() > 0 {
list_items.last_mut().unwrap().data.push(list_item.clone());
in_list_item = false;
}
list_items.push(ListInfo {location: range.start, data: Vec::new()});
}
Tag::Item => {
if in_list_item && list_item.len() > 0 {
list_items.last_mut().unwrap().data.push(list_item.clone());
}
in_list_item = true;
list_item = String::new();
link_count = 0;
github_stars = None;
cargo_downloads = None;
}
Tag::Heading(level) => {
last_level = level;
if let Some(override_level) = star_override_level {
if level == override_level {
star_override_level = None;
required_stars = MINIMUM_GITHUB_STARS;
}
}
}
Tag::Paragraph => {}
_ => {
if in_list_item {
in_list_item = false;
}
}
}
}
Event::Text(text) => {
let possible_override = override_stars(last_level, &text);
if let Some(override_value) = possible_override {
star_override_level = Some(last_level);
required_stars = override_value;
}
if in_list_item {
list_item.push_str(&text);
}
}
Event::End(tag) => {
match tag {
Tag::Item => {
if list_item.len() > 0 {
if link_count > 0 {
if github_stars.unwrap_or(0) < required_stars && cargo_downloads.unwrap_or(0) < MINIMUM_CARGO_DOWNLOADS {
if github_stars.is_none() {
warn!("No valid github link");
}
if cargo_downloads.is_none() {
warn!("No valid crates link");
}
return Err(format_err!("Not high enough metrics ({:?} stars < {}, and {:?} cargo downloads < {}): {}", github_stars, required_stars, cargo_downloads, MINIMUM_CARGO_DOWNLOADS, list_item));
}
}
list_items.last_mut().unwrap().data.push(list_item.clone());
list_item = String::new();
}
in_list_item = false
}
Tag::List(_) => {
let list_info = list_items.pop().unwrap();
if list_info.data.iter().find(|s| *s == "License").is_some() && list_info.data.iter().find(|s| *s == "Resources").is_some() {
// Ignore wrong ordering in top-level list
continue
}
let mut sorted_recent_list = list_info.data.to_vec();
sorted_recent_list.sort_by(|a, b| a.to_lowercase().cmp(&b.to_lowercase()));
let joined_recent = list_info.data.join("\n");
let joined_sorted = sorted_recent_list.join("\n");
let patch = create_patch(&joined_recent, &joined_sorted);
if patch.hunks().len() > 0 {
println!("{}", patch);
return Err(format_err!("Sorting error"));
}
}
_ => {}
}
@ -297,6 +571,7 @@ async fn main() -> Result<(), Error> {
_ => {}
}
}
fs::write("results/popularity.yaml", serde_yaml::to_string(&popularity_data)?)?;
to_check.sort_by(|a,b| {
let get_time = |k| {
@ -424,4 +699,4 @@ async fn main() -> Result<(), Error> {
} else {
Err(format_err!("{} urls with errors", failed))
}
}
}

Loading…
Cancel
Save