From 19a1a077c5b236e16fe5346765fd824715b8cf52 Mon Sep 17 00:00:00 2001 From: flamingos-cant <45780476+flamingo-cant-draw@users.noreply.github.com> Date: Fri, 15 Mar 2024 11:03:29 +0000 Subject: [PATCH] Add a blocklist for URLs. (#4515) * Add a blocklist for URLs. * Fix SQL format * Make clippy happy. * Use regex for URL matching. * Escape regex chars in URLs. * Use post for modification. * Make URL block regex static and remove API routes. * Add date fields to table and use transaction. * Use Cache for blocklist. * Rename check_links + move list to parameters of process_markdown. * SQL format. * Format, again. * Remove println. * Add API test. * Set a shorter lifetime for regex in debug mode. * Add missing macro. * Update lemmy-js-client * Update api_test/pnpm-lock.yaml * Don't break other tests * Use different URL for test --------- Co-authored-by: Dessalines Co-authored-by: Nutomic --- api_tests/package.json | 2 +- api_tests/pnpm-lock.yaml | 8 +- api_tests/src/post.spec.ts | 23 ++ crates/api/src/local_user/save_settings.rs | 6 +- crates/api/src/site/leave_admin.rs | 3 + crates/api_common/Cargo.toml | 2 + crates/api_common/src/site.rs | 4 + crates/api_common/src/utils.rs | 63 +++- crates/api_crud/src/comment/create.rs | 4 +- crates/api_crud/src/comment/update.rs | 10 +- crates/api_crud/src/community/create.rs | 5 +- crates/api_crud/src/community/update.rs | 5 +- crates/api_crud/src/post/create.rs | 6 +- crates/api_crud/src/post/update.rs | 7 +- crates/api_crud/src/private_message/create.rs | 4 +- crates/api_crud/src/private_message/update.rs | 5 +- crates/api_crud/src/site/create.rs | 4 +- crates/api_crud/src/site/read.rs | 3 + crates/api_crud/src/site/update.rs | 12 +- crates/apub/src/objects/comment.rs | 5 +- crates/apub/src/objects/community.rs | 5 +- crates/apub/src/objects/instance.rs | 10 +- crates/apub/src/objects/person.rs | 4 +- crates/apub/src/objects/post.rs | 4 +- crates/apub/src/objects/private_message.rs | 5 +- .../src/impls/local_site_url_blocklist.rs | 49 ++++ crates/db_schema/src/impls/mod.rs | 1 + crates/db_schema/src/schema.rs | 10 + .../src/source/local_site_url_blocklist.rs | 28 ++ crates/db_schema/src/source/mod.rs | 1 + crates/utils/src/error.rs | 1 + crates/utils/src/utils/markdown/mod.rs | 271 +++++++++++------- crates/utils/src/utils/validation.rs | 67 ++++- .../2024-03-06-201637_url_blocklist/down.sql | 3 + .../2024-03-06-201637_url_blocklist/up.sql | 7 + 35 files changed, 514 insertions(+), 133 deletions(-) create mode 100644 crates/db_schema/src/impls/local_site_url_blocklist.rs create mode 100644 crates/db_schema/src/source/local_site_url_blocklist.rs create mode 100644 migrations/2024-03-06-201637_url_blocklist/down.sql create mode 100644 migrations/2024-03-06-201637_url_blocklist/up.sql diff --git a/api_tests/package.json b/api_tests/package.json index 9e7042f2d..f6c436312 100644 --- a/api_tests/package.json +++ b/api_tests/package.json @@ -27,7 +27,7 @@ "eslint": "^8.57.0", "eslint-plugin-prettier": "^5.0.1", "jest": "^29.5.0", - "lemmy-js-client": "0.19.4-alpha.6", + "lemmy-js-client": "0.19.4-alpha.8", "prettier": "^3.2.5", "ts-jest": "^29.1.0", "typescript": "^5.3.3" diff --git a/api_tests/pnpm-lock.yaml b/api_tests/pnpm-lock.yaml index 213111ab1..321fffc8b 100644 --- a/api_tests/pnpm-lock.yaml +++ b/api_tests/pnpm-lock.yaml @@ -30,8 +30,8 @@ devDependencies: specifier: ^29.5.0 version: 29.7.0(@types/node@20.11.22) lemmy-js-client: - specifier: 0.19.4-alpha.6 - version: 0.19.4-alpha.6 + specifier: 0.19.4-alpha.8 + version: 0.19.4-alpha.8 prettier: specifier: ^3.2.5 version: 3.2.5 @@ -2390,8 +2390,8 @@ packages: engines: {node: '>=6'} dev: true - /lemmy-js-client@0.19.4-alpha.6: - resolution: {integrity: sha512-x4htMlpoZ7hzrhrIk82aompVxbpu2ZDWtmWNGraM0+27nUCDf6gYxJH5nb5R/o39BQe5KSHq6zoBdliBwAY40w==} + /lemmy-js-client@0.19.4-alpha.8: + resolution: {integrity: sha512-8vjqUYVOhyUTcmG9FvPLjrWziVwNa2/Zi+kSflTrajJsK0V+5DclJ5dhdVMUQ4DEA70gb0OuNMDlipPG2FoS5A==} dependencies: cross-fetch: 4.0.0 form-data: 4.0.0 diff --git a/api_tests/src/post.spec.ts b/api_tests/src/post.spec.ts index 02080c4cc..59a0557bf 100644 --- a/api_tests/src/post.spec.ts +++ b/api_tests/src/post.spec.ts @@ -18,6 +18,7 @@ import { resolveBetaCommunity, createComment, deletePost, + delay, removePost, getPost, unfollowRemotes, @@ -710,3 +711,25 @@ test("Fetch post via redirect", async () => { expect(gammaPost.post?.post.ap_id).toBe(alphaPost.post_view.post.ap_id); await unfollowRemotes(alpha); }); + +test("Block post that contains banned URL", async () => { + let editSiteForm: EditSite = { + blocked_urls: ["https://evil.com/"], + }; + + await epsilon.editSite(editSiteForm); + + await delay(500); + + if (!betaCommunity) { + throw "Missing beta community"; + } + + expect( + createPost(epsilon, betaCommunity.community.id, "https://evil.com"), + ).rejects.toStrictEqual(Error("blocked_url")); + + // Later tests need this to be empty + editSiteForm.blocked_urls = []; + await epsilon.editSite(editSiteForm); +}); diff --git a/crates/api/src/local_user/save_settings.rs b/crates/api/src/local_user/save_settings.rs index d918bdc00..927496416 100644 --- a/crates/api/src/local_user/save_settings.rs +++ b/crates/api/src/local_user/save_settings.rs @@ -3,6 +3,7 @@ use lemmy_api_common::{ context::LemmyContext, person::SaveUserSettings, utils::{ + get_url_blocklist, local_site_to_slur_regex, process_markdown_opt, proxy_image_link_opt_api, @@ -35,7 +36,10 @@ pub async fn save_user_settings( let site_view = SiteView::read_local(&mut context.pool()).await?; let slur_regex = local_site_to_slur_regex(&site_view.local_site); - let bio = diesel_option_overwrite(process_markdown_opt(&data.bio, &slur_regex, &context).await?); + let url_blocklist = get_url_blocklist(&context).await?; + let bio = diesel_option_overwrite( + process_markdown_opt(&data.bio, &slur_regex, &url_blocklist, &context).await?, + ); let avatar = proxy_image_link_opt_api(&data.avatar, &context).await?; let banner = proxy_image_link_opt_api(&data.banner, &context).await?; diff --git a/crates/api/src/site/leave_admin.rs b/crates/api/src/site/leave_admin.rs index 77e28332b..0d149d07d 100644 --- a/crates/api/src/site/leave_admin.rs +++ b/crates/api/src/site/leave_admin.rs @@ -4,6 +4,7 @@ use lemmy_db_schema::{ source::{ actor_language::SiteLanguage, language::Language, + local_site_url_blocklist::LocalSiteUrlBlocklist, local_user::{LocalUser, LocalUserUpdateForm}, moderator::{ModAdd, ModAddForm}, tagline::Tagline, @@ -62,6 +63,7 @@ pub async fn leave_admin( let taglines = Tagline::get_all(&mut context.pool(), site_view.local_site.id).await?; let custom_emojis = CustomEmojiView::get_all(&mut context.pool(), site_view.local_site.id).await?; + let blocked_urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?; Ok(Json(GetSiteResponse { site_view, @@ -72,5 +74,6 @@ pub async fn leave_admin( discussion_languages, taglines, custom_emojis, + blocked_urls, })) } diff --git a/crates/api_common/Cargo.toml b/crates/api_common/Cargo.toml index 3acd7d4ca..47545446f 100644 --- a/crates/api_common/Cargo.toml +++ b/crates/api_common/Cargo.toml @@ -59,6 +59,8 @@ uuid = { workspace = true, optional = true } tokio = { workspace = true, optional = true } reqwest = { workspace = true, optional = true } ts-rs = { workspace = true, optional = true } +moka.workspace = true +anyhow.workspace = true once_cell = { workspace = true, optional = true } actix-web = { workspace = true, optional = true } enum-map = { workspace = true } diff --git a/crates/api_common/src/site.rs b/crates/api_common/src/site.rs index bed81c2e4..d87cbdaaf 100644 --- a/crates/api_common/src/site.rs +++ b/crates/api_common/src/site.rs @@ -6,6 +6,7 @@ use lemmy_db_schema::{ federation_queue_state::FederationQueueState, instance::Instance, language::Language, + local_site_url_blocklist::LocalSiteUrlBlocklist, tagline::Tagline, }, ListingType, @@ -268,6 +269,8 @@ pub struct EditSite { pub allowed_instances: Option>, /// A list of blocked instances. pub blocked_instances: Option>, + /// A list of blocked URLs + pub blocked_urls: Option>, /// A list of taglines shown at the top of the front page. pub taglines: Option>, pub registration_mode: Option, @@ -305,6 +308,7 @@ pub struct GetSiteResponse { pub taglines: Vec, /// A list of custom emojis your site supports. pub custom_emojis: Vec, + pub blocked_urls: Vec, } #[skip_serializing_none] diff --git a/crates/api_common/src/utils.rs b/crates/api_common/src/utils.rs index d51751854..b13d21f9f 100644 --- a/crates/api_common/src/utils.rs +++ b/crates/api_common/src/utils.rs @@ -17,6 +17,7 @@ use lemmy_db_schema::{ instance_block::InstanceBlock, local_site::LocalSite, local_site_rate_limit::LocalSiteRateLimit, + local_site_url_blocklist::LocalSiteUrlBlocklist, password_reset_request::PasswordResetRequest, person::{Person, PersonUpdateForm}, person_block::PersonBlock, @@ -38,18 +39,24 @@ use lemmy_utils::{ rate_limit::{ActionType, BucketConfig}, settings::structs::{PictrsImageMode, Settings}, utils::{ - markdown::markdown_rewrite_image_links, + markdown::{markdown_check_for_blocked_urls, markdown_rewrite_image_links}, slurs::{build_slur_regex, remove_slurs}, }, }; -use regex::Regex; +use moka::future::Cache; +use once_cell::sync::Lazy; +use regex::{escape, Regex, RegexSet}; use rosetta_i18n::{Language, LanguageId}; -use std::collections::HashSet; +use std::{collections::HashSet, time::Duration}; use tracing::warn; use url::{ParseError, Url}; use urlencoding::encode; pub static AUTH_COOKIE_NAME: &str = "jwt"; +#[cfg(debug_assertions)] +static URL_BLOCKLIST_RECHECK_DELAY: Duration = Duration::from_millis(500); +#[cfg(not(debug_assertions))] +static URL_BLOCKLIST_RECHECK_DELAY: Duration = Duration::from_secs(60); #[tracing::instrument(skip_all)] pub async fn is_mod_or_admin( @@ -516,6 +523,47 @@ pub fn local_site_opt_to_sensitive(local_site: &Option) -> bool { .unwrap_or(false) } +pub async fn get_url_blocklist(context: &LemmyContext) -> LemmyResult { + static URL_BLOCKLIST: Lazy> = Lazy::new(|| { + Cache::builder() + .max_capacity(1) + .time_to_live(URL_BLOCKLIST_RECHECK_DELAY) + .build() + }); + + Ok( + URL_BLOCKLIST + .try_get_with::<_, LemmyError>((), async { + let urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?; + + let regexes = urls.iter().map(|url| { + let url = &url.url; + let parsed = Url::parse(url).expect("Coundln't parse URL."); + if url.ends_with('/') { + format!( + "({}://)?{}{}?", + parsed.scheme(), + escape(parsed.domain().expect("No domain.")), + escape(parsed.path()) + ) + } else { + format!( + "({}://)?{}{}", + parsed.scheme(), + escape(parsed.domain().expect("No domain.")), + escape(parsed.path()) + ) + } + }); + + let set = RegexSet::new(regexes)?; + Ok(set) + }) + .await + .map_err(|e| anyhow::anyhow!("Failed to build URL blocklist due to `{}`", e))?, + ) +} + pub async fn send_application_approved_email( user: &LocalUserView, settings: &Settings, @@ -867,9 +915,13 @@ fn limit_expire_time(expires: DateTime) -> LemmyResult pub async fn process_markdown( text: &str, slur_regex: &Option, + url_blocklist: &RegexSet, context: &LemmyContext, ) -> LemmyResult { let text = remove_slurs(text, slur_regex); + + markdown_check_for_blocked_urls(&text, url_blocklist)?; + if context.settings().pictrs_config()?.image_mode() == PictrsImageMode::ProxyAllImages { let (text, links) = markdown_rewrite_image_links(text); RemoteImage::create(&mut context.pool(), links).await?; @@ -882,10 +934,13 @@ pub async fn process_markdown( pub async fn process_markdown_opt( text: &Option, slur_regex: &Option, + url_blocklist: &RegexSet, context: &LemmyContext, ) -> LemmyResult> { match text { - Some(t) => process_markdown(t, slur_regex, context).await.map(Some), + Some(t) => process_markdown(t, slur_regex, url_blocklist, context) + .await + .map(Some), None => Ok(None), } } diff --git a/crates/api_crud/src/comment/create.rs b/crates/api_crud/src/comment/create.rs index e5a869223..9269ec382 100644 --- a/crates/api_crud/src/comment/create.rs +++ b/crates/api_crud/src/comment/create.rs @@ -10,6 +10,7 @@ use lemmy_api_common::{ check_post_deleted_or_removed, generate_local_apub_endpoint, get_post, + get_url_blocklist, is_mod_or_admin, local_site_to_slur_regex, process_markdown, @@ -44,7 +45,8 @@ pub async fn create_comment( let local_site = LocalSite::read(&mut context.pool()).await?; let slur_regex = local_site_to_slur_regex(&local_site); - let content = process_markdown(&data.content, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?; is_valid_body_field(&Some(content.clone()), false)?; // Check for a community ban diff --git a/crates/api_crud/src/comment/update.rs b/crates/api_crud/src/comment/update.rs index 2d6bf79be..e814ebd6b 100644 --- a/crates/api_crud/src/comment/update.rs +++ b/crates/api_crud/src/comment/update.rs @@ -5,7 +5,12 @@ use lemmy_api_common::{ comment::{CommentResponse, EditComment}, context::LemmyContext, send_activity::{ActivityChannel, SendActivityData}, - utils::{check_community_user_action, local_site_to_slur_regex, process_markdown_opt}, + utils::{ + check_community_user_action, + get_url_blocklist, + local_site_to_slur_regex, + process_markdown_opt, + }, }; use lemmy_db_schema::{ source::{ @@ -54,7 +59,8 @@ pub async fn update_comment( .await?; let slur_regex = local_site_to_slur_regex(&local_site); - let content = process_markdown_opt(&data.content, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let content = process_markdown_opt(&data.content, &slur_regex, &url_blocklist, &context).await?; is_valid_body_field(&content, false)?; let comment_id = data.comment_id; diff --git a/crates/api_crud/src/community/create.rs b/crates/api_crud/src/community/create.rs index ef2da9ef8..679655078 100644 --- a/crates/api_crud/src/community/create.rs +++ b/crates/api_crud/src/community/create.rs @@ -9,6 +9,7 @@ use lemmy_api_common::{ generate_inbox_url, generate_local_apub_endpoint, generate_shared_inbox_url, + get_url_blocklist, is_admin, local_site_to_slur_regex, process_markdown_opt, @@ -53,9 +54,11 @@ pub async fn create_community( } let slur_regex = local_site_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(&context).await?; check_slurs(&data.name, &slur_regex)?; check_slurs(&data.title, &slur_regex)?; - let description = process_markdown_opt(&data.description, &slur_regex, &context).await?; + let description = + process_markdown_opt(&data.description, &slur_regex, &url_blocklist, &context).await?; let icon = proxy_image_link_api(&data.icon, &context).await?; let banner = proxy_image_link_api(&data.banner, &context).await?; diff --git a/crates/api_crud/src/community/update.rs b/crates/api_crud/src/community/update.rs index 14bb5c326..83ffded13 100644 --- a/crates/api_crud/src/community/update.rs +++ b/crates/api_crud/src/community/update.rs @@ -7,6 +7,7 @@ use lemmy_api_common::{ send_activity::{ActivityChannel, SendActivityData}, utils::{ check_community_mod_action, + get_url_blocklist, local_site_to_slur_regex, process_markdown_opt, proxy_image_link_opt_api, @@ -36,8 +37,10 @@ pub async fn update_community( let local_site = LocalSite::read(&mut context.pool()).await?; let slur_regex = local_site_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(&context).await?; check_slurs_opt(&data.title, &slur_regex)?; - let description = process_markdown_opt(&data.description, &slur_regex, &context).await?; + let description = + process_markdown_opt(&data.description, &slur_regex, &url_blocklist, &context).await?; is_valid_body_field(&data.description, false)?; let description = diesel_option_overwrite(description); diff --git a/crates/api_crud/src/post/create.rs b/crates/api_crud/src/post/create.rs index 07d577f51..fabab6b09 100644 --- a/crates/api_crud/src/post/create.rs +++ b/crates/api_crud/src/post/create.rs @@ -9,6 +9,7 @@ use lemmy_api_common::{ utils::{ check_community_user_action, generate_local_apub_endpoint, + get_url_blocklist, honeypot_check, local_site_to_slur_regex, mark_post_as_read, @@ -38,6 +39,7 @@ use lemmy_utils::{ validation::{ check_url_scheme, clean_url_params, + is_url_blocked, is_valid_alt_text_field, is_valid_body_field, is_valid_post_title, @@ -60,8 +62,9 @@ pub async fn create_post( let slur_regex = local_site_to_slur_regex(&local_site); check_slurs(&data.name, &slur_regex)?; + let url_blocklist = get_url_blocklist(&context).await?; - let body = process_markdown_opt(&data.body, &slur_regex, &context).await?; + let body = process_markdown_opt(&data.body, &slur_regex, &url_blocklist, &context).await?; let data_url = data.url.as_ref(); let url = data_url.map(clean_url_params); // TODO no good way to handle a "clear" let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params); @@ -69,6 +72,7 @@ pub async fn create_post( is_valid_post_title(&data.name)?; is_valid_body_field(&body, true)?; is_valid_alt_text_field(&data.alt_text)?; + is_url_blocked(&url, &url_blocklist)?; check_url_scheme(&url)?; check_url_scheme(&custom_thumbnail)?; diff --git a/crates/api_crud/src/post/update.rs b/crates/api_crud/src/post/update.rs index 6db65dffe..08c5425b9 100644 --- a/crates/api_crud/src/post/update.rs +++ b/crates/api_crud/src/post/update.rs @@ -8,6 +8,7 @@ use lemmy_api_common::{ send_activity::{ActivityChannel, SendActivityData}, utils::{ check_community_user_action, + get_url_blocklist, local_site_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub, @@ -30,6 +31,7 @@ use lemmy_utils::{ validation::{ check_url_scheme, clean_url_params, + is_url_blocked, is_valid_alt_text_field, is_valid_body_field, is_valid_post_title, @@ -51,9 +53,11 @@ pub async fn update_post( let url = data.url.as_ref().map(clean_url_params); let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params); + let url_blocklist = get_url_blocklist(&context).await?; + let slur_regex = local_site_to_slur_regex(&local_site); check_slurs_opt(&data.name, &slur_regex)?; - let body = process_markdown_opt(&data.body, &slur_regex, &context).await?; + let body = process_markdown_opt(&data.body, &slur_regex, &url_blocklist, &context).await?; if let Some(name) = &data.name { is_valid_post_title(name)?; @@ -61,6 +65,7 @@ pub async fn update_post( is_valid_body_field(&body, true)?; is_valid_alt_text_field(&data.alt_text)?; + is_url_blocked(&url, &url_blocklist)?; check_url_scheme(&url)?; check_url_scheme(&custom_thumbnail)?; diff --git a/crates/api_crud/src/private_message/create.rs b/crates/api_crud/src/private_message/create.rs index c4832ec70..32d8b99e6 100644 --- a/crates/api_crud/src/private_message/create.rs +++ b/crates/api_crud/src/private_message/create.rs @@ -8,6 +8,7 @@ use lemmy_api_common::{ check_person_block, generate_local_apub_endpoint, get_interface_language, + get_url_blocklist, local_site_to_slur_regex, process_markdown, send_email_to_user, @@ -36,7 +37,8 @@ pub async fn create_private_message( let local_site = LocalSite::read(&mut context.pool()).await?; let slur_regex = local_site_to_slur_regex(&local_site); - let content = process_markdown(&data.content, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?; is_valid_body_field(&Some(content.clone()), false)?; check_person_block( diff --git a/crates/api_crud/src/private_message/update.rs b/crates/api_crud/src/private_message/update.rs index dfcf522a8..29063fd10 100644 --- a/crates/api_crud/src/private_message/update.rs +++ b/crates/api_crud/src/private_message/update.rs @@ -4,7 +4,7 @@ use lemmy_api_common::{ context::LemmyContext, private_message::{EditPrivateMessage, PrivateMessageResponse}, send_activity::{ActivityChannel, SendActivityData}, - utils::{local_site_to_slur_regex, process_markdown}, + utils::{get_url_blocklist, local_site_to_slur_regex, process_markdown}, }; use lemmy_db_schema::{ source::{ @@ -37,7 +37,8 @@ pub async fn update_private_message( // Doing the update let slur_regex = local_site_to_slur_regex(&local_site); - let content = process_markdown(&data.content, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?; is_valid_body_field(&Some(content.clone()), false)?; let private_message_id = data.private_message_id; diff --git a/crates/api_crud/src/site/create.rs b/crates/api_crud/src/site/create.rs index b5441bffe..76aae405e 100644 --- a/crates/api_crud/src/site/create.rs +++ b/crates/api_crud/src/site/create.rs @@ -6,6 +6,7 @@ use lemmy_api_common::{ site::{CreateSite, SiteResponse}, utils::{ generate_shared_inbox_url, + get_url_blocklist, is_admin, local_site_rate_limit_to_rate_limit_config, local_site_to_slur_regex, @@ -58,7 +59,8 @@ pub async fn create_site( let keypair = generate_actor_keypair()?; let slur_regex = local_site_to_slur_regex(&local_site); - let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &url_blocklist, &context).await?; let icon = proxy_image_link_opt_api(&data.icon, &context).await?; let banner = proxy_image_link_opt_api(&data.banner, &context).await?; diff --git a/crates/api_crud/src/site/read.rs b/crates/api_crud/src/site/read.rs index b64503666..e99a222fa 100644 --- a/crates/api_crud/src/site/read.rs +++ b/crates/api_crud/src/site/read.rs @@ -6,6 +6,7 @@ use lemmy_api_common::{ use lemmy_db_schema::source::{ actor_language::{LocalUserLanguage, SiteLanguage}, language::Language, + local_site_url_blocklist::LocalSiteUrlBlocklist, tagline::Tagline, }; use lemmy_db_views::structs::{CustomEmojiView, LocalUserView, SiteView}; @@ -47,6 +48,7 @@ pub async fn get_site( let taglines = Tagline::get_all(&mut context.pool(), site_view.local_site.id).await?; let custom_emojis = CustomEmojiView::get_all(&mut context.pool(), site_view.local_site.id).await?; + let blocked_urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?; Ok(GetSiteResponse { site_view, admins, @@ -56,6 +58,7 @@ pub async fn get_site( discussion_languages, taglines, custom_emojis, + blocked_urls, }) }) .await diff --git a/crates/api_crud/src/site/update.rs b/crates/api_crud/src/site/update.rs index 17e81937e..809dbe498 100644 --- a/crates/api_crud/src/site/update.rs +++ b/crates/api_crud/src/site/update.rs @@ -4,6 +4,7 @@ use lemmy_api_common::{ context::LemmyContext, site::{EditSite, SiteResponse}, utils::{ + get_url_blocklist, is_admin, local_site_rate_limit_to_rate_limit_config, local_site_to_slur_regex, @@ -18,6 +19,7 @@ use lemmy_db_schema::{ federation_blocklist::FederationBlockList, local_site::{LocalSite, LocalSiteUpdateForm}, local_site_rate_limit::{LocalSiteRateLimit, LocalSiteRateLimitUpdateForm}, + local_site_url_blocklist::LocalSiteUrlBlocklist, local_user::LocalUser, site::{Site, SiteUpdateForm}, tagline::Tagline, @@ -34,6 +36,7 @@ use lemmy_utils::{ validation::{ build_and_check_regex, check_site_visibility_valid, + check_urls_are_valid, is_valid_body_field, site_description_length_check, site_name_length_check, @@ -61,7 +64,8 @@ pub async fn update_site( } let slur_regex = local_site_to_slur_regex(&local_site); - let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &url_blocklist, &context).await?; let icon = proxy_image_link_opt_api(&data.icon, &context).await?; let banner = proxy_image_link_opt_api(&data.banner, &context).await?; @@ -137,6 +141,11 @@ pub async fn update_site( let blocked = data.blocked_instances.clone(); FederationBlockList::replace(&mut context.pool(), blocked).await?; + if let Some(url_blocklist) = data.blocked_urls.clone() { + let parsed_urls = check_urls_are_valid(&url_blocklist)?; + LocalSiteUrlBlocklist::replace(&mut context.pool(), parsed_urls).await?; + } + // TODO can't think of a better way to do this. // If the server suddenly requires email verification, or required applications, no old users // will be able to log in. It really only wants this to be a requirement for NEW signups. @@ -578,6 +587,7 @@ mod tests { captcha_difficulty: None, allowed_instances: None, blocked_instances: None, + blocked_urls: None, taglines: None, registration_mode: site_registration_mode, reports_email_admins: None, diff --git a/crates/apub/src/objects/comment.rs b/crates/apub/src/objects/comment.rs index 6d8d814bf..ba7cc914f 100644 --- a/crates/apub/src/objects/comment.rs +++ b/crates/apub/src/objects/comment.rs @@ -18,7 +18,7 @@ use activitypub_federation::{ use chrono::{DateTime, Utc}; use lemmy_api_common::{ context::LemmyContext, - utils::{is_mod_or_admin, local_site_opt_to_slur_regex, process_markdown}, + utils::{get_url_blocklist, is_mod_or_admin, local_site_opt_to_slur_regex, process_markdown}, }; use lemmy_db_schema::{ source::{ @@ -165,7 +165,8 @@ impl Object for ApubComment { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); - let content = process_markdown(&content, slur_regex, context).await?; + let url_blocklist = get_url_blocklist(context).await?; + let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?; let language_id = LanguageTag::to_language_id_single(note.language, &mut context.pool()).await?; diff --git a/crates/apub/src/objects/community.rs b/crates/apub/src/objects/community.rs index e71f6d9b5..7630d80b2 100644 --- a/crates/apub/src/objects/community.rs +++ b/crates/apub/src/objects/community.rs @@ -21,6 +21,7 @@ use lemmy_api_common::{ generate_featured_url, generate_moderators_url, generate_outbox_url, + get_url_blocklist, local_site_opt_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub, @@ -141,8 +142,10 @@ impl Object for ApubCommunity { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(context).await?; let description = read_from_string_or_source_opt(&group.summary, &None, &group.source); - let description = process_markdown_opt(&description, slur_regex, context).await?; + let description = + process_markdown_opt(&description, slur_regex, &url_blocklist, context).await?; let icon = proxy_image_link_opt_apub(group.icon.map(|i| i.url), context).await?; let banner = proxy_image_link_opt_apub(group.image.map(|i| i.url), context).await?; diff --git a/crates/apub/src/objects/instance.rs b/crates/apub/src/objects/instance.rs index 8f4f163db..6894643d6 100644 --- a/crates/apub/src/objects/instance.rs +++ b/crates/apub/src/objects/instance.rs @@ -19,7 +19,12 @@ use activitypub_federation::{ use chrono::{DateTime, Utc}; use lemmy_api_common::{ context::LemmyContext, - utils::{local_site_opt_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub}, + utils::{ + get_url_blocklist, + local_site_opt_to_slur_regex, + process_markdown_opt, + proxy_image_link_opt_apub, + }, }; use lemmy_db_schema::{ newtypes::InstanceId, @@ -138,8 +143,9 @@ impl Object for ApubSite { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(context).await?; let sidebar = read_from_string_or_source_opt(&apub.content, &None, &apub.source); - let sidebar = process_markdown_opt(&sidebar, slur_regex, context).await?; + let sidebar = process_markdown_opt(&sidebar, slur_regex, &url_blocklist, context).await?; let icon = proxy_image_link_opt_apub(apub.icon.map(|i| i.url), context).await?; let banner = proxy_image_link_opt_apub(apub.image.map(|i| i.url), context).await?; diff --git a/crates/apub/src/objects/person.rs b/crates/apub/src/objects/person.rs index 7bfb68a04..d4456344f 100644 --- a/crates/apub/src/objects/person.rs +++ b/crates/apub/src/objects/person.rs @@ -22,6 +22,7 @@ use lemmy_api_common::{ context::LemmyContext, utils::{ generate_outbox_url, + get_url_blocklist, local_site_opt_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub, @@ -152,8 +153,9 @@ impl Object for ApubPerson { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(context).await?; let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source); - let bio = process_markdown_opt(&bio, slur_regex, context).await?; + let bio = process_markdown_opt(&bio, slur_regex, &url_blocklist, context).await?; let avatar = proxy_image_link_opt_apub(person.icon.map(|i| i.url), context).await?; let banner = proxy_image_link_opt_apub(person.image.map(|i| i.url), context).await?; diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index ff42b9cbd..15184b622 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -26,6 +26,7 @@ use lemmy_api_common::{ context::LemmyContext, request::fetch_link_metadata_opt, utils::{ + get_url_blocklist, local_site_opt_to_sensitive, local_site_opt_to_slur_regex, process_markdown_opt, @@ -246,9 +247,10 @@ impl Object for ApubPost { let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, context).await?; let slur_regex = &local_site_opt_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(context).await?; let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source); - let body = process_markdown_opt(&body, slur_regex, context).await?; + let body = process_markdown_opt(&body, slur_regex, &url_blocklist, context).await?; let language_id = LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?; diff --git a/crates/apub/src/objects/private_message.rs b/crates/apub/src/objects/private_message.rs index d5c00632f..647510802 100644 --- a/crates/apub/src/objects/private_message.rs +++ b/crates/apub/src/objects/private_message.rs @@ -14,7 +14,7 @@ use activitypub_federation::{ use chrono::{DateTime, Utc}; use lemmy_api_common::{ context::LemmyContext, - utils::{check_person_block, local_site_opt_to_slur_regex, process_markdown}, + utils::{check_person_block, get_url_blocklist, local_site_opt_to_slur_regex, process_markdown}, }; use lemmy_db_schema::{ source::{ @@ -127,8 +127,9 @@ impl Object for ApubPrivateMessage { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(context).await?; let content = read_from_string_or_source(¬e.content, &None, ¬e.source); - let content = process_markdown(&content, slur_regex, context).await?; + let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?; let form = PrivateMessageInsertForm { creator_id: creator.id, diff --git a/crates/db_schema/src/impls/local_site_url_blocklist.rs b/crates/db_schema/src/impls/local_site_url_blocklist.rs new file mode 100644 index 000000000..73dedabce --- /dev/null +++ b/crates/db_schema/src/impls/local_site_url_blocklist.rs @@ -0,0 +1,49 @@ +use crate::{ + schema::local_site_url_blocklist, + source::local_site_url_blocklist::{LocalSiteUrlBlocklist, LocalSiteUrlBlocklistForm}, + utils::{get_conn, DbPool}, +}; +use diesel::{dsl::insert_into, result::Error}; +use diesel_async::{AsyncPgConnection, RunQueryDsl}; + +impl LocalSiteUrlBlocklist { + pub async fn replace(pool: &mut DbPool<'_>, url_blocklist: Vec) -> Result<(), Error> { + let conn = &mut get_conn(pool).await?; + + conn + .build_transaction() + .run(|conn| { + Box::pin(async move { + use crate::schema::local_site_url_blocklist::dsl::local_site_url_blocklist; + + Self::clear(conn).await?; + + let forms = url_blocklist + .into_iter() + .map(|url| LocalSiteUrlBlocklistForm { url, updated: None }) + .collect::>(); + + insert_into(local_site_url_blocklist) + .values(forms) + .execute(conn) + .await?; + + Ok(()) + }) as _ + }) + .await + } + + async fn clear(conn: &mut AsyncPgConnection) -> Result { + diesel::delete(local_site_url_blocklist::table) + .execute(conn) + .await + } + + pub async fn get_all(pool: &mut DbPool<'_>) -> Result, Error> { + let conn = &mut get_conn(pool).await?; + local_site_url_blocklist::table + .get_results::(conn) + .await + } +} diff --git a/crates/db_schema/src/impls/mod.rs b/crates/db_schema/src/impls/mod.rs index 711a6c4e6..3a4e71307 100644 --- a/crates/db_schema/src/impls/mod.rs +++ b/crates/db_schema/src/impls/mod.rs @@ -17,6 +17,7 @@ pub mod instance_block; pub mod language; pub mod local_site; pub mod local_site_rate_limit; +pub mod local_site_url_blocklist; pub mod local_user; pub mod local_user_vote_display_mode; pub mod login_token; diff --git a/crates/db_schema/src/schema.rs b/crates/db_schema/src/schema.rs index a61b2d24f..408ed0540 100644 --- a/crates/db_schema/src/schema.rs +++ b/crates/db_schema/src/schema.rs @@ -409,6 +409,15 @@ diesel::table! { } } +diesel::table! { + local_site_url_blocklist (id) { + id -> Int4, + url -> Text, + published -> Timestamptz, + updated -> Nullable, + } +} + diesel::table! { use diesel::sql_types::*; use super::sql_types::SortTypeEnum; @@ -1052,6 +1061,7 @@ diesel::allow_tables_to_appear_in_same_query!( local_image, local_site, local_site_rate_limit, + local_site_url_blocklist, local_user, local_user_language, local_user_vote_display_mode, diff --git a/crates/db_schema/src/source/local_site_url_blocklist.rs b/crates/db_schema/src/source/local_site_url_blocklist.rs new file mode 100644 index 000000000..4ac0893ec --- /dev/null +++ b/crates/db_schema/src/source/local_site_url_blocklist.rs @@ -0,0 +1,28 @@ +#[cfg(feature = "full")] +use crate::schema::local_site_url_blocklist; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use serde_with::skip_serializing_none; +#[cfg(feature = "full")] +use ts_rs::TS; + +#[skip_serializing_none] +#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)] +#[cfg_attr(feature = "full", derive(Queryable, Selectable, Identifiable, TS))] +#[cfg_attr(feature = "full", diesel(table_name = local_site_url_blocklist))] +#[cfg_attr(feature = "full", diesel(check_for_backend(diesel::pg::Pg)))] +#[cfg_attr(feature = "full", ts(export))] +pub struct LocalSiteUrlBlocklist { + pub id: i32, + pub url: String, + pub published: DateTime, + pub updated: Option>, +} + +#[derive(Default, Clone)] +#[cfg_attr(feature = "full", derive(Insertable, AsChangeset))] +#[cfg_attr(feature = "full", diesel(table_name = local_site_url_blocklist))] +pub struct LocalSiteUrlBlocklistForm { + pub url: String, + pub updated: Option>, +} diff --git a/crates/db_schema/src/source/mod.rs b/crates/db_schema/src/source/mod.rs index ab82a114c..3a6501717 100644 --- a/crates/db_schema/src/source/mod.rs +++ b/crates/db_schema/src/source/mod.rs @@ -22,6 +22,7 @@ pub mod instance_block; pub mod language; pub mod local_site; pub mod local_site_rate_limit; +pub mod local_site_url_blocklist; pub mod local_user; pub mod local_user_vote_display_mode; pub mod login_token; diff --git a/crates/utils/src/error.rs b/crates/utils/src/error.rs index 9da018960..d25845894 100644 --- a/crates/utils/src/error.rs +++ b/crates/utils/src/error.rs @@ -135,6 +135,7 @@ pub enum LemmyErrorType { CouldntSetAllRegistrationsAccepted, CouldntSetAllEmailVerified, Banned, + BlockedUrl, CouldntGetComments, CouldntGetPosts, InvalidUrl, diff --git a/crates/utils/src/utils/markdown/mod.rs b/crates/utils/src/utils/markdown/mod.rs index bee2dcb94..c3def13a7 100644 --- a/crates/utils/src/utils/markdown/mod.rs +++ b/crates/utils/src/utils/markdown/mod.rs @@ -1,6 +1,7 @@ -use crate::settings::SETTINGS; +use crate::{error::LemmyResult, settings::SETTINGS, LemmyErrorType}; use markdown_it::{plugins::cmark::inline::image::Image, MarkdownIt}; use once_cell::sync::Lazy; +use regex::RegexSet; use url::Url; use urlencoding::encode; @@ -98,6 +99,13 @@ pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec) { (src, links) } +pub fn markdown_check_for_blocked_urls(text: &str, blocklist: &RegexSet) -> LemmyResult<()> { + if blocklist.is_match(text) { + Err(LemmyErrorType::BlockedUrl)? + } + Ok(()) +} + #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] @@ -109,65 +117,65 @@ mod tests { #[test] fn test_basic_markdown() { let tests: Vec<_> = vec![ - ( - "headings", - "# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6", - "

h1

\n

h2

\n

h3

\n

h4

\n
h5
\n
h6
\n" - ), - ( - "line breaks", - "First\rSecond", - "

First\nSecond

\n"), - ( - "emphasis", - "__bold__ **bold** *italic* ***bold+italic***", - "

bold bold italic bold+italic

\n" - ), - ( - "blockquotes", - "> #### Hello\n > \n > - Hola\n > - 안영 \n>> Goodbye\n", - "
\n

Hello

\n
    \n
  • Hola
  • \n
  • 안영
  • \n
\n
\n

Goodbye

\n
\n
\n" - ), - ( - "lists (ordered, unordered)", - "1. pen\n2. apple\n3. apple pen\n- pen\n- pineapple\n- pineapple pen", - "
    \n
  1. pen
  2. \n
  3. apple
  4. \n
  5. apple pen
  6. \n
\n
    \n
  • pen
  • \n
  • pineapple
  • \n
  • pineapple pen
  • \n
\n" - ), - ( - "code and code blocks", - "this is my amazing `code snippet` and my amazing ```code block```", - "

this is my amazing code snippet and my amazing code block

\n" - ), - // Links with added nofollow attribute - ( - "links", - "[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")", - "

Lemmy

\n" - ), - // Remote images with proxy - ( - "images", - "![My linked image](https://example.com/image.png \"image alt text\")", - "

\"My

\n" - ), - // Local images without proxy - ( - "images", - "![My linked image](https://lemmy-alpha/image.png \"image alt text\")", - "

\"My

\n" - ), - // Ensure spoiler plugin is added - ( - "basic spoiler", - "::: spoiler click to see more\nhow spicy!\n:::\n", - "
click to see more

how spicy!\n

\n" - ), - ( - "escape html special chars", - " hello &\"", - "

<script>alert(‘xss’);</script> hello &"

\n" - ) - ]; + ( + "headings", + "# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6", + "

h1

\n

h2

\n

h3

\n

h4

\n
h5
\n
h6
\n" + ), + ( + "line breaks", + "First\rSecond", + "

First\nSecond

\n"), + ( + "emphasis", + "__bold__ **bold** *italic* ***bold+italic***", + "

bold bold italic bold+italic

\n" + ), + ( + "blockquotes", + "> #### Hello\n > \n > - Hola\n > - 안영 \n>> Goodbye\n", + "
\n

Hello

\n
    \n
  • Hola
  • \n
  • 안영
  • \n
\n
\n

Goodbye

\n
\n
\n" + ), + ( + "lists (ordered, unordered)", + "1. pen\n2. apple\n3. apple pen\n- pen\n- pineapple\n- pineapple pen", + "
    \n
  1. pen
  2. \n
  3. apple
  4. \n
  5. apple pen
  6. \n
\n
    \n
  • pen
  • \n
  • pineapple
  • \n
  • pineapple pen
  • \n
\n" + ), + ( + "code and code blocks", + "this is my amazing `code snippet` and my amazing ```code block```", + "

this is my amazing code snippet and my amazing code block

\n" + ), + // Links with added nofollow attribute + ( + "links", + "[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")", + "

Lemmy

\n" + ), + // Remote images with proxy + ( + "images", + "![My linked image](https://example.com/image.png \"image alt text\")", + "

\"My

\n" + ), + // Local images without proxy + ( + "images", + "![My linked image](https://lemmy-alpha/image.png \"image alt text\")", + "

\"My

\n" + ), + // Ensure spoiler plugin is added + ( + "basic spoiler", + "::: spoiler click to see more\nhow spicy!\n:::\n", + "
click to see more

how spicy!\n

\n" + ), + ( + "escape html special chars", + " hello &\"", + "

<script>alert(‘xss’);</script> hello &"

\n" + ) + ]; tests.iter().for_each(|&(msg, input, expected)| { let result = markdown_to_html(input); @@ -184,46 +192,46 @@ mod tests { fn test_markdown_proxy_images() { let tests: Vec<_> = vec![ - ( - "remote image proxied", - "![link](http://example.com/image.jpg)", - "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)", - ), - ( - "local image unproxied", - "![link](http://lemmy-alpha/image.jpg)", - "![link](http://lemmy-alpha/image.jpg)", - ), - ( - "multiple image links", - "![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)", - "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)", - ), - ( - "empty link handled", - "![image]()", - "![image]()" - ), - ( - "empty label handled", - "![](http://example.com/image.jpg)", - "![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" - ), - ( - "invalid image link removed", - "![image](http-not-a-link)", - "![image]()" - ), - ( - "label with nested markdown handled", - "![a *b* c](http://example.com/image.jpg)", - "![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" - ), - ( - "custom emoji support", - r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#, - r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"# - ) + ( + "remote image proxied", + "![link](http://example.com/image.jpg)", + "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)", + ), + ( + "local image unproxied", + "![link](http://lemmy-alpha/image.jpg)", + "![link](http://lemmy-alpha/image.jpg)", + ), + ( + "multiple image links", + "![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)", + "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)", + ), + ( + "empty link handled", + "![image]()", + "![image]()" + ), + ( + "empty label handled", + "![](http://example.com/image.jpg)", + "![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" + ), + ( + "invalid image link removed", + "![image](http-not-a-link)", + "![image]()" + ), + ( + "label with nested markdown handled", + "![a *b* c](http://example.com/image.jpg)", + "![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" + ), + ( + "custom emoji support", + r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#, + r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"# + ) ]; tests.iter().for_each(|&(msg, input, expected)| { @@ -237,6 +245,69 @@ mod tests { }); } + #[test] + fn test_url_blocking() { + let set = RegexSet::new(vec![r"(https://)?example\.com/?"]).unwrap(); + + assert!( + markdown_check_for_blocked_urls(&String::from("[](https://example.com)"), &set).is_err() + ); + + assert!(markdown_check_for_blocked_urls( + &String::from("Go to https://example.com to get free Robux"), + &set + ) + .is_err()); + + assert!( + markdown_check_for_blocked_urls(&String::from("[](https://example.blog)"), &set).is_ok() + ); + + assert!(markdown_check_for_blocked_urls(&String::from("example.com"), &set).is_err()); + + assert!(markdown_check_for_blocked_urls( + "Odio exercitationem culpa sed sunt + et. Sit et similique tempora deserunt doloremque. Cupiditate iusto + repellat et quis qui. Cum veritatis facere quasi repellendus sunt + eveniet nemo sint. Cumque sit unde est. https://example.com Alias + repellendus at quos.", + &set + ) + .is_err()); + + let set = RegexSet::new(vec![r"(https://)?example\.com/spam\.jpg"]).unwrap(); + assert!(markdown_check_for_blocked_urls( + &String::from("![](https://example.com/spam.jpg)"), + &set + ) + .is_err()); + + let set = RegexSet::new(vec![ + r"(https://)?quo\.example\.com/?", + r"(https://)?foo\.example\.com/?", + r"(https://)?bar\.example\.com/?", + ]) + .unwrap(); + + assert!( + markdown_check_for_blocked_urls(&String::from("https://baz.example.com"), &set).is_ok() + ); + + assert!( + markdown_check_for_blocked_urls(&String::from("https://bar.example.com"), &set).is_err() + ); + + let set = RegexSet::new(vec![r"(https://)?example\.com/banned_page"]).unwrap(); + + assert!( + markdown_check_for_blocked_urls(&String::from("https://example.com/page"), &set).is_ok() + ); + + let set = RegexSet::new(vec![r"(https://)?ex\.mple\.com/?"]).unwrap(); + + assert!(markdown_check_for_blocked_urls("example.com", &set).is_ok()); + } + #[test] fn test_sanitize_html() { let sanitized = sanitize_html(" hello &\"'"); diff --git a/crates/utils/src/utils/validation.rs b/crates/utils/src/utils/validation.rs index 23ef9744a..f9b2a87e5 100644 --- a/crates/utils/src/utils/validation.rs +++ b/crates/utils/src/utils/validation.rs @@ -1,8 +1,8 @@ use crate::error::{LemmyErrorExt, LemmyErrorType, LemmyResult}; use itertools::Itertools; use once_cell::sync::Lazy; -use regex::{Regex, RegexBuilder}; -use url::Url; +use regex::{Regex, RegexBuilder, RegexSet}; +use url::{ParseError, Url}; // From here: https://github.com/vector-im/element-android/blob/develop/matrix-sdk-android/src/main/java/org/matrix/android/sdk/api/MatrixPatterns.kt#L35 static VALID_MATRIX_ID_REGEX: Lazy = Lazy::new(|| { @@ -299,6 +299,33 @@ pub fn check_url_scheme(url: &Option) -> LemmyResult<()> { } } +pub fn is_url_blocked(url: &Option, blocklist: &RegexSet) -> LemmyResult<()> { + if let Some(url) = url { + if blocklist.is_match(url.as_str()) { + Err(LemmyErrorType::BlockedUrl)? + } + } + + Ok(()) +} + +pub fn check_urls_are_valid(urls: &Vec) -> LemmyResult> { + let mut parsed_urls = vec![]; + for url in urls { + let url = Url::parse(url).or_else(|e| { + if e == ParseError::RelativeUrlWithoutBase { + Url::parse(&format!("https://{url}")) + } else { + Err(e) + } + })?; + + parsed_urls.push(url.to_string()); + } + + Ok(parsed_urls) +} + #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] @@ -310,7 +337,9 @@ mod tests { build_and_check_regex, check_site_visibility_valid, check_url_scheme, + check_urls_are_valid, clean_url_params, + is_url_blocked, is_valid_actor_name, is_valid_bio_field, is_valid_display_name, @@ -550,4 +579,38 @@ mod tests { let magnet_link="magnet:?xt=urn:btih:4b390af3891e323778959d5abfff4b726510f14c&dn=Ravel%20Complete%20Piano%20Sheet%20Music%20-%20Public%20Domain&tr=udp%3A%2F%2Fopen.tracker.cl%3A1337%2Fannounce"; assert!(check_url_scheme(&Some(Url::parse(magnet_link).unwrap())).is_ok()); } + + #[test] + fn test_url_block() { + let set = regex::RegexSet::new(vec![ + r"(https://)?example\.org/page/to/article", + r"(https://)?example\.net/?", + r"(https://)?example\.com/?", + ]) + .unwrap(); + + assert!(is_url_blocked(&Some(Url::parse("https://example.blog").unwrap()), &set).is_ok()); + + assert!(is_url_blocked(&Some(Url::parse("https://example.org").unwrap()), &set).is_ok()); + + assert!(is_url_blocked(&None, &set).is_ok()); + + assert!(is_url_blocked(&Some(Url::parse("https://example.com").unwrap()), &set).is_err()); + } + + #[test] + fn test_url_parsed() { + assert_eq!( + vec![String::from("https://example.com/")], + check_urls_are_valid(&vec![String::from("example.com")]).unwrap() + ); + + assert!(check_urls_are_valid(&vec![ + String::from("example.com"), + String::from("https://example.blog") + ]) + .is_ok()); + + assert!(check_urls_are_valid(&vec![String::from("https://example .com"),]).is_err()); + } } diff --git a/migrations/2024-03-06-201637_url_blocklist/down.sql b/migrations/2024-03-06-201637_url_blocklist/down.sql new file mode 100644 index 000000000..442f3c922 --- /dev/null +++ b/migrations/2024-03-06-201637_url_blocklist/down.sql @@ -0,0 +1,3 @@ +-- This file should undo anything in `up.sql` +DROP TABLE local_site_url_blocklist; + diff --git a/migrations/2024-03-06-201637_url_blocklist/up.sql b/migrations/2024-03-06-201637_url_blocklist/up.sql new file mode 100644 index 000000000..bb9b704b4 --- /dev/null +++ b/migrations/2024-03-06-201637_url_blocklist/up.sql @@ -0,0 +1,7 @@ +CREATE TABLE local_site_url_blocklist ( + id serial NOT NULL PRIMARY KEY, + url text NOT NULL UNIQUE, + published timestamp with time zone NOT NULL DEFAULT now(), + updated timestamp with time zone +); +