Add a blocklist for URLs. (#4515)

* Add a blocklist for URLs.

* Fix SQL format

* Make clippy happy.

* Use regex for URL matching.

* Escape regex chars in URLs.

* Use post for modification.

* Make URL block regex static and remove API routes.

* Add date fields to table and use transaction.

* Use Cache for blocklist.

* Rename check_links + move list to parameters of process_markdown.

* SQL format.

* Format, again.

* Remove println.

* Add API test.

* Set a shorter lifetime for regex in debug mode.

* Add missing macro.

* Update lemmy-js-client

* Update api_test/pnpm-lock.yaml

* Don't break other tests

* Use different URL for test

---------

Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: Nutomic <me@nutomic.com>
apub-assets-delete-remove-data
flamingos-cant 2 months ago committed by GitHub
parent 0e7080337b
commit 19a1a077c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -27,7 +27,7 @@
"eslint": "^8.57.0", "eslint": "^8.57.0",
"eslint-plugin-prettier": "^5.0.1", "eslint-plugin-prettier": "^5.0.1",
"jest": "^29.5.0", "jest": "^29.5.0",
"lemmy-js-client": "0.19.4-alpha.6", "lemmy-js-client": "0.19.4-alpha.8",
"prettier": "^3.2.5", "prettier": "^3.2.5",
"ts-jest": "^29.1.0", "ts-jest": "^29.1.0",
"typescript": "^5.3.3" "typescript": "^5.3.3"

@ -30,8 +30,8 @@ devDependencies:
specifier: ^29.5.0 specifier: ^29.5.0
version: 29.7.0(@types/node@20.11.22) version: 29.7.0(@types/node@20.11.22)
lemmy-js-client: lemmy-js-client:
specifier: 0.19.4-alpha.6 specifier: 0.19.4-alpha.8
version: 0.19.4-alpha.6 version: 0.19.4-alpha.8
prettier: prettier:
specifier: ^3.2.5 specifier: ^3.2.5
version: 3.2.5 version: 3.2.5
@ -2390,8 +2390,8 @@ packages:
engines: {node: '>=6'} engines: {node: '>=6'}
dev: true dev: true
/lemmy-js-client@0.19.4-alpha.6: /lemmy-js-client@0.19.4-alpha.8:
resolution: {integrity: sha512-x4htMlpoZ7hzrhrIk82aompVxbpu2ZDWtmWNGraM0+27nUCDf6gYxJH5nb5R/o39BQe5KSHq6zoBdliBwAY40w==} resolution: {integrity: sha512-8vjqUYVOhyUTcmG9FvPLjrWziVwNa2/Zi+kSflTrajJsK0V+5DclJ5dhdVMUQ4DEA70gb0OuNMDlipPG2FoS5A==}
dependencies: dependencies:
cross-fetch: 4.0.0 cross-fetch: 4.0.0
form-data: 4.0.0 form-data: 4.0.0

@ -18,6 +18,7 @@ import {
resolveBetaCommunity, resolveBetaCommunity,
createComment, createComment,
deletePost, deletePost,
delay,
removePost, removePost,
getPost, getPost,
unfollowRemotes, unfollowRemotes,
@ -710,3 +711,25 @@ test("Fetch post via redirect", async () => {
expect(gammaPost.post?.post.ap_id).toBe(alphaPost.post_view.post.ap_id); expect(gammaPost.post?.post.ap_id).toBe(alphaPost.post_view.post.ap_id);
await unfollowRemotes(alpha); await unfollowRemotes(alpha);
}); });
test("Block post that contains banned URL", async () => {
let editSiteForm: EditSite = {
blocked_urls: ["https://evil.com/"],
};
await epsilon.editSite(editSiteForm);
await delay(500);
if (!betaCommunity) {
throw "Missing beta community";
}
expect(
createPost(epsilon, betaCommunity.community.id, "https://evil.com"),
).rejects.toStrictEqual(Error("blocked_url"));
// Later tests need this to be empty
editSiteForm.blocked_urls = [];
await epsilon.editSite(editSiteForm);
});

@ -3,6 +3,7 @@ use lemmy_api_common::{
context::LemmyContext, context::LemmyContext,
person::SaveUserSettings, person::SaveUserSettings,
utils::{ utils::{
get_url_blocklist,
local_site_to_slur_regex, local_site_to_slur_regex,
process_markdown_opt, process_markdown_opt,
proxy_image_link_opt_api, proxy_image_link_opt_api,
@ -35,7 +36,10 @@ pub async fn save_user_settings(
let site_view = SiteView::read_local(&mut context.pool()).await?; let site_view = SiteView::read_local(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&site_view.local_site); let slur_regex = local_site_to_slur_regex(&site_view.local_site);
let bio = diesel_option_overwrite(process_markdown_opt(&data.bio, &slur_regex, &context).await?); let url_blocklist = get_url_blocklist(&context).await?;
let bio = diesel_option_overwrite(
process_markdown_opt(&data.bio, &slur_regex, &url_blocklist, &context).await?,
);
let avatar = proxy_image_link_opt_api(&data.avatar, &context).await?; let avatar = proxy_image_link_opt_api(&data.avatar, &context).await?;
let banner = proxy_image_link_opt_api(&data.banner, &context).await?; let banner = proxy_image_link_opt_api(&data.banner, &context).await?;

@ -4,6 +4,7 @@ use lemmy_db_schema::{
source::{ source::{
actor_language::SiteLanguage, actor_language::SiteLanguage,
language::Language, language::Language,
local_site_url_blocklist::LocalSiteUrlBlocklist,
local_user::{LocalUser, LocalUserUpdateForm}, local_user::{LocalUser, LocalUserUpdateForm},
moderator::{ModAdd, ModAddForm}, moderator::{ModAdd, ModAddForm},
tagline::Tagline, tagline::Tagline,
@ -62,6 +63,7 @@ pub async fn leave_admin(
let taglines = Tagline::get_all(&mut context.pool(), site_view.local_site.id).await?; let taglines = Tagline::get_all(&mut context.pool(), site_view.local_site.id).await?;
let custom_emojis = let custom_emojis =
CustomEmojiView::get_all(&mut context.pool(), site_view.local_site.id).await?; CustomEmojiView::get_all(&mut context.pool(), site_view.local_site.id).await?;
let blocked_urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?;
Ok(Json(GetSiteResponse { Ok(Json(GetSiteResponse {
site_view, site_view,
@ -72,5 +74,6 @@ pub async fn leave_admin(
discussion_languages, discussion_languages,
taglines, taglines,
custom_emojis, custom_emojis,
blocked_urls,
})) }))
} }

@ -59,6 +59,8 @@ uuid = { workspace = true, optional = true }
tokio = { workspace = true, optional = true } tokio = { workspace = true, optional = true }
reqwest = { workspace = true, optional = true } reqwest = { workspace = true, optional = true }
ts-rs = { workspace = true, optional = true } ts-rs = { workspace = true, optional = true }
moka.workspace = true
anyhow.workspace = true
once_cell = { workspace = true, optional = true } once_cell = { workspace = true, optional = true }
actix-web = { workspace = true, optional = true } actix-web = { workspace = true, optional = true }
enum-map = { workspace = true } enum-map = { workspace = true }

@ -6,6 +6,7 @@ use lemmy_db_schema::{
federation_queue_state::FederationQueueState, federation_queue_state::FederationQueueState,
instance::Instance, instance::Instance,
language::Language, language::Language,
local_site_url_blocklist::LocalSiteUrlBlocklist,
tagline::Tagline, tagline::Tagline,
}, },
ListingType, ListingType,
@ -268,6 +269,8 @@ pub struct EditSite {
pub allowed_instances: Option<Vec<String>>, pub allowed_instances: Option<Vec<String>>,
/// A list of blocked instances. /// A list of blocked instances.
pub blocked_instances: Option<Vec<String>>, pub blocked_instances: Option<Vec<String>>,
/// A list of blocked URLs
pub blocked_urls: Option<Vec<String>>,
/// A list of taglines shown at the top of the front page. /// A list of taglines shown at the top of the front page.
pub taglines: Option<Vec<String>>, pub taglines: Option<Vec<String>>,
pub registration_mode: Option<RegistrationMode>, pub registration_mode: Option<RegistrationMode>,
@ -305,6 +308,7 @@ pub struct GetSiteResponse {
pub taglines: Vec<Tagline>, pub taglines: Vec<Tagline>,
/// A list of custom emojis your site supports. /// A list of custom emojis your site supports.
pub custom_emojis: Vec<CustomEmojiView>, pub custom_emojis: Vec<CustomEmojiView>,
pub blocked_urls: Vec<LocalSiteUrlBlocklist>,
} }
#[skip_serializing_none] #[skip_serializing_none]

@ -17,6 +17,7 @@ use lemmy_db_schema::{
instance_block::InstanceBlock, instance_block::InstanceBlock,
local_site::LocalSite, local_site::LocalSite,
local_site_rate_limit::LocalSiteRateLimit, local_site_rate_limit::LocalSiteRateLimit,
local_site_url_blocklist::LocalSiteUrlBlocklist,
password_reset_request::PasswordResetRequest, password_reset_request::PasswordResetRequest,
person::{Person, PersonUpdateForm}, person::{Person, PersonUpdateForm},
person_block::PersonBlock, person_block::PersonBlock,
@ -38,18 +39,24 @@ use lemmy_utils::{
rate_limit::{ActionType, BucketConfig}, rate_limit::{ActionType, BucketConfig},
settings::structs::{PictrsImageMode, Settings}, settings::structs::{PictrsImageMode, Settings},
utils::{ utils::{
markdown::markdown_rewrite_image_links, markdown::{markdown_check_for_blocked_urls, markdown_rewrite_image_links},
slurs::{build_slur_regex, remove_slurs}, slurs::{build_slur_regex, remove_slurs},
}, },
}; };
use regex::Regex; use moka::future::Cache;
use once_cell::sync::Lazy;
use regex::{escape, Regex, RegexSet};
use rosetta_i18n::{Language, LanguageId}; use rosetta_i18n::{Language, LanguageId};
use std::collections::HashSet; use std::{collections::HashSet, time::Duration};
use tracing::warn; use tracing::warn;
use url::{ParseError, Url}; use url::{ParseError, Url};
use urlencoding::encode; use urlencoding::encode;
pub static AUTH_COOKIE_NAME: &str = "jwt"; pub static AUTH_COOKIE_NAME: &str = "jwt";
#[cfg(debug_assertions)]
static URL_BLOCKLIST_RECHECK_DELAY: Duration = Duration::from_millis(500);
#[cfg(not(debug_assertions))]
static URL_BLOCKLIST_RECHECK_DELAY: Duration = Duration::from_secs(60);
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
pub async fn is_mod_or_admin( pub async fn is_mod_or_admin(
@ -516,6 +523,47 @@ pub fn local_site_opt_to_sensitive(local_site: &Option<LocalSite>) -> bool {
.unwrap_or(false) .unwrap_or(false)
} }
pub async fn get_url_blocklist(context: &LemmyContext) -> LemmyResult<RegexSet> {
static URL_BLOCKLIST: Lazy<Cache<(), RegexSet>> = Lazy::new(|| {
Cache::builder()
.max_capacity(1)
.time_to_live(URL_BLOCKLIST_RECHECK_DELAY)
.build()
});
Ok(
URL_BLOCKLIST
.try_get_with::<_, LemmyError>((), async {
let urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?;
let regexes = urls.iter().map(|url| {
let url = &url.url;
let parsed = Url::parse(url).expect("Coundln't parse URL.");
if url.ends_with('/') {
format!(
"({}://)?{}{}?",
parsed.scheme(),
escape(parsed.domain().expect("No domain.")),
escape(parsed.path())
)
} else {
format!(
"({}://)?{}{}",
parsed.scheme(),
escape(parsed.domain().expect("No domain.")),
escape(parsed.path())
)
}
});
let set = RegexSet::new(regexes)?;
Ok(set)
})
.await
.map_err(|e| anyhow::anyhow!("Failed to build URL blocklist due to `{}`", e))?,
)
}
pub async fn send_application_approved_email( pub async fn send_application_approved_email(
user: &LocalUserView, user: &LocalUserView,
settings: &Settings, settings: &Settings,
@ -867,9 +915,13 @@ fn limit_expire_time(expires: DateTime<Utc>) -> LemmyResult<Option<DateTime<Utc>
pub async fn process_markdown( pub async fn process_markdown(
text: &str, text: &str,
slur_regex: &Option<Regex>, slur_regex: &Option<Regex>,
url_blocklist: &RegexSet,
context: &LemmyContext, context: &LemmyContext,
) -> LemmyResult<String> { ) -> LemmyResult<String> {
let text = remove_slurs(text, slur_regex); let text = remove_slurs(text, slur_regex);
markdown_check_for_blocked_urls(&text, url_blocklist)?;
if context.settings().pictrs_config()?.image_mode() == PictrsImageMode::ProxyAllImages { if context.settings().pictrs_config()?.image_mode() == PictrsImageMode::ProxyAllImages {
let (text, links) = markdown_rewrite_image_links(text); let (text, links) = markdown_rewrite_image_links(text);
RemoteImage::create(&mut context.pool(), links).await?; RemoteImage::create(&mut context.pool(), links).await?;
@ -882,10 +934,13 @@ pub async fn process_markdown(
pub async fn process_markdown_opt( pub async fn process_markdown_opt(
text: &Option<String>, text: &Option<String>,
slur_regex: &Option<Regex>, slur_regex: &Option<Regex>,
url_blocklist: &RegexSet,
context: &LemmyContext, context: &LemmyContext,
) -> LemmyResult<Option<String>> { ) -> LemmyResult<Option<String>> {
match text { match text {
Some(t) => process_markdown(t, slur_regex, context).await.map(Some), Some(t) => process_markdown(t, slur_regex, url_blocklist, context)
.await
.map(Some),
None => Ok(None), None => Ok(None),
} }
} }

@ -10,6 +10,7 @@ use lemmy_api_common::{
check_post_deleted_or_removed, check_post_deleted_or_removed,
generate_local_apub_endpoint, generate_local_apub_endpoint,
get_post, get_post,
get_url_blocklist,
is_mod_or_admin, is_mod_or_admin,
local_site_to_slur_regex, local_site_to_slur_regex,
process_markdown, process_markdown,
@ -44,7 +45,8 @@ pub async fn create_comment(
let local_site = LocalSite::read(&mut context.pool()).await?; let local_site = LocalSite::read(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&local_site); let slur_regex = local_site_to_slur_regex(&local_site);
let content = process_markdown(&data.content, &slur_regex, &context).await?; let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&Some(content.clone()), false)?; is_valid_body_field(&Some(content.clone()), false)?;
// Check for a community ban // Check for a community ban

@ -5,7 +5,12 @@ use lemmy_api_common::{
comment::{CommentResponse, EditComment}, comment::{CommentResponse, EditComment},
context::LemmyContext, context::LemmyContext,
send_activity::{ActivityChannel, SendActivityData}, send_activity::{ActivityChannel, SendActivityData},
utils::{check_community_user_action, local_site_to_slur_regex, process_markdown_opt}, utils::{
check_community_user_action,
get_url_blocklist,
local_site_to_slur_regex,
process_markdown_opt,
},
}; };
use lemmy_db_schema::{ use lemmy_db_schema::{
source::{ source::{
@ -54,7 +59,8 @@ pub async fn update_comment(
.await?; .await?;
let slur_regex = local_site_to_slur_regex(&local_site); let slur_regex = local_site_to_slur_regex(&local_site);
let content = process_markdown_opt(&data.content, &slur_regex, &context).await?; let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown_opt(&data.content, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&content, false)?; is_valid_body_field(&content, false)?;
let comment_id = data.comment_id; let comment_id = data.comment_id;

@ -9,6 +9,7 @@ use lemmy_api_common::{
generate_inbox_url, generate_inbox_url,
generate_local_apub_endpoint, generate_local_apub_endpoint,
generate_shared_inbox_url, generate_shared_inbox_url,
get_url_blocklist,
is_admin, is_admin,
local_site_to_slur_regex, local_site_to_slur_regex,
process_markdown_opt, process_markdown_opt,
@ -53,9 +54,11 @@ pub async fn create_community(
} }
let slur_regex = local_site_to_slur_regex(&local_site); let slur_regex = local_site_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(&context).await?;
check_slurs(&data.name, &slur_regex)?; check_slurs(&data.name, &slur_regex)?;
check_slurs(&data.title, &slur_regex)?; check_slurs(&data.title, &slur_regex)?;
let description = process_markdown_opt(&data.description, &slur_regex, &context).await?; let description =
process_markdown_opt(&data.description, &slur_regex, &url_blocklist, &context).await?;
let icon = proxy_image_link_api(&data.icon, &context).await?; let icon = proxy_image_link_api(&data.icon, &context).await?;
let banner = proxy_image_link_api(&data.banner, &context).await?; let banner = proxy_image_link_api(&data.banner, &context).await?;

@ -7,6 +7,7 @@ use lemmy_api_common::{
send_activity::{ActivityChannel, SendActivityData}, send_activity::{ActivityChannel, SendActivityData},
utils::{ utils::{
check_community_mod_action, check_community_mod_action,
get_url_blocklist,
local_site_to_slur_regex, local_site_to_slur_regex,
process_markdown_opt, process_markdown_opt,
proxy_image_link_opt_api, proxy_image_link_opt_api,
@ -36,8 +37,10 @@ pub async fn update_community(
let local_site = LocalSite::read(&mut context.pool()).await?; let local_site = LocalSite::read(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&local_site); let slur_regex = local_site_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(&context).await?;
check_slurs_opt(&data.title, &slur_regex)?; check_slurs_opt(&data.title, &slur_regex)?;
let description = process_markdown_opt(&data.description, &slur_regex, &context).await?; let description =
process_markdown_opt(&data.description, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&data.description, false)?; is_valid_body_field(&data.description, false)?;
let description = diesel_option_overwrite(description); let description = diesel_option_overwrite(description);

@ -9,6 +9,7 @@ use lemmy_api_common::{
utils::{ utils::{
check_community_user_action, check_community_user_action,
generate_local_apub_endpoint, generate_local_apub_endpoint,
get_url_blocklist,
honeypot_check, honeypot_check,
local_site_to_slur_regex, local_site_to_slur_regex,
mark_post_as_read, mark_post_as_read,
@ -38,6 +39,7 @@ use lemmy_utils::{
validation::{ validation::{
check_url_scheme, check_url_scheme,
clean_url_params, clean_url_params,
is_url_blocked,
is_valid_alt_text_field, is_valid_alt_text_field,
is_valid_body_field, is_valid_body_field,
is_valid_post_title, is_valid_post_title,
@ -60,8 +62,9 @@ pub async fn create_post(
let slur_regex = local_site_to_slur_regex(&local_site); let slur_regex = local_site_to_slur_regex(&local_site);
check_slurs(&data.name, &slur_regex)?; check_slurs(&data.name, &slur_regex)?;
let url_blocklist = get_url_blocklist(&context).await?;
let body = process_markdown_opt(&data.body, &slur_regex, &context).await?; let body = process_markdown_opt(&data.body, &slur_regex, &url_blocklist, &context).await?;
let data_url = data.url.as_ref(); let data_url = data.url.as_ref();
let url = data_url.map(clean_url_params); // TODO no good way to handle a "clear" let url = data_url.map(clean_url_params); // TODO no good way to handle a "clear"
let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params); let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params);
@ -69,6 +72,7 @@ pub async fn create_post(
is_valid_post_title(&data.name)?; is_valid_post_title(&data.name)?;
is_valid_body_field(&body, true)?; is_valid_body_field(&body, true)?;
is_valid_alt_text_field(&data.alt_text)?; is_valid_alt_text_field(&data.alt_text)?;
is_url_blocked(&url, &url_blocklist)?;
check_url_scheme(&url)?; check_url_scheme(&url)?;
check_url_scheme(&custom_thumbnail)?; check_url_scheme(&custom_thumbnail)?;

@ -8,6 +8,7 @@ use lemmy_api_common::{
send_activity::{ActivityChannel, SendActivityData}, send_activity::{ActivityChannel, SendActivityData},
utils::{ utils::{
check_community_user_action, check_community_user_action,
get_url_blocklist,
local_site_to_slur_regex, local_site_to_slur_regex,
process_markdown_opt, process_markdown_opt,
proxy_image_link_opt_apub, proxy_image_link_opt_apub,
@ -30,6 +31,7 @@ use lemmy_utils::{
validation::{ validation::{
check_url_scheme, check_url_scheme,
clean_url_params, clean_url_params,
is_url_blocked,
is_valid_alt_text_field, is_valid_alt_text_field,
is_valid_body_field, is_valid_body_field,
is_valid_post_title, is_valid_post_title,
@ -51,9 +53,11 @@ pub async fn update_post(
let url = data.url.as_ref().map(clean_url_params); let url = data.url.as_ref().map(clean_url_params);
let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params); let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params);
let url_blocklist = get_url_blocklist(&context).await?;
let slur_regex = local_site_to_slur_regex(&local_site); let slur_regex = local_site_to_slur_regex(&local_site);
check_slurs_opt(&data.name, &slur_regex)?; check_slurs_opt(&data.name, &slur_regex)?;
let body = process_markdown_opt(&data.body, &slur_regex, &context).await?; let body = process_markdown_opt(&data.body, &slur_regex, &url_blocklist, &context).await?;
if let Some(name) = &data.name { if let Some(name) = &data.name {
is_valid_post_title(name)?; is_valid_post_title(name)?;
@ -61,6 +65,7 @@ pub async fn update_post(
is_valid_body_field(&body, true)?; is_valid_body_field(&body, true)?;
is_valid_alt_text_field(&data.alt_text)?; is_valid_alt_text_field(&data.alt_text)?;
is_url_blocked(&url, &url_blocklist)?;
check_url_scheme(&url)?; check_url_scheme(&url)?;
check_url_scheme(&custom_thumbnail)?; check_url_scheme(&custom_thumbnail)?;

@ -8,6 +8,7 @@ use lemmy_api_common::{
check_person_block, check_person_block,
generate_local_apub_endpoint, generate_local_apub_endpoint,
get_interface_language, get_interface_language,
get_url_blocklist,
local_site_to_slur_regex, local_site_to_slur_regex,
process_markdown, process_markdown,
send_email_to_user, send_email_to_user,
@ -36,7 +37,8 @@ pub async fn create_private_message(
let local_site = LocalSite::read(&mut context.pool()).await?; let local_site = LocalSite::read(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&local_site); let slur_regex = local_site_to_slur_regex(&local_site);
let content = process_markdown(&data.content, &slur_regex, &context).await?; let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&Some(content.clone()), false)?; is_valid_body_field(&Some(content.clone()), false)?;
check_person_block( check_person_block(

@ -4,7 +4,7 @@ use lemmy_api_common::{
context::LemmyContext, context::LemmyContext,
private_message::{EditPrivateMessage, PrivateMessageResponse}, private_message::{EditPrivateMessage, PrivateMessageResponse},
send_activity::{ActivityChannel, SendActivityData}, send_activity::{ActivityChannel, SendActivityData},
utils::{local_site_to_slur_regex, process_markdown}, utils::{get_url_blocklist, local_site_to_slur_regex, process_markdown},
}; };
use lemmy_db_schema::{ use lemmy_db_schema::{
source::{ source::{
@ -37,7 +37,8 @@ pub async fn update_private_message(
// Doing the update // Doing the update
let slur_regex = local_site_to_slur_regex(&local_site); let slur_regex = local_site_to_slur_regex(&local_site);
let content = process_markdown(&data.content, &slur_regex, &context).await?; let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&Some(content.clone()), false)?; is_valid_body_field(&Some(content.clone()), false)?;
let private_message_id = data.private_message_id; let private_message_id = data.private_message_id;

@ -6,6 +6,7 @@ use lemmy_api_common::{
site::{CreateSite, SiteResponse}, site::{CreateSite, SiteResponse},
utils::{ utils::{
generate_shared_inbox_url, generate_shared_inbox_url,
get_url_blocklist,
is_admin, is_admin,
local_site_rate_limit_to_rate_limit_config, local_site_rate_limit_to_rate_limit_config,
local_site_to_slur_regex, local_site_to_slur_regex,
@ -58,7 +59,8 @@ pub async fn create_site(
let keypair = generate_actor_keypair()?; let keypair = generate_actor_keypair()?;
let slur_regex = local_site_to_slur_regex(&local_site); let slur_regex = local_site_to_slur_regex(&local_site);
let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &context).await?; let url_blocklist = get_url_blocklist(&context).await?;
let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &url_blocklist, &context).await?;
let icon = proxy_image_link_opt_api(&data.icon, &context).await?; let icon = proxy_image_link_opt_api(&data.icon, &context).await?;
let banner = proxy_image_link_opt_api(&data.banner, &context).await?; let banner = proxy_image_link_opt_api(&data.banner, &context).await?;

@ -6,6 +6,7 @@ use lemmy_api_common::{
use lemmy_db_schema::source::{ use lemmy_db_schema::source::{
actor_language::{LocalUserLanguage, SiteLanguage}, actor_language::{LocalUserLanguage, SiteLanguage},
language::Language, language::Language,
local_site_url_blocklist::LocalSiteUrlBlocklist,
tagline::Tagline, tagline::Tagline,
}; };
use lemmy_db_views::structs::{CustomEmojiView, LocalUserView, SiteView}; use lemmy_db_views::structs::{CustomEmojiView, LocalUserView, SiteView};
@ -47,6 +48,7 @@ pub async fn get_site(
let taglines = Tagline::get_all(&mut context.pool(), site_view.local_site.id).await?; let taglines = Tagline::get_all(&mut context.pool(), site_view.local_site.id).await?;
let custom_emojis = let custom_emojis =
CustomEmojiView::get_all(&mut context.pool(), site_view.local_site.id).await?; CustomEmojiView::get_all(&mut context.pool(), site_view.local_site.id).await?;
let blocked_urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?;
Ok(GetSiteResponse { Ok(GetSiteResponse {
site_view, site_view,
admins, admins,
@ -56,6 +58,7 @@ pub async fn get_site(
discussion_languages, discussion_languages,
taglines, taglines,
custom_emojis, custom_emojis,
blocked_urls,
}) })
}) })
.await .await

@ -4,6 +4,7 @@ use lemmy_api_common::{
context::LemmyContext, context::LemmyContext,
site::{EditSite, SiteResponse}, site::{EditSite, SiteResponse},
utils::{ utils::{
get_url_blocklist,
is_admin, is_admin,
local_site_rate_limit_to_rate_limit_config, local_site_rate_limit_to_rate_limit_config,
local_site_to_slur_regex, local_site_to_slur_regex,
@ -18,6 +19,7 @@ use lemmy_db_schema::{
federation_blocklist::FederationBlockList, federation_blocklist::FederationBlockList,
local_site::{LocalSite, LocalSiteUpdateForm}, local_site::{LocalSite, LocalSiteUpdateForm},
local_site_rate_limit::{LocalSiteRateLimit, LocalSiteRateLimitUpdateForm}, local_site_rate_limit::{LocalSiteRateLimit, LocalSiteRateLimitUpdateForm},
local_site_url_blocklist::LocalSiteUrlBlocklist,
local_user::LocalUser, local_user::LocalUser,
site::{Site, SiteUpdateForm}, site::{Site, SiteUpdateForm},
tagline::Tagline, tagline::Tagline,
@ -34,6 +36,7 @@ use lemmy_utils::{
validation::{ validation::{
build_and_check_regex, build_and_check_regex,
check_site_visibility_valid, check_site_visibility_valid,
check_urls_are_valid,
is_valid_body_field, is_valid_body_field,
site_description_length_check, site_description_length_check,
site_name_length_check, site_name_length_check,
@ -61,7 +64,8 @@ pub async fn update_site(
} }
let slur_regex = local_site_to_slur_regex(&local_site); let slur_regex = local_site_to_slur_regex(&local_site);
let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &context).await?; let url_blocklist = get_url_blocklist(&context).await?;
let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &url_blocklist, &context).await?;
let icon = proxy_image_link_opt_api(&data.icon, &context).await?; let icon = proxy_image_link_opt_api(&data.icon, &context).await?;
let banner = proxy_image_link_opt_api(&data.banner, &context).await?; let banner = proxy_image_link_opt_api(&data.banner, &context).await?;
@ -137,6 +141,11 @@ pub async fn update_site(
let blocked = data.blocked_instances.clone(); let blocked = data.blocked_instances.clone();
FederationBlockList::replace(&mut context.pool(), blocked).await?; FederationBlockList::replace(&mut context.pool(), blocked).await?;
if let Some(url_blocklist) = data.blocked_urls.clone() {
let parsed_urls = check_urls_are_valid(&url_blocklist)?;
LocalSiteUrlBlocklist::replace(&mut context.pool(), parsed_urls).await?;
}
// TODO can't think of a better way to do this. // TODO can't think of a better way to do this.
// If the server suddenly requires email verification, or required applications, no old users // If the server suddenly requires email verification, or required applications, no old users
// will be able to log in. It really only wants this to be a requirement for NEW signups. // will be able to log in. It really only wants this to be a requirement for NEW signups.
@ -578,6 +587,7 @@ mod tests {
captcha_difficulty: None, captcha_difficulty: None,
allowed_instances: None, allowed_instances: None,
blocked_instances: None, blocked_instances: None,
blocked_urls: None,
taglines: None, taglines: None,
registration_mode: site_registration_mode, registration_mode: site_registration_mode,
reports_email_admins: None, reports_email_admins: None,

@ -18,7 +18,7 @@ use activitypub_federation::{
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use lemmy_api_common::{ use lemmy_api_common::{
context::LemmyContext, context::LemmyContext,
utils::{is_mod_or_admin, local_site_opt_to_slur_regex, process_markdown}, utils::{get_url_blocklist, is_mod_or_admin, local_site_opt_to_slur_regex, process_markdown},
}; };
use lemmy_db_schema::{ use lemmy_db_schema::{
source::{ source::{
@ -165,7 +165,8 @@ impl Object for ApubComment {
let local_site = LocalSite::read(&mut context.pool()).await.ok(); let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site); let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let content = process_markdown(&content, slur_regex, context).await?; let url_blocklist = get_url_blocklist(context).await?;
let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?;
let language_id = let language_id =
LanguageTag::to_language_id_single(note.language, &mut context.pool()).await?; LanguageTag::to_language_id_single(note.language, &mut context.pool()).await?;

@ -21,6 +21,7 @@ use lemmy_api_common::{
generate_featured_url, generate_featured_url,
generate_moderators_url, generate_moderators_url,
generate_outbox_url, generate_outbox_url,
get_url_blocklist,
local_site_opt_to_slur_regex, local_site_opt_to_slur_regex,
process_markdown_opt, process_markdown_opt,
proxy_image_link_opt_apub, proxy_image_link_opt_apub,
@ -141,8 +142,10 @@ impl Object for ApubCommunity {
let local_site = LocalSite::read(&mut context.pool()).await.ok(); let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site); let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
let description = read_from_string_or_source_opt(&group.summary, &None, &group.source); let description = read_from_string_or_source_opt(&group.summary, &None, &group.source);
let description = process_markdown_opt(&description, slur_regex, context).await?; let description =
process_markdown_opt(&description, slur_regex, &url_blocklist, context).await?;
let icon = proxy_image_link_opt_apub(group.icon.map(|i| i.url), context).await?; let icon = proxy_image_link_opt_apub(group.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(group.image.map(|i| i.url), context).await?; let banner = proxy_image_link_opt_apub(group.image.map(|i| i.url), context).await?;

@ -19,7 +19,12 @@ use activitypub_federation::{
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use lemmy_api_common::{ use lemmy_api_common::{
context::LemmyContext, context::LemmyContext,
utils::{local_site_opt_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub}, utils::{
get_url_blocklist,
local_site_opt_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_apub,
},
}; };
use lemmy_db_schema::{ use lemmy_db_schema::{
newtypes::InstanceId, newtypes::InstanceId,
@ -138,8 +143,9 @@ impl Object for ApubSite {
let local_site = LocalSite::read(&mut context.pool()).await.ok(); let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site); let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
let sidebar = read_from_string_or_source_opt(&apub.content, &None, &apub.source); let sidebar = read_from_string_or_source_opt(&apub.content, &None, &apub.source);
let sidebar = process_markdown_opt(&sidebar, slur_regex, context).await?; let sidebar = process_markdown_opt(&sidebar, slur_regex, &url_blocklist, context).await?;
let icon = proxy_image_link_opt_apub(apub.icon.map(|i| i.url), context).await?; let icon = proxy_image_link_opt_apub(apub.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(apub.image.map(|i| i.url), context).await?; let banner = proxy_image_link_opt_apub(apub.image.map(|i| i.url), context).await?;

@ -22,6 +22,7 @@ use lemmy_api_common::{
context::LemmyContext, context::LemmyContext,
utils::{ utils::{
generate_outbox_url, generate_outbox_url,
get_url_blocklist,
local_site_opt_to_slur_regex, local_site_opt_to_slur_regex,
process_markdown_opt, process_markdown_opt,
proxy_image_link_opt_apub, proxy_image_link_opt_apub,
@ -152,8 +153,9 @@ impl Object for ApubPerson {
let local_site = LocalSite::read(&mut context.pool()).await.ok(); let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site); let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source); let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source);
let bio = process_markdown_opt(&bio, slur_regex, context).await?; let bio = process_markdown_opt(&bio, slur_regex, &url_blocklist, context).await?;
let avatar = proxy_image_link_opt_apub(person.icon.map(|i| i.url), context).await?; let avatar = proxy_image_link_opt_apub(person.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(person.image.map(|i| i.url), context).await?; let banner = proxy_image_link_opt_apub(person.image.map(|i| i.url), context).await?;

@ -26,6 +26,7 @@ use lemmy_api_common::{
context::LemmyContext, context::LemmyContext,
request::fetch_link_metadata_opt, request::fetch_link_metadata_opt,
utils::{ utils::{
get_url_blocklist,
local_site_opt_to_sensitive, local_site_opt_to_sensitive,
local_site_opt_to_slur_regex, local_site_opt_to_slur_regex,
process_markdown_opt, process_markdown_opt,
@ -246,9 +247,10 @@ impl Object for ApubPost {
let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, context).await?; let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, context).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site); let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source); let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source);
let body = process_markdown_opt(&body, slur_regex, context).await?; let body = process_markdown_opt(&body, slur_regex, &url_blocklist, context).await?;
let language_id = let language_id =
LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?; LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?;

@ -14,7 +14,7 @@ use activitypub_federation::{
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use lemmy_api_common::{ use lemmy_api_common::{
context::LemmyContext, context::LemmyContext,
utils::{check_person_block, local_site_opt_to_slur_regex, process_markdown}, utils::{check_person_block, get_url_blocklist, local_site_opt_to_slur_regex, process_markdown},
}; };
use lemmy_db_schema::{ use lemmy_db_schema::{
source::{ source::{
@ -127,8 +127,9 @@ impl Object for ApubPrivateMessage {
let local_site = LocalSite::read(&mut context.pool()).await.ok(); let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site); let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
let content = read_from_string_or_source(&note.content, &None, &note.source); let content = read_from_string_or_source(&note.content, &None, &note.source);
let content = process_markdown(&content, slur_regex, context).await?; let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?;
let form = PrivateMessageInsertForm { let form = PrivateMessageInsertForm {
creator_id: creator.id, creator_id: creator.id,

@ -0,0 +1,49 @@
use crate::{
schema::local_site_url_blocklist,
source::local_site_url_blocklist::{LocalSiteUrlBlocklist, LocalSiteUrlBlocklistForm},
utils::{get_conn, DbPool},
};
use diesel::{dsl::insert_into, result::Error};
use diesel_async::{AsyncPgConnection, RunQueryDsl};
impl LocalSiteUrlBlocklist {
pub async fn replace(pool: &mut DbPool<'_>, url_blocklist: Vec<String>) -> Result<(), Error> {
let conn = &mut get_conn(pool).await?;
conn
.build_transaction()
.run(|conn| {
Box::pin(async move {
use crate::schema::local_site_url_blocklist::dsl::local_site_url_blocklist;
Self::clear(conn).await?;
let forms = url_blocklist
.into_iter()
.map(|url| LocalSiteUrlBlocklistForm { url, updated: None })
.collect::<Vec<_>>();
insert_into(local_site_url_blocklist)
.values(forms)
.execute(conn)
.await?;
Ok(())
}) as _
})
.await
}
async fn clear(conn: &mut AsyncPgConnection) -> Result<usize, Error> {
diesel::delete(local_site_url_blocklist::table)
.execute(conn)
.await
}
pub async fn get_all(pool: &mut DbPool<'_>) -> Result<Vec<Self>, Error> {
let conn = &mut get_conn(pool).await?;
local_site_url_blocklist::table
.get_results::<Self>(conn)
.await
}
}

@ -17,6 +17,7 @@ pub mod instance_block;
pub mod language; pub mod language;
pub mod local_site; pub mod local_site;
pub mod local_site_rate_limit; pub mod local_site_rate_limit;
pub mod local_site_url_blocklist;
pub mod local_user; pub mod local_user;
pub mod local_user_vote_display_mode; pub mod local_user_vote_display_mode;
pub mod login_token; pub mod login_token;

@ -409,6 +409,15 @@ diesel::table! {
} }
} }
diesel::table! {
local_site_url_blocklist (id) {
id -> Int4,
url -> Text,
published -> Timestamptz,
updated -> Nullable<Timestamptz>,
}
}
diesel::table! { diesel::table! {
use diesel::sql_types::*; use diesel::sql_types::*;
use super::sql_types::SortTypeEnum; use super::sql_types::SortTypeEnum;
@ -1052,6 +1061,7 @@ diesel::allow_tables_to_appear_in_same_query!(
local_image, local_image,
local_site, local_site,
local_site_rate_limit, local_site_rate_limit,
local_site_url_blocklist,
local_user, local_user,
local_user_language, local_user_language,
local_user_vote_display_mode, local_user_vote_display_mode,

@ -0,0 +1,28 @@
#[cfg(feature = "full")]
use crate::schema::local_site_url_blocklist;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_with::skip_serializing_none;
#[cfg(feature = "full")]
use ts_rs::TS;
#[skip_serializing_none]
#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
#[cfg_attr(feature = "full", derive(Queryable, Selectable, Identifiable, TS))]
#[cfg_attr(feature = "full", diesel(table_name = local_site_url_blocklist))]
#[cfg_attr(feature = "full", diesel(check_for_backend(diesel::pg::Pg)))]
#[cfg_attr(feature = "full", ts(export))]
pub struct LocalSiteUrlBlocklist {
pub id: i32,
pub url: String,
pub published: DateTime<Utc>,
pub updated: Option<DateTime<Utc>>,
}
#[derive(Default, Clone)]
#[cfg_attr(feature = "full", derive(Insertable, AsChangeset))]
#[cfg_attr(feature = "full", diesel(table_name = local_site_url_blocklist))]
pub struct LocalSiteUrlBlocklistForm {
pub url: String,
pub updated: Option<DateTime<Utc>>,
}

@ -22,6 +22,7 @@ pub mod instance_block;
pub mod language; pub mod language;
pub mod local_site; pub mod local_site;
pub mod local_site_rate_limit; pub mod local_site_rate_limit;
pub mod local_site_url_blocklist;
pub mod local_user; pub mod local_user;
pub mod local_user_vote_display_mode; pub mod local_user_vote_display_mode;
pub mod login_token; pub mod login_token;

@ -135,6 +135,7 @@ pub enum LemmyErrorType {
CouldntSetAllRegistrationsAccepted, CouldntSetAllRegistrationsAccepted,
CouldntSetAllEmailVerified, CouldntSetAllEmailVerified,
Banned, Banned,
BlockedUrl,
CouldntGetComments, CouldntGetComments,
CouldntGetPosts, CouldntGetPosts,
InvalidUrl, InvalidUrl,

@ -1,6 +1,7 @@
use crate::settings::SETTINGS; use crate::{error::LemmyResult, settings::SETTINGS, LemmyErrorType};
use markdown_it::{plugins::cmark::inline::image::Image, MarkdownIt}; use markdown_it::{plugins::cmark::inline::image::Image, MarkdownIt};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::RegexSet;
use url::Url; use url::Url;
use urlencoding::encode; use urlencoding::encode;
@ -98,6 +99,13 @@ pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec<Url>) {
(src, links) (src, links)
} }
pub fn markdown_check_for_blocked_urls(text: &str, blocklist: &RegexSet) -> LemmyResult<()> {
if blocklist.is_match(text) {
Err(LemmyErrorType::BlockedUrl)?
}
Ok(())
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
#![allow(clippy::unwrap_used)] #![allow(clippy::unwrap_used)]
@ -109,65 +117,65 @@ mod tests {
#[test] #[test]
fn test_basic_markdown() { fn test_basic_markdown() {
let tests: Vec<_> = vec![ let tests: Vec<_> = vec![
( (
"headings", "headings",
"# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6", "# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6",
"<h1>h1</h1>\n<h2>h2</h2>\n<h3>h3</h3>\n<h4>h4</h4>\n<h5>h5</h5>\n<h6>h6</h6>\n" "<h1>h1</h1>\n<h2>h2</h2>\n<h3>h3</h3>\n<h4>h4</h4>\n<h5>h5</h5>\n<h6>h6</h6>\n"
), ),
( (
"line breaks", "line breaks",
"First\rSecond", "First\rSecond",
"<p>First\nSecond</p>\n"), "<p>First\nSecond</p>\n"),
( (
"emphasis", "emphasis",
"__bold__ **bold** *italic* ***bold+italic***", "__bold__ **bold** *italic* ***bold+italic***",
"<p><strong>bold</strong> <strong>bold</strong> <em>italic</em> <em><strong>bold+italic</strong></em></p>\n" "<p><strong>bold</strong> <strong>bold</strong> <em>italic</em> <em><strong>bold+italic</strong></em></p>\n"
), ),
( (
"blockquotes", "blockquotes",
"> #### Hello\n > \n > - Hola\n > - 안영 \n>> Goodbye\n", "> #### Hello\n > \n > - Hola\n > - 안영 \n>> Goodbye\n",
"<blockquote>\n<h4>Hello</h4>\n<ul>\n<li>Hola</li>\n<li>안영</li>\n</ul>\n<blockquote>\n<p>Goodbye</p>\n</blockquote>\n</blockquote>\n" "<blockquote>\n<h4>Hello</h4>\n<ul>\n<li>Hola</li>\n<li>안영</li>\n</ul>\n<blockquote>\n<p>Goodbye</p>\n</blockquote>\n</blockquote>\n"
), ),
( (
"lists (ordered, unordered)", "lists (ordered, unordered)",
"1. pen\n2. apple\n3. apple pen\n- pen\n- pineapple\n- pineapple pen", "1. pen\n2. apple\n3. apple pen\n- pen\n- pineapple\n- pineapple pen",
"<ol>\n<li>pen</li>\n<li>apple</li>\n<li>apple pen</li>\n</ol>\n<ul>\n<li>pen</li>\n<li>pineapple</li>\n<li>pineapple pen</li>\n</ul>\n" "<ol>\n<li>pen</li>\n<li>apple</li>\n<li>apple pen</li>\n</ol>\n<ul>\n<li>pen</li>\n<li>pineapple</li>\n<li>pineapple pen</li>\n</ul>\n"
), ),
( (
"code and code blocks", "code and code blocks",
"this is my amazing `code snippet` and my amazing ```code block```", "this is my amazing `code snippet` and my amazing ```code block```",
"<p>this is my amazing <code>code snippet</code> and my amazing <code>code block</code></p>\n" "<p>this is my amazing <code>code snippet</code> and my amazing <code>code block</code></p>\n"
), ),
// Links with added nofollow attribute // Links with added nofollow attribute
( (
"links", "links",
"[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")", "[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")",
"<p><a href=\"https://join-lemmy.org/\" rel=\"nofollow\" title=\"Join Lemmy!\">Lemmy</a></p>\n" "<p><a href=\"https://join-lemmy.org/\" rel=\"nofollow\" title=\"Join Lemmy!\">Lemmy</a></p>\n"
), ),
// Remote images with proxy // Remote images with proxy
( (
"images", "images",
"![My linked image](https://example.com/image.png \"image alt text\")", "![My linked image](https://example.com/image.png \"image alt text\")",
"<p><img src=\"https://example.com/image.png\" alt=\"My linked image\" title=\"image alt text\" /></p>\n" "<p><img src=\"https://example.com/image.png\" alt=\"My linked image\" title=\"image alt text\" /></p>\n"
), ),
// Local images without proxy // Local images without proxy
( (
"images", "images",
"![My linked image](https://lemmy-alpha/image.png \"image alt text\")", "![My linked image](https://lemmy-alpha/image.png \"image alt text\")",
"<p><img src=\"https://lemmy-alpha/image.png\" alt=\"My linked image\" title=\"image alt text\" /></p>\n" "<p><img src=\"https://lemmy-alpha/image.png\" alt=\"My linked image\" title=\"image alt text\" /></p>\n"
), ),
// Ensure spoiler plugin is added // Ensure spoiler plugin is added
( (
"basic spoiler", "basic spoiler",
"::: spoiler click to see more\nhow spicy!\n:::\n", "::: spoiler click to see more\nhow spicy!\n:::\n",
"<details><summary>click to see more</summary><p>how spicy!\n</p></details>\n" "<details><summary>click to see more</summary><p>how spicy!\n</p></details>\n"
), ),
( (
"escape html special chars", "escape html special chars",
"<script>alert('xss');</script> hello &\"", "<script>alert('xss');</script> hello &\"",
"<p>&lt;script&gt;alert(xss);&lt;/script&gt; hello &amp;&quot;</p>\n" "<p>&lt;script&gt;alert(xss);&lt;/script&gt; hello &amp;&quot;</p>\n"
) )
]; ];
tests.iter().for_each(|&(msg, input, expected)| { tests.iter().for_each(|&(msg, input, expected)| {
let result = markdown_to_html(input); let result = markdown_to_html(input);
@ -184,46 +192,46 @@ mod tests {
fn test_markdown_proxy_images() { fn test_markdown_proxy_images() {
let tests: Vec<_> = let tests: Vec<_> =
vec![ vec![
( (
"remote image proxied", "remote image proxied",
"![link](http://example.com/image.jpg)", "![link](http://example.com/image.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)", "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)",
), ),
( (
"local image unproxied", "local image unproxied",
"![link](http://lemmy-alpha/image.jpg)", "![link](http://lemmy-alpha/image.jpg)",
"![link](http://lemmy-alpha/image.jpg)", "![link](http://lemmy-alpha/image.jpg)",
), ),
( (
"multiple image links", "multiple image links",
"![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)", "![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)", "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)",
), ),
( (
"empty link handled", "empty link handled",
"![image]()", "![image]()",
"![image]()" "![image]()"
), ),
( (
"empty label handled", "empty label handled",
"![](http://example.com/image.jpg)", "![](http://example.com/image.jpg)",
"![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" "![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
), ),
( (
"invalid image link removed", "invalid image link removed",
"![image](http-not-a-link)", "![image](http-not-a-link)",
"![image]()" "![image]()"
), ),
( (
"label with nested markdown handled", "label with nested markdown handled",
"![a *b* c](http://example.com/image.jpg)", "![a *b* c](http://example.com/image.jpg)",
"![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" "![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
), ),
( (
"custom emoji support", "custom emoji support",
r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#, r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#,
r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"# r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#
) )
]; ];
tests.iter().for_each(|&(msg, input, expected)| { tests.iter().for_each(|&(msg, input, expected)| {
@ -237,6 +245,69 @@ mod tests {
}); });
} }
#[test]
fn test_url_blocking() {
let set = RegexSet::new(vec![r"(https://)?example\.com/?"]).unwrap();
assert!(
markdown_check_for_blocked_urls(&String::from("[](https://example.com)"), &set).is_err()
);
assert!(markdown_check_for_blocked_urls(
&String::from("Go to https://example.com to get free Robux"),
&set
)
.is_err());
assert!(
markdown_check_for_blocked_urls(&String::from("[](https://example.blog)"), &set).is_ok()
);
assert!(markdown_check_for_blocked_urls(&String::from("example.com"), &set).is_err());
assert!(markdown_check_for_blocked_urls(
"Odio exercitationem culpa sed sunt
et. Sit et similique tempora deserunt doloremque. Cupiditate iusto
repellat et quis qui. Cum veritatis facere quasi repellendus sunt
eveniet nemo sint. Cumque sit unde est. https://example.com Alias
repellendus at quos.",
&set
)
.is_err());
let set = RegexSet::new(vec![r"(https://)?example\.com/spam\.jpg"]).unwrap();
assert!(markdown_check_for_blocked_urls(
&String::from("![](https://example.com/spam.jpg)"),
&set
)
.is_err());
let set = RegexSet::new(vec![
r"(https://)?quo\.example\.com/?",
r"(https://)?foo\.example\.com/?",
r"(https://)?bar\.example\.com/?",
])
.unwrap();
assert!(
markdown_check_for_blocked_urls(&String::from("https://baz.example.com"), &set).is_ok()
);
assert!(
markdown_check_for_blocked_urls(&String::from("https://bar.example.com"), &set).is_err()
);
let set = RegexSet::new(vec![r"(https://)?example\.com/banned_page"]).unwrap();
assert!(
markdown_check_for_blocked_urls(&String::from("https://example.com/page"), &set).is_ok()
);
let set = RegexSet::new(vec![r"(https://)?ex\.mple\.com/?"]).unwrap();
assert!(markdown_check_for_blocked_urls("example.com", &set).is_ok());
}
#[test] #[test]
fn test_sanitize_html() { fn test_sanitize_html() {
let sanitized = sanitize_html("<script>alert('xss');</script> hello &\"'"); let sanitized = sanitize_html("<script>alert('xss');</script> hello &\"'");

@ -1,8 +1,8 @@
use crate::error::{LemmyErrorExt, LemmyErrorType, LemmyResult}; use crate::error::{LemmyErrorExt, LemmyErrorType, LemmyResult};
use itertools::Itertools; use itertools::Itertools;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::{Regex, RegexBuilder}; use regex::{Regex, RegexBuilder, RegexSet};
use url::Url; use url::{ParseError, Url};
// From here: https://github.com/vector-im/element-android/blob/develop/matrix-sdk-android/src/main/java/org/matrix/android/sdk/api/MatrixPatterns.kt#L35 // From here: https://github.com/vector-im/element-android/blob/develop/matrix-sdk-android/src/main/java/org/matrix/android/sdk/api/MatrixPatterns.kt#L35
static VALID_MATRIX_ID_REGEX: Lazy<Regex> = Lazy::new(|| { static VALID_MATRIX_ID_REGEX: Lazy<Regex> = Lazy::new(|| {
@ -299,6 +299,33 @@ pub fn check_url_scheme(url: &Option<Url>) -> LemmyResult<()> {
} }
} }
pub fn is_url_blocked(url: &Option<Url>, blocklist: &RegexSet) -> LemmyResult<()> {
if let Some(url) = url {
if blocklist.is_match(url.as_str()) {
Err(LemmyErrorType::BlockedUrl)?
}
}
Ok(())
}
pub fn check_urls_are_valid(urls: &Vec<String>) -> LemmyResult<Vec<String>> {
let mut parsed_urls = vec![];
for url in urls {
let url = Url::parse(url).or_else(|e| {
if e == ParseError::RelativeUrlWithoutBase {
Url::parse(&format!("https://{url}"))
} else {
Err(e)
}
})?;
parsed_urls.push(url.to_string());
}
Ok(parsed_urls)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
#![allow(clippy::unwrap_used)] #![allow(clippy::unwrap_used)]
@ -310,7 +337,9 @@ mod tests {
build_and_check_regex, build_and_check_regex,
check_site_visibility_valid, check_site_visibility_valid,
check_url_scheme, check_url_scheme,
check_urls_are_valid,
clean_url_params, clean_url_params,
is_url_blocked,
is_valid_actor_name, is_valid_actor_name,
is_valid_bio_field, is_valid_bio_field,
is_valid_display_name, is_valid_display_name,
@ -550,4 +579,38 @@ mod tests {
let magnet_link="magnet:?xt=urn:btih:4b390af3891e323778959d5abfff4b726510f14c&dn=Ravel%20Complete%20Piano%20Sheet%20Music%20-%20Public%20Domain&tr=udp%3A%2F%2Fopen.tracker.cl%3A1337%2Fannounce"; let magnet_link="magnet:?xt=urn:btih:4b390af3891e323778959d5abfff4b726510f14c&dn=Ravel%20Complete%20Piano%20Sheet%20Music%20-%20Public%20Domain&tr=udp%3A%2F%2Fopen.tracker.cl%3A1337%2Fannounce";
assert!(check_url_scheme(&Some(Url::parse(magnet_link).unwrap())).is_ok()); assert!(check_url_scheme(&Some(Url::parse(magnet_link).unwrap())).is_ok());
} }
#[test]
fn test_url_block() {
let set = regex::RegexSet::new(vec![
r"(https://)?example\.org/page/to/article",
r"(https://)?example\.net/?",
r"(https://)?example\.com/?",
])
.unwrap();
assert!(is_url_blocked(&Some(Url::parse("https://example.blog").unwrap()), &set).is_ok());
assert!(is_url_blocked(&Some(Url::parse("https://example.org").unwrap()), &set).is_ok());
assert!(is_url_blocked(&None, &set).is_ok());
assert!(is_url_blocked(&Some(Url::parse("https://example.com").unwrap()), &set).is_err());
}
#[test]
fn test_url_parsed() {
assert_eq!(
vec![String::from("https://example.com/")],
check_urls_are_valid(&vec![String::from("example.com")]).unwrap()
);
assert!(check_urls_are_valid(&vec![
String::from("example.com"),
String::from("https://example.blog")
])
.is_ok());
assert!(check_urls_are_valid(&vec![String::from("https://example .com"),]).is_err());
}
} }

@ -0,0 +1,3 @@
-- This file should undo anything in `up.sql`
DROP TABLE local_site_url_blocklist;

@ -0,0 +1,7 @@
CREATE TABLE local_site_url_blocklist (
id serial NOT NULL PRIMARY KEY,
url text NOT NULL UNIQUE,
published timestamp with time zone NOT NULL DEFAULT now(),
updated timestamp with time zone
);
Loading…
Cancel
Save