Add a blocklist for URLs. (#4515)

* Add a blocklist for URLs.

* Fix SQL format

* Make clippy happy.

* Use regex for URL matching.

* Escape regex chars in URLs.

* Use post for modification.

* Make URL block regex static and remove API routes.

* Add date fields to table and use transaction.

* Use Cache for blocklist.

* Rename check_links + move list to parameters of process_markdown.

* SQL format.

* Format, again.

* Remove println.

* Add API test.

* Set a shorter lifetime for regex in debug mode.

* Add missing macro.

* Update lemmy-js-client

* Update api_test/pnpm-lock.yaml

* Don't break other tests

* Use different URL for test

---------

Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: Nutomic <me@nutomic.com>
apub-assets-delete-remove-data
flamingos-cant 1 month ago committed by GitHub
parent 0e7080337b
commit 19a1a077c5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

@ -27,7 +27,7 @@
"eslint": "^8.57.0",
"eslint-plugin-prettier": "^5.0.1",
"jest": "^29.5.0",
"lemmy-js-client": "0.19.4-alpha.6",
"lemmy-js-client": "0.19.4-alpha.8",
"prettier": "^3.2.5",
"ts-jest": "^29.1.0",
"typescript": "^5.3.3"

@ -30,8 +30,8 @@ devDependencies:
specifier: ^29.5.0
version: 29.7.0(@types/node@20.11.22)
lemmy-js-client:
specifier: 0.19.4-alpha.6
version: 0.19.4-alpha.6
specifier: 0.19.4-alpha.8
version: 0.19.4-alpha.8
prettier:
specifier: ^3.2.5
version: 3.2.5
@ -2390,8 +2390,8 @@ packages:
engines: {node: '>=6'}
dev: true
/lemmy-js-client@0.19.4-alpha.6:
resolution: {integrity: sha512-x4htMlpoZ7hzrhrIk82aompVxbpu2ZDWtmWNGraM0+27nUCDf6gYxJH5nb5R/o39BQe5KSHq6zoBdliBwAY40w==}
/lemmy-js-client@0.19.4-alpha.8:
resolution: {integrity: sha512-8vjqUYVOhyUTcmG9FvPLjrWziVwNa2/Zi+kSflTrajJsK0V+5DclJ5dhdVMUQ4DEA70gb0OuNMDlipPG2FoS5A==}
dependencies:
cross-fetch: 4.0.0
form-data: 4.0.0

@ -18,6 +18,7 @@ import {
resolveBetaCommunity,
createComment,
deletePost,
delay,
removePost,
getPost,
unfollowRemotes,
@ -710,3 +711,25 @@ test("Fetch post via redirect", async () => {
expect(gammaPost.post?.post.ap_id).toBe(alphaPost.post_view.post.ap_id);
await unfollowRemotes(alpha);
});
test("Block post that contains banned URL", async () => {
let editSiteForm: EditSite = {
blocked_urls: ["https://evil.com/"],
};
await epsilon.editSite(editSiteForm);
await delay(500);
if (!betaCommunity) {
throw "Missing beta community";
}
expect(
createPost(epsilon, betaCommunity.community.id, "https://evil.com"),
).rejects.toStrictEqual(Error("blocked_url"));
// Later tests need this to be empty
editSiteForm.blocked_urls = [];
await epsilon.editSite(editSiteForm);
});

@ -3,6 +3,7 @@ use lemmy_api_common::{
context::LemmyContext,
person::SaveUserSettings,
utils::{
get_url_blocklist,
local_site_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_api,
@ -35,7 +36,10 @@ pub async fn save_user_settings(
let site_view = SiteView::read_local(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&site_view.local_site);
let bio = diesel_option_overwrite(process_markdown_opt(&data.bio, &slur_regex, &context).await?);
let url_blocklist = get_url_blocklist(&context).await?;
let bio = diesel_option_overwrite(
process_markdown_opt(&data.bio, &slur_regex, &url_blocklist, &context).await?,
);
let avatar = proxy_image_link_opt_api(&data.avatar, &context).await?;
let banner = proxy_image_link_opt_api(&data.banner, &context).await?;

@ -4,6 +4,7 @@ use lemmy_db_schema::{
source::{
actor_language::SiteLanguage,
language::Language,
local_site_url_blocklist::LocalSiteUrlBlocklist,
local_user::{LocalUser, LocalUserUpdateForm},
moderator::{ModAdd, ModAddForm},
tagline::Tagline,
@ -62,6 +63,7 @@ pub async fn leave_admin(
let taglines = Tagline::get_all(&mut context.pool(), site_view.local_site.id).await?;
let custom_emojis =
CustomEmojiView::get_all(&mut context.pool(), site_view.local_site.id).await?;
let blocked_urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?;
Ok(Json(GetSiteResponse {
site_view,
@ -72,5 +74,6 @@ pub async fn leave_admin(
discussion_languages,
taglines,
custom_emojis,
blocked_urls,
}))
}

@ -59,6 +59,8 @@ uuid = { workspace = true, optional = true }
tokio = { workspace = true, optional = true }
reqwest = { workspace = true, optional = true }
ts-rs = { workspace = true, optional = true }
moka.workspace = true
anyhow.workspace = true
once_cell = { workspace = true, optional = true }
actix-web = { workspace = true, optional = true }
enum-map = { workspace = true }

@ -6,6 +6,7 @@ use lemmy_db_schema::{
federation_queue_state::FederationQueueState,
instance::Instance,
language::Language,
local_site_url_blocklist::LocalSiteUrlBlocklist,
tagline::Tagline,
},
ListingType,
@ -268,6 +269,8 @@ pub struct EditSite {
pub allowed_instances: Option<Vec<String>>,
/// A list of blocked instances.
pub blocked_instances: Option<Vec<String>>,
/// A list of blocked URLs
pub blocked_urls: Option<Vec<String>>,
/// A list of taglines shown at the top of the front page.
pub taglines: Option<Vec<String>>,
pub registration_mode: Option<RegistrationMode>,
@ -305,6 +308,7 @@ pub struct GetSiteResponse {
pub taglines: Vec<Tagline>,
/// A list of custom emojis your site supports.
pub custom_emojis: Vec<CustomEmojiView>,
pub blocked_urls: Vec<LocalSiteUrlBlocklist>,
}
#[skip_serializing_none]

@ -17,6 +17,7 @@ use lemmy_db_schema::{
instance_block::InstanceBlock,
local_site::LocalSite,
local_site_rate_limit::LocalSiteRateLimit,
local_site_url_blocklist::LocalSiteUrlBlocklist,
password_reset_request::PasswordResetRequest,
person::{Person, PersonUpdateForm},
person_block::PersonBlock,
@ -38,18 +39,24 @@ use lemmy_utils::{
rate_limit::{ActionType, BucketConfig},
settings::structs::{PictrsImageMode, Settings},
utils::{
markdown::markdown_rewrite_image_links,
markdown::{markdown_check_for_blocked_urls, markdown_rewrite_image_links},
slurs::{build_slur_regex, remove_slurs},
},
};
use regex::Regex;
use moka::future::Cache;
use once_cell::sync::Lazy;
use regex::{escape, Regex, RegexSet};
use rosetta_i18n::{Language, LanguageId};
use std::collections::HashSet;
use std::{collections::HashSet, time::Duration};
use tracing::warn;
use url::{ParseError, Url};
use urlencoding::encode;
pub static AUTH_COOKIE_NAME: &str = "jwt";
#[cfg(debug_assertions)]
static URL_BLOCKLIST_RECHECK_DELAY: Duration = Duration::from_millis(500);
#[cfg(not(debug_assertions))]
static URL_BLOCKLIST_RECHECK_DELAY: Duration = Duration::from_secs(60);
#[tracing::instrument(skip_all)]
pub async fn is_mod_or_admin(
@ -516,6 +523,47 @@ pub fn local_site_opt_to_sensitive(local_site: &Option<LocalSite>) -> bool {
.unwrap_or(false)
}
pub async fn get_url_blocklist(context: &LemmyContext) -> LemmyResult<RegexSet> {
static URL_BLOCKLIST: Lazy<Cache<(), RegexSet>> = Lazy::new(|| {
Cache::builder()
.max_capacity(1)
.time_to_live(URL_BLOCKLIST_RECHECK_DELAY)
.build()
});
Ok(
URL_BLOCKLIST
.try_get_with::<_, LemmyError>((), async {
let urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?;
let regexes = urls.iter().map(|url| {
let url = &url.url;
let parsed = Url::parse(url).expect("Coundln't parse URL.");
if url.ends_with('/') {
format!(
"({}://)?{}{}?",
parsed.scheme(),
escape(parsed.domain().expect("No domain.")),
escape(parsed.path())
)
} else {
format!(
"({}://)?{}{}",
parsed.scheme(),
escape(parsed.domain().expect("No domain.")),
escape(parsed.path())
)
}
});
let set = RegexSet::new(regexes)?;
Ok(set)
})
.await
.map_err(|e| anyhow::anyhow!("Failed to build URL blocklist due to `{}`", e))?,
)
}
pub async fn send_application_approved_email(
user: &LocalUserView,
settings: &Settings,
@ -867,9 +915,13 @@ fn limit_expire_time(expires: DateTime<Utc>) -> LemmyResult<Option<DateTime<Utc>
pub async fn process_markdown(
text: &str,
slur_regex: &Option<Regex>,
url_blocklist: &RegexSet,
context: &LemmyContext,
) -> LemmyResult<String> {
let text = remove_slurs(text, slur_regex);
markdown_check_for_blocked_urls(&text, url_blocklist)?;
if context.settings().pictrs_config()?.image_mode() == PictrsImageMode::ProxyAllImages {
let (text, links) = markdown_rewrite_image_links(text);
RemoteImage::create(&mut context.pool(), links).await?;
@ -882,10 +934,13 @@ pub async fn process_markdown(
pub async fn process_markdown_opt(
text: &Option<String>,
slur_regex: &Option<Regex>,
url_blocklist: &RegexSet,
context: &LemmyContext,
) -> LemmyResult<Option<String>> {
match text {
Some(t) => process_markdown(t, slur_regex, context).await.map(Some),
Some(t) => process_markdown(t, slur_regex, url_blocklist, context)
.await
.map(Some),
None => Ok(None),
}
}

@ -10,6 +10,7 @@ use lemmy_api_common::{
check_post_deleted_or_removed,
generate_local_apub_endpoint,
get_post,
get_url_blocklist,
is_mod_or_admin,
local_site_to_slur_regex,
process_markdown,
@ -44,7 +45,8 @@ pub async fn create_comment(
let local_site = LocalSite::read(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&local_site);
let content = process_markdown(&data.content, &slur_regex, &context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&Some(content.clone()), false)?;
// Check for a community ban

@ -5,7 +5,12 @@ use lemmy_api_common::{
comment::{CommentResponse, EditComment},
context::LemmyContext,
send_activity::{ActivityChannel, SendActivityData},
utils::{check_community_user_action, local_site_to_slur_regex, process_markdown_opt},
utils::{
check_community_user_action,
get_url_blocklist,
local_site_to_slur_regex,
process_markdown_opt,
},
};
use lemmy_db_schema::{
source::{
@ -54,7 +59,8 @@ pub async fn update_comment(
.await?;
let slur_regex = local_site_to_slur_regex(&local_site);
let content = process_markdown_opt(&data.content, &slur_regex, &context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown_opt(&data.content, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&content, false)?;
let comment_id = data.comment_id;

@ -9,6 +9,7 @@ use lemmy_api_common::{
generate_inbox_url,
generate_local_apub_endpoint,
generate_shared_inbox_url,
get_url_blocklist,
is_admin,
local_site_to_slur_regex,
process_markdown_opt,
@ -53,9 +54,11 @@ pub async fn create_community(
}
let slur_regex = local_site_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(&context).await?;
check_slurs(&data.name, &slur_regex)?;
check_slurs(&data.title, &slur_regex)?;
let description = process_markdown_opt(&data.description, &slur_regex, &context).await?;
let description =
process_markdown_opt(&data.description, &slur_regex, &url_blocklist, &context).await?;
let icon = proxy_image_link_api(&data.icon, &context).await?;
let banner = proxy_image_link_api(&data.banner, &context).await?;

@ -7,6 +7,7 @@ use lemmy_api_common::{
send_activity::{ActivityChannel, SendActivityData},
utils::{
check_community_mod_action,
get_url_blocklist,
local_site_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_api,
@ -36,8 +37,10 @@ pub async fn update_community(
let local_site = LocalSite::read(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(&context).await?;
check_slurs_opt(&data.title, &slur_regex)?;
let description = process_markdown_opt(&data.description, &slur_regex, &context).await?;
let description =
process_markdown_opt(&data.description, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&data.description, false)?;
let description = diesel_option_overwrite(description);

@ -9,6 +9,7 @@ use lemmy_api_common::{
utils::{
check_community_user_action,
generate_local_apub_endpoint,
get_url_blocklist,
honeypot_check,
local_site_to_slur_regex,
mark_post_as_read,
@ -38,6 +39,7 @@ use lemmy_utils::{
validation::{
check_url_scheme,
clean_url_params,
is_url_blocked,
is_valid_alt_text_field,
is_valid_body_field,
is_valid_post_title,
@ -60,8 +62,9 @@ pub async fn create_post(
let slur_regex = local_site_to_slur_regex(&local_site);
check_slurs(&data.name, &slur_regex)?;
let url_blocklist = get_url_blocklist(&context).await?;
let body = process_markdown_opt(&data.body, &slur_regex, &context).await?;
let body = process_markdown_opt(&data.body, &slur_regex, &url_blocklist, &context).await?;
let data_url = data.url.as_ref();
let url = data_url.map(clean_url_params); // TODO no good way to handle a "clear"
let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params);
@ -69,6 +72,7 @@ pub async fn create_post(
is_valid_post_title(&data.name)?;
is_valid_body_field(&body, true)?;
is_valid_alt_text_field(&data.alt_text)?;
is_url_blocked(&url, &url_blocklist)?;
check_url_scheme(&url)?;
check_url_scheme(&custom_thumbnail)?;

@ -8,6 +8,7 @@ use lemmy_api_common::{
send_activity::{ActivityChannel, SendActivityData},
utils::{
check_community_user_action,
get_url_blocklist,
local_site_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_apub,
@ -30,6 +31,7 @@ use lemmy_utils::{
validation::{
check_url_scheme,
clean_url_params,
is_url_blocked,
is_valid_alt_text_field,
is_valid_body_field,
is_valid_post_title,
@ -51,9 +53,11 @@ pub async fn update_post(
let url = data.url.as_ref().map(clean_url_params);
let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params);
let url_blocklist = get_url_blocklist(&context).await?;
let slur_regex = local_site_to_slur_regex(&local_site);
check_slurs_opt(&data.name, &slur_regex)?;
let body = process_markdown_opt(&data.body, &slur_regex, &context).await?;
let body = process_markdown_opt(&data.body, &slur_regex, &url_blocklist, &context).await?;
if let Some(name) = &data.name {
is_valid_post_title(name)?;
@ -61,6 +65,7 @@ pub async fn update_post(
is_valid_body_field(&body, true)?;
is_valid_alt_text_field(&data.alt_text)?;
is_url_blocked(&url, &url_blocklist)?;
check_url_scheme(&url)?;
check_url_scheme(&custom_thumbnail)?;

@ -8,6 +8,7 @@ use lemmy_api_common::{
check_person_block,
generate_local_apub_endpoint,
get_interface_language,
get_url_blocklist,
local_site_to_slur_regex,
process_markdown,
send_email_to_user,
@ -36,7 +37,8 @@ pub async fn create_private_message(
let local_site = LocalSite::read(&mut context.pool()).await?;
let slur_regex = local_site_to_slur_regex(&local_site);
let content = process_markdown(&data.content, &slur_regex, &context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&Some(content.clone()), false)?;
check_person_block(

@ -4,7 +4,7 @@ use lemmy_api_common::{
context::LemmyContext,
private_message::{EditPrivateMessage, PrivateMessageResponse},
send_activity::{ActivityChannel, SendActivityData},
utils::{local_site_to_slur_regex, process_markdown},
utils::{get_url_blocklist, local_site_to_slur_regex, process_markdown},
};
use lemmy_db_schema::{
source::{
@ -37,7 +37,8 @@ pub async fn update_private_message(
// Doing the update
let slur_regex = local_site_to_slur_regex(&local_site);
let content = process_markdown(&data.content, &slur_regex, &context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?;
is_valid_body_field(&Some(content.clone()), false)?;
let private_message_id = data.private_message_id;

@ -6,6 +6,7 @@ use lemmy_api_common::{
site::{CreateSite, SiteResponse},
utils::{
generate_shared_inbox_url,
get_url_blocklist,
is_admin,
local_site_rate_limit_to_rate_limit_config,
local_site_to_slur_regex,
@ -58,7 +59,8 @@ pub async fn create_site(
let keypair = generate_actor_keypair()?;
let slur_regex = local_site_to_slur_regex(&local_site);
let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &url_blocklist, &context).await?;
let icon = proxy_image_link_opt_api(&data.icon, &context).await?;
let banner = proxy_image_link_opt_api(&data.banner, &context).await?;

@ -6,6 +6,7 @@ use lemmy_api_common::{
use lemmy_db_schema::source::{
actor_language::{LocalUserLanguage, SiteLanguage},
language::Language,
local_site_url_blocklist::LocalSiteUrlBlocklist,
tagline::Tagline,
};
use lemmy_db_views::structs::{CustomEmojiView, LocalUserView, SiteView};
@ -47,6 +48,7 @@ pub async fn get_site(
let taglines = Tagline::get_all(&mut context.pool(), site_view.local_site.id).await?;
let custom_emojis =
CustomEmojiView::get_all(&mut context.pool(), site_view.local_site.id).await?;
let blocked_urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?;
Ok(GetSiteResponse {
site_view,
admins,
@ -56,6 +58,7 @@ pub async fn get_site(
discussion_languages,
taglines,
custom_emojis,
blocked_urls,
})
})
.await

@ -4,6 +4,7 @@ use lemmy_api_common::{
context::LemmyContext,
site::{EditSite, SiteResponse},
utils::{
get_url_blocklist,
is_admin,
local_site_rate_limit_to_rate_limit_config,
local_site_to_slur_regex,
@ -18,6 +19,7 @@ use lemmy_db_schema::{
federation_blocklist::FederationBlockList,
local_site::{LocalSite, LocalSiteUpdateForm},
local_site_rate_limit::{LocalSiteRateLimit, LocalSiteRateLimitUpdateForm},
local_site_url_blocklist::LocalSiteUrlBlocklist,
local_user::LocalUser,
site::{Site, SiteUpdateForm},
tagline::Tagline,
@ -34,6 +36,7 @@ use lemmy_utils::{
validation::{
build_and_check_regex,
check_site_visibility_valid,
check_urls_are_valid,
is_valid_body_field,
site_description_length_check,
site_name_length_check,
@ -61,7 +64,8 @@ pub async fn update_site(
}
let slur_regex = local_site_to_slur_regex(&local_site);
let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &context).await?;
let url_blocklist = get_url_blocklist(&context).await?;
let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &url_blocklist, &context).await?;
let icon = proxy_image_link_opt_api(&data.icon, &context).await?;
let banner = proxy_image_link_opt_api(&data.banner, &context).await?;
@ -137,6 +141,11 @@ pub async fn update_site(
let blocked = data.blocked_instances.clone();
FederationBlockList::replace(&mut context.pool(), blocked).await?;
if let Some(url_blocklist) = data.blocked_urls.clone() {
let parsed_urls = check_urls_are_valid(&url_blocklist)?;
LocalSiteUrlBlocklist::replace(&mut context.pool(), parsed_urls).await?;
}
// TODO can't think of a better way to do this.
// If the server suddenly requires email verification, or required applications, no old users
// will be able to log in. It really only wants this to be a requirement for NEW signups.
@ -578,6 +587,7 @@ mod tests {
captcha_difficulty: None,
allowed_instances: None,
blocked_instances: None,
blocked_urls: None,
taglines: None,
registration_mode: site_registration_mode,
reports_email_admins: None,

@ -18,7 +18,7 @@ use activitypub_federation::{
use chrono::{DateTime, Utc};
use lemmy_api_common::{
context::LemmyContext,
utils::{is_mod_or_admin, local_site_opt_to_slur_regex, process_markdown},
utils::{get_url_blocklist, is_mod_or_admin, local_site_opt_to_slur_regex, process_markdown},
};
use lemmy_db_schema::{
source::{
@ -165,7 +165,8 @@ impl Object for ApubComment {
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let content = process_markdown(&content, slur_regex, context).await?;
let url_blocklist = get_url_blocklist(context).await?;
let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?;
let language_id =
LanguageTag::to_language_id_single(note.language, &mut context.pool()).await?;

@ -21,6 +21,7 @@ use lemmy_api_common::{
generate_featured_url,
generate_moderators_url,
generate_outbox_url,
get_url_blocklist,
local_site_opt_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_apub,
@ -141,8 +142,10 @@ impl Object for ApubCommunity {
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
let description = read_from_string_or_source_opt(&group.summary, &None, &group.source);
let description = process_markdown_opt(&description, slur_regex, context).await?;
let description =
process_markdown_opt(&description, slur_regex, &url_blocklist, context).await?;
let icon = proxy_image_link_opt_apub(group.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(group.image.map(|i| i.url), context).await?;

@ -19,7 +19,12 @@ use activitypub_federation::{
use chrono::{DateTime, Utc};
use lemmy_api_common::{
context::LemmyContext,
utils::{local_site_opt_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub},
utils::{
get_url_blocklist,
local_site_opt_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_apub,
},
};
use lemmy_db_schema::{
newtypes::InstanceId,
@ -138,8 +143,9 @@ impl Object for ApubSite {
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
let sidebar = read_from_string_or_source_opt(&apub.content, &None, &apub.source);
let sidebar = process_markdown_opt(&sidebar, slur_regex, context).await?;
let sidebar = process_markdown_opt(&sidebar, slur_regex, &url_blocklist, context).await?;
let icon = proxy_image_link_opt_apub(apub.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(apub.image.map(|i| i.url), context).await?;

@ -22,6 +22,7 @@ use lemmy_api_common::{
context::LemmyContext,
utils::{
generate_outbox_url,
get_url_blocklist,
local_site_opt_to_slur_regex,
process_markdown_opt,
proxy_image_link_opt_apub,
@ -152,8 +153,9 @@ impl Object for ApubPerson {
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source);
let bio = process_markdown_opt(&bio, slur_regex, context).await?;
let bio = process_markdown_opt(&bio, slur_regex, &url_blocklist, context).await?;
let avatar = proxy_image_link_opt_apub(person.icon.map(|i| i.url), context).await?;
let banner = proxy_image_link_opt_apub(person.image.map(|i| i.url), context).await?;

@ -26,6 +26,7 @@ use lemmy_api_common::{
context::LemmyContext,
request::fetch_link_metadata_opt,
utils::{
get_url_blocklist,
local_site_opt_to_sensitive,
local_site_opt_to_slur_regex,
process_markdown_opt,
@ -246,9 +247,10 @@ impl Object for ApubPost {
let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, context).await?;
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source);
let body = process_markdown_opt(&body, slur_regex, context).await?;
let body = process_markdown_opt(&body, slur_regex, &url_blocklist, context).await?;
let language_id =
LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?;

@ -14,7 +14,7 @@ use activitypub_federation::{
use chrono::{DateTime, Utc};
use lemmy_api_common::{
context::LemmyContext,
utils::{check_person_block, local_site_opt_to_slur_regex, process_markdown},
utils::{check_person_block, get_url_blocklist, local_site_opt_to_slur_regex, process_markdown},
};
use lemmy_db_schema::{
source::{
@ -127,8 +127,9 @@ impl Object for ApubPrivateMessage {
let local_site = LocalSite::read(&mut context.pool()).await.ok();
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
let url_blocklist = get_url_blocklist(context).await?;
let content = read_from_string_or_source(&note.content, &None, &note.source);
let content = process_markdown(&content, slur_regex, context).await?;
let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?;
let form = PrivateMessageInsertForm {
creator_id: creator.id,

@ -0,0 +1,49 @@
use crate::{
schema::local_site_url_blocklist,
source::local_site_url_blocklist::{LocalSiteUrlBlocklist, LocalSiteUrlBlocklistForm},
utils::{get_conn, DbPool},
};
use diesel::{dsl::insert_into, result::Error};
use diesel_async::{AsyncPgConnection, RunQueryDsl};
impl LocalSiteUrlBlocklist {
pub async fn replace(pool: &mut DbPool<'_>, url_blocklist: Vec<String>) -> Result<(), Error> {
let conn = &mut get_conn(pool).await?;
conn
.build_transaction()
.run(|conn| {
Box::pin(async move {
use crate::schema::local_site_url_blocklist::dsl::local_site_url_blocklist;
Self::clear(conn).await?;
let forms = url_blocklist
.into_iter()
.map(|url| LocalSiteUrlBlocklistForm { url, updated: None })
.collect::<Vec<_>>();
insert_into(local_site_url_blocklist)
.values(forms)
.execute(conn)
.await?;
Ok(())
}) as _
})
.await
}
async fn clear(conn: &mut AsyncPgConnection) -> Result<usize, Error> {
diesel::delete(local_site_url_blocklist::table)
.execute(conn)
.await
}
pub async fn get_all(pool: &mut DbPool<'_>) -> Result<Vec<Self>, Error> {
let conn = &mut get_conn(pool).await?;
local_site_url_blocklist::table
.get_results::<Self>(conn)
.await
}
}

@ -17,6 +17,7 @@ pub mod instance_block;
pub mod language;
pub mod local_site;
pub mod local_site_rate_limit;
pub mod local_site_url_blocklist;
pub mod local_user;
pub mod local_user_vote_display_mode;
pub mod login_token;

@ -409,6 +409,15 @@ diesel::table! {
}
}
diesel::table! {
local_site_url_blocklist (id) {
id -> Int4,
url -> Text,
published -> Timestamptz,
updated -> Nullable<Timestamptz>,
}
}
diesel::table! {
use diesel::sql_types::*;
use super::sql_types::SortTypeEnum;
@ -1052,6 +1061,7 @@ diesel::allow_tables_to_appear_in_same_query!(
local_image,
local_site,
local_site_rate_limit,
local_site_url_blocklist,
local_user,
local_user_language,
local_user_vote_display_mode,

@ -0,0 +1,28 @@
#[cfg(feature = "full")]
use crate::schema::local_site_url_blocklist;
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use serde_with::skip_serializing_none;
#[cfg(feature = "full")]
use ts_rs::TS;
#[skip_serializing_none]
#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)]
#[cfg_attr(feature = "full", derive(Queryable, Selectable, Identifiable, TS))]
#[cfg_attr(feature = "full", diesel(table_name = local_site_url_blocklist))]
#[cfg_attr(feature = "full", diesel(check_for_backend(diesel::pg::Pg)))]
#[cfg_attr(feature = "full", ts(export))]
pub struct LocalSiteUrlBlocklist {
pub id: i32,
pub url: String,
pub published: DateTime<Utc>,
pub updated: Option<DateTime<Utc>>,
}
#[derive(Default, Clone)]
#[cfg_attr(feature = "full", derive(Insertable, AsChangeset))]
#[cfg_attr(feature = "full", diesel(table_name = local_site_url_blocklist))]
pub struct LocalSiteUrlBlocklistForm {
pub url: String,
pub updated: Option<DateTime<Utc>>,
}

@ -22,6 +22,7 @@ pub mod instance_block;
pub mod language;
pub mod local_site;
pub mod local_site_rate_limit;
pub mod local_site_url_blocklist;
pub mod local_user;
pub mod local_user_vote_display_mode;
pub mod login_token;

@ -135,6 +135,7 @@ pub enum LemmyErrorType {
CouldntSetAllRegistrationsAccepted,
CouldntSetAllEmailVerified,
Banned,
BlockedUrl,
CouldntGetComments,
CouldntGetPosts,
InvalidUrl,

@ -1,6 +1,7 @@
use crate::settings::SETTINGS;
use crate::{error::LemmyResult, settings::SETTINGS, LemmyErrorType};
use markdown_it::{plugins::cmark::inline::image::Image, MarkdownIt};
use once_cell::sync::Lazy;
use regex::RegexSet;
use url::Url;
use urlencoding::encode;
@ -98,6 +99,13 @@ pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec<Url>) {
(src, links)
}
pub fn markdown_check_for_blocked_urls(text: &str, blocklist: &RegexSet) -> LemmyResult<()> {
if blocklist.is_match(text) {
Err(LemmyErrorType::BlockedUrl)?
}
Ok(())
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used)]
@ -109,65 +117,65 @@ mod tests {
#[test]
fn test_basic_markdown() {
let tests: Vec<_> = vec![
(
"headings",
"# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6",
"<h1>h1</h1>\n<h2>h2</h2>\n<h3>h3</h3>\n<h4>h4</h4>\n<h5>h5</h5>\n<h6>h6</h6>\n"
),
(
"line breaks",
"First\rSecond",
"<p>First\nSecond</p>\n"),
(
"emphasis",
"__bold__ **bold** *italic* ***bold+italic***",
"<p><strong>bold</strong> <strong>bold</strong> <em>italic</em> <em><strong>bold+italic</strong></em></p>\n"
),
(
"blockquotes",
"> #### Hello\n > \n > - Hola\n > - 안영 \n>> Goodbye\n",
"<blockquote>\n<h4>Hello</h4>\n<ul>\n<li>Hola</li>\n<li>안영</li>\n</ul>\n<blockquote>\n<p>Goodbye</p>\n</blockquote>\n</blockquote>\n"
),
(
"lists (ordered, unordered)",
"1. pen\n2. apple\n3. apple pen\n- pen\n- pineapple\n- pineapple pen",
"<ol>\n<li>pen</li>\n<li>apple</li>\n<li>apple pen</li>\n</ol>\n<ul>\n<li>pen</li>\n<li>pineapple</li>\n<li>pineapple pen</li>\n</ul>\n"
),
(
"code and code blocks",
"this is my amazing `code snippet` and my amazing ```code block```",
"<p>this is my amazing <code>code snippet</code> and my amazing <code>code block</code></p>\n"
),
// Links with added nofollow attribute
(
"links",
"[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")",
"<p><a href=\"https://join-lemmy.org/\" rel=\"nofollow\" title=\"Join Lemmy!\">Lemmy</a></p>\n"
),
// Remote images with proxy
(
"images",
"![My linked image](https://example.com/image.png \"image alt text\")",
"<p><img src=\"https://example.com/image.png\" alt=\"My linked image\" title=\"image alt text\" /></p>\n"
),
// Local images without proxy
(
"images",
"![My linked image](https://lemmy-alpha/image.png \"image alt text\")",
"<p><img src=\"https://lemmy-alpha/image.png\" alt=\"My linked image\" title=\"image alt text\" /></p>\n"
),
// Ensure spoiler plugin is added
(
"basic spoiler",
"::: spoiler click to see more\nhow spicy!\n:::\n",
"<details><summary>click to see more</summary><p>how spicy!\n</p></details>\n"
),
(
"escape html special chars",
"<script>alert('xss');</script> hello &\"",
"<p>&lt;script&gt;alert(xss);&lt;/script&gt; hello &amp;&quot;</p>\n"
)
];
(
"headings",
"# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6",
"<h1>h1</h1>\n<h2>h2</h2>\n<h3>h3</h3>\n<h4>h4</h4>\n<h5>h5</h5>\n<h6>h6</h6>\n"
),
(
"line breaks",
"First\rSecond",
"<p>First\nSecond</p>\n"),
(
"emphasis",
"__bold__ **bold** *italic* ***bold+italic***",
"<p><strong>bold</strong> <strong>bold</strong> <em>italic</em> <em><strong>bold+italic</strong></em></p>\n"
),
(
"blockquotes",
"> #### Hello\n > \n > - Hola\n > - 안영 \n>> Goodbye\n",
"<blockquote>\n<h4>Hello</h4>\n<ul>\n<li>Hola</li>\n<li>안영</li>\n</ul>\n<blockquote>\n<p>Goodbye</p>\n</blockquote>\n</blockquote>\n"
),
(
"lists (ordered, unordered)",
"1. pen\n2. apple\n3. apple pen\n- pen\n- pineapple\n- pineapple pen",
"<ol>\n<li>pen</li>\n<li>apple</li>\n<li>apple pen</li>\n</ol>\n<ul>\n<li>pen</li>\n<li>pineapple</li>\n<li>pineapple pen</li>\n</ul>\n"
),
(
"code and code blocks",
"this is my amazing `code snippet` and my amazing ```code block```",
"<p>this is my amazing <code>code snippet</code> and my amazing <code>code block</code></p>\n"
),
// Links with added nofollow attribute
(
"links",
"[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")",
"<p><a href=\"https://join-lemmy.org/\" rel=\"nofollow\" title=\"Join Lemmy!\">Lemmy</a></p>\n"
),
// Remote images with proxy
(
"images",
"![My linked image](https://example.com/image.png \"image alt text\")",
"<p><img src=\"https://example.com/image.png\" alt=\"My linked image\" title=\"image alt text\" /></p>\n"
),
// Local images without proxy
(
"images",
"![My linked image](https://lemmy-alpha/image.png \"image alt text\")",
"<p><img src=\"https://lemmy-alpha/image.png\" alt=\"My linked image\" title=\"image alt text\" /></p>\n"
),
// Ensure spoiler plugin is added
(
"basic spoiler",
"::: spoiler click to see more\nhow spicy!\n:::\n",
"<details><summary>click to see more</summary><p>how spicy!\n</p></details>\n"
),
(
"escape html special chars",
"<script>alert('xss');</script> hello &\"",
"<p>&lt;script&gt;alert(xss);&lt;/script&gt; hello &amp;&quot;</p>\n"
)
];
tests.iter().for_each(|&(msg, input, expected)| {
let result = markdown_to_html(input);
@ -184,46 +192,46 @@ mod tests {
fn test_markdown_proxy_images() {
let tests: Vec<_> =
vec![
(
"remote image proxied",
"![link](http://example.com/image.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)",
),
(
"local image unproxied",
"![link](http://lemmy-alpha/image.jpg)",
"![link](http://lemmy-alpha/image.jpg)",
),
(
"multiple image links",
"![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)",
),
(
"empty link handled",
"![image]()",
"![image]()"
),
(
"empty label handled",
"![](http://example.com/image.jpg)",
"![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
),
(
"invalid image link removed",
"![image](http-not-a-link)",
"![image]()"
),
(
"label with nested markdown handled",
"![a *b* c](http://example.com/image.jpg)",
"![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
),
(
"custom emoji support",
r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#,
r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#
)
(
"remote image proxied",
"![link](http://example.com/image.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)",
),
(
"local image unproxied",
"![link](http://lemmy-alpha/image.jpg)",
"![link](http://lemmy-alpha/image.jpg)",
),
(
"multiple image links",
"![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)",
),
(
"empty link handled",
"![image]()",
"![image]()"
),
(
"empty label handled",
"![](http://example.com/image.jpg)",
"![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
),
(
"invalid image link removed",
"![image](http-not-a-link)",
"![image]()"
),
(
"label with nested markdown handled",
"![a *b* c](http://example.com/image.jpg)",
"![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
),
(
"custom emoji support",
r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#,
r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#
)
];
tests.iter().for_each(|&(msg, input, expected)| {
@ -237,6 +245,69 @@ mod tests {
});
}
#[test]
fn test_url_blocking() {
let set = RegexSet::new(vec![r"(https://)?example\.com/?"]).unwrap();
assert!(
markdown_check_for_blocked_urls(&String::from("[](https://example.com)"), &set).is_err()
);
assert!(markdown_check_for_blocked_urls(
&String::from("Go to https://example.com to get free Robux"),
&set
)
.is_err());
assert!(
markdown_check_for_blocked_urls(&String::from("[](https://example.blog)"), &set).is_ok()
);
assert!(markdown_check_for_blocked_urls(&String::from("example.com"), &set).is_err());
assert!(markdown_check_for_blocked_urls(
"Odio exercitationem culpa sed sunt
et. Sit et similique tempora deserunt doloremque. Cupiditate iusto
repellat et quis qui. Cum veritatis facere quasi repellendus sunt
eveniet nemo sint. Cumque sit unde est. https://example.com Alias
repellendus at quos.",
&set
)
.is_err());
let set = RegexSet::new(vec![r"(https://)?example\.com/spam\.jpg"]).unwrap();
assert!(markdown_check_for_blocked_urls(
&String::from("![](https://example.com/spam.jpg)"),
&set
)
.is_err());
let set = RegexSet::new(vec![
r"(https://)?quo\.example\.com/?",
r"(https://)?foo\.example\.com/?",
r"(https://)?bar\.example\.com/?",
])
.unwrap();
assert!(
markdown_check_for_blocked_urls(&String::from("https://baz.example.com"), &set).is_ok()
);
assert!(
markdown_check_for_blocked_urls(&String::from("https://bar.example.com"), &set).is_err()
);
let set = RegexSet::new(vec![r"(https://)?example\.com/banned_page"]).unwrap();
assert!(
markdown_check_for_blocked_urls(&String::from("https://example.com/page"), &set).is_ok()
);
let set = RegexSet::new(vec![r"(https://)?ex\.mple\.com/?"]).unwrap();
assert!(markdown_check_for_blocked_urls("example.com", &set).is_ok());
}
#[test]
fn test_sanitize_html() {
let sanitized = sanitize_html("<script>alert('xss');</script> hello &\"'");

@ -1,8 +1,8 @@
use crate::error::{LemmyErrorExt, LemmyErrorType, LemmyResult};
use itertools::Itertools;
use once_cell::sync::Lazy;
use regex::{Regex, RegexBuilder};
use url::Url;
use regex::{Regex, RegexBuilder, RegexSet};
use url::{ParseError, Url};
// From here: https://github.com/vector-im/element-android/blob/develop/matrix-sdk-android/src/main/java/org/matrix/android/sdk/api/MatrixPatterns.kt#L35
static VALID_MATRIX_ID_REGEX: Lazy<Regex> = Lazy::new(|| {
@ -299,6 +299,33 @@ pub fn check_url_scheme(url: &Option<Url>) -> LemmyResult<()> {
}
}
pub fn is_url_blocked(url: &Option<Url>, blocklist: &RegexSet) -> LemmyResult<()> {
if let Some(url) = url {
if blocklist.is_match(url.as_str()) {
Err(LemmyErrorType::BlockedUrl)?
}
}
Ok(())
}
pub fn check_urls_are_valid(urls: &Vec<String>) -> LemmyResult<Vec<String>> {
let mut parsed_urls = vec![];
for url in urls {
let url = Url::parse(url).or_else(|e| {
if e == ParseError::RelativeUrlWithoutBase {
Url::parse(&format!("https://{url}"))
} else {
Err(e)
}
})?;
parsed_urls.push(url.to_string());
}
Ok(parsed_urls)
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used)]
@ -310,7 +337,9 @@ mod tests {
build_and_check_regex,
check_site_visibility_valid,
check_url_scheme,
check_urls_are_valid,
clean_url_params,
is_url_blocked,
is_valid_actor_name,
is_valid_bio_field,
is_valid_display_name,
@ -550,4 +579,38 @@ mod tests {
let magnet_link="magnet:?xt=urn:btih:4b390af3891e323778959d5abfff4b726510f14c&dn=Ravel%20Complete%20Piano%20Sheet%20Music%20-%20Public%20Domain&tr=udp%3A%2F%2Fopen.tracker.cl%3A1337%2Fannounce";
assert!(check_url_scheme(&Some(Url::parse(magnet_link).unwrap())).is_ok());
}
#[test]
fn test_url_block() {
let set = regex::RegexSet::new(vec![
r"(https://)?example\.org/page/to/article",
r"(https://)?example\.net/?",
r"(https://)?example\.com/?",
])
.unwrap();
assert!(is_url_blocked(&Some(Url::parse("https://example.blog").unwrap()), &set).is_ok());
assert!(is_url_blocked(&Some(Url::parse("https://example.org").unwrap()), &set).is_ok());
assert!(is_url_blocked(&None, &set).is_ok());
assert!(is_url_blocked(&Some(Url::parse("https://example.com").unwrap()), &set).is_err());
}
#[test]
fn test_url_parsed() {
assert_eq!(
vec![String::from("https://example.com/")],
check_urls_are_valid(&vec![String::from("example.com")]).unwrap()
);
assert!(check_urls_are_valid(&vec![
String::from("example.com"),
String::from("https://example.blog")
])
.is_ok());
assert!(check_urls_are_valid(&vec![String::from("https://example .com"),]).is_err());
}
}

@ -0,0 +1,3 @@
-- This file should undo anything in `up.sql`
DROP TABLE local_site_url_blocklist;

@ -0,0 +1,7 @@
CREATE TABLE local_site_url_blocklist (
id serial NOT NULL PRIMARY KEY,
url text NOT NULL UNIQUE,
published timestamp with time zone NOT NULL DEFAULT now(),
updated timestamp with time zone
);
Loading…
Cancel
Save