Convert titles for posts from Mastodon to plaintext (fixes #3828) (#4033)

* Convert titles for posts from Mastodon to plaintext (fixes #3828)

* Fix prettier.

* Trigger build

* Convert titles for posts from Mastodon to plaintext (fixes #3828)

* Fix prettier.

* Fix sanizize.

---------

Co-authored-by: Dessalines <dessalines@users.noreply.github.com>
Co-authored-by: Dessalines <tyhou13@gmx.com>
pull/4038/head
Nutomic 8 months ago committed by GitHub
parent 645bf21d54
commit 608bb6b1b4
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

8
Cargo.lock generated

@ -2719,6 +2719,7 @@ dependencies = [
"enum_delegate", "enum_delegate",
"futures", "futures",
"html2md", "html2md",
"html2text",
"http", "http",
"itertools 0.11.0", "itertools 0.11.0",
"lemmy_api_common", "lemmy_api_common",
@ -2734,6 +2735,7 @@ dependencies = [
"serde_json", "serde_json",
"serde_with", "serde_with",
"serial_test", "serial_test",
"stringreader",
"strum_macros", "strum_macros",
"task-local-extensions", "task-local-extensions",
"tokio", "tokio",
@ -4950,6 +4952,12 @@ dependencies = [
"unicode-normalization", "unicode-normalization",
] ]
[[package]]
name = "stringreader"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "913e7b03d63752f6cdd2df77da36749d82669904798fe8944b9ec3d23f159905"
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.10.0" version = "0.10.0"

@ -38,6 +38,8 @@ anyhow = { workspace = true }
reqwest = { workspace = true } reqwest = { workspace = true }
once_cell = { workspace = true } once_cell = { workspace = true }
html2md = "0.2.14" html2md = "0.2.14"
html2text = "0.6.0"
stringreader = "0.1.1"
serde_with = { workspace = true } serde_with = { workspace = true }
enum_delegate = "0.2.0" enum_delegate = "0.2.0"
moka = { version = "0.11", features = ["future"] } moka = { version = "0.11", features = ["future"] }

@ -1,7 +1,7 @@
{ {
"id": "https://enterprise.lemmy.ml/c/tenforward", "id": "https://enterprise.lemmy.ml/c/tenforward",
"type": "Group", "type": "Group",
"preferredUsername": "main", "preferredUsername": "tenforward",
"name": "Ten Forward", "name": "Ten Forward",
"summary": "<p>Lounge and recreation facility</p>\n<hr />\n<p>Welcome to the <a href=\"https://memory-alpha.fandom.com/wiki/USS_Enterprise_(NCC-1701-D)\">Enterprise</a>!.</p>\n", "summary": "<p>Lounge and recreation facility</p>\n<hr />\n<p>Welcome to the <a href=\"https://memory-alpha.fandom.com/wiki/USS_Enterprise_(NCC-1701-D)\">Enterprise</a>!.</p>\n",
"source": { "source": {

@ -11,40 +11,42 @@
"votersCount": "toot:votersCount" "votersCount": "toot:votersCount"
} }
], ],
"id": "https://mastodon.madrid/users/felix/statuses/107224289116410645", "id": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519",
"type": "Note", "type": "Note",
"summary": null, "summary": null,
"published": "2021-11-05T11:46:50Z", "inReplyTo": null,
"url": "https://mastodon.madrid/@felix/107224289116410645", "published": "2023-08-04T09:55:39Z",
"attributedTo": "https://mastodon.madrid/users/felix", "url": "https://dice.camp/@thekernelinyellow/110830743680706519",
"to": ["https://mastodon.madrid/users/felix/followers"], "attributedTo": "https://dice.camp/users/thekernelinyellow",
"to": ["https://www.w3.org/ns/activitystreams#Public"],
"cc": [ "cc": [
"https://www.w3.org/ns/activitystreams#Public", "https://dice.camp/users/thekernelinyellow/followers",
"https://mamot.fr/users/retiolus" "https://enterprise.lemmy.ml/c/tenforward",
"https://enterprise.lemmy.ml/c/tenforward/followers"
], ],
"sensitive": false, "sensitive": false,
"atomUri": "https://mastodon.madrid/users/felix/statuses/107224289116410645", "atomUri": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519",
"inReplyToAtomUri": "https://mamot.fr/users/retiolus/statuses/107224244380204526", "inReplyToAtomUri": null,
"conversation": "tag:mamot.fr,2021-11-05:objectId=64635960:objectType=Conversation", "conversation": "tag:dice.camp,2023-08-04:objectId=29969291:objectType=Conversation",
"content": "<p><span class=\"h-card\"><a href=\"https://mamot.fr/@retiolus\" class=\"u-url mention\">@<span>retiolus</span></a></span> i have never been disappointed by a thinkpad. if you want to save money, get a model from a few years ago, there isnt a huge difference anyway.</p>", "content": "<p><span class=\"h-card\" translate=\"no\"><a href=\"https://enterprise.lemmy.ml/c/tenforward\" class=\"u-url mention\">@<span>tenforward</span></a></span> Variable never resetting at refresh</p><p>Hi! I&#39;m using a variable to count elements in my generator but every time I generate a new character, the counter&#39;s value carries on from the previous one. Is there a function to reset it (I set it to 0 at the beginning of the file)</p>",
"contentMap": { "contentMap": {
"en": "<p><span class=\"h-card\"><a href=\"https://mamot.fr/@retiolus\" class=\"u-url mention\">@<span>retiolus</span></a></span> i have never been disappointed by a thinkpad. if you want to save money, get a model from a few years ago, there isnt a huge difference anyway.</p>" "it": "<p><span class=\"h-card\" translate=\"no\"><a href=\"https://enterprise.lemmy.ml/c/tenforward\" class=\"u-url mention\">@<span>tenforward</span></a></span>Variable never resetting at refresh</p><p>Hi! I&#39;m using a variable to count elements in my generator but every time I generate a new character, the counter&#39;s value carries on from the previous one. Is there a function to reset it (I set it to 0 at the beginning of the file)</p>"
}, },
"attachment": [], "attachment": [],
"tag": [ "tag": [
{ {
"type": "Mention", "type": "Mention",
"href": "https://mamot.fr/users/retiolus", "href": "https://enterprise.lemmy.ml/c/tenforward",
"name": "@retiolus@mamot.fr" "name": "@tenforward@enterprise.lemmy.ml"
} }
], ],
"replies": { "replies": {
"id": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies", "id": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519/replies",
"type": "Collection", "type": "Collection",
"first": { "first": {
"type": "CollectionPage", "type": "CollectionPage",
"next": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies?only_other_accounts=true&page=true", "next": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519/replies?only_other_accounts=true&page=true",
"partOf": "https://mastodon.madrid/users/felix/statuses/107224289116410645/replies", "partOf": "https://dice.camp/users/thekernelinyellow/statuses/110830743680706519/replies",
"items": [] "items": []
} }
} }

@ -7,7 +7,7 @@ use crate::objects::{
use activitypub_federation::{config::Data, fetch::object_id::ObjectId}; use activitypub_federation::{config::Data, fetch::object_id::ObjectId};
use actix_web::web::Json; use actix_web::web::Json;
use futures::{future::try_join_all, StreamExt}; use futures::{future::try_join_all, StreamExt};
use lemmy_api_common::{context::LemmyContext, utils::sanitize_html_api_opt, SuccessResponse}; use lemmy_api_common::{context::LemmyContext, SuccessResponse};
use lemmy_db_schema::{ use lemmy_db_schema::{
newtypes::DbUrl, newtypes::DbUrl,
source::{ source::{
@ -20,6 +20,7 @@ use lemmy_db_schema::{
post::{PostSaved, PostSavedForm}, post::{PostSaved, PostSavedForm},
}, },
traits::{Blockable, Crud, Followable, Saveable}, traits::{Blockable, Crud, Followable, Saveable},
utils::diesel_option_overwrite,
}; };
use lemmy_db_views::structs::LocalUserView; use lemmy_db_views::structs::LocalUserView;
use lemmy_utils::{ use lemmy_utils::{
@ -96,8 +97,8 @@ pub async fn import_settings(
local_user_view: LocalUserView, local_user_view: LocalUserView,
context: Data<LemmyContext>, context: Data<LemmyContext>,
) -> Result<Json<SuccessResponse>, LemmyError> { ) -> Result<Json<SuccessResponse>, LemmyError> {
let display_name = Some(sanitize_html_api_opt(&data.display_name)); let display_name = diesel_option_overwrite(data.display_name.clone());
let bio = Some(sanitize_html_api_opt(&data.bio)); let bio = diesel_option_overwrite(data.bio.clone());
let person_form = PersonUpdateForm { let person_form = PersonUpdateForm {
display_name, display_name,

@ -21,7 +21,7 @@ use activitypub_federation::{
}; };
use anyhow::anyhow; use anyhow::anyhow;
use chrono::{DateTime, Utc}; use chrono::{DateTime, Utc};
use html2md::parse_html; use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator};
use lemmy_api_common::{ use lemmy_api_common::{
context::LemmyContext, context::LemmyContext,
request::fetch_site_data, request::fetch_site_data,
@ -48,6 +48,7 @@ use lemmy_utils::{
}, },
}; };
use std::ops::Deref; use std::ops::Deref;
use stringreader::StringReader;
use url::Url; use url::Url;
const MAX_TITLE_LENGTH: usize = 200; const MAX_TITLE_LENGTH: usize = 200;
@ -171,11 +172,21 @@ impl Object for ApubPost {
.name .name
.clone() .clone()
.or_else(|| { .or_else(|| {
// Posts coming from Mastodon or similar platforms don't have a title. Instead we take the
// first line of the content and convert it from HTML to plaintext. We also remove mentions
// of the community name.
page page
.content .content
.clone() .as_deref()
.as_ref() .map(StringReader::new)
.and_then(|c| parse_html(c).lines().next().map(ToString::to_string)) .map(|c| from_read_with_decorator(c, MAX_TITLE_LENGTH, TrivialDecorator::new()))
.and_then(|c| {
c.lines().next().map(|s| {
s.replace(&format!("@{}", community.name), "")
.trim()
.to_string()
})
})
}) })
.ok_or_else(|| anyhow!("Object must have name or content"))?; .ok_or_else(|| anyhow!("Object must have name or content"))?;
if name.chars().count() > MAX_TITLE_LENGTH { if name.chars().count() > MAX_TITLE_LENGTH {
@ -288,8 +299,9 @@ mod tests {
use super::*; use super::*;
use crate::{ use crate::{
objects::{ objects::{
community::tests::parse_lemmy_community, community::{tests::parse_lemmy_community, ApubCommunity},
person::tests::parse_lemmy_person, instance::ApubSite,
person::{tests::parse_lemmy_person, ApubPerson},
post::ApubPost, post::ApubPost,
tests::init_context, tests::init_context,
}, },
@ -318,6 +330,31 @@ mod tests {
assert!(!post.featured_community); assert!(!post.featured_community);
assert_eq!(context.request_count(), 0); assert_eq!(context.request_count(), 0);
cleanup(&context, person, site, community, post).await;
}
#[tokio::test]
#[serial]
async fn test_convert_mastodon_post_title() {
let context = init_context().await;
let (person, site) = parse_lemmy_person(&context).await;
let community = parse_lemmy_community(&context).await;
let json = file_to_json_object("assets/mastodon/objects/page.json").unwrap();
let post = ApubPost::from_json(json, &context).await.unwrap();
assert_eq!(post.name, "Variable never resetting at refresh");
cleanup(&context, person, site, community, post).await;
}
async fn cleanup(
context: &Data<LemmyContext>,
person: ApubPerson,
site: ApubSite,
community: ApubCommunity,
post: ApubPost,
) {
Post::delete(&mut context.pool(), post.id).await.unwrap(); Post::delete(&mut context.pool(), post.id).await.unwrap();
Person::delete(&mut context.pool(), person.id) Person::delete(&mut context.pool(), person.id)
.await .await

Loading…
Cancel
Save