Markdown link rule-dess (#4356)

* Extracting opengraph_data to its own type.

* A few additions for markdown-link-rule.

---------

Co-authored-by: Nutomic <me@nutomic.com>
markdown-link-rule
Dessalines 5 months ago committed by GitHub
parent 1f29e72127
commit 33999171ad
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -247,13 +247,23 @@ pub struct GetSiteMetadataResponse {
#[cfg_attr(feature = "full", ts(export))]
/// Site metadata, from its opengraph tags.
pub struct LinkMetadata {
#[serde(flatten)]
pub opengraph_data: OpenGraphData,
pub content_type: Option<String>,
#[serde(skip)]
pub thumbnail: Option<DbUrl>,
}
#[skip_serializing_none]
#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, Clone, Default)]
#[cfg_attr(feature = "full", derive(TS))]
#[cfg_attr(feature = "full", ts(export))]
/// Site metadata, from its opengraph tags.
pub struct OpenGraphData {
pub title: Option<String>,
pub description: Option<String>,
pub(crate) image: Option<DbUrl>,
pub embed_video_url: Option<DbUrl>,
pub content_type: Option<String>,
#[serde(skip)]
pub thumbnail: Option<DbUrl>,
}
#[skip_serializing_none]

@ -1,5 +1,10 @@
use crate::{context::LemmyContext, post::LinkMetadata, utils::proxy_image_link};
use crate::{
context::LemmyContext,
post::{LinkMetadata, OpenGraphData},
utils::proxy_image_link,
};
use encoding::{all::encodings, DecoderTrap};
use lemmy_db_schema::newtypes::DbUrl;
use lemmy_utils::{
error::{LemmyError, LemmyErrorType},
settings::structs::Settings,
@ -43,29 +48,28 @@ pub async fn fetch_link_metadata(
.get(CONTENT_TYPE)
.and_then(|h| h.to_str().ok())
.and_then(|h| h.parse().ok());
let is_image = content_type.as_ref().unwrap_or(&mime::TEXT_PLAIN).type_() == mime::IMAGE;
// Can't use .text() here, because it only checks the content header, not the actual bytes
// https://github.com/LemmyNet/lemmy/issues/1964
let html_bytes = response.bytes().await.map_err(LemmyError::from)?.to_vec();
let mut metadata = extract_opengraph_data(&html_bytes, url).unwrap_or_default();
let opengraph_data = extract_opengraph_data(&html_bytes, url).unwrap_or_default();
let thumbnail = extract_thumbnail_from_opengraph_data(
url,
&opengraph_data,
&content_type,
generate_thumbnail,
context,
)
.await;
metadata.content_type = content_type.map(|c| c.to_string());
if generate_thumbnail && is_image {
let image_url = metadata
.image
.as_ref()
.map(lemmy_db_schema::newtypes::DbUrl::inner)
.unwrap_or(url);
metadata.thumbnail = generate_pictrs_thumbnail(image_url, context)
.await
.ok()
.map(Into::into);
}
Ok(metadata)
Ok(LinkMetadata {
opengraph_data,
content_type: content_type.map(|c| c.to_string()),
thumbnail,
})
}
#[tracing::instrument(skip_all)]
pub async fn fetch_link_metadata_opt(
url: Option<&Url>,
@ -81,7 +85,7 @@ pub async fn fetch_link_metadata_opt(
}
/// Extract site metadata from HTML Opengraph attributes.
fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<LinkMetadata, LemmyError> {
fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<OpenGraphData, LemmyError> {
let html = String::from_utf8_lossy(html_bytes);
// Make sure the first line is doctype html
@ -137,16 +141,38 @@ fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<LinkMetadata,
// join also works if the target URL is absolute
.and_then(|v| url.join(&v.url).ok());
Ok(LinkMetadata {
Ok(OpenGraphData {
title: og_title.or(page_title),
description: og_description.or(page_description),
image: og_image.map(Into::into),
embed_video_url: og_embed_url.map(Into::into),
content_type: None,
thumbnail: None,
})
}
#[tracing::instrument(skip_all)]
pub async fn extract_thumbnail_from_opengraph_data(
url: &Url,
opengraph_data: &OpenGraphData,
content_type: &Option<Mime>,
generate_thumbnail: bool,
context: &LemmyContext,
) -> Option<DbUrl> {
let is_image = content_type.as_ref().unwrap_or(&mime::TEXT_PLAIN).type_() == mime::IMAGE;
if generate_thumbnail && is_image {
let image_url = opengraph_data
.image
.as_ref()
.map(lemmy_db_schema::newtypes::DbUrl::inner)
.unwrap_or(url);
generate_pictrs_thumbnail(image_url, context)
.await
.ok()
.map(Into::into)
} else {
None
}
}
#[derive(Deserialize, Debug)]
struct PictrsResponse {
files: Vec<PictrsFile>,
@ -233,15 +259,7 @@ async fn generate_pictrs_thumbnail(
let pictrs_config = context.settings().pictrs_config()?;
if !pictrs_config.cache_external_link_previews {
return Ok(
proxy_image_link(
image_url.clone(),
context.settings().pictrs_config()?.image_proxy,
context,
)
.await?
.into(),
);
return Ok(proxy_image_link(image_url.clone(), context).await?.into());
}
// fetch remote non-pictrs images for persistent thumbnail link
@ -314,11 +332,11 @@ mod tests {
.unwrap();
assert_eq!(
Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()),
sample_res.title
sample_res.opengraph_data.title
);
assert_eq!(
Some("The F-Droid compatible repo at https://apt.izzysoft.de/fdroid/".to_string()),
sample_res.description
sample_res.opengraph_data.description
);
assert_eq!(
Some(
@ -326,9 +344,9 @@ mod tests {
.unwrap()
.into()
),
sample_res.image
sample_res.opengraph_data.image
);
assert_eq!(None, sample_res.embed_video_url);
assert_eq!(None, sample_res.opengraph_data.embed_video_url);
assert_eq!(
Some(mime::TEXT_HTML_UTF_8.to_string()),
sample_res.content_type

@ -866,28 +866,33 @@ pub async fn process_markdown_opt(
}
}
/// Rewrite a link to go through `/api/v3/image_proxy` endpoint. This is only for remote urls and
/// if image_proxy setting is enabled.
/// A wrapper for `proxy_image_link` for use in tests.
///
/// The parameter `image_proxy` is the config value of `pictrs.image_proxy`. Its necessary to pass
/// The parameter `force_image_proxy` is the config value of `pictrs.image_proxy`. Its necessary to pass
/// as separate parameter so it can be changed in tests.
pub(crate) async fn proxy_image_link(
pub(crate) async fn proxy_image_link_wrapper(
link: Url,
image_proxy: bool,
force_image_proxy: bool,
context: &LemmyContext,
) -> LemmyResult<DbUrl> {
// Dont rewrite links pointing to local domain.
if link.domain() == Some(&context.settings().hostname) || !image_proxy {
return Ok(link.into());
if link.domain() == Some(&context.settings().hostname) || !force_image_proxy {
Ok(link.into())
} else {
let proxied = format!(
"{}/api/v3/image_proxy?url={}",
context.settings().get_protocol_and_hostname(),
encode(link.as_str())
);
RemoteImage::create(&mut context.pool(), vec![link]).await?;
Ok(Url::parse(&proxied)?.into())
}
}
let proxied = format!(
"{}/api/v3/image_proxy?url={}",
context.settings().get_protocol_and_hostname(),
encode(link.as_str())
);
RemoteImage::create(&mut context.pool(), vec![link]).await?;
Ok(Url::parse(&proxied)?.into())
/// Rewrite a link to go through `/api/v3/image_proxy` endpoint. This is only for remote urls and
/// if image_proxy setting is enabled.
pub(crate) async fn proxy_image_link(link: Url, context: &LemmyContext) -> LemmyResult<DbUrl> {
proxy_image_link_wrapper(link, false, context).await
}
pub async fn proxy_image_link_opt_api(
@ -910,13 +915,7 @@ pub async fn proxy_image_link_api(
None => None,
};
if let Some(l) = link {
proxy_image_link(
l.into(),
context.settings().pictrs_config()?.image_proxy,
context,
)
.await
.map(Some)
proxy_image_link(l.into(), context).await.map(Some)
} else {
Ok(link)
}
@ -927,9 +926,7 @@ pub async fn proxy_image_link_opt_apub(
context: &LemmyContext,
) -> LemmyResult<Option<DbUrl>> {
if let Some(l) = link {
proxy_image_link(l, context.settings().pictrs_config()?.image_proxy, context)
.await
.map(Some)
proxy_image_link(l, context).await.map(Some)
} else {
Ok(None)
}
@ -991,14 +988,14 @@ mod tests {
// image from local domain is unchanged
let local_url = Url::parse("http://lemmy-alpha/image.png").unwrap();
let proxied = proxy_image_link(local_url.clone(), true, &context)
let proxied = proxy_image_link_wrapper(local_url.clone(), true, &context)
.await
.unwrap();
assert_eq!(&local_url, proxied.inner());
// image from remote domain is proxied
let remote_image = Url::parse("http://lemmy-beta/image.png").unwrap();
let proxied = proxy_image_link(remote_image.clone(), true, &context)
let proxied = proxy_image_link_wrapper(remote_image.clone(), true, &context)
.await
.unwrap();
assert_eq!(

@ -116,9 +116,9 @@ pub async fn create_post(
.community_id(data.community_id)
.creator_id(local_user_view.person.id)
.nsfw(data.nsfw)
.embed_title(metadata.title)
.embed_description(metadata.description)
.embed_video_url(metadata.embed_video_url)
.embed_title(metadata.opengraph_data.title)
.embed_description(metadata.opengraph_data.description)
.embed_video_url(metadata.opengraph_data.embed_video_url)
.language_id(language_id)
.thumbnail_url(metadata.thumbnail)
.build();

@ -75,9 +75,9 @@ pub async fn update_post(
Some(url) => {
let metadata = fetch_link_metadata(url, true, &context).await?;
(
Some(metadata.title),
Some(metadata.description),
Some(metadata.embed_video_url),
Some(metadata.opengraph_data.title),
Some(metadata.opengraph_data.description),
Some(metadata.opengraph_data.embed_video_url),
Some(metadata.thumbnail),
)
}

@ -250,9 +250,9 @@ impl Object for ApubPost {
updated: page.updated.map(Into::into),
deleted: Some(false),
nsfw: page.sensitive,
embed_title: metadata.title,
embed_description: metadata.description,
embed_video_url: metadata.embed_video_url,
embed_title: metadata.opengraph_data.title,
embed_description: metadata.opengraph_data.description,
embed_video_url: metadata.opengraph_data.embed_video_url,
thumbnail_url,
ap_id: Some(page.id.clone().into()),
local: Some(false),

Loading…
Cancel
Save