Fix doctype check issue for metadata fetching. Fixes #4468

fix_html_doctype_check
Dessalines 7 months ago
parent f42420809b
commit e838bf3348

@ -15,7 +15,7 @@ use mime::Mime;
use reqwest::{header::CONTENT_TYPE, Client, ClientBuilder};
use reqwest_middleware::ClientWithMiddleware;
use serde::Deserialize;
use tracing::info;
use tracing::{info, warn};
use url::Url;
use urlencoding::encode;
use webpage::HTML;
@ -53,7 +53,9 @@ pub async fn fetch_link_metadata(
// https://github.com/LemmyNet/lemmy/issues/1964
let html_bytes = response.bytes().await.map_err(LemmyError::from)?.to_vec();
let opengraph_data = extract_opengraph_data(&html_bytes, url).unwrap_or_default();
let opengraph_data = extract_opengraph_data(&html_bytes, url)
.map_err(|e| warn!("{e}"))
.unwrap_or_default();
let thumbnail = extract_thumbnail_from_opengraph_data(
url,
&opengraph_data,
@ -96,7 +98,7 @@ fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<OpenGraphData,
.ok_or(LemmyErrorType::NoLinesInHtml)?
.to_lowercase();
if !first_line.starts_with("<!doctype html>") {
if !first_line.starts_with("<!doctype html") {
Err(LemmyErrorType::SiteMetadataPageIsNotDoctypeHtml)?
}

Loading…
Cancel
Save