use crate::settings::SETTINGS; use markdown_it::{plugins::cmark::inline::image::Image, MarkdownIt}; use once_cell::sync::Lazy; use url::Url; use urlencoding::encode; mod link_rule; mod spoiler_rule; static MARKDOWN_PARSER: Lazy = Lazy::new(|| { let mut parser = MarkdownIt::new(); markdown_it::plugins::cmark::add(&mut parser); markdown_it::plugins::extra::add(&mut parser); spoiler_rule::add(&mut parser); link_rule::add(&mut parser); parser }); /// Replace special HTML characters in API parameters to prevent XSS attacks. /// /// Taken from https://github.com/OWASP/CheatSheetSeries/blob/master/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.md#output-encoding-for-html-contexts /// /// `>` is left in place because it is interpreted as markdown quote. pub fn sanitize_html(text: &str) -> String { text .replace('&', "&") .replace('<', "<") .replace('\"', """) .replace('\'', "'") } pub fn markdown_to_html(text: &str) -> String { MARKDOWN_PARSER.parse(text).xrender() } /// Rewrites all links to remote domains in markdown, so they go through `/api/v3/image_proxy`. pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec) { let ast = MARKDOWN_PARSER.parse(&src); let mut links_offsets = vec![]; // Walk the syntax tree to find positions of image links ast.walk(|node, _depth| { if let Some(image) = node.cast::() { // srcmap is always present for image // https://github.com/markdown-it-rust/markdown-it/issues/36#issuecomment-1777844387 let node_offsets = node.srcmap.expect("srcmap is none").get_byte_offsets(); // necessary for custom emojis which look like `![name](url "title")` let start_offset = node_offsets.1 - image.url.len() - 1 - image .title .as_ref() .map(|t| t.len() + 3) .unwrap_or_default(); let end_offset = node_offsets.1 - 1; links_offsets.push((start_offset, end_offset)); } }); let mut links = vec![]; // Go through the collected links in reverse order while let Some((start, end)) = links_offsets.pop() { let content = src.get(start..end).unwrap_or_default(); // necessary for custom emojis which look like `![name](url "title")` let (url, extra) = if content.contains(' ') { let split = content.split_once(' ').expect("split is valid"); (split.0, Some(split.1)) } else { (content, None) }; match Url::parse(url) { Ok(parsed) => { links.push(parsed.clone()); // If link points to remote domain, replace with proxied link if parsed.domain() != Some(&SETTINGS.hostname) { let mut proxied = format!( "{}/api/v3/image_proxy?url={}", SETTINGS.get_protocol_and_hostname(), encode(url), ); // restore custom emoji format if let Some(extra) = extra { proxied = format!("{proxied} {extra}"); } src.replace_range(start..end, &proxied); } } Err(_) => { // If its not a valid url, replace with empty text src.replace_range(start..end, ""); } } } (src, links) } #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] #![allow(clippy::indexing_slicing)] use super::*; use pretty_assertions::assert_eq; #[test] fn test_basic_markdown() { let tests: Vec<_> = vec![ ( "headings", "# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6", "

h1

\n

h2

\n

h3

\n

h4

\n
h5
\n
h6
\n" ), ( "line breaks", "First\rSecond", "

First\nSecond

\n"), ( "emphasis", "__bold__ **bold** *italic* ***bold+italic***", "

bold bold italic bold+italic

\n" ), ( "blockquotes", "> #### Hello\n > \n > - Hola\n > - 안영 \n>> Goodbye\n", "
\n

Hello

\n
    \n
  • Hola
  • \n
  • 안영
  • \n
\n
\n

Goodbye

\n
\n
\n" ), ( "lists (ordered, unordered)", "1. pen\n2. apple\n3. apple pen\n- pen\n- pineapple\n- pineapple pen", "
    \n
  1. pen
  2. \n
  3. apple
  4. \n
  5. apple pen
  6. \n
\n\n" ), ( "code and code blocks", "this is my amazing `code snippet` and my amazing ```code block```", "

this is my amazing code snippet and my amazing code block

\n" ), // Links with added nofollow attribute ( "links", "[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")", "

Lemmy

\n" ), // Remote images with proxy ( "images", "![My linked image](https://example.com/image.png \"image alt text\")", "

\"My

\n" ), // Local images without proxy ( "images", "![My linked image](https://lemmy-alpha/image.png \"image alt text\")", "

\"My

\n" ), // Ensure spoiler plugin is added ( "basic spoiler", "::: spoiler click to see more\nhow spicy!\n:::\n", "
click to see more

how spicy!\n

\n" ), ( "escape html special chars", " hello &\"", "

<script>alert(‘xss’);</script> hello &"

\n" ) ]; tests.iter().for_each(|&(msg, input, expected)| { let result = markdown_to_html(input); assert_eq!( result, expected, "Testing {}, with original input '{}'", msg, input ); }); } #[test] fn test_markdown_proxy_images() { let tests: Vec<_> = vec![ ( "remote image proxied", "![link](http://example.com/image.jpg)", "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)", ), ( "local image unproxied", "![link](http://lemmy-alpha/image.jpg)", "![link](http://lemmy-alpha/image.jpg)", ), ( "multiple image links", "![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)", "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)", ), ( "empty link handled", "![image]()", "![image]()" ), ( "empty label handled", "![](http://example.com/image.jpg)", "![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" ), ( "invalid image link removed", "![image](http-not-a-link)", "![image]()" ), ( "label with nested markdown handled", "![a *b* c](http://example.com/image.jpg)", "![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" ), ( "custom emoji support", r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#, r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"# ) ]; tests.iter().for_each(|&(msg, input, expected)| { let result = markdown_rewrite_image_links(input.to_string()); assert_eq!( result.0, expected, "Testing {}, with original input '{}'", msg, input ); }); } #[test] fn test_sanitize_html() { let sanitized = sanitize_html(" hello &\"'"); let expected = "<script>alert('xss');</script> hello &"'"; assert_eq!(expected, sanitized) } }