mirror of https://github.com/LemmyNet/lemmy
* Organize utils into separate files. Fixes #2295 * Moving tests. * Fix test. * Fix test 2pull/2739/head
parent
8a27978ee1
commit
36aaa50644
@ -1,98 +0,0 @@
|
|||||||
use crate::utils::{
|
|
||||||
is_valid_actor_name,
|
|
||||||
is_valid_display_name,
|
|
||||||
is_valid_matrix_id,
|
|
||||||
is_valid_post_title,
|
|
||||||
remove_slurs,
|
|
||||||
scrape_text_for_mentions,
|
|
||||||
slur_check,
|
|
||||||
slurs_vec_to_str,
|
|
||||||
};
|
|
||||||
use regex::RegexBuilder;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_mentions_regex() {
|
|
||||||
let text = "Just read a great blog post by [@tedu@honk.teduangst.com](/u/test). And another by !test_community@fish.teduangst.com . Another [@lemmy@lemmy-alpha:8540](/u/fish)";
|
|
||||||
let mentions = scrape_text_for_mentions(text);
|
|
||||||
|
|
||||||
assert_eq!(mentions[0].name, "tedu".to_string());
|
|
||||||
assert_eq!(mentions[0].domain, "honk.teduangst.com".to_string());
|
|
||||||
assert_eq!(mentions[1].domain, "lemmy-alpha:8540".to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_valid_actor_name() {
|
|
||||||
let actor_name_max_length = 20;
|
|
||||||
assert!(is_valid_actor_name("Hello_98", actor_name_max_length));
|
|
||||||
assert!(is_valid_actor_name("ten", actor_name_max_length));
|
|
||||||
assert!(!is_valid_actor_name("Hello-98", actor_name_max_length));
|
|
||||||
assert!(!is_valid_actor_name("a", actor_name_max_length));
|
|
||||||
assert!(!is_valid_actor_name("", actor_name_max_length));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_valid_display_name() {
|
|
||||||
let actor_name_max_length = 20;
|
|
||||||
assert!(is_valid_display_name("hello @there", actor_name_max_length));
|
|
||||||
assert!(!is_valid_display_name(
|
|
||||||
"@hello there",
|
|
||||||
actor_name_max_length
|
|
||||||
));
|
|
||||||
|
|
||||||
// Make sure zero-space with an @ doesn't work
|
|
||||||
assert!(!is_valid_display_name(
|
|
||||||
&format!("{}@my name is", '\u{200b}'),
|
|
||||||
actor_name_max_length
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_valid_post_title() {
|
|
||||||
assert!(is_valid_post_title("Post Title"));
|
|
||||||
assert!(is_valid_post_title(" POST TITLE 😃😃😃😃😃"));
|
|
||||||
assert!(!is_valid_post_title("\n \n \n \n ")); // tabs/spaces/newlines
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_valid_matrix_id() {
|
|
||||||
assert!(is_valid_matrix_id("@dess:matrix.org"));
|
|
||||||
assert!(!is_valid_matrix_id("dess:matrix.org"));
|
|
||||||
assert!(!is_valid_matrix_id(" @dess:matrix.org"));
|
|
||||||
assert!(!is_valid_matrix_id("@dess:matrix.org t"));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_slur_filter() {
|
|
||||||
let slur_regex = Some(RegexBuilder::new(r"(fag(g|got|tard)?\b|cock\s?sucker(s|ing)?|ni((g{2,}|q)+|[gq]{2,})[e3r]+(s|z)?|mudslime?s?|kikes?|\bspi(c|k)s?\b|\bchinks?|gooks?|bitch(es|ing|y)?|whor(es?|ing)|\btr(a|@)nn?(y|ies?)|\b(b|re|r)tard(ed)?s?)").case_insensitive(true).build().unwrap());
|
|
||||||
let test =
|
|
||||||
"faggot test kike tranny cocksucker retardeds. Capitalized Niggerz. This is a bunch of other safe text.";
|
|
||||||
let slur_free = "No slurs here";
|
|
||||||
assert_eq!(
|
|
||||||
remove_slurs(test, &slur_regex),
|
|
||||||
"*removed* test *removed* *removed* *removed* *removed*. Capitalized *removed*. This is a bunch of other safe text."
|
|
||||||
.to_string()
|
|
||||||
);
|
|
||||||
|
|
||||||
let has_slurs_vec = vec![
|
|
||||||
"Niggerz",
|
|
||||||
"cocksucker",
|
|
||||||
"faggot",
|
|
||||||
"kike",
|
|
||||||
"retardeds",
|
|
||||||
"tranny",
|
|
||||||
];
|
|
||||||
let has_slurs_err_str = "No slurs - Niggerz, cocksucker, faggot, kike, retardeds, tranny";
|
|
||||||
|
|
||||||
assert_eq!(slur_check(test, &slur_regex), Err(has_slurs_vec));
|
|
||||||
assert_eq!(slur_check(slur_free, &slur_regex), Ok(()));
|
|
||||||
if let Err(slur_vec) = slur_check(test, &slur_regex) {
|
|
||||||
assert_eq!(&slurs_vec_to_str(&slur_vec), has_slurs_err_str);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// These helped with testing
|
|
||||||
// #[test]
|
|
||||||
// fn test_send_email() {
|
|
||||||
// let result = send_email("not a subject", "test_email@gmail.com", "ur user", "<h1>HI there</h1>");
|
|
||||||
// assert!(result.is_ok());
|
|
||||||
// }
|
|
@ -1,223 +0,0 @@
|
|||||||
use crate::{error::LemmyError, location_info, IpAddr};
|
|
||||||
use actix_web::dev::ConnectionInfo;
|
|
||||||
use anyhow::Context;
|
|
||||||
use chrono::{DateTime, FixedOffset, NaiveDateTime};
|
|
||||||
use itertools::Itertools;
|
|
||||||
use once_cell::sync::Lazy;
|
|
||||||
use rand::{distributions::Alphanumeric, thread_rng, Rng};
|
|
||||||
use regex::{Regex, RegexBuilder};
|
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
static MENTIONS_REGEX: Lazy<Regex> = Lazy::new(|| {
|
|
||||||
Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._:-]+)").expect("compile regex")
|
|
||||||
});
|
|
||||||
static VALID_ACTOR_NAME_REGEX: Lazy<Regex> =
|
|
||||||
Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex"));
|
|
||||||
static VALID_POST_TITLE_REGEX: Lazy<Regex> =
|
|
||||||
Lazy::new(|| Regex::new(r".*\S{3,}.*").expect("compile regex"));
|
|
||||||
static VALID_MATRIX_ID_REGEX: Lazy<Regex> = Lazy::new(|| {
|
|
||||||
Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex")
|
|
||||||
});
|
|
||||||
// taken from https://en.wikipedia.org/wiki/UTM_parameters
|
|
||||||
static CLEAN_URL_PARAMS_REGEX: Lazy<Regex> = Lazy::new(|| {
|
|
||||||
Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$")
|
|
||||||
.expect("compile regex")
|
|
||||||
});
|
|
||||||
|
|
||||||
pub fn naive_from_unix(time: i64) -> NaiveDateTime {
|
|
||||||
NaiveDateTime::from_timestamp_opt(time, 0).expect("convert datetime")
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn convert_datetime(datetime: NaiveDateTime) -> DateTime<FixedOffset> {
|
|
||||||
DateTime::<FixedOffset>::from_utc(
|
|
||||||
datetime,
|
|
||||||
FixedOffset::east_opt(0).expect("create fixed offset"),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn remove_slurs(test: &str, slur_regex: &Option<Regex>) -> String {
|
|
||||||
if let Some(slur_regex) = slur_regex {
|
|
||||||
slur_regex.replace_all(test, "*removed*").to_string()
|
|
||||||
} else {
|
|
||||||
test.to_string()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn slur_check<'a>(
|
|
||||||
test: &'a str,
|
|
||||||
slur_regex: &'a Option<Regex>,
|
|
||||||
) -> Result<(), Vec<&'a str>> {
|
|
||||||
if let Some(slur_regex) = slur_regex {
|
|
||||||
let mut matches: Vec<&str> = slur_regex.find_iter(test).map(|mat| mat.as_str()).collect();
|
|
||||||
|
|
||||||
// Unique
|
|
||||||
matches.sort_unstable();
|
|
||||||
matches.dedup();
|
|
||||||
|
|
||||||
if matches.is_empty() {
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
Err(matches)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn build_slur_regex(regex_str: Option<&str>) -> Option<Regex> {
|
|
||||||
regex_str.map(|slurs| {
|
|
||||||
RegexBuilder::new(slurs)
|
|
||||||
.case_insensitive(true)
|
|
||||||
.build()
|
|
||||||
.expect("compile regex")
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn check_slurs(text: &str, slur_regex: &Option<Regex>) -> Result<(), LemmyError> {
|
|
||||||
if let Err(slurs) = slur_check(text, slur_regex) {
|
|
||||||
Err(LemmyError::from_error_message(
|
|
||||||
anyhow::anyhow!("{}", slurs_vec_to_str(&slurs)),
|
|
||||||
"slurs",
|
|
||||||
))
|
|
||||||
} else {
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn check_slurs_opt(
|
|
||||||
text: &Option<String>,
|
|
||||||
slur_regex: &Option<Regex>,
|
|
||||||
) -> Result<(), LemmyError> {
|
|
||||||
match text {
|
|
||||||
Some(t) => check_slurs(t, slur_regex),
|
|
||||||
None => Ok(()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn slurs_vec_to_str(slurs: &[&str]) -> String {
|
|
||||||
let start = "No slurs - ";
|
|
||||||
let combined = &slurs.join(", ");
|
|
||||||
[start, combined].concat()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn generate_random_string() -> String {
|
|
||||||
thread_rng()
|
|
||||||
.sample_iter(&Alphanumeric)
|
|
||||||
.map(char::from)
|
|
||||||
.take(30)
|
|
||||||
.collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn markdown_to_html(text: &str) -> String {
|
|
||||||
comrak::markdown_to_html(text, &comrak::ComrakOptions::default())
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO nothing is done with community / group webfingers yet, so just ignore those for now
|
|
||||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
|
||||||
pub struct MentionData {
|
|
||||||
pub name: String,
|
|
||||||
pub domain: String,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl MentionData {
|
|
||||||
pub fn is_local(&self, hostname: &str) -> bool {
|
|
||||||
hostname.eq(&self.domain)
|
|
||||||
}
|
|
||||||
pub fn full_name(&self) -> String {
|
|
||||||
format!("@{}@{}", &self.name, &self.domain)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn scrape_text_for_mentions(text: &str) -> Vec<MentionData> {
|
|
||||||
let mut out: Vec<MentionData> = Vec::new();
|
|
||||||
for caps in MENTIONS_REGEX.captures_iter(text) {
|
|
||||||
out.push(MentionData {
|
|
||||||
name: caps["name"].to_string(),
|
|
||||||
domain: caps["domain"].to_string(),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
out.into_iter().unique().collect()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn has_newline(name: &str) -> bool {
|
|
||||||
name.contains('\n')
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> bool {
|
|
||||||
name.chars().count() <= actor_name_max_length
|
|
||||||
&& VALID_ACTOR_NAME_REGEX.is_match(name)
|
|
||||||
&& !has_newline(name)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Can't do a regex here, reverse lookarounds not supported
|
|
||||||
pub fn is_valid_display_name(name: &str, actor_name_max_length: usize) -> bool {
|
|
||||||
!name.starts_with('@')
|
|
||||||
&& !name.starts_with('\u{200b}')
|
|
||||||
&& name.chars().count() >= 3
|
|
||||||
&& name.chars().count() <= actor_name_max_length
|
|
||||||
&& !has_newline(name)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_valid_matrix_id(matrix_id: &str) -> bool {
|
|
||||||
VALID_MATRIX_ID_REGEX.is_match(matrix_id) && !has_newline(matrix_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn is_valid_post_title(title: &str) -> bool {
|
|
||||||
VALID_POST_TITLE_REGEX.is_match(title) && !has_newline(title)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_ip(conn_info: &ConnectionInfo) -> IpAddr {
|
|
||||||
IpAddr(
|
|
||||||
conn_info
|
|
||||||
.realip_remote_addr()
|
|
||||||
.unwrap_or("127.0.0.1:12345")
|
|
||||||
.split(':')
|
|
||||||
.next()
|
|
||||||
.unwrap_or("127.0.0.1")
|
|
||||||
.to_string(),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn clean_url_params(url: &Url) -> Url {
|
|
||||||
let mut url_out = url.clone();
|
|
||||||
if url.query().is_some() {
|
|
||||||
let new_query = url
|
|
||||||
.query_pairs()
|
|
||||||
.filter(|q| !CLEAN_URL_PARAMS_REGEX.is_match(&q.0))
|
|
||||||
.map(|q| format!("{}={}", q.0, q.1))
|
|
||||||
.join("&");
|
|
||||||
url_out.set_query(Some(&new_query));
|
|
||||||
}
|
|
||||||
url_out
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn generate_domain_url(actor_id: &Url) -> Result<String, LemmyError> {
|
|
||||||
Ok(actor_id.host_str().context(location_info!())?.to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use crate::utils::{clean_url_params, is_valid_post_title};
|
|
||||||
use url::Url;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn test_clean_url_params() {
|
|
||||||
let url = Url::parse("https://example.com/path/123?utm_content=buffercf3b2&utm_medium=social&username=randomuser&id=123").unwrap();
|
|
||||||
let cleaned = clean_url_params(&url);
|
|
||||||
let expected = Url::parse("https://example.com/path/123?username=randomuser&id=123").unwrap();
|
|
||||||
assert_eq!(expected.to_string(), cleaned.to_string());
|
|
||||||
|
|
||||||
let url = Url::parse("https://example.com/path/123").unwrap();
|
|
||||||
let cleaned = clean_url_params(&url);
|
|
||||||
assert_eq!(url.to_string(), cleaned.to_string());
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn regex_checks() {
|
|
||||||
assert!(!is_valid_post_title("hi"));
|
|
||||||
assert!(is_valid_post_title("him"));
|
|
||||||
assert!(!is_valid_post_title("n\n\n\n\nanother"));
|
|
||||||
assert!(!is_valid_post_title("hello there!\n this is a test."));
|
|
||||||
assert!(is_valid_post_title("hello there! this is a test."));
|
|
||||||
}
|
|
||||||
}
|
|
@ -0,0 +1,3 @@
|
|||||||
|
pub fn markdown_to_html(text: &str) -> String {
|
||||||
|
comrak::markdown_to_html(text, &comrak::ComrakOptions::default())
|
||||||
|
}
|
@ -0,0 +1,48 @@
|
|||||||
|
use itertools::Itertools;
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
|
static MENTIONS_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||||
|
Regex::new(r"@(?P<name>[\w.]+)@(?P<domain>[a-zA-Z0-9._:-]+)").expect("compile regex")
|
||||||
|
});
|
||||||
|
// TODO nothing is done with community / group webfingers yet, so just ignore those for now
|
||||||
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
|
pub struct MentionData {
|
||||||
|
pub name: String,
|
||||||
|
pub domain: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MentionData {
|
||||||
|
pub fn is_local(&self, hostname: &str) -> bool {
|
||||||
|
hostname.eq(&self.domain)
|
||||||
|
}
|
||||||
|
pub fn full_name(&self) -> String {
|
||||||
|
format!("@{}@{}", &self.name, &self.domain)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn scrape_text_for_mentions(text: &str) -> Vec<MentionData> {
|
||||||
|
let mut out: Vec<MentionData> = Vec::new();
|
||||||
|
for caps in MENTIONS_REGEX.captures_iter(text) {
|
||||||
|
out.push(MentionData {
|
||||||
|
name: caps["name"].to_string(),
|
||||||
|
domain: caps["domain"].to_string(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
out.into_iter().unique().collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use crate::utils::mention::scrape_text_for_mentions;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_mentions_regex() {
|
||||||
|
let text = "Just read a great blog post by [@tedu@honk.teduangst.com](/u/test). And another by !test_community@fish.teduangst.com . Another [@lemmy@lemmy-alpha:8540](/u/fish)";
|
||||||
|
let mentions = scrape_text_for_mentions(text);
|
||||||
|
|
||||||
|
assert_eq!(mentions[0].name, "tedu".to_string());
|
||||||
|
assert_eq!(mentions[0].domain, "honk.teduangst.com".to_string());
|
||||||
|
assert_eq!(mentions[1].domain, "lemmy-alpha:8540".to_string());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,5 @@
|
|||||||
|
pub mod markdown;
|
||||||
|
pub mod mention;
|
||||||
|
pub mod slurs;
|
||||||
|
pub mod time;
|
||||||
|
pub mod validation;
|
@ -0,0 +1,109 @@
|
|||||||
|
use crate::error::LemmyError;
|
||||||
|
use regex::{Regex, RegexBuilder};
|
||||||
|
|
||||||
|
pub fn remove_slurs(test: &str, slur_regex: &Option<Regex>) -> String {
|
||||||
|
if let Some(slur_regex) = slur_regex {
|
||||||
|
slur_regex.replace_all(test, "*removed*").to_string()
|
||||||
|
} else {
|
||||||
|
test.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn slur_check<'a>(
|
||||||
|
test: &'a str,
|
||||||
|
slur_regex: &'a Option<Regex>,
|
||||||
|
) -> Result<(), Vec<&'a str>> {
|
||||||
|
if let Some(slur_regex) = slur_regex {
|
||||||
|
let mut matches: Vec<&str> = slur_regex.find_iter(test).map(|mat| mat.as_str()).collect();
|
||||||
|
|
||||||
|
// Unique
|
||||||
|
matches.sort_unstable();
|
||||||
|
matches.dedup();
|
||||||
|
|
||||||
|
if matches.is_empty() {
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(matches)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn build_slur_regex(regex_str: Option<&str>) -> Option<Regex> {
|
||||||
|
regex_str.map(|slurs| {
|
||||||
|
RegexBuilder::new(slurs)
|
||||||
|
.case_insensitive(true)
|
||||||
|
.build()
|
||||||
|
.expect("compile regex")
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn check_slurs(text: &str, slur_regex: &Option<Regex>) -> Result<(), LemmyError> {
|
||||||
|
if let Err(slurs) = slur_check(text, slur_regex) {
|
||||||
|
Err(LemmyError::from_error_message(
|
||||||
|
anyhow::anyhow!("{}", slurs_vec_to_str(&slurs)),
|
||||||
|
"slurs",
|
||||||
|
))
|
||||||
|
} else {
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn check_slurs_opt(
|
||||||
|
text: &Option<String>,
|
||||||
|
slur_regex: &Option<Regex>,
|
||||||
|
) -> Result<(), LemmyError> {
|
||||||
|
match text {
|
||||||
|
Some(t) => check_slurs(t, slur_regex),
|
||||||
|
None => Ok(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn slurs_vec_to_str(slurs: &[&str]) -> String {
|
||||||
|
let start = "No slurs - ";
|
||||||
|
let combined = &slurs.join(", ");
|
||||||
|
[start, combined].concat()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use crate::utils::slurs::{remove_slurs, slur_check, slurs_vec_to_str};
|
||||||
|
use regex::RegexBuilder;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_slur_filter() {
|
||||||
|
let slur_regex = Some(RegexBuilder::new(r"(fag(g|got|tard)?\b|cock\s?sucker(s|ing)?|ni((g{2,}|q)+|[gq]{2,})[e3r]+(s|z)?|mudslime?s?|kikes?|\bspi(c|k)s?\b|\bchinks?|gooks?|bitch(es|ing|y)?|whor(es?|ing)|\btr(a|@)nn?(y|ies?)|\b(b|re|r)tard(ed)?s?)").case_insensitive(true).build().unwrap());
|
||||||
|
let test =
|
||||||
|
"faggot test kike tranny cocksucker retardeds. Capitalized Niggerz. This is a bunch of other safe text.";
|
||||||
|
let slur_free = "No slurs here";
|
||||||
|
assert_eq!(
|
||||||
|
remove_slurs(test, &slur_regex),
|
||||||
|
"*removed* test *removed* *removed* *removed* *removed*. Capitalized *removed*. This is a bunch of other safe text."
|
||||||
|
.to_string()
|
||||||
|
);
|
||||||
|
|
||||||
|
let has_slurs_vec = vec![
|
||||||
|
"Niggerz",
|
||||||
|
"cocksucker",
|
||||||
|
"faggot",
|
||||||
|
"kike",
|
||||||
|
"retardeds",
|
||||||
|
"tranny",
|
||||||
|
];
|
||||||
|
let has_slurs_err_str = "No slurs - Niggerz, cocksucker, faggot, kike, retardeds, tranny";
|
||||||
|
|
||||||
|
assert_eq!(slur_check(test, &slur_regex), Err(has_slurs_vec));
|
||||||
|
assert_eq!(slur_check(slur_free, &slur_regex), Ok(()));
|
||||||
|
if let Err(slur_vec) = slur_check(test, &slur_regex) {
|
||||||
|
assert_eq!(&slurs_vec_to_str(&slur_vec), has_slurs_err_str);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// These helped with testing
|
||||||
|
// #[test]
|
||||||
|
// fn test_send_email() {
|
||||||
|
// let result = send_email("not a subject", "test_email@gmail.com", "ur user", "<h1>HI there</h1>");
|
||||||
|
// assert!(result.is_ok());
|
||||||
|
// }
|
||||||
|
}
|
@ -0,0 +1,12 @@
|
|||||||
|
use chrono::{DateTime, FixedOffset, NaiveDateTime};
|
||||||
|
|
||||||
|
pub fn naive_from_unix(time: i64) -> NaiveDateTime {
|
||||||
|
NaiveDateTime::from_timestamp_opt(time, 0).expect("convert datetime")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn convert_datetime(datetime: NaiveDateTime) -> DateTime<FixedOffset> {
|
||||||
|
DateTime::<FixedOffset>::from_utc(
|
||||||
|
datetime,
|
||||||
|
FixedOffset::east_opt(0).expect("create fixed offset"),
|
||||||
|
)
|
||||||
|
}
|
@ -0,0 +1,131 @@
|
|||||||
|
use itertools::Itertools;
|
||||||
|
use once_cell::sync::Lazy;
|
||||||
|
use regex::Regex;
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
static VALID_ACTOR_NAME_REGEX: Lazy<Regex> =
|
||||||
|
Lazy::new(|| Regex::new(r"^[a-zA-Z0-9_]{3,}$").expect("compile regex"));
|
||||||
|
static VALID_POST_TITLE_REGEX: Lazy<Regex> =
|
||||||
|
Lazy::new(|| Regex::new(r".*\S{3,}.*").expect("compile regex"));
|
||||||
|
static VALID_MATRIX_ID_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||||
|
Regex::new(r"^@[A-Za-z0-9._=-]+:[A-Za-z0-9.-]+\.[A-Za-z]{2,}$").expect("compile regex")
|
||||||
|
});
|
||||||
|
// taken from https://en.wikipedia.org/wiki/UTM_parameters
|
||||||
|
static CLEAN_URL_PARAMS_REGEX: Lazy<Regex> = Lazy::new(|| {
|
||||||
|
Regex::new(r"^utm_source|utm_medium|utm_campaign|utm_term|utm_content|gclid|gclsrc|dclid|fbclid$")
|
||||||
|
.expect("compile regex")
|
||||||
|
});
|
||||||
|
|
||||||
|
fn has_newline(name: &str) -> bool {
|
||||||
|
name.contains('\n')
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_valid_actor_name(name: &str, actor_name_max_length: usize) -> bool {
|
||||||
|
name.chars().count() <= actor_name_max_length
|
||||||
|
&& VALID_ACTOR_NAME_REGEX.is_match(name)
|
||||||
|
&& !has_newline(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Can't do a regex here, reverse lookarounds not supported
|
||||||
|
pub fn is_valid_display_name(name: &str, actor_name_max_length: usize) -> bool {
|
||||||
|
!name.starts_with('@')
|
||||||
|
&& !name.starts_with('\u{200b}')
|
||||||
|
&& name.chars().count() >= 3
|
||||||
|
&& name.chars().count() <= actor_name_max_length
|
||||||
|
&& !has_newline(name)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_valid_matrix_id(matrix_id: &str) -> bool {
|
||||||
|
VALID_MATRIX_ID_REGEX.is_match(matrix_id) && !has_newline(matrix_id)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_valid_post_title(title: &str) -> bool {
|
||||||
|
VALID_POST_TITLE_REGEX.is_match(title) && !has_newline(title)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn clean_url_params(url: &Url) -> Url {
|
||||||
|
let mut url_out = url.clone();
|
||||||
|
if url.query().is_some() {
|
||||||
|
let new_query = url
|
||||||
|
.query_pairs()
|
||||||
|
.filter(|q| !CLEAN_URL_PARAMS_REGEX.is_match(&q.0))
|
||||||
|
.map(|q| format!("{}={}", q.0, q.1))
|
||||||
|
.join("&");
|
||||||
|
url_out.set_query(Some(&new_query));
|
||||||
|
}
|
||||||
|
url_out
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use crate::utils::validation::{
|
||||||
|
clean_url_params,
|
||||||
|
is_valid_actor_name,
|
||||||
|
is_valid_display_name,
|
||||||
|
is_valid_matrix_id,
|
||||||
|
is_valid_post_title,
|
||||||
|
};
|
||||||
|
use url::Url;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_clean_url_params() {
|
||||||
|
let url = Url::parse("https://example.com/path/123?utm_content=buffercf3b2&utm_medium=social&username=randomuser&id=123").unwrap();
|
||||||
|
let cleaned = clean_url_params(&url);
|
||||||
|
let expected = Url::parse("https://example.com/path/123?username=randomuser&id=123").unwrap();
|
||||||
|
assert_eq!(expected.to_string(), cleaned.to_string());
|
||||||
|
|
||||||
|
let url = Url::parse("https://example.com/path/123").unwrap();
|
||||||
|
let cleaned = clean_url_params(&url);
|
||||||
|
assert_eq!(url.to_string(), cleaned.to_string());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn regex_checks() {
|
||||||
|
assert!(!is_valid_post_title("hi"));
|
||||||
|
assert!(is_valid_post_title("him"));
|
||||||
|
assert!(!is_valid_post_title("n\n\n\n\nanother"));
|
||||||
|
assert!(!is_valid_post_title("hello there!\n this is a test."));
|
||||||
|
assert!(is_valid_post_title("hello there! this is a test."));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_valid_actor_name() {
|
||||||
|
let actor_name_max_length = 20;
|
||||||
|
assert!(is_valid_actor_name("Hello_98", actor_name_max_length));
|
||||||
|
assert!(is_valid_actor_name("ten", actor_name_max_length));
|
||||||
|
assert!(!is_valid_actor_name("Hello-98", actor_name_max_length));
|
||||||
|
assert!(!is_valid_actor_name("a", actor_name_max_length));
|
||||||
|
assert!(!is_valid_actor_name("", actor_name_max_length));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_valid_display_name() {
|
||||||
|
let actor_name_max_length = 20;
|
||||||
|
assert!(is_valid_display_name("hello @there", actor_name_max_length));
|
||||||
|
assert!(!is_valid_display_name(
|
||||||
|
"@hello there",
|
||||||
|
actor_name_max_length
|
||||||
|
));
|
||||||
|
|
||||||
|
// Make sure zero-space with an @ doesn't work
|
||||||
|
assert!(!is_valid_display_name(
|
||||||
|
&format!("{}@my name is", '\u{200b}'),
|
||||||
|
actor_name_max_length
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_valid_post_title() {
|
||||||
|
assert!(is_valid_post_title("Post Title"));
|
||||||
|
assert!(is_valid_post_title(" POST TITLE 😃😃😃😃😃"));
|
||||||
|
assert!(!is_valid_post_title("\n \n \n \n ")); // tabs/spaces/newlines
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_valid_matrix_id() {
|
||||||
|
assert!(is_valid_matrix_id("@dess:matrix.org"));
|
||||||
|
assert!(!is_valid_matrix_id("dess:matrix.org"));
|
||||||
|
assert!(!is_valid_matrix_id(" @dess:matrix.org"));
|
||||||
|
assert!(!is_valid_matrix_id("@dess:matrix.org t"));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue