/* * melib - sieve module * * Copyright 2022 Manos Pitsidianakis * * This file is part of meli. * * meli is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * meli is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with meli. If not, see . */ //! Parse Sieve scripts into an abstract syntax tree //! //! The main function to use is [`parse_sieve`] which parses text as a list of Sieve rules. //! //! # Example //! //! ```rust //! use melib::{sieve::{parser::parse_sieve, ast::*}, parsec::Parser}; //! //! use ActionCommand::*; //! use AddressOperator::*; //! use CharacterOperator::*; //! use ConditionRule::*; //! use ControlCommand::*; //! use IntegerOperator::*; //! use MatchOperator::*; //! use Rule::*; //! //! assert_eq!( //! parse_sieve().parse( //! r#"require "fileinto"; //! if header :contains "from" "coyote" { //! discard; //! } elsif header :contains ["subject"] ["$$$"] { //! discard; //! } else { //! fileinto "INBOX"; //! }"# //! ), //! Ok(( //! "", //! [ //! Control(Require(["fileinto".to_string()].to_vec())), //! Control(If { //! condition: ( //! Header { //! comparator: None, //! match_type: Some(Contains), //! header_names: ["from".to_string()].to_vec(), //! key_list: ["coyote".to_string()].to_vec() //! }, //! RuleBlock([Action(Discard)].to_vec()) //! ), //! elsif: Some(( //! Header { //! comparator: None, //! match_type: Some(Contains), //! header_names: ["subject".to_string()].to_vec(), //! key_list: ["$$$".to_string()].to_vec() //! }, //! RuleBlock([Action(Discard)].to_vec()) //! )), //! else_: Some(RuleBlock( //! [Action(FileInto { //! mailbox: "INBOX".to_string() //! })] //! .to_vec() //! )) //! }) //! ] //! .to_vec() //! )) //! ); //! ``` use super::ast::*; use crate::parsec::*; // Helper macro to generate simple parsers for commands ending in semicolon. macro_rules! parse_action { // Macro for commands without an argument. ($(#[$attrs:meta])* $parser_name:ident, $lit:literal, $t:ty, $action:expr) => { $(#[$attrs])* pub fn $parser_name<'a>() -> impl Parser<'a, $t> { move |input| { map( ws(right(match_literal_anycase($lit), ws(match_literal(";")))), |_| $action, ) .parse(input) } } }; // Alternative macro for commands with a single string argument. ($(#[$attrs:meta])* $parser_name:ident, $lit:literal, $t:ty, $action:expr, $argument:ident) => { $(#[$attrs])* pub fn $parser_name<'a>() -> impl Parser<'a, $t> { move |input| { map( ws(right( parse_token($lit), left(ws(string()), ws(parse_token(";"))), )), |$argument| $action, ) .parse(input) } } }; } parse_action! { /// Parse the `keep` action. parse_sieve_keep, "keep", ActionCommand, ActionCommand::Keep } parse_action! { /// Parse the `discard` action. parse_sieve_discard, "discard", ActionCommand, ActionCommand::Discard } parse_action! { /// Parse the `stop` control command. parse_sieve_stop, "stop", ControlCommand, ControlCommand::Stop } parse_action! { /// Parse the `fileinto` action. parse_sieve_fileinto, "fileinto", ActionCommand, ActionCommand::FileInto { mailbox }, mailbox } parse_action! { /// Parse the `redirect` action. parse_sieve_redirect, "redirect", ActionCommand, ActionCommand::Redirect { address }, address } #[inline(always)] fn parse_token<'a>(literal: &'static str) -> impl Parser<'a, ()> { move |input| map(ws(match_literal_anycase(literal)), |_| ()).parse(input) } #[inline(always)] fn ws_inner<'a>() -> impl Parser<'a, ()> { move |input: &'a str| { let mut offset = 0; let input_b = input.as_bytes(); while offset < input_b.len() { while offset < input_b.len() && [b' ', b'\t', b'\n', b'\r'].contains(&input_b[offset]) { offset += 1; } if offset >= input_b.len() { break; } if input_b[offset] == b'#' { while offset < input_b.len() && !input[offset..].starts_with("\r\n") && !input[offset..].starts_with('\n') { offset += 1; } if offset >= input_b.len() { break; } if input[offset..].starts_with("\r\n") { offset += 2; } else if input[offset..].starts_with('\n') { offset += 1; } } else if input[offset..].starts_with("/*") { while offset < input_b.len() && !input[offset..].starts_with("*/") { offset += 1; } if offset >= input_b.len() { break; } if input[offset..].starts_with("*/") { offset += 2; } } else { break; } } Ok((&input[offset..], ())) } } #[inline(always)] /// Take a parser combinator, remove whitespace from the front of the input, apply the parser and /// remove whitespace from what's left. Sieve defines whitespace as regular white space characters /// and comments. pub fn ws<'a, P, A>(parser: P) -> impl Parser<'a, A> where P: Parser<'a, A>, { move |input1| { let (input2, ()) = ws_inner().parse(input1)?; let (input3, res) = parser.parse(input2)?; let (input4, ()) = ws_inner().parse(input3)?; Ok((input4, res)) } } // string = quoted-string / multi-line // // quoted-other = "\" octet-not-qspecial // ; represents just the octet-no-qspecial // ; character. SHOULD NOT be used // quoted-safe = CRLF / octet-not-qspecial // ; either a CRLF pair, OR a single octet other // ; than NUL, CR, LF, double-quote, or backslash // quoted-special = "\" (DQUOTE / "\") // ; represents just a double-quote or backslash // quoted-string = DQUOTE quoted-text DQUOTE // quoted-text = *(quoted-safe / quoted-special / quoted-other) /// Parse a Sieve language string. pub fn string<'a>() -> impl Parser<'a, String> { #[inline(always)] fn quoted_text<'a>() -> impl Parser<'a, String> { move |input: &'a str| { let mut offset = 0; let mut unescape_dquote: bool = false; let mut unescape_slash: bool = false; while offset < input.len() { if input.len() >= offset + 2 { if input.starts_with("\r\n") { offset += 2; } else if input.starts_with("\\\"") { unescape_dquote = true; offset += 2; } else if input.starts_with("\\\\") { unescape_slash = true; offset += 2; } } // a single octet other ; than NUL, CR, LF, double-quote, or backslash if [b'\x00', b'\r', b'\n', b'"', b'\\'].contains(&input.as_bytes()[offset]) { break; } offset += 1; } match (unescape_dquote, unescape_slash) { (false, false) => Ok((&input[offset..], input[..offset].to_string())), (true, false) => Ok((&input[offset..], input[..offset].replace("\\\"", "\""))), (false, true) => Ok((&input[offset..], input[..offset].replace("\\\\", "\\"))), (true, true) => Ok(( &input[offset..], input[..offset].replace("\\\"", "\"").replace("\\\\", "\\"), )), } } } #[inline(always)] fn quoted_string<'a>() -> impl Parser<'a, String> { delimited(parse_token("\""), quoted_text(), parse_token("\"")) } //fn multiline() -> impl Parser<'a, String> {} //either(quoted_string(), multiline()) quoted_string() } // number = 1*DIGIT [ QUANTIFIER ] // QUANTIFIER = "K" / "M" / "G" /// Parse a Sieve language number literal. pub fn number<'a>() -> impl Parser<'a, u64> { map_res( pair( is_a(b"0123456789"), pred(any_char, |c| { ['k', 'm', 'g'].contains(&c.to_ascii_lowercase()) }), ), |(num_s, quant)| { Ok(match (num_s.parse::(), quant.to_ascii_lowercase()) { (Ok(num), 'k') => num * 1000, (Ok(num), 'm') => num * 1000_000, (Ok(num), 'g') => num * 1000_000_000, _ => return Err(num_s), }) }, ) } /// Parse a Sieve language [integer operator](`IntegerOperator`) (`:over` or `:under`). pub fn parse_sieve_integer_operator<'a>() -> impl Parser<'a, (IntegerOperator, u64)> { move |input| { ws(pair( either( map(parse_token(":over"), |_| IntegerOperator::Over), map(parse_token(":under"), |_| IntegerOperator::Under), ), ws(number()), )) .parse(input) } } // ":comparator" /// Parse a Sieve language [character comparator](`CharacterOperator`) (`:comparator`). pub fn parse_sieve_comparator<'a>() -> impl Parser<'a, CharacterOperator> { move |input| { ws(right( parse_token(":comparator"), ws(map_res(string(), |s| { if s == "i;octet" { Ok(CharacterOperator::Octet) } else if s == "i;ascii-casemap" { Ok(CharacterOperator::AsciiCasemap) } else { Err("invalid comparator") } })), )) .parse(input) } } // MATCH-TYPE = ":is" / ":contains" / ":matches" /// Parse a Sieve language [match type]('MatchOperator'). pub fn parse_sieve_match_type<'a>() -> impl Parser<'a, MatchOperator> { move |input| { either( map(parse_token(":is"), |_| MatchOperator::Is), either( map(parse_token(":contains"), |_| MatchOperator::Contains), map(parse_token(":matches"), |_| MatchOperator::Matches), ), ) .parse(input) } } /* string-list = "[" string *("," string) "]" / string ; if there is only a single string, the brackets ; are optional */ /// Parse a Sieve language string list. If there is only a single string, the brackets are /// optional. pub fn parse_string_list<'a>() -> impl Parser<'a, Vec> { move |input| { either( delimited( ws(parse_token("[")), separated_list0(string(), ws(parse_token(",")), false), ws(parse_token("]")), ), map(string(), |s| vec![s]), ) .parse(input) } } /* Usage: header [COMPARATOR] [MATCH-TYPE] * */ /// Parse a Sieve language [header condition](`ConditionRule`). pub fn parse_sieve_header<'a>() -> impl Parser<'a, ConditionRule> { move |input| { map( ws(pair( right(parse_token("header"), move |input| { crate::permutation! { input, comparator, Option, opt(parse_sieve_comparator()), match_type, Option, opt(parse_sieve_match_type()) } }), pair(ws(parse_string_list()), ws(parse_string_list())), )), |((comparator, match_type), (header_names, key_list))| ConditionRule::Header { comparator, match_type, header_names, key_list, }, ) .parse(input) } } // ADDRESS-PART = ":localpart" / ":domain" / ":all" /// Parse a Sieve language [address operator](`AddressOperator`). pub fn parse_sieve_address_type<'a>() -> impl Parser<'a, AddressOperator> { move |input| { either( map(parse_token(":localpart"), |_| AddressOperator::Localpart), either( map(parse_token(":domain"), |_| AddressOperator::Domain), map(parse_token(":all"), |_| AddressOperator::All), ), ) .parse(input) } } // address [COMPARATOR] [ADDRESS-PART] [MATCH-TYPE] /// Parse a Sieve language [address condition](`ConditionRule`). pub fn parse_sieve_address<'a>() -> impl Parser<'a, ConditionRule> { move |input| { map( ws(pair( right(parse_token("address"), move |input| { crate::permutation! { input, match_type, Option, opt(parse_sieve_match_type()), comparator, Option, opt(parse_sieve_comparator()), address_type, Option, opt(parse_sieve_address_type()) } }), pair(ws(parse_string_list()), ws(parse_string_list())), )), |((match_type, comparator, address_part), (header_list, key_list))| { ConditionRule::Address { comparator, address_part, match_type, header_list, key_list, } }, ) .parse(input) } } // Test envelope // Usage: envelope [COMPARATOR] [ADDRESS-PART] [MATCH-TYPE] // /// Parse a Sieve language [envelope condition](`ConditionRule`). pub fn parse_sieve_envelope<'a>() -> impl Parser<'a, ConditionRule> { move |input| { map( ws(pair( right(parse_token("envelope"), move |input| { crate::permutation! { input, match_type, Option, opt(parse_sieve_match_type()), comparator, Option, opt(parse_sieve_comparator()), address_type, Option, opt(parse_sieve_address_type()) } }), pair(ws(parse_string_list()), ws(parse_string_list())), )), |((match_type, comparator, address_part), (envelope_part, key_list))| { ConditionRule::Envelope { comparator, address_part, match_type, envelope_part, key_list, } }, ) .parse(input) } } /// Parse a Sieve language [test condition](`ConditionRule`). pub fn parse_sieve_test<'a>() -> impl Parser<'a, ConditionRule> { move |input| { either( either( map(parse_token("true"), |_| ConditionRule::Literal(true)), map(parse_token("false"), |_| ConditionRule::Literal(false)), ), either( either( map( right(ws(parse_token("exists")), ws(parse_string_list())), |l| ConditionRule::Exists(l), ), map( right(ws(parse_token("size")), ws(parse_sieve_integer_operator())), |(operator, limit)| ConditionRule::Size { operator, limit }, ), ), either( either( map(right(ws(parse_token("not")), parse_sieve_test()), |cond| { ConditionRule::Not(Box::new(cond)) }), either( either(parse_sieve_header(), parse_sieve_address()), parse_sieve_envelope(), ), ), either( map(right(ws(parse_token("allof")), parse_test_list()), |l| { ConditionRule::AllOf(l) }), map(right(ws(parse_token("anyof")), parse_test_list()), |l| { ConditionRule::AnyOf(l) }), ), ), ), ) .parse(input) } } /* test-list = "(" test *("," test) ")" */ /// Parse a Sieve language list of [test conditions](`ConditionRule`). pub fn parse_test_list<'a>() -> impl Parser<'a, Vec> { move |input| { delimited( ws(parse_token("(")), separated_list0(ws(parse_sieve_test()), ws(parse_token(",")), false), ws(parse_token(")")), ) .parse(input) } } /// Parse a Sieve language [rule](`Rule`). pub fn parse_sieve_rule<'a>() -> impl Parser<'a, Rule> { either( map( either( either(parse_sieve_stop(), parse_sieve_require()), parse_sieve_if(), ), |c| Rule::Control(c), ), map( either( either(parse_sieve_keep(), parse_sieve_fileinto()), either(parse_sieve_redirect(), parse_sieve_discard()), ), |ac| Rule::Action(ac), ), ) } /// Parse a Sieve language [block](`RuleBlock`). pub fn parse_sieve_block<'a>() -> impl Parser<'a, RuleBlock> { move |input| { map( ws(delimited( parse_token("{"), ws(zero_or_more(parse_sieve_rule())), parse_token("}"), )), |v| RuleBlock(v), ) .parse(input) } } /// Parse a Sieve language [if condition](`ControlCommand`). pub fn parse_sieve_if<'a>() -> impl Parser<'a, ControlCommand> { either( map( pair( parse_sieve_if_bare(), ws(right(parse_token("else"), ws(parse_sieve_block()))), ), |(ifbare, else_)| match ifbare { ControlCommand::If { condition, elsif, else_: _, } => ControlCommand::If { condition, elsif, else_: Some(else_), }, _ => unreachable!(), }, ), parse_sieve_if_bare(), ) } fn parse_sieve_if_bare<'a>() -> impl Parser<'a, ControlCommand> { either( map( pair( ws(pair( ws(right(parse_token("if"), ws(parse_sieve_test()))), ws(parse_sieve_block()), )), ws(pair( ws(right(parse_token("elsif"), ws(parse_sieve_test()))), ws(parse_sieve_block()), )), ), |(condition, elsif)| ControlCommand::If { condition, elsif: Some(elsif), else_: None, }, ), map( pair( ws(right(parse_token("if"), ws(parse_sieve_test()))), ws(parse_sieve_block()), ), |(cond, block)| ControlCommand::If { condition: (cond, block), elsif: None, else_: None, }, ), ) } /// Parse a Sieve language [`require` control command](`ControlCommand`). pub fn parse_sieve_require<'a>() -> impl Parser<'a, ControlCommand> { move |input| { right( ws(parse_token("require")), ws(left( map(parse_string_list(), |string_list| { ControlCommand::Require(string_list) }), ws(parse_token(";")), )), ) .parse(input) } } /// Parse a Sieve language script. pub fn parse_sieve<'a>() -> impl Parser<'a, Vec> { ws(zero_or_more(ws(parse_sieve_rule()))) } #[cfg(test)] mod test { use super::*; use crate::parsec::Parser; use super::ActionCommand::*; use super::AddressOperator::*; use super::CharacterOperator::*; use super::ConditionRule::*; use super::ControlCommand::*; use super::IntegerOperator::*; use super::MatchOperator::*; use super::Rule::*; use super::RuleBlock; #[test] fn test_sieve_parse_strings() { assert_eq!( parse_string_list().parse(r#"["fileinto", "reject"]"#), Ok(("", vec!["fileinto".to_string(), "reject".to_string()])) ); assert_eq!( parse_string_list().parse(r#""fileinto""#), Ok(("", vec!["fileinto".to_string()])) ); } #[test] fn test_sieve_parse_conditionals() { /* Operators that start with : like :matches are unordered and optional, since they have * defaults. But that means we must handle any order correctly, which is tricky if we use * an optional parser; for an optional parser both None and Some(_) are valid values. */ /* Permutations of two */ let (_, first) = parse_sieve_test() .parse( r#"header :contains :comparator "i;octet" "Subject" "MAKE MONEY FAST""#, ) .unwrap(); assert_eq!( Header { comparator: Some(Octet), match_type: Some(Contains), header_names: ["Subject".to_string()].to_vec(), key_list: ["MAKE MONEY FAST".to_string()].to_vec() }, first ); assert_eq!( parse_sieve_test().parse( r#"header :comparator "i;octet" :contains "Subject" "MAKE MONEY FAST""# ), Ok(("", first)), ); /* Permutations of three */ let (_, first) = parse_sieve_test() .parse(r#"address :DOMAIN :comparator "i;octet" :is ["From", "To"] "example.com""#) .unwrap(); assert_eq!( &Address { comparator: Some(Octet), address_part: Some(Domain), match_type: Some(Is), header_list: ["From".to_string(), "To".to_string()].to_vec(), key_list: ["example.com".to_string()].to_vec() }, &first ); assert_eq!( parse_sieve_test().parse( r#"address :DOMAIN :is :comparator "i;octet" ["From", "To"] "example.com""# ), Ok(("", first.clone())), ); assert_eq!( parse_sieve_test().parse( r#"address :is :DOMAIN :comparator "i;octet" ["From", "To"] "example.com""# ), Ok(("", first.clone())), ); assert_eq!( parse_sieve_test() .parse(r#"address :is :comparator "i;octet" :DOMAIN ["From", "To"] "example.com""#), Ok(("", first)), ); } #[test] fn test_sieve_parse_ifs() { assert_eq!( parse_sieve_rule().parse("if true {\nstop ;\n}"), Ok(( "", Control(If { condition: (Literal(true), RuleBlock([Control(Stop)].to_vec())), elsif: None, else_: None }) )) ); assert_eq!( parse_sieve().parse( r#"# Reject all messages that contain the string "ivnten"in the Subject. if header :contains "subject" "ivnten" { discard; } else { keep; }"# ), Ok(( "", [Control(If { condition: ( Header { comparator: None, match_type: Some(Contains), header_names: ["subject".to_string()].to_vec(), key_list: ["ivnten".to_string()].to_vec() }, RuleBlock([Action(Discard)].to_vec()) ), elsif: None, else_: Some(RuleBlock([Action(Keep)].to_vec())) })] .to_vec() )) ); assert_eq!( parse_sieve().parse( r#"# Reject all messages that contain the string "ivnten"in the Subject. if header :contains "subject" "ivnten" { discard; } # Silently discard all messages sent from the tax man elsif address :matches :domain "from" "*hmrc.gov.uk" { keep; }"# ), Ok(( "", [Control(If { condition: ( Header { comparator: None, match_type: Some(Contains), header_names: ["subject".to_string()].to_vec(), key_list: ["ivnten".to_string()].to_vec() }, RuleBlock([Action(Discard)].to_vec()) ), elsif: Some(( Address { comparator: None, address_part: Some(Domain), match_type: Some(Matches), header_list: ["from".to_string()].to_vec(), key_list: ["*hmrc.gov.uk".to_string()].to_vec() }, RuleBlock([Action(Keep)].to_vec()) )), else_: None })] .to_vec() )) ); } #[test] fn test_sieve_parse() { assert_eq!( parse_sieve().parse( r#"# The hash character starts a one-line comment. "# ), Ok(("", vec![])) ); assert_eq!( parse_sieve().parse( r#"# The hash character starts a one-line comment. # Everything after a # character until the end of line is ignored. /* this is a bracketed (C-style) comment. This type of comment can stretch * over many lines. A bracketed comment begins with a forward slash, followed * by an asterisk and ends with the inverse sequence: an asterisk followed * by a forward slash. */ "# ), Ok(("", vec![])), ); // Test Lists (allof, anyof) assert_eq!( parse_sieve().parse( r#"# This test checks against Spamassassin's header fields: # If the spam level is 4 or more and the Subject contains too # many illegal characters, then silently discard the mail. if allof (header :contains "X-Spam-Level" "****", header :contains "X-Spam-Report" "FROM_ILLEGAL_CHARS") { discard; } # Discard mails that do not have a Date: or From: header field # or mails that are sent from the marketing department at example.com. elsif anyof (not exists ["from", "date"], header :contains "from" "marketing@example.com") { discard; }"# ), Ok(( "", [Control(If { condition: ( AllOf( [ Header { comparator: None, match_type: Some(Contains), header_names: ["X-Spam-Level".to_string()].to_vec(), key_list: ["****".to_string()].to_vec() }, Header { comparator: None, match_type: Some(Contains), header_names: ["X-Spam-Report".to_string()].to_vec(), key_list: ["FROM_ILLEGAL_CHARS".to_string()].to_vec() } ] .to_vec() ), RuleBlock([Action(Discard)].to_vec()) ), elsif: Some(( AnyOf( [ Not(Box::new(Exists( ["from".to_string(), "date".to_string()].to_vec() ))), Header { comparator: None, match_type: Some(Contains), header_names: ["from".to_string()].to_vec(), key_list: ["marketing@example.com".to_string()].to_vec() } ] .to_vec() ), RuleBlock([Action(Discard)].to_vec()) )), else_: None })] .to_vec() )) ); // Filter on message size assert_eq!( parse_sieve().parse( r#"# Delete messages greater than half a MB if size :over 500K { discard; } # Also delete small mails, under 1k if size :under 1k { discard; }"# ), Ok(( "", [ Control(If { condition: ( Size { operator: Over, limit: 500000 }, RuleBlock([Action(Discard)].to_vec()) ), elsif: None, else_: None }), Control(If { condition: ( Size { operator: Under, limit: 1000 }, RuleBlock([Action(Discard)].to_vec()) ), elsif: None, else_: None }) ] .to_vec() )) ); assert_eq!( parse_sieve().parse( r#"require "fileinto"; if header :contains "from" "coyote" { discard; } elsif header :contains ["subject"] ["$$$"] { discard; } else { fileinto "INBOX"; }"# ), Ok(( "", [ Control(Require(["fileinto".to_string()].to_vec())), Control(If { condition: ( Header { comparator: None, match_type: Some(Contains), header_names: ["from".to_string()].to_vec(), key_list: ["coyote".to_string()].to_vec() }, RuleBlock([Action(Discard)].to_vec()) ), elsif: Some(( Header { comparator: None, match_type: Some(Contains), header_names: ["subject".to_string()].to_vec(), key_list: ["$$$".to_string()].to_vec() }, RuleBlock([Action(Discard)].to_vec()) )), else_: Some(RuleBlock( [Action(FileInto { mailbox: "INBOX".to_string() })] .to_vec() )) }) ] .to_vec() )) ); assert_eq!( parse_sieve().parse( r#"require "envelope"; if envelope :all :is "from" "tim@example.com" { discard; } "# ), Ok(( "", [ Control(Require(["envelope".to_string()].to_vec())), Control(If { condition: ( Envelope { comparator: None, address_part: Some(All), match_type: Some(Is), envelope_part: ["from".to_string()].to_vec(), key_list: ["tim@example.com".to_string()].to_vec() }, RuleBlock([Action(Discard)].to_vec()) ), elsif: None, else_: None }) ] .to_vec() )) ); } }