From ac04f1677c99c10cacee53fe6a3d2b263cecc833 Mon Sep 17 00:00:00 2001 From: Manos Pitsidianakis Date: Tue, 27 Dec 2022 19:04:38 +0200 Subject: [PATCH] melib: add basic Sieve parser and interpreter --- melib/Cargo.toml | 3 +- melib/src/lib.rs | 1 + melib/src/sieve.rs | 1217 ++++++++----------------------------- melib/src/sieve/ast.rs | 531 ++++++++++++++++ melib/src/sieve/parser.rs | 1031 +++++++++++++++++++++++++++++++ 5 files changed, 1830 insertions(+), 953 deletions(-) create mode 100644 melib/src/sieve/ast.rs create mode 100644 melib/src/sieve/parser.rs diff --git a/melib/Cargo.toml b/melib/Cargo.toml index 1399b2bf..9e26a949 100644 --- a/melib/Cargo.toml +++ b/melib/Cargo.toml @@ -54,7 +54,7 @@ mailin-embedded = { version = "0.7", features = ["rtls"] } stderrlog = "^0.5" [features] -default = ["unicode_algorithms", "imap_backend", "maildir_backend", "mbox_backend", "vcard", "sqlite3", "smtp", "deflate_compression"] +default = ["unicode_algorithms", "imap_backend", "maildir_backend", "mbox_backend", "vcard", "sqlite3", "smtp", "deflate_compression", "sieve"] debug-tracing = [] deflate_compression = ["flate2", ] @@ -71,3 +71,4 @@ sqlite3 = ["rusqlite", ] tls = ["native-tls"] unicode_algorithms = ["unicode-segmentation"] vcard = [] +sieve = [] diff --git a/melib/src/lib.rs b/melib/src/lib.rs index c50086d2..a0d35664 100644 --- a/melib/src/lib.rs +++ b/melib/src/lib.rs @@ -110,6 +110,7 @@ pub use addressbook::*; pub mod backends; pub use backends::*; mod collection; +#[cfg(feature = "sieve")] pub mod sieve; pub use collection::*; pub mod conf; diff --git a/melib/src/sieve.rs b/melib/src/sieve.rs index d84e07b3..badc3112 100644 --- a/melib/src/sieve.rs +++ b/melib/src/sieve.rs @@ -19,995 +19,308 @@ * along with meli. If not, see . */ -use crate::parsec::*; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct RuleBlock(pub Vec); - -/* - MATCH-TYPE =/ COUNT / VALUE - - COUNT = ":count" relational-match - - VALUE = ":value" relational-match - - relational-match = DQUOTE - ("gt" / "ge" / "lt" / "le" / "eq" / "ne") DQUOTE - ; "gt" means "greater than", the C operator ">". - ; "ge" means "greater than or equal", the C operator ">=". - ; "lt" means "less than", the C operator "<". - ; "le" means "less than or equal", the C operator "<=". - ; "eq" means "equal to", the C operator "==". - ; "ne" means "not equal to", the C operator "!=". -*/ -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum ActionCommand { - Keep, - Fileinto { mailbox: String }, - Redirect { address: String }, - Discard, -} - -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum ControlCommand { - Stop, - Require(Vec), - If { - condition: (ConditionRule, RuleBlock), - elsif: Option<(ConditionRule, RuleBlock)>, - else_: Option, - }, -} - -#[derive(Debug, PartialEq, Eq, Clone)] -pub enum Rule { - Block(RuleBlock), - Action(ActionCommand), - Control(ControlCommand), +//! Parsing and interpreting the [RFC 5228 - Sieve: An Email Filtering Language] +//! +//! [RFC 5228 - Sieve: An Email Filtering Language]: https://www.rfc-editor.org/rfc/rfc5228.html + +use crate::error::{Error, ErrorKind, Result}; +use crate::parsec::Parser; + +pub mod ast; +pub mod parser; + +use ast::Rule; + +use std::collections::{HashSet, VecDeque}; +use std::convert::TryFrom; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Capability { + /// "body" + Body, + /// "fileinto" + FileInto, + /// "envelope" + Envelope, + /// "relational" + Relational, + /// "date" + Date, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum AddressOperator { - All, - Localpart, - Domain, +impl TryFrom<&str> for Capability { + type Error = Error; + + fn try_from(value: &str) -> Result { + use Capability::*; + for (literal, ext) in [ + ("body", Body), + ("fileinto", FileInto), + ("envelope", Envelope), + ("relational", Relational), + ("date", Date), + ] { + if value.eq_ignore_ascii_case(literal) { + return Ok(ext); + } + } + Err( + Error::new(format!("Unrecognized Sieve capability: `{}`.", value)) + .set_kind(ErrorKind::NotSupported), + ) + } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum IntegerOperator { - Over, - Under, +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct SieveFilter { + rules: Vec, + capabilities: HashSet, } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -/// RFC 5231 Sieve Email Filtering: Relational Extension -pub enum RelationalMatch { - /// "gt" means "greater than", the C operator ">". - Gt, - /// "ge" means "greater than or equal", the C operator ">=". - Ge, - /// "lt" means "less than", the C operator "<". - Lt, - /// "le" means "less than or equal", the C operator "<=". - Le, - /// "eq" means "equal to", the C operator "==". - Eq, - /// "ne" means "not equal to", the C operator "!=". - Ne, -} +impl SieveFilter { + /// Parse and create a new Sieve script from string. + pub fn from_str(input: &str) -> Result { + match parser::parse_sieve().parse(input) { + Ok(("", rules)) => Self::new(rules), + Err(unparsed) | Ok((unparsed, _)) => Err(Error::new(format!( + "Could not parse part of Sieve filter input: {:?}.", + unparsed + ))), + } + } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum MatchOperator { - Is, - Matches, - Contains, - Count(RelationalMatch), - Value(RelationalMatch), -} + /// Create a new Sieve script from a vector of rules. + pub fn new(rules: Vec) -> Result { + Ok(Self { + capabilities: Self::validate_rules(&rules)?, + rules, + }) + } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum CharacterOperator { - /// i;octet, - Octet, - ///i;ascii-casemap - AsciiCasemap, + /// Validate a slice of rules. + /// + /// ```rust + /// use melib::parsec::Parser; + /// use melib::sieve::{parser::parse_sieve, Capability, SieveFilter}; + /// use std::collections::HashSet; + /// + /// assert_eq!( + /// SieveFilter::validate_rules( + /// &parse_sieve() + /// .parse( + /// r#"require "fileinto"; + /// if header :contains "from" "coyote" { + /// discard; + /// } elsif header :contains ["subject"] ["$$$"] { + /// discard; + /// } else { + /// fileinto "INBOX"; + /// }"# + /// ) + /// .unwrap() + /// .1 + /// ) + /// .unwrap(), + /// HashSet::from([Capability::FileInto]) + /// ); + /// + /// // These should err: + /// for s in [ + /// "require \"date\";\nif envelope :all :is \"from\" \"tim@example.com\" {\ndiscard;\n}", + /// "if header :contains \"from\" \"coyote\" {\ndiscard;\n} elsif header :contains [\"subject\"] [\"$$$\"] {\ndiscard;\n} else {\nfileinto \"INBOX\";\n}" + /// ] { + /// assert!( + /// SieveFilter::validate_rules( + /// &parse_sieve() + /// .parse(s) + /// .unwrap() + /// .1 + /// ) + /// .is_err() + /// ); + /// } + /// ``` + pub fn validate_rules(rules: &[Rule]) -> Result> { + use ast::{ControlCommand::*, RequiredCapabilities, Rule::*}; + + let mut capabilities = HashSet::default(); + let mut rule_queue = rules.iter().collect::>(); + + while let Some(rule) = rule_queue.pop_front() { + match rule { + Control(Require(ref required)) => { + for ext in required { + capabilities.insert(Capability::try_from(ext.as_str())?); + } + } + other_rule => { + if let Some(required_caps) = other_rule.requires() { + if required_caps.difference(&capabilities).count() > 0 { + return Err(Error::new(format!( + "Rules require capabilities {:?} but + they are not declared with `required`.", + required_caps + .difference(&capabilities) + .collect::>() + ))); + } + } + } + } + } + Ok(capabilities) + } } -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum ZoneRule { - /// "year" => the year, "0000" .. "9999". - Year, - /// "month" => the month, "01" .. "12". - Month, - /// "day" => the day, "01" .. "31". - Day, - /// "date" => the date in "yyyy-mm-dd" format. - Date, - /// "julian" => the Modified Julian Day, that is, the date - /// expressed as an integer number of days since - /// 00:00 UTC on November 17, 1858 (using the Gregorian - /// calendar). This corresponds to the regular - /// Julian Day minus 2400000.5. Sample routines to - /// convert to and from modified Julian dates are - /// given in Appendix A. - Julian, - /// "hour" => the hour, "00" .. "23". - Hour, - /// "minute" => the minute, "00" .. "59". - Minute, - /// "second" => the second, "00" .. "60". - Second, - /// "time" => the time in "hh:mm:ss" format. - Time, - /// "iso8601" => the date and time in restricted ISO 8601 format. - Iso8601, - /// "std11" => the date and time in a format appropriate - /// for use in a Date: header field [RFC2822]. - Std11, - /// "zone" => the time zone in use. If the user specified a - ///time zone with ":zone", "zone" will - ///contain that value. If :originalzone is specified - ///this value will be the original zone specified - ///in the date-time value. If neither argument is - ///specified the value will be the server's default - ///time zone in offset format "+hhmm" or "-hhmm". An - ///offset of 0 (Zulu) always has a positive sign. - Zone, - /// "weekday" => the day of the week expressed as an integer between "0" and "6". "0" is Sunday, "1" is Monday, etc. - Weekday, +/// Possible errors when parsing, validating and/or executing Sieve scripts. +#[derive(Debug, Clone)] +pub enum SieveError { + /// Script validity error. + ValidScriptError { + /// Encapsulated error value. + inner: Error, + }, + /// Script runtime error. + RuntimeScriptError { + /// Encapsulated error value. + inner: Error, + }, + /// Logic bug error. + Bug { + /// Encapsulated error value. + inner: Error, + }, } +/// Succesful outcome of a sieve script execution for an [`Envelope`]. #[derive(Debug, Clone, PartialEq, Eq)] -pub enum ConditionRule { - /// Logical OR operation. - AnyOf(Vec), - /// Logical AND operation. - AllOf(Vec), - /// Header values exist. - Exists(Vec), - Header { - comparator: Option, - match_operator: Option, - header_names: Vec, - key_list: Vec, +pub enum Outcome { + /// Keep. + Keep, + /// Discard. + Discard, + /// File into. + FileInto { + /// Destination + destination_mailbox: String, }, - Date { - comparator: Option, - match_type: Option, - zone: ZoneRule, - header_name: String, - date_part: String, - key_list: Vec, + /// Redirect to address. + Redirect { + /// Destination + destination_address: String, }, - Address { - comparator: Option, - address_part: Option, - match_type: Option, - header_list: Vec, - key_list: Vec, +} + +/// Optional action of a sieve script execution for an [`Envelope`]. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Action { + /// Copy. + Copy { + /// Destination + destination_mailbox: String, }, - Not(Box), - Size { - operator: IntegerOperator, - limit: u64, + /// Forward. + Forward { + /// Destination + destination_address: String, }, - Literal(bool), + /// Modify + Modify, } -pub mod parser { - use super::*; - macro_rules! parse_action { - ($parser_name:ident, $lit:literal, $t:ty, $action:expr) => { - pub fn $parser_name<'a>() -> impl Parser<'a, $t> { - move |input| { - map( - ws(right(match_literal_anycase($lit), ws(match_literal(";")))), - |_| $action, - ) - .parse(input) +pub trait Sieve { + fn passthrough( + &self, + script: &SieveFilter, + ) -> std::result::Result<(Outcome, Vec), SieveError>; +} + +impl Sieve for crate::Envelope { + fn passthrough( + &self, + script: &SieveFilter, + ) -> std::result::Result<(Outcome, Vec), SieveError> { + use ast::{ActionCommand, ControlCommand::*, Rule::*}; + + // Implicit keep. + let mut outcome: Outcome = Outcome::Keep; + let actions: Vec = Vec::with_capacity(0); + let mut rule_queue = script.rules.iter().collect::>(); + + while let Some(rule) = rule_queue.pop_front() { + match rule { + Action(ActionCommand::Discard) => { + outcome = Outcome::Discard; } - } - }; - ($parser_name:ident, $lit:literal, $t:ty, $action:expr, $argument:ident) => { - pub fn $parser_name<'a>() -> impl Parser<'a, $t> { - move |input| { - map( - ws(right( - parse_token($lit), - left(ws(string()), ws(parse_token(";"))), - )), - |$argument| $action, - ) - .parse(input) + Action(ActionCommand::Keep) => { + outcome = Outcome::Keep; } - } - }; - } - - parse_action! { parse_sieve_keep, "keep", ActionCommand, ActionCommand::Keep } - parse_action! { parse_sieve_discard, "discard", ActionCommand, ActionCommand::Discard } - parse_action! { parse_sieve_stop, "stop", ControlCommand, ControlCommand::Stop } - parse_action! { parse_sieve_fileinto, "fileinto", ActionCommand, ActionCommand::Fileinto { mailbox }, mailbox } - parse_action! { parse_sieve_redirect, "redirect", ActionCommand, ActionCommand::Redirect { address }, address } - - #[inline(always)] - pub fn parse_token<'a>(literal: &'static str) -> impl Parser<'a, ()> { - move |input| map(ws(match_literal_anycase(literal)), |_| ()).parse(input) - } - - #[inline(always)] - fn ws_inner<'a>() -> impl Parser<'a, ()> { - move |input: &'a str| { - let mut offset = 0; - let input_b = input.as_bytes(); - while offset < input_b.len() { - while offset < input_b.len() - && [b' ', b'\t', b'\n', b'\r'].contains(&input_b[offset]) - { - offset += 1; + Action(ActionCommand::Redirect { ref address }) => { + outcome = Outcome::Redirect { + destination_address: address.clone(), + }; } - if offset >= input_b.len() { - break; + Action(ActionCommand::FileInto { ref mailbox }) => { + outcome = Outcome::FileInto { + destination_mailbox: mailbox.clone(), + }; } - if input_b[offset] == b'#' { - while offset < input_b.len() - && !input[offset..].starts_with("\r\n") - && !input[offset..].starts_with('\n') - { - offset += 1; - } - if offset >= input_b.len() { - break; - } - if input[offset..].starts_with("\r\n") { - offset += 2; - } else if input[offset..].starts_with('\n') { - offset += 1; - } - } else if input[offset..].starts_with("/*") { - while offset < input_b.len() && !input[offset..].starts_with("*/") { - offset += 1; - } - if offset >= input_b.len() { - break; - } - if input[offset..].starts_with("*/") { - offset += 2; - } - } else { + Control(Stop) => { break; } - } - Ok((&input[offset..], ())) - } - } - - pub fn ws<'a, P, A>(parser: P) -> impl Parser<'a, A> - where - P: Parser<'a, A>, - { - move |input1| { - let (input2, ()) = ws_inner().parse(input1)?; - let (input3, res) = parser.parse(input2)?; - let (input4, ()) = ws_inner().parse(input3)?; - Ok((input4, res)) - } - } - - // string = quoted-string / multi-line - // - // quoted-other = "\" octet-not-qspecial - // ; represents just the octet-no-qspecial - // ; character. SHOULD NOT be used - - // quoted-safe = CRLF / octet-not-qspecial - // ; either a CRLF pair, OR a single octet other - // ; than NUL, CR, LF, double-quote, or backslash - - // quoted-special = "\" (DQUOTE / "\") - // ; represents just a double-quote or backslash - - // quoted-string = DQUOTE quoted-text DQUOTE - - // quoted-text = *(quoted-safe / quoted-special / quoted-other) - - pub fn string<'a>() -> impl Parser<'a, String> { - #[inline(always)] - fn quoted_text<'a>() -> impl Parser<'a, String> { - move |input: &'a str| { - let mut offset = 0; - let mut unescape_dquote: bool = false; - let mut unescape_slash: bool = false; - while offset < input.len() { - if input.len() >= offset + 2 { - if input.starts_with("\r\n") { - offset += 2; - } else if input.starts_with("\\\"") { - unescape_dquote = true; - offset += 2; - } else if input.starts_with("\\\\") { - unescape_slash = true; - offset += 2; + Control(Require(_)) => {} + Control(If { + condition: (ifrule, ifthen), + elsif, + else_, + }) => { + for (cond, block) in Some((Some(ifrule), ifthen)) + .into_iter() + .chain(elsif.as_ref().map(|(c, b)| (Some(c), b)).into_iter()) + .chain(else_.as_ref().map(|b| (None, b)).into_iter()) + { + if let Some(_cond) = cond { + todo!() + } else { + rule_queue.extend(block.0.iter()); + break; } } - // a single octet other ; than NUL, CR, LF, double-quote, or backslash - if [b'\x00', b'\r', b'\n', b'"', b'\\'].contains(&input.as_bytes()[offset]) { - break; - } - offset += 1; } - match (unescape_dquote, unescape_slash) { - (false, false) => Ok((&input[offset..], input[..offset].to_string())), - (true, false) => Ok((&input[offset..], input[..offset].replace("\\\"", "\""))), - (false, true) => Ok((&input[offset..], input[..offset].replace("\\\\", "\\"))), - (true, true) => Ok(( - &input[offset..], - input[..offset].replace("\\\"", "\"").replace("\\\\", "\\"), - )), + Block(ref ruleblock) => { + rule_queue.extend(ruleblock.0.iter()); } } } - - #[inline(always)] - fn quoted_string<'a>() -> impl Parser<'a, String> { - delimited(parse_token("\""), quoted_text(), parse_token("\"")) - } - - //fn multiline() -> impl Parser<'a, String> {} - //either(quoted_string(), multiline()) - quoted_string() - } - - // number = 1*DIGIT [ QUANTIFIER ] - // QUANTIFIER = "K" / "M" / "G" - pub fn number<'a>() -> impl Parser<'a, u64> { - map_res( - pair( - is_a(b"0123456789"), - pred(any_char, |c| { - ['k', 'm', 'g'].contains(&c.to_ascii_lowercase()) - }), - ), - |(num_s, quant)| { - Ok(match (num_s.parse::(), quant.to_ascii_lowercase()) { - (Ok(num), 'k') => num * 1000, - (Ok(num), 'm') => num * 1000_000, - (Ok(num), 'g') => num * 1000_000_000, - _ => return Err(num_s), - }) - }, - ) - } - - pub fn parse_sieve_integer_operator<'a>() -> impl Parser<'a, (IntegerOperator, u64)> { - move |input| { - ws(pair( - either( - map(parse_token(":over"), |_| IntegerOperator::Over), - map(parse_token(":under"), |_| IntegerOperator::Under), - ), - ws(number()), - )) - .parse(input) - } - } - // ":comparator" - pub fn parse_sieve_comparator<'a>() -> impl Parser<'a, CharacterOperator> { - move |input| { - ws(right( - parse_token(":comparator"), - ws(map_res(string(), |s| { - if s == "i;octet" { - Ok(CharacterOperator::Octet) - } else if s == "i;ascii-casemap" { - Ok(CharacterOperator::AsciiCasemap) - } else { - Err("invalid comparator") - } - })), - )) - .parse(input) - } - } - - // MATCH-TYPE = ":is" / ":contains" / ":matches" - pub fn parse_sieve_match_type<'a>() -> impl Parser<'a, MatchOperator> { - move |input| { - either( - map(parse_token(":is"), |_| MatchOperator::Is), - either( - map(parse_token(":contains"), |_| MatchOperator::Contains), - map(parse_token(":matches"), |_| MatchOperator::Matches), - ), - ) - .parse(input) - } - } - - /* string-list = "[" string *("," string) "]" / string - ; if there is only a single string, the brackets - ; are optional - */ - pub fn parse_string_list<'a>() -> impl Parser<'a, Vec> { - move |input| { - either( - delimited( - ws(parse_token("[")), - separated_list0(string(), ws(parse_token(",")), false), - ws(parse_token("]")), - ), - map(string(), |s| vec![s]), - ) - .parse(input) - } - } - - /* Usage: header [COMPARATOR] [MATCH-TYPE] - * - */ - pub fn parse_sieve_header<'a>() -> impl Parser<'a, ConditionRule> { - move |input| { - map( - ws(pair( - right(parse_token("header"), move |input| { - crate::permutation! { - input, - comparator, Option, opt(parse_sieve_comparator()), - match_type, Option, opt(parse_sieve_match_type()) - } - }), - pair(ws(parse_string_list()), ws(parse_string_list())), - )), - |((comparator, match_operator), (header_names, key_list))| ConditionRule::Header { - comparator, - match_operator, - header_names, - key_list, - }, - ) - .parse(input) - } - } - - // ADDRESS-PART = ":localpart" / ":domain" / ":all" - pub fn parse_sieve_address_type<'a>() -> impl Parser<'a, AddressOperator> { - move |input| { - either( - map(parse_token(":localpart"), |_| AddressOperator::Localpart), - either( - map(parse_token(":domain"), |_| AddressOperator::Domain), - map(parse_token(":all"), |_| AddressOperator::All), - ), - ) - .parse(input) - } - } - - // address [COMPARATOR] [ADDRESS-PART] [MATCH-TYPE] - pub fn parse_sieve_address<'a>() -> impl Parser<'a, ConditionRule> { - move |input| { - map( - ws(pair( - right(parse_token("address"), move |input| { - crate::permutation! { - input, - match_type, Option, opt(parse_sieve_match_type()), - comparator, Option, opt(parse_sieve_comparator()), - address_type, Option, opt(parse_sieve_address_type()) - } - }), - pair(ws(parse_string_list()), ws(parse_string_list())), - )), - |((match_type, comparator, address_part), (header_list, key_list))| { - ConditionRule::Address { - comparator, - address_part, - match_type, - header_list, - key_list, - } - }, - ) - .parse(input) - } - } - - pub fn parse_sieve_test<'a>() -> impl Parser<'a, ConditionRule> { - move |input| { - either( - either( - map(parse_token("true"), |_| ConditionRule::Literal(true)), - map(parse_token("false"), |_| ConditionRule::Literal(false)), - ), - either( - either( - map( - right(ws(parse_token("exists")), ws(parse_string_list())), - |l| ConditionRule::Exists(l), - ), - map( - right(ws(parse_token("size")), ws(parse_sieve_integer_operator())), - |(operator, limit)| ConditionRule::Size { operator, limit }, - ), - ), - either( - either( - map(right(ws(parse_token("not")), parse_sieve_test()), |cond| { - ConditionRule::Not(Box::new(cond)) - }), - either(parse_sieve_header(), parse_sieve_address()), - ), - either( - map(right(ws(parse_token("allof")), parse_test_list()), |l| { - ConditionRule::AllOf(l) - }), - map(right(ws(parse_token("anyof")), parse_test_list()), |l| { - ConditionRule::AnyOf(l) - }), - ), - ), - ), - ) - .parse(input) - } - } - - /* test-list = "(" test *("," test) ")" - */ - pub fn parse_test_list<'a>() -> impl Parser<'a, Vec> { - move |input| { - delimited( - ws(parse_token("(")), - separated_list0(ws(parse_sieve_test()), ws(parse_token(",")), false), - ws(parse_token(")")), - ) - .parse(input) - } - } - - pub fn parse_sieve_rule<'a>() -> impl Parser<'a, Rule> { - either( - map( - either( - either(parse_sieve_stop(), parse_sieve_require()), - parse_sieve_if(), - ), - |c| Rule::Control(c), - ), - map( - either( - either(parse_sieve_keep(), parse_sieve_fileinto()), - either(parse_sieve_redirect(), parse_sieve_discard()), - ), - |ac| Rule::Action(ac), - ), - ) - } - - pub fn parse_sieve_block<'a>() -> impl Parser<'a, RuleBlock> { - move |input| { - map( - ws(delimited( - parse_token("{"), - ws(zero_or_more(parse_sieve_rule())), - parse_token("}"), - )), - |v| RuleBlock(v), - ) - .parse(input) - } - } - - pub fn parse_sieve_if<'a>() -> impl Parser<'a, ControlCommand> { - either( - map( - pair( - parse_sieve_if_bare(), - ws(right(parse_token("else"), ws(parse_sieve_block()))), - ), - |(ifbare, else_)| match ifbare { - ControlCommand::If { - condition, - elsif, - else_: _, - } => ControlCommand::If { - condition, - elsif, - else_: Some(else_), - }, - _ => unreachable!(), - }, - ), - parse_sieve_if_bare(), - ) - } - - pub fn parse_sieve_if_bare<'a>() -> impl Parser<'a, ControlCommand> { - either( - map( - pair( - ws(pair( - ws(right(parse_token("if"), ws(parse_sieve_test()))), - ws(parse_sieve_block()), - )), - ws(pair( - ws(right(parse_token("elsif"), ws(parse_sieve_test()))), - ws(parse_sieve_block()), - )), - ), - |(condition, elsif)| ControlCommand::If { - condition, - elsif: Some(elsif), - else_: None, - }, - ), - map( - pair( - ws(right(parse_token("if"), ws(parse_sieve_test()))), - ws(parse_sieve_block()), - ), - |(cond, block)| ControlCommand::If { - condition: (cond, block), - elsif: None, - else_: None, - }, - ), - ) - } - - pub fn parse_sieve_require<'a>() -> impl Parser<'a, ControlCommand> { - move |input| { - right( - ws(parse_token("require")), - ws(left( - map(parse_string_list(), |string_list| { - ControlCommand::Require(string_list) - }), - ws(parse_token(";")), - )), - ) - .parse(input) - } - } - - pub fn parse_sieve<'a>() -> impl Parser<'a, Vec> { - ws(zero_or_more(ws(parse_sieve_rule()))) + Ok((outcome, actions)) } } #[cfg(test)] mod test { - use super::parser::*; - use crate::parsec::Parser; - - use super::ActionCommand::*; - use super::AddressOperator::*; - use super::CharacterOperator::*; - use super::ConditionRule::*; - use super::ControlCommand::*; - use super::IntegerOperator::*; - use super::MatchOperator::*; - use super::Rule::*; - use super::RuleBlock; - - #[test] - fn test_sieve_parse_strings() { - assert_eq!( - Ok(("", vec!["fileinto".to_string(), "reject".to_string()])), - parse_string_list().parse(r#"["fileinto", "reject"]"#) - ); - - assert_eq!( - Ok(("", vec!["fileinto".to_string()])), - parse_string_list().parse(r#""fileinto""#) - ); - } - - #[test] - fn test_sieve_parse_conditionals() { - /* Operators that start with : like :matches are unordered and optional, since they have - * defaults. But that means we must handle any order correctly, which is tricky if we use - * an optional parser; for an optional parser both None and Some(_) are valid values. - */ - - /* Permutations of two */ - let raw_input = r#"header :contains :comparator "i;octet" "Subject" - "MAKE MONEY FAST""#; - let (_, first) = parse_sieve_test().parse(raw_input).unwrap(); - assert_eq!( - Header { - comparator: Some(Octet), - match_operator: Some(Contains), - header_names: ["Subject".to_string()].to_vec(), - key_list: ["MAKE MONEY FAST".to_string()].to_vec() - }, - first - ); - - let raw_input = r#"header :comparator "i;octet" :contains "Subject" - "MAKE MONEY FAST""#; - assert_eq!(Ok(("", first)), parse_sieve_test().parse(raw_input)); - - /* Permutations of three */ - let raw_input = r#"address :DOMAIN :comparator "i;octet" :is ["From", "To"] "example.com""#; - let (_, first) = parse_sieve_test().parse(raw_input).unwrap(); - - assert_eq!( - &Address { - comparator: Some(Octet), - address_part: Some(Domain), - match_type: Some(Is), - header_list: ["From".to_string(), "To".to_string()].to_vec(), - key_list: ["example.com".to_string()].to_vec() - }, - &first - ); - - let raw_input = - r#"address :DOMAIN :is :comparator "i;octet" ["From", "To"] "example.com""#; - assert_eq!(Ok(("", first.clone())), parse_sieve_test().parse(raw_input)); - - let raw_input = - r#"address :is :DOMAIN :comparator "i;octet" ["From", "To"] "example.com""#; - assert_eq!(Ok(("", first.clone())), parse_sieve_test().parse(raw_input)); - - let raw_input = r#"address :is :comparator "i;octet" :DOMAIN ["From", "To"] "example.com""#; - assert_eq!(Ok(("", first)), parse_sieve_test().parse(raw_input)); - } - - #[test] - fn test_sieve_parse_ifs() { - let raw_input = "if true {\nstop ;\n}"; - assert_eq!( - Ok(( - "", - Control(If { - condition: (Literal(true), RuleBlock([Control(Stop)].to_vec())), - elsif: None, - else_: None - }) - )), - parse_sieve_rule().parse(raw_input) - ); - - let raw_input = r#"# Reject all messages that contain the string "ivnten"in the Subject. -if header :contains "subject" "ivnten" -{ - discard; -} else { - keep; -}"#; - - assert_eq!( - Ok(( - "", - [Control(If { - condition: ( - Header { - comparator: None, - match_operator: Some(Contains), - header_names: ["subject".to_string()].to_vec(), - key_list: ["ivnten".to_string()].to_vec() - }, - RuleBlock([Action(Discard)].to_vec()) - ), - elsif: None, - else_: Some(RuleBlock([Action(Keep)].to_vec())) - })] - .to_vec() - )), - parse_sieve().parse(raw_input) - ); - - let raw_input = r#"# Reject all messages that contain the string "ivnten"in the Subject. -if header :contains "subject" "ivnten" -{ - discard; -} -# Silently discard all messages sent from the tax man -elsif address :matches :domain "from" "*hmrc.gov.uk" -{ - keep; -}"#; - assert_eq!( - Ok(( - "", - [Control(If { - condition: ( - Header { - comparator: None, - match_operator: Some(Contains), - header_names: ["subject".to_string()].to_vec(), - key_list: ["ivnten".to_string()].to_vec() - }, - RuleBlock([Action(Discard)].to_vec()) - ), - elsif: Some(( - Address { - comparator: None, - address_part: Some(Domain), - match_type: Some(Matches), - header_list: ["from".to_string()].to_vec(), - key_list: ["*hmrc.gov.uk".to_string()].to_vec() - }, - RuleBlock([Action(Keep)].to_vec()) - )), - else_: None - })] - .to_vec() - )), - parse_sieve().parse(raw_input) - ); - } + use super::*; + use crate::Envelope; + + const MESSAGE_A: &str = r#"Date: Tue, 1 Apr 1997 09:06:31 -0800 (PST) +From: coyote@desert.example.org +To: roadrunner@acme.example.com +Subject: I have a present for you + +Look, I'm sorry about the whole anvil thing, and I really +didn't mean to try and drop it on you from the top of the +cliff. I want to try to make it up to you. I've got some +great birdseed over here at my place--top of the line +stuff--and if you come by, I'll have it all wrapped up +for you. I'm really sorry for all the problems I've caused +for you over the years, but I know we can work this out. +-- +Wile E. Coyote "Super Genius" coyote@desert.example.org"#; #[test] - fn test_sieve_parse() { - let raw_input = r#"# The hash character starts a one-line comment. - -"#; - assert_eq!(Ok(("", vec![])), parse_sieve().parse(raw_input)); - - let raw_input = r#"# The hash character starts a one-line comment. -# Everything after a # character until the end of line is ignored. - -/* this is a bracketed (C-style) comment. This type of comment can stretch - * over many lines. A bracketed comment begins with a forward slash, followed - * by an asterisk and ends with the inverse sequence: an asterisk followed - * by a forward slash. */ -"#; - - assert_eq!(Ok(("", vec![])), parse_sieve().parse(raw_input)); - // Test Lists (allof, anyof) - - let raw_input = r#"# This test checks against Spamassassin's header fields: -# If the spam level is 4 or more and the Subject contains too -# many illegal characters, then silently discard the mail. -if allof (header :contains "X-Spam-Level" "****", - header :contains "X-Spam-Report" "FROM_ILLEGAL_CHARS") -{ - discard; -} -# Discard mails that do not have a Date: or From: header field -# or mails that are sent from the marketing department at example.com. -elsif anyof (not exists ["from", "date"], - header :contains "from" "marketing@example.com") { - discard; -}"#; - - assert_eq!( - Ok(( - "", - [Control(If { - condition: ( - AllOf( - [ - Header { - comparator: None, - match_operator: Some(Contains), - header_names: ["X-Spam-Level".to_string()].to_vec(), - key_list: ["****".to_string()].to_vec() - }, - Header { - comparator: None, - match_operator: Some(Contains), - header_names: ["X-Spam-Report".to_string()].to_vec(), - key_list: ["FROM_ILLEGAL_CHARS".to_string()].to_vec() - } - ] - .to_vec() - ), - RuleBlock([Action(Discard)].to_vec()) - ), - elsif: Some(( - AnyOf( - [ - Not(Box::new(Exists( - ["from".to_string(), "date".to_string()].to_vec() - ))), - Header { - comparator: None, - match_operator: Some(Contains), - header_names: ["from".to_string()].to_vec(), - key_list: ["marketing@example.com".to_string()].to_vec() - } - ] - .to_vec() - ), - RuleBlock([Action(Discard)].to_vec()) - )), - else_: None - })] - .to_vec() - )), - parse_sieve().parse(raw_input) - ); - // Filter on message size - let raw_input = r#"# Delete messages greater than half a MB -if size :over 500K -{ - discard; -} -# Also delete small mails, under 1k -if size :under 1k -{ - discard; -}"#; - assert_eq!( - Ok(( - "", - [ - Control(If { - condition: ( - Size { - operator: Over, - limit: 500000 - }, - RuleBlock([Action(Discard)].to_vec()) - ), - elsif: None, - else_: None - }), - Control(If { - condition: ( - Size { - operator: Under, - limit: 1000 - }, - RuleBlock([Action(Discard)].to_vec()) - ), - elsif: None, - else_: None - }) - ] - .to_vec() - )), - parse_sieve().parse(raw_input) - ); - - assert_eq!( - Ok(( - "", - [ - Control(Require(["fileinto".to_string()].to_vec())), - Control(If { - condition: ( - Header { - comparator: None, - match_operator: Some(Contains), - header_names: ["from".to_string()].to_vec(), - key_list: ["coyote".to_string()].to_vec() - }, - RuleBlock([Action(Discard)].to_vec()) - ), - elsif: Some(( - Header { - comparator: None, - match_operator: Some(Contains), - header_names: ["subject".to_string()].to_vec(), - key_list: ["$$$".to_string()].to_vec() - }, - RuleBlock([Action(Discard)].to_vec()) - )), - else_: Some(RuleBlock( - [Action(Fileinto { - mailbox: "INBOX".to_string() - })] - .to_vec() - )) - }) - ] - .to_vec() - )), - parse_sieve().parse( - r#"require "fileinto"; - if header :contains "from" "coyote" { - discard; - } elsif header :contains ["subject"] ["$$$"] { - discard; - } else { - fileinto "INBOX"; - }"# - ) - ); + fn test_sieve_discard_keep() { + let f = SieveFilter::from_str(r#"keep;"#).unwrap(); + let envelope = + Envelope::from_bytes(MESSAGE_A.as_bytes(), None).expect("Could not parse mail"); + assert_eq!((Outcome::Keep, vec![]), envelope.passthrough(&f).unwrap()); } } diff --git a/melib/src/sieve/ast.rs b/melib/src/sieve/ast.rs new file mode 100644 index 00000000..3758c7ea --- /dev/null +++ b/melib/src/sieve/ast.rs @@ -0,0 +1,531 @@ +/* + * melib - sieve module + * + * Copyright 2022 Manos Pitsidianakis + * + * This file is part of meli. + * + * meli is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * meli is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with meli. If not, see . + */ + +//! Types representing the Sieve's language abstract syntax tree. + +use super::Capability; + +use std::collections::HashSet; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +/// A list of [rules](Rule). +pub struct RuleBlock(pub Vec); + +/* + MATCH-TYPE =/ COUNT / VALUE + + COUNT = ":count" relational-match + + VALUE = ":value" relational-match + + relational-match = DQUOTE + ("gt" / "ge" / "lt" / "le" / "eq" / "ne") DQUOTE + ; "gt" means "greater than", the C operator ">". + ; "ge" means "greater than or equal", the C operator ">=". + ; "lt" means "less than", the C operator "<". + ; "le" means "less than or equal", the C operator "<=". + ; "eq" means "equal to", the C operator "==". + ; "ne" means "not equal to", the C operator "!=". +*/ +/// Sieve action commands. +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +pub enum ActionCommand { + /// `keep` + Keep, + /// `fileinto` + FileInto { + /// + mailbox: String, + }, + /// `redirect` + Redirect { + /// + address: String, + }, + /// `discard` + Discard, +} + +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +/// Sieve control commands. +pub enum ControlCommand { + /// `stop` + /// + /// > The "stop" action ends all processing. If the implicit keep has not + /// > been cancelled, then it is taken. + Stop, + /// `require` + Require(Vec), + /// an `if`-`elsif`-`else` condition. + If { + /// + condition: (ConditionRule, RuleBlock), + /// + elsif: Option<(ConditionRule, RuleBlock)>, + /// + else_: Option, + }, +} + +#[derive(Debug, PartialEq, Eq, Clone, Hash)] +/// Sieve rule commands. +pub enum Rule { + /// A list of rules enclosed by braces. + Block(RuleBlock), + /// An action command. + Action(ActionCommand), + /// A control command. + Control(ControlCommand), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +/// Specifies which part of an e-mail address to examine in conditionals.. +pub enum AddressOperator { + /// The entire address. + All, + /// The localpart (the part before the `@` character). + Localpart, + /// The domain (the part after the `@` character). + Domain, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +/// Defines what integer operation to perform. +pub enum IntegerOperator { + /// Over + Over, + /// Under + Under, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +/// RFC 5231 Sieve Email Filtering: Relational Extension +pub enum RelationalMatch { + /// "gt" means "greater than", the C operator ">". + Gt, + /// "ge" means "greater than or equal", the C operator ">=". + Ge, + /// "lt" means "less than", the C operator "<". + Lt, + /// "le" means "less than or equal", the C operator "<=". + Le, + /// "eq" means "equal to", the C operator "==". + Eq, + /// "ne" means "not equal to", the C operator "!=". + Ne, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +/// Defines what match operation to perform. +pub enum MatchOperator { + /// Exact equality. + Is, + /// Pattern match. + Matches, + /// Content query. + Contains, + /// Count query. + Count(RelationalMatch), + /// Numerical value query. + Value(RelationalMatch), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +/// Defines how to compare strings/characters. +pub enum CharacterOperator { + /// `i;octet,` compares as raw bytes. + Octet, + /// `i;ascii-casemap` compares case-insensitive. + AsciiCasemap, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +/// Part of datetime to examine. +pub enum ZoneRule { + /// "year" => the year, "0000" .. "9999". + Year, + /// "month" => the month, "01" .. "12". + Month, + /// "day" => the day, "01" .. "31". + Day, + /// "date" => the date in "yyyy-mm-dd" format. + Date, + /// "julian" => the Modified Julian Day, that is, the date + /// expressed as an integer number of days since + /// 00:00 UTC on November 17, 1858 (using the Gregorian + /// calendar). This corresponds to the regular + /// Julian Day minus 2400000.5. Sample routines to + /// convert to and from modified Julian dates are + /// given in Appendix A. + Julian, + /// "hour" => the hour, "00" .. "23". + Hour, + /// "minute" => the minute, "00" .. "59". + Minute, + /// "second" => the second, "00" .. "60". + Second, + /// "time" => the time in "hh:mm:ss" format. + Time, + /// "iso8601" => the date and time in restricted ISO 8601 format. + Iso8601, + /// "std11" => the date and time in a format appropriate + /// for use in a Date: header field [RFC2822]. + Std11, + /// "zone" => the time zone in use. If the user specified a + ///time zone with ":zone", "zone" will + ///contain that value. If :originalzone is specified + ///this value will be the original zone specified + ///in the date-time value. If neither argument is + ///specified the value will be the server's default + ///time zone in offset format "+hhmm" or "-hhmm". An + ///offset of 0 (Zulu) always has a positive sign. + Zone, + /// "weekday" => the day of the week expressed as an integer between "0" and "6". "0" is Sunday, "1" is Monday, etc. + Weekday, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +/// Condition rules. +pub enum ConditionRule { + /// Logical OR operation. + AnyOf(Vec), + /// Logical AND operation. + AllOf(Vec), + /// Header values exist. + Exists(Vec), + /// Header value check. + Header { + /// + comparator: Option, + /// + match_type: Option, + /// + header_names: Vec, + /// + key_list: Vec, + }, + /// Date value check. + Date { + /// + comparator: Option, + /// + match_type: Option, + /// + zone: ZoneRule, + /// + header_name: String, + /// + date_part: String, + /// + key_list: Vec, + }, + /// Address value check. + Address { + /// + comparator: Option, + /// + address_part: Option, + /// + match_type: Option, + /// + header_list: Vec, + /// + key_list: Vec, + }, + /// Test envelope ("envelope" capability). + Envelope { + /// + comparator: Option, + /// + address_part: Option, + /// + match_type: Option, + /// + envelope_part: Vec, + /// + key_list: Vec, + }, + /// Invert a conditional. + Not(Box), + /// Check the size of an e-mail. + Size { + /// + operator: IntegerOperator, + /// + limit: u64, + }, + /// Literal `true` or `false`. + Literal(bool), +} + +/// Returns what capabilities an AST item requires, if any. +pub trait RequiredCapabilities { + fn requires(&self) -> Option> { + None + } +} + +impl RequiredCapabilities for ActionCommand { + fn requires(&self) -> Option> { + if matches!(self, ActionCommand::FileInto { .. }) { + Some(HashSet::from([Capability::FileInto])) + } else { + None + } + } +} + +impl RequiredCapabilities for ConditionRule { + fn requires(&self) -> Option> { + macro_rules! opt_map { + ($id:ident) => { + $id.as_ref().and_then(RequiredCapabilities::requires) + }; + } + match self { + ConditionRule::Address { + comparator, + match_type, + address_part: _, + header_list: _, + key_list: _, + } + | ConditionRule::Header { + comparator, + match_type, + header_names: _, + key_list: _, + } => { + let ret = IntoIterator::into_iter([opt_map!(comparator), opt_map!(match_type)]) + .filter_map(std::convert::identity) + .flatten() + .collect::>(); + if ret.is_empty() { + None + } else { + Some(ret) + } + } + ConditionRule::Date { + comparator, + match_type, + zone: _, + header_name: _, + date_part: _, + key_list: _, + } => { + let ret = IntoIterator::into_iter([opt_map!(comparator), opt_map!(match_type)]) + .filter_map(std::convert::identity) + .flatten() + .chain(Some(Capability::Date).into_iter()) + .collect::>(); + Some(ret) + } + ConditionRule::Envelope { + comparator, + match_type, + address_part: _, + envelope_part: _, + key_list: _, + } => { + let ret = IntoIterator::into_iter([opt_map!(comparator), opt_map!(match_type)]) + .filter_map(std::convert::identity) + .flatten() + .chain(Some(Capability::Envelope).into_iter()) + .collect::>(); + Some(ret) + } + ConditionRule::Not(ref inner) => inner.requires(), + ConditionRule::AnyOf(ref vec) | ConditionRule::AllOf(ref vec) => { + let ret = vec + .iter() + .filter_map(RequiredCapabilities::requires) + .flatten() + .collect::>(); + if ret.is_empty() { + None + } else { + Some(ret) + } + } + ConditionRule::Literal(_) | ConditionRule::Size { .. } | ConditionRule::Exists(_) => { + None + } + } + } +} + +impl RequiredCapabilities for MatchOperator { + fn requires(&self) -> Option> { + if matches!(self, MatchOperator::Count(_) | MatchOperator::Value(_)) { + Some(HashSet::from([Capability::Relational])) + } else { + None + } + } +} + +impl RequiredCapabilities for CharacterOperator {} + +impl RequiredCapabilities for Rule { + fn requires(&self) -> Option> { + match self { + Rule::Block(bl) => bl.requires(), + Rule::Action(cmd) => cmd.requires(), + Rule::Control(cmd) => cmd.requires(), + } + } +} + +impl RequiredCapabilities for RuleBlock { + fn requires(&self) -> Option> { + let ret = self + .0 + .iter() + .filter_map(RequiredCapabilities::requires) + .flatten() + .collect::>(); + if ret.is_empty() { + None + } else { + Some(ret) + } + } +} +impl RequiredCapabilities for ControlCommand { + fn requires(&self) -> Option> { + match self { + ControlCommand::Stop | ControlCommand::Require(_) => None, + ControlCommand::If { + condition: (cond, ruleblock), + elsif, + else_, + } => { + let ret = else_ + .as_ref() + .into_iter() + .filter_map(RequiredCapabilities::requires) + .chain(elsif.as_ref().into_iter().flat_map(|(cond, ruleblock)| { + cond.requires() + .into_iter() + .chain(ruleblock.requires().into_iter()) + })) + .chain(cond.requires().into_iter()) + .chain(ruleblock.requires().into_iter()) + .flatten() + .collect::>(); + if ret.is_empty() { + None + } else { + Some(ret) + } + } + } + } +} + +#[cfg(test)] +mod test { + use std::collections::HashSet; + use std::iter::FromIterator; + + use super::*; + + use super::ActionCommand::*; + use super::AddressOperator::*; + // use super::CharacterOperator::*; + use super::ConditionRule::*; + use super::ControlCommand::*; + // use super::IntegerOperator::*; + use super::MatchOperator::*; + use super::Rule::*; + use super::RuleBlock; + + #[test] + fn test_sieve_capabilities_detect() { + let cond = Envelope { + comparator: None, + address_part: Some(All), + match_type: Some(Is), + envelope_part: ["from".to_string()].to_vec(), + key_list: ["tim@example.com".to_string()].to_vec(), + }; + + assert_eq!( + Header { + comparator: None, + match_type: Some(Contains), + header_names: ["from".to_string()].to_vec(), + key_list: ["coyote".to_string()].to_vec() + } + .requires(), + None + ); + + assert_eq!( + Action(FileInto { + mailbox: "INBOX".to_string() + }) + .requires() + .unwrap(), + HashSet::from([Capability::FileInto]) + ); + assert_eq!( + cond.requires().unwrap(), + HashSet::from([Capability::Envelope]) + ); + assert_eq!( + HashSet::from_iter( + Control(If { + condition: ( + Envelope { + comparator: None, + address_part: Some(All), + match_type: Some(Is), + envelope_part: ["from".to_string()].to_vec(), + key_list: ["tim@example.com".to_string()].to_vec() + }, + RuleBlock([Action(Discard)].to_vec()) + ), + elsif: Some(( + Header { + comparator: None, + match_type: Some(Contains), + header_names: ["subject".to_string()].to_vec(), + key_list: ["$$$".to_string()].to_vec() + }, + RuleBlock([Action(Discard)].to_vec()) + )), + else_: Some(RuleBlock( + [Action(FileInto { + mailbox: "INBOX".to_string() + })] + .to_vec() + )) + }) + .requires() + .unwrap() + .into_iter() + ), + HashSet::from([Capability::FileInto, Capability::Envelope]) + ); + } +} diff --git a/melib/src/sieve/parser.rs b/melib/src/sieve/parser.rs new file mode 100644 index 00000000..64a4bf89 --- /dev/null +++ b/melib/src/sieve/parser.rs @@ -0,0 +1,1031 @@ +/* + * melib - sieve module + * + * Copyright 2022 Manos Pitsidianakis + * + * This file is part of meli. + * + * meli is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * meli is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with meli. If not, see . + */ + +//! Parse Sieve scripts into an abstract syntax tree +//! +//! The main function to use is [`parse_sieve`] which parses text as a list of Sieve rules. +//! +//! # Example +//! +//! ```rust +//! use melib::{sieve::{parser::parse_sieve, ast::*}, parsec::Parser}; +//! +//! use ActionCommand::*; +//! use AddressOperator::*; +//! use CharacterOperator::*; +//! use ConditionRule::*; +//! use ControlCommand::*; +//! use IntegerOperator::*; +//! use MatchOperator::*; +//! use Rule::*; +//! +//! assert_eq!( +//! parse_sieve().parse( +//! r#"require "fileinto"; +//! if header :contains "from" "coyote" { +//! discard; +//! } elsif header :contains ["subject"] ["$$$"] { +//! discard; +//! } else { +//! fileinto "INBOX"; +//! }"# +//! ), +//! Ok(( +//! "", +//! [ +//! Control(Require(["fileinto".to_string()].to_vec())), +//! Control(If { +//! condition: ( +//! Header { +//! comparator: None, +//! match_type: Some(Contains), +//! header_names: ["from".to_string()].to_vec(), +//! key_list: ["coyote".to_string()].to_vec() +//! }, +//! RuleBlock([Action(Discard)].to_vec()) +//! ), +//! elsif: Some(( +//! Header { +//! comparator: None, +//! match_type: Some(Contains), +//! header_names: ["subject".to_string()].to_vec(), +//! key_list: ["$$$".to_string()].to_vec() +//! }, +//! RuleBlock([Action(Discard)].to_vec()) +//! )), +//! else_: Some(RuleBlock( +//! [Action(FileInto { +//! mailbox: "INBOX".to_string() +//! })] +//! .to_vec() +//! )) +//! }) +//! ] +//! .to_vec() +//! )) +//! ); +//! ``` + +use super::ast::*; +use crate::parsec::*; + +// Helper macro to generate simple parsers for commands ending in semicolon. +macro_rules! parse_action { + // Macro for commands without an argument. + ($(#[$attrs:meta])* + $parser_name:ident, $lit:literal, $t:ty, $action:expr) => { + $(#[$attrs])* + pub fn $parser_name<'a>() -> impl Parser<'a, $t> { + move |input| { + map( + ws(right(match_literal_anycase($lit), ws(match_literal(";")))), + |_| $action, + ) + .parse(input) + } + } + }; + // Alternative macro for commands with a single string argument. + ($(#[$attrs:meta])* + $parser_name:ident, $lit:literal, $t:ty, $action:expr, $argument:ident) => { + $(#[$attrs])* + pub fn $parser_name<'a>() -> impl Parser<'a, $t> { + move |input| { + map( + ws(right( + parse_token($lit), + left(ws(string()), ws(parse_token(";"))), + )), + |$argument| $action, + ) + .parse(input) + } + } + }; +} + +parse_action! { +/// Parse the `keep` action. +parse_sieve_keep, "keep", ActionCommand, ActionCommand::Keep } +parse_action! { +/// Parse the `discard` action. +parse_sieve_discard, "discard", ActionCommand, ActionCommand::Discard } +parse_action! { +/// Parse the `stop` control command. +parse_sieve_stop, "stop", ControlCommand, ControlCommand::Stop } +parse_action! { +/// Parse the `fileinto` action. +parse_sieve_fileinto, "fileinto", ActionCommand, ActionCommand::FileInto { mailbox }, mailbox } +parse_action! { +/// Parse the `redirect` action. +parse_sieve_redirect, "redirect", ActionCommand, ActionCommand::Redirect { address }, address } + +#[inline(always)] +fn parse_token<'a>(literal: &'static str) -> impl Parser<'a, ()> { + move |input| map(ws(match_literal_anycase(literal)), |_| ()).parse(input) +} + +#[inline(always)] +fn ws_inner<'a>() -> impl Parser<'a, ()> { + move |input: &'a str| { + let mut offset = 0; + let input_b = input.as_bytes(); + while offset < input_b.len() { + while offset < input_b.len() && [b' ', b'\t', b'\n', b'\r'].contains(&input_b[offset]) { + offset += 1; + } + if offset >= input_b.len() { + break; + } + if input_b[offset] == b'#' { + while offset < input_b.len() + && !input[offset..].starts_with("\r\n") + && !input[offset..].starts_with('\n') + { + offset += 1; + } + if offset >= input_b.len() { + break; + } + if input[offset..].starts_with("\r\n") { + offset += 2; + } else if input[offset..].starts_with('\n') { + offset += 1; + } + } else if input[offset..].starts_with("/*") { + while offset < input_b.len() && !input[offset..].starts_with("*/") { + offset += 1; + } + if offset >= input_b.len() { + break; + } + if input[offset..].starts_with("*/") { + offset += 2; + } + } else { + break; + } + } + Ok((&input[offset..], ())) + } +} + +#[inline(always)] +/// Take a parser combinator, remove whitespace from the front of the input, apply the parser and +/// remove whitespace from what's left. Sieve defines whitespace as regular white space characters +/// and comments. +pub fn ws<'a, P, A>(parser: P) -> impl Parser<'a, A> +where + P: Parser<'a, A>, +{ + move |input1| { + let (input2, ()) = ws_inner().parse(input1)?; + let (input3, res) = parser.parse(input2)?; + let (input4, ()) = ws_inner().parse(input3)?; + Ok((input4, res)) + } +} + +// string = quoted-string / multi-line +// +// quoted-other = "\" octet-not-qspecial +// ; represents just the octet-no-qspecial +// ; character. SHOULD NOT be used + +// quoted-safe = CRLF / octet-not-qspecial +// ; either a CRLF pair, OR a single octet other +// ; than NUL, CR, LF, double-quote, or backslash + +// quoted-special = "\" (DQUOTE / "\") +// ; represents just a double-quote or backslash + +// quoted-string = DQUOTE quoted-text DQUOTE + +// quoted-text = *(quoted-safe / quoted-special / quoted-other) +/// Parse a Sieve language string. +pub fn string<'a>() -> impl Parser<'a, String> { + #[inline(always)] + fn quoted_text<'a>() -> impl Parser<'a, String> { + move |input: &'a str| { + let mut offset = 0; + let mut unescape_dquote: bool = false; + let mut unescape_slash: bool = false; + while offset < input.len() { + if input.len() >= offset + 2 { + if input.starts_with("\r\n") { + offset += 2; + } else if input.starts_with("\\\"") { + unescape_dquote = true; + offset += 2; + } else if input.starts_with("\\\\") { + unescape_slash = true; + offset += 2; + } + } + // a single octet other ; than NUL, CR, LF, double-quote, or backslash + if [b'\x00', b'\r', b'\n', b'"', b'\\'].contains(&input.as_bytes()[offset]) { + break; + } + offset += 1; + } + match (unescape_dquote, unescape_slash) { + (false, false) => Ok((&input[offset..], input[..offset].to_string())), + (true, false) => Ok((&input[offset..], input[..offset].replace("\\\"", "\""))), + (false, true) => Ok((&input[offset..], input[..offset].replace("\\\\", "\\"))), + (true, true) => Ok(( + &input[offset..], + input[..offset].replace("\\\"", "\"").replace("\\\\", "\\"), + )), + } + } + } + + #[inline(always)] + fn quoted_string<'a>() -> impl Parser<'a, String> { + delimited(parse_token("\""), quoted_text(), parse_token("\"")) + } + + //fn multiline() -> impl Parser<'a, String> {} + //either(quoted_string(), multiline()) + quoted_string() +} + +// number = 1*DIGIT [ QUANTIFIER ] +// QUANTIFIER = "K" / "M" / "G" +/// Parse a Sieve language number literal. +pub fn number<'a>() -> impl Parser<'a, u64> { + map_res( + pair( + is_a(b"0123456789"), + pred(any_char, |c| { + ['k', 'm', 'g'].contains(&c.to_ascii_lowercase()) + }), + ), + |(num_s, quant)| { + Ok(match (num_s.parse::(), quant.to_ascii_lowercase()) { + (Ok(num), 'k') => num * 1000, + (Ok(num), 'm') => num * 1000_000, + (Ok(num), 'g') => num * 1000_000_000, + _ => return Err(num_s), + }) + }, + ) +} + +/// Parse a Sieve language [integer operator](`IntegerOperator`) (`:over` or `:under`). +pub fn parse_sieve_integer_operator<'a>() -> impl Parser<'a, (IntegerOperator, u64)> { + move |input| { + ws(pair( + either( + map(parse_token(":over"), |_| IntegerOperator::Over), + map(parse_token(":under"), |_| IntegerOperator::Under), + ), + ws(number()), + )) + .parse(input) + } +} + +// ":comparator" +/// Parse a Sieve language [character comparator](`CharacterOperator`) (`:comparator`). +pub fn parse_sieve_comparator<'a>() -> impl Parser<'a, CharacterOperator> { + move |input| { + ws(right( + parse_token(":comparator"), + ws(map_res(string(), |s| { + if s == "i;octet" { + Ok(CharacterOperator::Octet) + } else if s == "i;ascii-casemap" { + Ok(CharacterOperator::AsciiCasemap) + } else { + Err("invalid comparator") + } + })), + )) + .parse(input) + } +} + +// MATCH-TYPE = ":is" / ":contains" / ":matches" +/// Parse a Sieve language [match type]('MatchOperator'). +pub fn parse_sieve_match_type<'a>() -> impl Parser<'a, MatchOperator> { + move |input| { + either( + map(parse_token(":is"), |_| MatchOperator::Is), + either( + map(parse_token(":contains"), |_| MatchOperator::Contains), + map(parse_token(":matches"), |_| MatchOperator::Matches), + ), + ) + .parse(input) + } +} + +/* string-list = "[" string *("," string) "]" / string + ; if there is only a single string, the brackets + ; are optional +*/ +/// Parse a Sieve language string list. If there is only a single string, the brackets are +/// optional. +pub fn parse_string_list<'a>() -> impl Parser<'a, Vec> { + move |input| { + either( + delimited( + ws(parse_token("[")), + separated_list0(string(), ws(parse_token(",")), false), + ws(parse_token("]")), + ), + map(string(), |s| vec![s]), + ) + .parse(input) + } +} + +/* Usage: header [COMPARATOR] [MATCH-TYPE] + * + */ +/// Parse a Sieve language [header condition](`ConditionRule`). +pub fn parse_sieve_header<'a>() -> impl Parser<'a, ConditionRule> { + move |input| { + map( + ws(pair( + right(parse_token("header"), move |input| { + crate::permutation! { + input, + comparator, Option, opt(parse_sieve_comparator()), + match_type, Option, opt(parse_sieve_match_type()) + } + }), + pair(ws(parse_string_list()), ws(parse_string_list())), + )), + |((comparator, match_type), (header_names, key_list))| ConditionRule::Header { + comparator, + match_type, + header_names, + key_list, + }, + ) + .parse(input) + } +} + +// ADDRESS-PART = ":localpart" / ":domain" / ":all" +/// Parse a Sieve language [address operator](`AddressOperator`). +pub fn parse_sieve_address_type<'a>() -> impl Parser<'a, AddressOperator> { + move |input| { + either( + map(parse_token(":localpart"), |_| AddressOperator::Localpart), + either( + map(parse_token(":domain"), |_| AddressOperator::Domain), + map(parse_token(":all"), |_| AddressOperator::All), + ), + ) + .parse(input) + } +} + +// address [COMPARATOR] [ADDRESS-PART] [MATCH-TYPE] +/// Parse a Sieve language [address condition](`ConditionRule`). +pub fn parse_sieve_address<'a>() -> impl Parser<'a, ConditionRule> { + move |input| { + map( + ws(pair( + right(parse_token("address"), move |input| { + crate::permutation! { + input, + match_type, Option, opt(parse_sieve_match_type()), + comparator, Option, opt(parse_sieve_comparator()), + address_type, Option, opt(parse_sieve_address_type()) + } + }), + pair(ws(parse_string_list()), ws(parse_string_list())), + )), + |((match_type, comparator, address_part), (header_list, key_list))| { + ConditionRule::Address { + comparator, + address_part, + match_type, + header_list, + key_list, + } + }, + ) + .parse(input) + } +} + +// Test envelope +// Usage: envelope [COMPARATOR] [ADDRESS-PART] [MATCH-TYPE] +// +/// Parse a Sieve language [envelope condition](`ConditionRule`). +pub fn parse_sieve_envelope<'a>() -> impl Parser<'a, ConditionRule> { + move |input| { + map( + ws(pair( + right(parse_token("envelope"), move |input| { + crate::permutation! { + input, + match_type, Option, opt(parse_sieve_match_type()), + comparator, Option, opt(parse_sieve_comparator()), + address_type, Option, opt(parse_sieve_address_type()) + } + }), + pair(ws(parse_string_list()), ws(parse_string_list())), + )), + |((match_type, comparator, address_part), (envelope_part, key_list))| { + ConditionRule::Envelope { + comparator, + address_part, + match_type, + envelope_part, + key_list, + } + }, + ) + .parse(input) + } +} + +/// Parse a Sieve language [test condition](`ConditionRule`). +pub fn parse_sieve_test<'a>() -> impl Parser<'a, ConditionRule> { + move |input| { + either( + either( + map(parse_token("true"), |_| ConditionRule::Literal(true)), + map(parse_token("false"), |_| ConditionRule::Literal(false)), + ), + either( + either( + map( + right(ws(parse_token("exists")), ws(parse_string_list())), + |l| ConditionRule::Exists(l), + ), + map( + right(ws(parse_token("size")), ws(parse_sieve_integer_operator())), + |(operator, limit)| ConditionRule::Size { operator, limit }, + ), + ), + either( + either( + map(right(ws(parse_token("not")), parse_sieve_test()), |cond| { + ConditionRule::Not(Box::new(cond)) + }), + either( + either(parse_sieve_header(), parse_sieve_address()), + parse_sieve_envelope(), + ), + ), + either( + map(right(ws(parse_token("allof")), parse_test_list()), |l| { + ConditionRule::AllOf(l) + }), + map(right(ws(parse_token("anyof")), parse_test_list()), |l| { + ConditionRule::AnyOf(l) + }), + ), + ), + ), + ) + .parse(input) + } +} + +/* test-list = "(" test *("," test) ")" + */ +/// Parse a Sieve language list of [test conditions](`ConditionRule`). +pub fn parse_test_list<'a>() -> impl Parser<'a, Vec> { + move |input| { + delimited( + ws(parse_token("(")), + separated_list0(ws(parse_sieve_test()), ws(parse_token(",")), false), + ws(parse_token(")")), + ) + .parse(input) + } +} + +/// Parse a Sieve language [rule](`Rule`). +pub fn parse_sieve_rule<'a>() -> impl Parser<'a, Rule> { + either( + map( + either( + either(parse_sieve_stop(), parse_sieve_require()), + parse_sieve_if(), + ), + |c| Rule::Control(c), + ), + map( + either( + either(parse_sieve_keep(), parse_sieve_fileinto()), + either(parse_sieve_redirect(), parse_sieve_discard()), + ), + |ac| Rule::Action(ac), + ), + ) +} + +/// Parse a Sieve language [block](`RuleBlock`). +pub fn parse_sieve_block<'a>() -> impl Parser<'a, RuleBlock> { + move |input| { + map( + ws(delimited( + parse_token("{"), + ws(zero_or_more(parse_sieve_rule())), + parse_token("}"), + )), + |v| RuleBlock(v), + ) + .parse(input) + } +} + +/// Parse a Sieve language [if condition](`ControlCommand`). +pub fn parse_sieve_if<'a>() -> impl Parser<'a, ControlCommand> { + either( + map( + pair( + parse_sieve_if_bare(), + ws(right(parse_token("else"), ws(parse_sieve_block()))), + ), + |(ifbare, else_)| match ifbare { + ControlCommand::If { + condition, + elsif, + else_: _, + } => ControlCommand::If { + condition, + elsif, + else_: Some(else_), + }, + _ => unreachable!(), + }, + ), + parse_sieve_if_bare(), + ) +} + +fn parse_sieve_if_bare<'a>() -> impl Parser<'a, ControlCommand> { + either( + map( + pair( + ws(pair( + ws(right(parse_token("if"), ws(parse_sieve_test()))), + ws(parse_sieve_block()), + )), + ws(pair( + ws(right(parse_token("elsif"), ws(parse_sieve_test()))), + ws(parse_sieve_block()), + )), + ), + |(condition, elsif)| ControlCommand::If { + condition, + elsif: Some(elsif), + else_: None, + }, + ), + map( + pair( + ws(right(parse_token("if"), ws(parse_sieve_test()))), + ws(parse_sieve_block()), + ), + |(cond, block)| ControlCommand::If { + condition: (cond, block), + elsif: None, + else_: None, + }, + ), + ) +} + +/// Parse a Sieve language [`require` control command](`ControlCommand`). +pub fn parse_sieve_require<'a>() -> impl Parser<'a, ControlCommand> { + move |input| { + right( + ws(parse_token("require")), + ws(left( + map(parse_string_list(), |string_list| { + ControlCommand::Require(string_list) + }), + ws(parse_token(";")), + )), + ) + .parse(input) + } +} + +/// Parse a Sieve language script. +pub fn parse_sieve<'a>() -> impl Parser<'a, Vec> { + ws(zero_or_more(ws(parse_sieve_rule()))) +} + +#[cfg(test)] +mod test { + use super::*; + use crate::parsec::Parser; + + use super::ActionCommand::*; + use super::AddressOperator::*; + use super::CharacterOperator::*; + use super::ConditionRule::*; + use super::ControlCommand::*; + use super::IntegerOperator::*; + use super::MatchOperator::*; + use super::Rule::*; + use super::RuleBlock; + + #[test] + fn test_sieve_parse_strings() { + assert_eq!( + parse_string_list().parse(r#"["fileinto", "reject"]"#), + Ok(("", vec!["fileinto".to_string(), "reject".to_string()])) + ); + + assert_eq!( + parse_string_list().parse(r#""fileinto""#), + Ok(("", vec!["fileinto".to_string()])) + ); + } + + #[test] + fn test_sieve_parse_conditionals() { + /* Operators that start with : like :matches are unordered and optional, since they have + * defaults. But that means we must handle any order correctly, which is tricky if we use + * an optional parser; for an optional parser both None and Some(_) are valid values. + */ + + /* Permutations of two */ + let (_, first) = parse_sieve_test() + .parse( + r#"header :contains :comparator "i;octet" "Subject" + "MAKE MONEY FAST""#, + ) + .unwrap(); + assert_eq!( + Header { + comparator: Some(Octet), + match_type: Some(Contains), + header_names: ["Subject".to_string()].to_vec(), + key_list: ["MAKE MONEY FAST".to_string()].to_vec() + }, + first + ); + + assert_eq!( + parse_sieve_test().parse( + r#"header :comparator "i;octet" :contains "Subject" + "MAKE MONEY FAST""# + ), + Ok(("", first)), + ); + + /* Permutations of three */ + let (_, first) = parse_sieve_test() + .parse(r#"address :DOMAIN :comparator "i;octet" :is ["From", "To"] "example.com""#) + .unwrap(); + + assert_eq!( + &Address { + comparator: Some(Octet), + address_part: Some(Domain), + match_type: Some(Is), + header_list: ["From".to_string(), "To".to_string()].to_vec(), + key_list: ["example.com".to_string()].to_vec() + }, + &first + ); + + assert_eq!( + parse_sieve_test().parse( + r#"address :DOMAIN :is :comparator "i;octet" ["From", "To"] "example.com""# + ), + Ok(("", first.clone())), + ); + + assert_eq!( + parse_sieve_test().parse( + r#"address :is :DOMAIN :comparator "i;octet" ["From", "To"] "example.com""# + ), + Ok(("", first.clone())), + ); + + assert_eq!( + parse_sieve_test() + .parse(r#"address :is :comparator "i;octet" :DOMAIN ["From", "To"] "example.com""#), + Ok(("", first)), + ); + } + + #[test] + fn test_sieve_parse_ifs() { + assert_eq!( + parse_sieve_rule().parse("if true {\nstop ;\n}"), + Ok(( + "", + Control(If { + condition: (Literal(true), RuleBlock([Control(Stop)].to_vec())), + elsif: None, + else_: None + }) + )) + ); + + assert_eq!( + parse_sieve().parse( + r#"# Reject all messages that contain the string "ivnten"in the Subject. +if header :contains "subject" "ivnten" +{ + discard; +} else { + keep; +}"# + ), + Ok(( + "", + [Control(If { + condition: ( + Header { + comparator: None, + match_type: Some(Contains), + header_names: ["subject".to_string()].to_vec(), + key_list: ["ivnten".to_string()].to_vec() + }, + RuleBlock([Action(Discard)].to_vec()) + ), + elsif: None, + else_: Some(RuleBlock([Action(Keep)].to_vec())) + })] + .to_vec() + )) + ); + + assert_eq!( + parse_sieve().parse( + r#"# Reject all messages that contain the string "ivnten"in the Subject. +if header :contains "subject" "ivnten" +{ + discard; +} +# Silently discard all messages sent from the tax man +elsif address :matches :domain "from" "*hmrc.gov.uk" +{ + keep; +}"# + ), + Ok(( + "", + [Control(If { + condition: ( + Header { + comparator: None, + match_type: Some(Contains), + header_names: ["subject".to_string()].to_vec(), + key_list: ["ivnten".to_string()].to_vec() + }, + RuleBlock([Action(Discard)].to_vec()) + ), + elsif: Some(( + Address { + comparator: None, + address_part: Some(Domain), + match_type: Some(Matches), + header_list: ["from".to_string()].to_vec(), + key_list: ["*hmrc.gov.uk".to_string()].to_vec() + }, + RuleBlock([Action(Keep)].to_vec()) + )), + else_: None + })] + .to_vec() + )) + ); + } + + #[test] + fn test_sieve_parse() { + assert_eq!( + parse_sieve().parse( + r#"# The hash character starts a one-line comment. + +"# + ), + Ok(("", vec![])) + ); + + assert_eq!( + parse_sieve().parse( + r#"# The hash character starts a one-line comment. +# Everything after a # character until the end of line is ignored. + +/* this is a bracketed (C-style) comment. This type of comment can stretch + * over many lines. A bracketed comment begins with a forward slash, followed + * by an asterisk and ends with the inverse sequence: an asterisk followed + * by a forward slash. */ +"# + ), + Ok(("", vec![])), + ); + + // Test Lists (allof, anyof) + + assert_eq!( + parse_sieve().parse( + r#"# This test checks against Spamassassin's header fields: +# If the spam level is 4 or more and the Subject contains too +# many illegal characters, then silently discard the mail. +if allof (header :contains "X-Spam-Level" "****", + header :contains "X-Spam-Report" "FROM_ILLEGAL_CHARS") +{ + discard; +} +# Discard mails that do not have a Date: or From: header field +# or mails that are sent from the marketing department at example.com. +elsif anyof (not exists ["from", "date"], + header :contains "from" "marketing@example.com") { + discard; +}"# + ), + Ok(( + "", + [Control(If { + condition: ( + AllOf( + [ + Header { + comparator: None, + match_type: Some(Contains), + header_names: ["X-Spam-Level".to_string()].to_vec(), + key_list: ["****".to_string()].to_vec() + }, + Header { + comparator: None, + match_type: Some(Contains), + header_names: ["X-Spam-Report".to_string()].to_vec(), + key_list: ["FROM_ILLEGAL_CHARS".to_string()].to_vec() + } + ] + .to_vec() + ), + RuleBlock([Action(Discard)].to_vec()) + ), + elsif: Some(( + AnyOf( + [ + Not(Box::new(Exists( + ["from".to_string(), "date".to_string()].to_vec() + ))), + Header { + comparator: None, + match_type: Some(Contains), + header_names: ["from".to_string()].to_vec(), + key_list: ["marketing@example.com".to_string()].to_vec() + } + ] + .to_vec() + ), + RuleBlock([Action(Discard)].to_vec()) + )), + else_: None + })] + .to_vec() + )) + ); + // Filter on message size + assert_eq!( + parse_sieve().parse( + r#"# Delete messages greater than half a MB +if size :over 500K +{ + discard; +} +# Also delete small mails, under 1k +if size :under 1k +{ + discard; +}"# + ), + Ok(( + "", + [ + Control(If { + condition: ( + Size { + operator: Over, + limit: 500000 + }, + RuleBlock([Action(Discard)].to_vec()) + ), + elsif: None, + else_: None + }), + Control(If { + condition: ( + Size { + operator: Under, + limit: 1000 + }, + RuleBlock([Action(Discard)].to_vec()) + ), + elsif: None, + else_: None + }) + ] + .to_vec() + )) + ); + + assert_eq!( + parse_sieve().parse( + r#"require "fileinto"; + if header :contains "from" "coyote" { + discard; + } elsif header :contains ["subject"] ["$$$"] { + discard; + } else { + fileinto "INBOX"; + }"# + ), + Ok(( + "", + [ + Control(Require(["fileinto".to_string()].to_vec())), + Control(If { + condition: ( + Header { + comparator: None, + match_type: Some(Contains), + header_names: ["from".to_string()].to_vec(), + key_list: ["coyote".to_string()].to_vec() + }, + RuleBlock([Action(Discard)].to_vec()) + ), + elsif: Some(( + Header { + comparator: None, + match_type: Some(Contains), + header_names: ["subject".to_string()].to_vec(), + key_list: ["$$$".to_string()].to_vec() + }, + RuleBlock([Action(Discard)].to_vec()) + )), + else_: Some(RuleBlock( + [Action(FileInto { + mailbox: "INBOX".to_string() + })] + .to_vec() + )) + }) + ] + .to_vec() + )) + ); + + assert_eq!( + parse_sieve().parse( + r#"require "envelope"; + if envelope :all :is "from" "tim@example.com" { + discard; + } +"# + ), + Ok(( + "", + [ + Control(Require(["envelope".to_string()].to_vec())), + Control(If { + condition: ( + Envelope { + comparator: None, + address_part: Some(All), + match_type: Some(Is), + envelope_part: ["from".to_string()].to_vec(), + key_list: ["tim@example.com".to_string()].to_vec() + }, + RuleBlock([Action(Discard)].to_vec()) + ), + elsif: None, + else_: None + }) + ] + .to_vec() + )) + ); + } +}