meli/melib/src/email/parser.rs

/*
 * meli - parser module
 *
 * Copyright 2017 - 2020 Manos Pitsidianakis
 *
 * This file is part of meli.
 *
 * meli is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * meli is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with meli. If not, see <http://www.gnu.org/licenses/>.
 */

use crate::error::{MeliError, Result, ResultIntoMeliError};
use nom::{
    branch::alt,
    bytes::complete::{is_a, is_not, tag, take_until, take_while},
    character::is_hex_digit,
    combinator::peek,
    error::ErrorKind,
    multi::{many0, many1, separated_list, separated_nonempty_list},
    number::complete::le_u8,
    sequence::{delimited, preceded, separated_pair, terminated},
    IResult,
};

macro_rules! is_ctl_or_space {
    ($var:ident) => {
        /* <any ASCII control character and DEL> */
        $var < 33 || $var == 127
    };
    ($var:expr) => {
        /* <any ASCII control character and DEL> */
        $var < 33 || $var == 127
    };
}

macro_rules! is_whitespace {
    ($var:ident) => {
        $var == b' ' || $var == b'\t' || $var == b'\n' || $var == b'\r'
    };
    ($var:expr) => {
        $var == b' ' || $var == b'\t' || $var == b'\n' || $var == b'\r'
    };
}

pub trait BytesExt {
    fn rtrim(&self) -> &Self;
    fn ltrim(&self) -> &Self;
    fn trim(&self) -> &Self;
    fn find(&self, needle: &[u8]) -> Option<usize>;
    fn rfind(&self, needle: &[u8]) -> Option<usize>;
    fn replace(&self, from: &[u8], to: &[u8]) -> Vec<u8>;
    fn is_quoted(&self) -> bool;
}

impl BytesExt for [u8] {
    fn rtrim(&self) -> &Self {
        if let Some(last) = self.iter().rposition(|b| !is_whitespace!(*b)) {
            &self[..=last]
        } else {
            &[]
        }
    }
    fn ltrim(&self) -> &Self {
        if let Some(first) = self.iter().position(|b| !is_whitespace!(*b)) {
            &self[first..]
        } else {
            &[]
        }
    }
    fn trim(&self) -> &[u8] {
        self.rtrim().ltrim()
    }
    // https://stackoverflow.com/a/35907071
    fn find(&self, needle: &[u8]) -> Option<usize> {
        if needle.is_empty() {
            return None;
        }
        self.windows(needle.len())
            .position(|window| window == needle)
    }

    fn rfind(&self, needle: &[u8]) -> Option<usize> {
        if needle.is_empty() {
            return None;
        }
        self.windows(needle.len())
            .rposition(|window| window == needle)
    }

    fn replace(&self, from: &[u8], to: &[u8]) -> Vec<u8> {
        let mut ret = self.to_vec();
        if let Some(idx) = self.find(from) {
            ret.splice(idx..(idx + from.len()), to.iter().cloned());
        }
        ret
    }

    fn is_quoted(&self) -> bool {
        self.starts_with(b"\"") && self.ends_with(b"\"") && self.len() > 1
    }
}

pub trait BytesIterExt {
    fn join(&mut self, sep: u8) -> Vec<u8>;
}

impl<'a, P: for<'r> FnMut(&'r u8) -> bool> BytesIterExt for std::slice::Split<'a, u8, P> {
    fn join(&mut self, sep: u8) -> Vec<u8> {
        self.fold(vec![], |mut acc, el| {
            if !acc.is_empty() {
                acc.push(sep);
            }
            acc.extend(el.iter());
            acc
        })
    }
}

//fn parser(input: I) -> IResult<I, O, E>;
pub fn mail(input: &[u8]) -> Result<(Vec<(&[u8], &[u8])>, &[u8])> {
    let (rest, result) = separated_pair(
        headers::headers,
        alt((tag(b"\n"), tag(b"\r\n"))),
        take_while(|_| true),
    )(input)
    .chain_err_summary(|| "Could not parse mail")?;

    if !rest.is_empty() {
        return Err(MeliError::new("Got leftover bytes after parsing mail"));
    }

    Ok(result)
}

pub mod generic {
    use super::*;
    pub fn angle_bracket_delimeted_list(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
        separated_nonempty_list(is_a(","), delimited(tag("<"), take_until(">"), tag(">")))(
            input.rtrim(),
        )
        //    separated_nonempty_list!(complete!(is_a!(",")), ws!(complete!(complete!(delimited!(tag!("<"), take_until1!(">"), tag!(">")))))));
    }

    pub fn date(input: &[u8]) -> Result<crate::datetime::UnixTimestamp> {
        let (_, mut parsed_result) = encodings::phrase(&eat_comments(input), false)?;
        if let Some(pos) = parsed_result.find(b"-0000") {
            parsed_result[pos] = b'+';
        }

        crate::datetime::rfc822_to_timestamp(parsed_result.trim())
    }

    fn eat_comments(input: &[u8]) -> Vec<u8> {
        let mut in_comment = false;
        input
            .iter()
            .fold(Vec::with_capacity(input.len()), |mut acc, x| {
                if *x == b'(' && !in_comment {
                    in_comment = true;
                    acc
                } else if *x == b')' && in_comment {
                    in_comment = false;
                    acc
                } else if in_comment {
                    acc
                } else {
                    acc.push(*x);
                    acc
                }
            })
    }
    use crate::email::address::Address;
    use crate::email::mailto::Mailto;
    pub fn mailto(mut input: &[u8]) -> IResult<&[u8], Mailto> {
        if !input.starts_with(b"mailto:") {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }

        input = &input[b"mailto:".len()..];

        let end = input.iter().position(|e| *e == b'?').unwrap_or(input.len());
        let address: Address;

        if let Ok((_, addr)) = crate::email::parser::address::address(&input[..end]) {
            address = addr;
            input = if input[end..].is_empty() {
                &input[end..]
            } else {
                &input[end + 1..]
            };
        } else {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }

        let mut subject = None;
        let mut cc = None;
        let mut bcc = None;
        let mut body = None;
        while !input.is_empty() {
            let tag = if let Some(tag_pos) = input.iter().position(|e| *e == b'=') {
                let ret = &input[0..tag_pos];
                input = &input[tag_pos + 1..];
                ret
            } else {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            };

            let value_end = input.iter().position(|e| *e == b'&').unwrap_or(input.len());

            let value = String::from_utf8_lossy(&input[..value_end]).to_string();
            match tag {
                b"subject" if subject.is_none() => {
                    subject = Some(value);
                }
                b"cc" if cc.is_none() => {
                    cc = Some(value);
                }
                b"bcc" if bcc.is_none() => {
                    bcc = Some(value);
                }
                b"body" if body.is_none() => {
                    /* FIXME:
                     * Parse escaped characters properly.
                     */
                    body = Some(value.replace("%20", " ").replace("%0A", "\n"));
                }
                _ => {
                    return Err(nom::Err::Error((input, ErrorKind::Tag)));
                }
            }
            if input[value_end..].is_empty() {
                break;
            }
            input = &input[value_end + 1..];
        }
        Ok((
            input,
            Mailto {
                address,
                subject,
                cc,
                bcc,
                body,
            },
        ))
    }

    pub struct HeaderIterator<'a>(pub &'a [u8]);

    impl<'a> Iterator for HeaderIterator<'a> {
        type Item = (&'a [u8], &'a [u8]);
        fn next(&mut self) -> Option<(&'a [u8], &'a [u8])> {
            if self.0.is_empty() {
                return None;
            }

            match super::headers::header(self.0) {
                Ok((rest, value)) => {
                    self.0 = rest;
                    Some(value)
                }
                _ => {
                    self.0 = &[];
                    None
                }
            }
        }
    }
}

pub mod headers {
    use super::*;

    pub fn headers(input: &[u8]) -> IResult<&[u8], Vec<(&[u8], &[u8])>> {
        many1(header)(input)
    }

    pub fn header(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
        alt((header_without_val, header_with_val))(input)
    }

    pub fn header_without_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
        if input.is_empty() {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        } else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }
        let mut ptr = 0;
        let mut name: &[u8] = &input[0..0];
        let mut has_colon = false;
        /* field-name  =  1*<any CHAR, excluding CTLs, SPACE, and ":"> */
        for (i, x) in input.iter().enumerate() {
            if input[i..].starts_with(b"\r\n") {
                name = &input[0..i];
                ptr = i + 2;
                break;
            } else if *x == b':' || *x == b'\n' {
                name = &input[0..i];
                has_colon = true;
                ptr = i;
                break;
            } else if is_ctl_or_space!(*x) {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
        }
        if name.is_empty() || input.len() <= ptr {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }
        if input[ptr] == b':' {
            ptr += 1;
            has_colon = true;
            if ptr >= input.len() {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
        }

        if !has_colon {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }

        while input[ptr] == b' ' {
            ptr += 1;
            if ptr >= input.len() {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
        }
        if input[ptr..].starts_with(b"\n") {
            ptr += 1;
            if ptr >= input.len() {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
            if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
                Ok((&input[ptr..], (name, b"")))
            } else {
                Err(nom::Err::Error((input, ErrorKind::Tag)))
            }
        } else if input[ptr..].starts_with(b"\r\n") {
            ptr += 2;
            if ptr > input.len() {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
            if input.len() > ptr && input[ptr] != b' ' && input[ptr] != b'\t' {
                Ok((&input[ptr..], (name, b"")))
            } else {
                Err(nom::Err::Error((input, ErrorKind::Tag)))
            }
        } else {
            Err(nom::Err::Error((input, ErrorKind::Tag)))
        }
    }

    /* A header can span multiple lines, eg:
     *
     * Received: from -------------------- (-------------------------)
     * 	by --------------------- (--------------------- [------------------]) (-----------------------)
     * 	with ESMTP id ------------ for <------------------->;
     * 	Tue,  5 Jan 2016 21:30:44 +0100 (CET)
     */

    pub fn header_value(input: &[u8]) -> IResult<&[u8], &[u8]> {
        let input_len = input.len();
        for (i, x) in input.iter().enumerate() {
            if *x == b'\n'
                && (((i + 1) < input_len && input[i + 1] != b' ' && input[i + 1] != b'\t')
                    || i + 1 == input_len)
            {
                return Ok((&input[(i + 1)..], &input[0..i]));
            } else if input[i..].starts_with(b"\r\n")
                && (((i + 2) < input_len && input[i + 2] != b' ' && input[i + 2] != b'\t')
                    || i + 2 == input_len)
            {
                return Ok((&input[(i + 2)..], &input[0..i]));
            }
        }
        Err(nom::Err::Error((input, ErrorKind::Tag)))
    }

    /* Parse a single header as a tuple */
    pub fn header_with_val(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
        if input.is_empty() {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        } else if input.starts_with(b"\n") || input.starts_with(b"\r\n") {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }
        let mut ptr = 0;
        let mut name: &[u8] = &input[0..0];
        /* field-name  =  1*<any CHAR, excluding CTLs, SPACE, and ":"> */
        for (i, x) in input.iter().enumerate() {
            if *x == b':' {
                name = &input[0..i];
                ptr = i + 1;
                break;
            } else if is_ctl_or_space!(*x) {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
        }
        if name.is_empty() {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }
        if ptr >= input.len() {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }

        if input[ptr] == b'\n' {
            ptr += 1;
            if ptr >= input.len() {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
        } else if input[ptr..].starts_with(b"\r\n") {
            ptr += 2;
            if ptr > input.len() {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
        }
        if ptr >= input.len() {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }
        while input[ptr] == b' ' || input[ptr] == b'\t' {
            ptr += 1;
            if ptr >= input.len() {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
        }
        header_value(&input[ptr..]).map(|(rest, value)| (rest, (name, value)))
    }

    pub fn headers_raw(input: &[u8]) -> IResult<&[u8], &[u8]> {
        if input.is_empty() {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }
        for i in 0..input.len() {
            if input[i..].starts_with(b"\n\n") {
                return Ok((&input[(i + 1)..], &input[0..=i]));
            } else if input[i..].starts_with(b"\r\n\r\n") {
                return Ok((&input[(i + 2)..], &input[0..=i]));
            }
        }
        Err(nom::Err::Error((input, ErrorKind::Tag)))
    }
}

pub mod attachments {
    use super::*;
    use crate::email::address::*;
    pub fn attachment(input: &[u8]) -> IResult<&[u8], (std::vec::Vec<(&[u8], &[u8])>, &[u8])> {
        separated_pair(
            many0(headers::header),
            alt((tag(b"\n"), tag(b"\r\n"))),
            take_while(|_| true),
        )(input)
    }

    pub fn multipart_parts<'a>(
        input: &'a [u8],
        boundary: &[u8],
    ) -> IResult<&'a [u8], Vec<StrBuilder>> {
        let mut ret: Vec<_> = Vec::new();
        let mut input = input;
        let mut offset = 0;
        loop {
            let b_start = if let Some(v) = input.find(boundary) {
                v
            } else {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            };

            if b_start < 2 {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
            offset += b_start - 2;
            input = &input[b_start - 2..];
            if &input[0..2] == b"--" {
                offset += 2 + boundary.len();
                input = &input[2 + boundary.len()..];
                if input[0] == b'\n' {
                    offset += 1;
                    input = &input[1..];
                } else if input[0..].starts_with(b"\r\n") {
                    offset += 2;
                    input = &input[2..];
                } else {
                    continue;
                }
                break;
            }
        }

        loop {
            if input.len() < boundary.len() + 4 {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
            if let Some(end) = input.find(boundary) {
                if &input[end - 2..end] != b"--" {
                    return Err(nom::Err::Error((input, ErrorKind::Tag)));
                }
                ret.push(StrBuilder {
                    offset,
                    length: end - 2,
                });
                offset += end + boundary.len();
                input = &input[end + boundary.len()..];
                if input.len() < 2 || input[0] != b'\n' || &input[0..2] == b"--" {
                    break;
                }
                if input[0] == b'\n' {
                    offset += 1;
                    input = &input[1..];
                } else if input[0..].starts_with(b"\r\n") {
                    offset += 2;
                    input = &input[2..];
                }
            } else {
                ret.push(StrBuilder {
                    offset,
                    length: input.len(),
                });
                break;
            }
        }
        Ok((input, ret))
    }

    fn parts_f(boundary: &[u8]) -> impl Fn(&[u8]) -> IResult<&[u8], Vec<&[u8]>> + '_ {
        move |input: &[u8]| -> IResult<&[u8], Vec<&[u8]>> {
            let mut ret: Vec<&[u8]> = Vec::new();
            let mut input = input;
            loop {
                let b_start = if let Some(v) = input.find(boundary) {
                    v
                } else {
                    return Err(nom::Err::Error((input, ErrorKind::Tag)));
                };

                if b_start < 2 {
                    return Err(nom::Err::Error((input, ErrorKind::Tag)));
                }
                input = &input[b_start - 2..];
                if &input[0..2] == b"--" {
                    input = &input[2 + boundary.len()..];
                    if input[0] == b'\n' {
                        input = &input[1..];
                    } else if input[0..].starts_with(b"\r\n") {
                        input = &input[2..];
                    } else {
                        continue;
                    }
                    break;
                }
            }
            loop {
                if input.len() < boundary.len() + 4 {
                    return Err(nom::Err::Error((input, ErrorKind::Tag)));
                }
                if let Some(end) = input.find(boundary) {
                    if &input[end - 2..end] != b"--" {
                        return Err(nom::Err::Error((input, ErrorKind::Tag)));
                    }
                    ret.push(&input[0..end - 2]);
                    input = &input[end + boundary.len()..];
                    if input.len() < 2
                        || (input[0] != b'\n' && &input[0..2] != b"\r\n")
                        || &input[0..2] == b"--"
                    {
                        break;
                    }
                    if input[0] == b'\n' {
                        input = &input[1..];
                    } else if input[0..].starts_with(b"\r\n") {
                        input = &input[2..];
                    }
                } else {
                    ret.push(input);
                    break;
                }
            }
            Ok((input, ret))
        }
    }

    pub fn parts<'a>(input: &'a [u8], boundary: &[u8]) -> IResult<&'a [u8], Vec<&'a [u8]>> {
        alt((
            parts_f(boundary),
            |input: &'a [u8]| -> IResult<&'a [u8], Vec<&'a [u8]>> {
                let (input, _) = take_until(&b"--"[..])(input)?;
                let (input, _) = take_until(boundary)(input)?;
                Ok((input, Vec::<&[u8]>::new()))
            },
        ))(input)
        /*
            alt_complete!(call!(parts_f, boundary) | do_parse!(
                        take_until_and_consume!(&b"--"[..]) >>
                        take_until_and_consume!(boundary) >>
                        ( { Vec::<&[u8]>::new() } ))
                    ));
        */
    }

    /* Caution: values should be passed through phrase() */
    pub fn content_type_parameter(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {
        let (input, _) = tag(";")(input)?;
        let (input, name) = terminated(take_until("="), tag("="))(input.ltrim())?;
        let (input, value) = alt((
            delimited(tag("\""), take_until("\""), tag("\"")),
            is_not(";"),
        ))(input.ltrim())?;

        Ok((input, (name, value)))
    }

    pub fn content_type(input: &[u8]) -> IResult<&[u8], (&[u8], &[u8], Vec<(&[u8], &[u8])>)> {
        let (input, _type) = take_until("/")(input)?;
        let (input, _) = tag("/")(input)?;
        let (input, _subtype) = is_not(";")(input)?;
        let (input, parameters) = many0(content_type_parameter)(input)?;
        Ok((input, (_type, _subtype, parameters)))
        /*
           do_parse!(
               _type: take_until!("/") >>
               tag!("/") >>
               _subtype: is_not!(";") >>
               parameters: many0!(complete!(content_type_parameter)) >>
               ( {
                   (_type, _subtype, parameters)
               } )
               ));
        */
    }
}

pub mod encodings {
    use super::*;
    use crate::email::attachment_types::Charset;
    use data_encoding::BASE64_MIME;
    use encoding::all::*;
    use encoding::{DecoderTrap, Encoding};
    pub fn quoted_printable_byte(input: &[u8]) -> IResult<&[u8], u8> {
        if input.len() < 3 {
            Err(nom::Err::Error((input, ErrorKind::Tag)))
        } else if input[0] == b'=' && is_hex_digit(input[1]) && is_hex_digit(input[2]) {
            let a = if input[1] < b':' {
                input[1] - 48
            } else if input[1] < b'[' {
                input[1] - 55
            } else {
                input[1] - 87
            };
            let b = if input[2] < b':' {
                input[2] - 48
            } else if input[2] < b'[' {
                input[2] - 55
            } else {
                input[2] - 87
            };
            Ok((&input[3..], a * 16 + b))
        } else if input.starts_with(b"\r\n") {
            Ok((&input[2..], b'\n'))
        } else {
            Err(nom::Err::Error((input, ErrorKind::Tag)))
        }
    }

    /* Encoded words
     *"=?charset?encoding?encoded text?=".
     */
    fn encoded_word(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
        if input.is_empty() {
            return Ok((&[], Vec::with_capacity(0)));
        }
        if input.len() < 5 {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        } else if input[0] != b'=' || input[1] != b'?' {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }
        /* find end of Charset tag:
         * =?charset?encoding?encoded text?=
         * ---------^
         */
        let mut tag_end_idx = None;
        for (idx, b) in input[2..].iter().enumerate() {
            if *b == b'?' {
                tag_end_idx = Some(idx + 2);
                break;
            }
        }
        if tag_end_idx.is_none() {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }
        let tag_end_idx = tag_end_idx.unwrap();

        if tag_end_idx + 2 >= input.len() || input[2 + tag_end_idx] != b'?' {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }
        /* See if input ends with "?=" and get ending index
         * =?charset?encoding?encoded text?=
         * -------------------------------^
         */
        let mut encoded_end_idx = None;
        for i in (3 + tag_end_idx)..input.len() {
            if input[i] == b'?' && i + 1 < input.len() && input[i + 1] == b'=' {
                encoded_end_idx = Some(i);
                break;
            }
        }
        if encoded_end_idx.is_none() {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }
        let encoded_end_idx = encoded_end_idx.unwrap();
        let encoded_text = &input[3 + tag_end_idx..encoded_end_idx];

        let s: Vec<u8> = match input[tag_end_idx + 1] {
            b'b' | b'B' => match BASE64_MIME.decode(encoded_text) {
                Ok(v) => v,
                Err(_) => encoded_text.to_vec(),
            },
            b'q' | b'Q' => match quoted_printable_bytes_header(encoded_text) {
                Ok((b"", s)) => s,
                _ => return Err(nom::Err::Error((input, ErrorKind::Tag))),
            },
            _ => return Err(nom::Err::Error((input, ErrorKind::Tag))),
        };

        let charset = Charset::from(&input[2..tag_end_idx]);

        if let Charset::UTF8 = charset {
            Ok((&input[encoded_end_idx + 2..], s))
        } else {
            match decode_charset(&s, charset) {
                Ok(v) => Ok((&input[encoded_end_idx + 2..], v.into_bytes())),
                _ => Err(nom::Err::Error((input, ErrorKind::Tag))),
            }
        }
    }

    pub fn decode_charset(s: &[u8], charset: Charset) -> Result<String> {
        match charset {
            Charset::UTF8 | Charset::Ascii => Ok(String::from_utf8_lossy(s).to_string()),
            Charset::ISO8859_1 => Ok(ISO_8859_1.decode(s, DecoderTrap::Strict)?),
            Charset::ISO8859_2 => Ok(ISO_8859_2.decode(s, DecoderTrap::Strict)?),
            Charset::ISO8859_7 => Ok(ISO_8859_7.decode(s, DecoderTrap::Strict)?),
            Charset::ISO8859_15 => Ok(ISO_8859_15.decode(s, DecoderTrap::Strict)?),
            Charset::GBK => Ok(GBK.decode(s, DecoderTrap::Strict)?),
            Charset::Windows1250 => Ok(WINDOWS_1250.decode(s, DecoderTrap::Strict)?),
            Charset::Windows1251 => Ok(WINDOWS_1251.decode(s, DecoderTrap::Strict)?),
            Charset::Windows1252 => Ok(WINDOWS_1252.decode(s, DecoderTrap::Strict)?),
            Charset::Windows1253 => Ok(WINDOWS_1253.decode(s, DecoderTrap::Strict)?),
            // Unimplemented:
            Charset::GB2312 => Ok(String::from_utf8_lossy(s).to_string()),
            Charset::UTF16 => Ok(String::from_utf8_lossy(s).to_string()),
            Charset::BIG5 => Ok(String::from_utf8_lossy(s).to_string()),
            Charset::ISO2022JP => Ok(String::from_utf8_lossy(s).to_string()),
        }
    }

    fn quoted_printable_soft_break(input: &[u8]) -> IResult<&[u8], &[u8]> {
        if input.len() < 2 {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        } else if input[0] == b'=' && input[1] == b'\n' {
            Ok((&input[2..], &input[0..2])) // `=\n` is an escaped space character.
        } else if input.len() > 3 && input.starts_with(b"=\r\n") {
            Ok((&input[3..], &input[0..3])) // `=\r\n` is an escaped space character.
        } else {
            Err(nom::Err::Error((input, ErrorKind::Tag)))
        }
    }

    pub fn qp_underscore_header(input: &[u8]) -> IResult<&[u8], u8> {
        let (rest, _) = tag(b"_")(input)?;
        Ok((rest, 0x20))
    }

    // With MIME, headers in quoted printable format can contain underscores that represent spaces.
    // In non-header context, an underscore is just a plain underscore.
    pub fn quoted_printable_bytes_header(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
        many0(alt((quoted_printable_byte, qp_underscore_header, le_u8)))(input)
    }

    // For atoms in Header values.
    pub fn quoted_printable_bytes(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
        many0(alt((
            preceded(quoted_printable_soft_break, quoted_printable_byte),
            preceded(quoted_printable_soft_break, le_u8),
            quoted_printable_byte,
            le_u8,
        )))(input)
    }

    pub fn space(input: &[u8]) -> IResult<&[u8], ()> {
        let (rest, _) =
            take_while(|c: u8| c == b' ' || c == b'\t' || c == b'\r' || c == b'\n')(input)?;
        Ok((rest, ()))
        //eat_separator!());
    }

    pub fn encoded_word_list(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
        let (input, list) = separated_nonempty_list(space, encoded_word)(input)?;
        let list_len = list.iter().fold(0, |mut acc, x| {
            acc += x.len();
            acc
        });
        Ok((
            input,
            list.iter()
                .fold(Vec::with_capacity(list_len), |mut acc, x| {
                    acc.append(&mut x.clone());
                    acc
                }),
        ))
    }

    pub fn ascii_token(input: &[u8]) -> IResult<&[u8], Vec<u8>> {
        // TODO take_until used to be take_until1, check if this works
        let (input, word) = alt((
            terminated(take_until(" =?"), peek(preceded(tag(b" "), encoded_word))),
            take_while(|_| true),
        ))(input)?;
        Ok((input, word.to_vec()))
        /*
        do_parse!(
            word: alt_complete!(
                terminated!(
                    take_until1!(" =?"),
                    peek!(preceded!(tag!(b" "), call!(encoded_word)))
                ) | take_while!(call!(|_| true))
            ) >> ({ word.into() })
        )
            */
    }

    pub fn phrase(
        input: &[u8],
        multiline: /* preserve newlines */ bool,
    ) -> IResult<&[u8], Vec<u8>> {
        if input.is_empty() {
            return Ok((&[], Vec::with_capacity(0)));
        }

        let mut input = input.ltrim();
        let mut acc: Vec<u8> = Vec::new();
        let mut ptr = 0;

        while ptr < input.len() {
            let mut flag = false;
            // Check if word is encoded.
            while let Ok((rest, v)) = encoded_word(&input[ptr..]) {
                flag = true;
                input = rest;
                ptr = 0;
                acc.extend(v);

                // consume whitespace
                while ptr < input.len() && (is_whitespace!(input[ptr])) {
                    ptr += 1;
                }

                if ptr >= input.len() {
                    break;
                }
            }
            if flag && ptr < input.len() && ptr != 0 {
                acc.push(b' ');
            }
            let end = input[ptr..].find(b"=?");

            let end = end.unwrap_or_else(|| input.len() - ptr) + ptr;
            let ascii_s = ptr;
            let mut ascii_e = 0;

            while ptr < end && !(is_whitespace!(input[ptr])) {
                ptr += 1;
            }
            if !multiline {
                ascii_e = ptr;
            }

            while ptr < input.len() && (is_whitespace!(input[ptr])) {
                ptr += 1;
            }
            if multiline {
                ascii_e = ptr;
            }
            if ptr >= input.len() {
                acc.extend(ascii_token(&input[ascii_s..ascii_e])?.1);
                break;
            }
            if ascii_s >= ascii_e {
                /* We have the start of an encoded word but not the end, so parse it as ascii */
                ascii_e = input[ascii_s..]
                    .find(b" ")
                    .unwrap_or_else(|| ascii_s + input[ascii_s..].len());
                ptr = ascii_e;
            }
            if ascii_s >= ascii_e {
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }

            acc.extend(ascii_token(&input[ascii_s..ascii_e])?.1);
            if ptr != ascii_e {
                acc.push(b' ');
            }
        }
        Ok((&input[ptr..], acc))
    }
}

pub mod address {
    use super::*;
    use crate::email::address::*;
    pub fn display_addr(input: &[u8]) -> IResult<&[u8], Address> {
        if input.is_empty() || input.len() < 3 {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        } else if !is_whitespace!(input[0]) {
            let mut display_name = StrBuilder {
                offset: 0,
                length: 0,
            };
            let mut flag = false;
            for (i, b) in input[0..].iter().enumerate() {
                if *b == b'<' {
                    display_name.length = i.saturating_sub(1); // if i != 0 { i - 1 } else { 0 };
                    flag = true;
                    break;
                }
            }
            if !flag {
                let (rest, output) = match super::encodings::phrase(input, false) {
                    Ok(v) => v,
                    _ => return Err(nom::Err::Error((input, ErrorKind::Tag))),
                };
                if output.contains(&b'<') {
                    let (_, address) = match display_addr(&output) {
                        Ok(v) => v,
                        _ => return Err(nom::Err::Error((input, ErrorKind::Tag))),
                    };
                    return Ok((rest, address));
                }
                return Err(nom::Err::Error((input, ErrorKind::Tag)));
            }
            let mut end = input.len();
            let mut at_flag = false;
            let mut flag = false;
            for (i, b) in input[display_name.length + 2..].iter().enumerate() {
                match *b {
                    b'@' => at_flag = true,
                    b'>' => {
                        end = i;
                        flag = true;
                        break;
                    }
                    _ => {}
                }
            }
            if at_flag && flag {
                let (_, raw) =
                    super::encodings::phrase(&input[0..end + display_name.length + 3], false)?;
                let display_name_end = raw.find(b"<").unwrap();
                display_name.length = raw[0..display_name_end].trim().len();
                let address_spec = if display_name_end == 0 {
                    StrBuilder {
                        offset: 1,
                        length: end + 1,
                    }
                } else {
                    StrBuilder {
                        offset: display_name_end + 1,
                        length: end,
                    }
                };

                if display_name.display(&raw).as_bytes().is_quoted() {
                    display_name.offset += 1;
                    display_name.length -= 2;
                }

                let rest_start = if input.len() > end + display_name.length + 2 {
                    end + display_name.length + 3
                } else {
                    end + display_name.length + 2
                };

                Ok((
                    input.get(rest_start..).unwrap_or_default(),
                    Address::Mailbox(MailboxAddress {
                        raw,
                        display_name,
                        address_spec,
                    }),
                ))
            } else {
                Err(nom::Err::Error((input, ErrorKind::Tag)))
            }
        } else {
            Err(nom::Err::Error((input, ErrorKind::Tag)))
        }
    }

    fn addr_spec(input: &[u8]) -> IResult<&[u8], Address> {
        if input.is_empty() || input.len() < 3 {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        } else if !is_whitespace!(input[0]) {
            let mut end = input[1..].len();
            let mut flag = false;
            for (i, b) in input[1..].iter().enumerate() {
                if *b == b'@' {
                    flag = true;
                }
                if is_whitespace!(*b) {
                    end = i;
                    break;
                }
            }
            if flag {
                Ok((
                    &input[end..],
                    Address::Mailbox(MailboxAddress {
                        raw: input[0..=end].into(),
                        display_name: StrBuilder {
                            offset: 0,
                            length: 0,
                        },
                        address_spec: StrBuilder {
                            offset: 0,
                            length: input[0..=end].len(),
                        },
                    }),
                ))
            } else {
                Err(nom::Err::Error((input, ErrorKind::Tag)))
            }
        } else {
            Err(nom::Err::Error((input, ErrorKind::Tag)))
        }
    }

    pub fn mailbox(input: &[u8]) -> IResult<&[u8], Address> {
        alt((display_addr, addr_spec))(input)
        //ws!(alt_complete!(display_addr | addr_spec))
    }

    pub fn mailbox_list(input: &[u8]) -> IResult<&[u8], Vec<Address>> {
        many0(mailbox)(input)
        // many0!(mailbox));
    }

    /*
     * group of recipients eg. undisclosed-recipients;
     */
    fn group(input: &[u8]) -> IResult<&[u8], Address> {
        let mut flag = false;
        let mut dlength = 0;
        for (i, b) in input.iter().enumerate() {
            if *b == b':' {
                flag = true;
                dlength = i;
                break;
            }
        }
        if !flag {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }

        let (rest, vec) = mailbox_list(&input[dlength..])?;
        let size: usize =
            (rest.as_ptr() as usize).wrapping_sub((&input[0..] as &[u8]).as_ptr() as usize);
        Ok((
            rest,
            Address::Group(GroupAddress {
                raw: input[0..size].into(),
                display_name: StrBuilder {
                    offset: 0,
                    length: dlength,
                },
                mailbox_list: vec,
            }),
        ))
    }

    pub fn address(input: &[u8]) -> IResult<&[u8], Address> {
        alt((mailbox, group))(input.ltrim())
        // ws!(alt_complete!(mailbox | group))
    }

    pub fn rfc2822address_list(input: &[u8]) -> IResult<&[u8], Vec<Address>> {
        separated_list(is_a(","), address)(input.ltrim())
        // ws!( separated_list!(is_a!(","), address))
    }

    pub fn address_list(input: &[u8]) -> IResult<&[u8], String> {
        let (input, list) = alt((
            super::encodings::encoded_word_list,
            super::encodings::ascii_token,
        ))(input)?;
        let list: Vec<&[u8]> = list.split(|c| *c == b',').collect();
        let string_len = list.iter().fold(0, |mut acc, x| {
            acc += x.trim().len();
            acc
        }) + list.len()
            - 1;
        let list_len = list.len();
        let mut i = 0;
        Ok((
            input,
            list.iter()
                .fold(String::with_capacity(string_len), |acc, x| {
                    let mut acc = acc
                        + &String::from_utf8_lossy(
                            x.replace(b"\n", b"")
                                .replace(b"\r", b"")
                                .replace(b"\t", b" ")
                                .trim(),
                        );
                    if i != list_len - 1 {
                        acc.push_str(" ");
                        i += 1;
                    }
                    acc
                }),
        ))
    }

    pub fn message_id(input: &[u8]) -> IResult<&[u8], &[u8]> {
        delimited(tag("<"), take_until(">"), tag(">"))(input.ltrim())
        //complete!(delimited!(ws!(tag!("<")), take_until1!(">"), tag!(">")))
    }

    fn message_id_peek(input: &[u8]) -> IResult<&[u8], &[u8]> {
        let input_length = input.len();
        if input.is_empty() {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        } else if input_length == 2 || input[0] != b'<' {
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        } else {
            for (i, &x) in input.iter().take(input_length).enumerate().skip(1) {
                if x == b'>' {
                    return Ok((&input[i + 1..], &input[0..=i]));
                }
            }
            return Err(nom::Err::Error((input, ErrorKind::Tag)));
        }
    }

    pub fn references(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> {
        separated_list(is_a(" \n\t\r"), message_id_peek)(input)
        // separated_list!(complete!(is_a!(" \n\t\r")), message_id_peek));
    }
}

#[cfg(test)]
mod tests {
    use super::{address::*, encodings::*, generic::*, *};
    use crate::email::address::*;
    use crate::make_address;

    #[test]
    fn test_phrase() {
        let words = b"=?iso-8859-7?B?W215Y291cnNlcy5udHVhLmdyIC0gyvXs4fTp6t4g6uHpIMri4e306ere?=
     =?iso-8859-7?B?INb18+nq3l0gzd3hIMHt4erv3+358+c6IMzF0c/TIMHQz9TFy8XTzMHU?=
      =?iso-8859-7?B?2c0gwiDUzC4gysHNLiDFzsXUwdPH0yAyMDE3LTE4OiDTx8zFydnTxw==?=";
        assert_eq!("[mycourses.ntua.gr - Κυματική και Κβαντική Φυσική] Νέα Ανακοίνωση: ΜΕΡΟΣ ΑΠΟΤΕΛΕΣΜΑΤΩΝ Β ΤΜ. ΚΑΝ. ΕΞΕΤΑΣΗΣ 2017-18: ΣΗΜΕΙΩΣΗ" , std::str::from_utf8(&phrase(words.trim(), false).unwrap().1).unwrap());
        let words = b"=?UTF-8?Q?=CE=A0=CF=81=CF=8C=CF=83=CE=B8=CE=B5?= =?UTF-8?Q?=CF=84=CE=B7_=CE=B5=CE=BE=CE=B5=CF=84?= =?UTF-8?Q?=CE=B1=CF=83=CF=84=CE=B9=CE=BA=CE=AE?=";
        assert_eq!(
            "Πρόσθετη εξεταστική",
            std::str::from_utf8(&phrase(words.trim(), false).unwrap().1).unwrap()
        );
        let words = b"[Advcomparch] =?utf-8?b?zqPPhc68z4DOtc+BzrnPhs6/z4HOrCDPg861IGZs?=\n\t=?utf-8?b?dXNoIM67z4zOs8+JIG1pc3ByZWRpY3Rpb24gzrrOsc+Ezqwgz4TOt869?=\n\t=?utf-8?b?IM61zrrPhM6tzrvOtc+Dzrcgc3RvcmU=?=";
        assert_eq!(
            "[Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store",
            std::str::from_utf8(&phrase(words.trim(), false).unwrap().1).unwrap()
        );
        let words = b"Re: [Advcomparch] =?utf-8?b?zqPPhc68z4DOtc+BzrnPhs6/z4HOrCDPg861IGZs?=
	=?utf-8?b?dXNoIM67z4zOs8+JIG1pc3ByZWRpY3Rpb24gzrrOsc+Ezqwgz4TOt869?=
	=?utf-8?b?IM61zrrPhM6tzrvOtc+Dzrcgc3RvcmU=?=";
        assert_eq!(
            "Re: [Advcomparch] Συμπεριφορά σε flush λόγω misprediction κατά την εκτέλεση store",
            std::str::from_utf8(&phrase(words.trim(), false).unwrap().1).unwrap()
        );
        let words = b"sdf";
        assert_eq!(
            "sdf",
            std::str::from_utf8(&phrase(words, false).unwrap().1).unwrap()
        );
        let words = b"=?iso-8859-7?b?U2VnIGZhdWx0IPP05+0g5er03evl8+cg9O/1?= =?iso-8859-7?q?_example_ru_n_=5Fsniper?=";
        assert_eq!(
            "Seg fault στην εκτέλεση του example ru n _sniper",
            std::str::from_utf8(&phrase(words, false).unwrap().1).unwrap()
        );
        let words = b"Re: [Advcomparch]
 =?iso-8859-7?b?U2VnIGZhdWx0IPP05+0g5er03evl8+cg9O/1?=
 =?iso-8859-7?q?_example_ru_n_=5Fsniper?=";

        assert_eq!(
            "Re: [Advcomparch] Seg fault στην εκτέλεση του example ru n _sniper",
            std::str::from_utf8(&phrase(words, false).unwrap().1).unwrap()
        );

        let words = r#"[internal] =?UTF-8?B?zp3Orc6/z4Igzp/OtM63zrPPjM+CIM6jz4XOs86zz4E=?=
 =?UTF-8?B?zrHPhs6uz4I=?="#;
        assert_eq!(
            "[internal] Νέος Οδηγός Συγγραφής",
            std::str::from_utf8(&phrase(words.as_bytes(), false).unwrap().1).unwrap()
        );

        let words = r#"=?UTF-8?Q?Re=3a_Climate_crisis_reality_check_=e2=80=93=c2=a0EcoHust?=
 =?UTF-8?Q?ler?="#;
        assert_eq!(
            "Re: Climate crisis reality check –\u{a0}EcoHustler",
            std::str::from_utf8(&phrase(words.as_bytes(), false).unwrap().1).unwrap()
        );

        let words = r#"Re: Climate crisis reality check =?windows-1250?B?lqBFY29IdXN0?=
 =?windows-1250?B?bGVy?="#;
        assert_eq!(
            "Re: Climate crisis reality check –\u{a0}EcoHustler",
            std::str::from_utf8(&phrase(words.as_bytes(), false).unwrap().1).unwrap()
        );
    }

    #[test]
    fn test_address_list() {
        let s = b"Obit Oppidum <user@domain>,
            list <list@domain.tld>, list2 <list2@domain.tld>,
            Bobit Boppidum <user@otherdomain.com>, Cobit Coppidum <user2@otherdomain.com>, <user@domain.tld>";
        assert_eq!(
            (
                &s[0..0],
                vec![
                    make_address!("Obit Oppidum", "user@domain"),
                    make_address!("list", "list@domain.tld"),
                    make_address!("list2", "list2@domain.tld"),
                    make_address!("Bobit Boppidum", "user@otherdomain.com"),
                    make_address!("Cobit Coppidum", "user2@otherdomain.com"),
                    make_address!("", "user@domain.tld")
                ]
            ),
            rfc2822address_list(s).unwrap()
        );
    }

    #[test]
    fn test_date() {
        let s = b"Thu, 31 Aug 2017 13:43:37 +0000 (UTC)";
        let _s = b"Thu, 31 Aug 2017 13:43:37 +0000";
        let __s = b"=?utf-8?q?Thu=2C_31_Aug_2017_13=3A43=3A37_-0000?=";
        debug!("{:?}, {:?}", date(s), date(_s));
        debug!("{:?}", date(__s));
        assert_eq!(date(s).unwrap(), date(_s).unwrap());
        assert_eq!(date(_s).unwrap(), date(__s).unwrap());
        let val = b"Fri, 23 Dec 0001 21:20:36 -0800 (PST)";
        assert_eq!(date(val).unwrap(), 0);
    }

    #[test]
    fn test_attachments() {
        //FIXME: add file
        return;
        /*
        use std::io::Read;
        let mut buffer: Vec<u8> = Vec::new();
        let _ = std::fs::File::open("").unwrap().read_to_end(&mut buffer);
        let boundary = b"b1_4382d284f0c601a737bb32aaeda53160";
        let (_, body) = match mail(&buffer) {
            Ok(v) => v,
            Err(_) => panic!(),
        };
        let attachments = parts(body, boundary).unwrap().1;
        assert_eq!(attachments.len(), 4);
        let v: Vec<&str> = attachments
            .iter()
            .map(|v| std::str::from_utf8(v).unwrap())
            .collect();
        println!("attachments {:?}", v);
        */
    }
    #[test]
    fn test_addresses() {
        {
            let s = b"=?iso-8859-7?B?0/Th/fHv8iDM4ev03ebv8g==?= <maltezos@central.ntua.gr>";
            let r = mailbox(s).unwrap().1;
            match r {
                Address::Mailbox(ref m) => assert!(
                    "Σταύρος Μαλτέζος"
                        == std::str::from_utf8(&m.display_name.display_bytes(&m.raw)).unwrap()
                        && std::str::from_utf8(&m.address_spec.display_bytes(&m.raw)).unwrap()
                            == "maltezos@central.ntua.gr"
                ),
                _ => assert!(false),
            }
        }
        {
            let s = b"user@domain";
            let r = mailbox(s).unwrap().1;
            match r {
                Address::Mailbox(ref m) => assert!(
                    m.display_name.display_bytes(&m.raw) == b""
                        && m.address_spec.display_bytes(&m.raw) == b"user@domain"
                ),
                _ => assert!(false),
            }
        }
        {
            let s = b"Name <user@domain>";
            let r = display_addr(s).unwrap().1;
            match r {
                Address::Mailbox(ref m) => assert!(
                    b"Name" == m.display_name.display_bytes(&m.raw)
                        && b"user@domain" == m.address_spec.display_bytes(&m.raw)
                ),
                _ => {}
            }
        }
        {
            let s = b"user@domain";
            let r = mailbox(s).unwrap().1;
            match r {
                Address::Mailbox(ref m) => assert!(
                    b"" == m.display_name.display_bytes(&m.raw)
                        && b"user@domain" == m.address_spec.display_bytes(&m.raw)
                ),
                _ => {}
            }
        }
    }

    #[test]
    fn test_quoted_printable() {
        let input = r#"<=21-- SEPARATOR  -->
   <tr>
    <td style=3D=22padding-left: 10px;padding-right: 10px;background-color:=
 =23f3f5fa;=22>
     <table width=3D=22100%=22 cellspacing=3D=220=22 cellpadding=3D=220=22 =
border=3D=220=22>
      <tr>
       <td style=3D=22height:5px;background-color: =23f3f5fa;=22>&nbsp;</td>
      </tr>
     </table>
    </td>
   </tr>"#;
        assert_eq!(
            quoted_printable_bytes(input.as_bytes())
                .as_ref()
                .map(|(_, b)| unsafe { std::str::from_utf8_unchecked(b) }),
            Ok(r#"<!-- SEPARATOR  -->
   <tr>
    <td style="padding-left: 10px;padding-right: 10px;background-color: #f3f5fa;">
     <table width="100%" cellspacing="0" cellpadding="0" border="0">
      <tr>
       <td style="height:5px;background-color: #f3f5fa;">&nbsp;</td>
      </tr>
     </table>
    </td>
   </tr>"#)
        );
    }
}