@ -1,12 +1,32 @@
//use memmap::{Mmap, Protection};
/*
* meli - parser module
*
* Copyright 2017 Manos Pitsidianakis
*
* This file is part of meli .
*
* meli is free software : you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation , either version 3 of the License , or
* ( at your option ) any later version .
*
* meli is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with meli . If not , see < http ://www.gnu.org/licenses/>.
* /
use std ;
use std ::str ::from_utf8 ;
use base64 ;
use chrono ;
use nom ::le_u8 ;
use nom ::{ le_u8 , is_hex_digit } ;
/* Wow this sucks! */
named ! ( quoted_printable_byte < u8 > , do_parse ! (
p : map_res ! ( preceded ! ( tag ! ( "=" ) , verify ! ( complete ! ( take ! ( 2 ) ) , | s : & [ u8 ] | { ::nom ::is_hex_digit ( s [ 0 ] ) & & ::nom ::is_hex_digit ( s [ 1 ] ) } ) ) , std ::str ::from_utf8 ) > >
p : map_res ! ( preceded ! ( tag ! ( "=" ) , verify ! ( complete ! ( take ! ( 2 ) ) , | s : & [ u8 ] | is_hex_digit ( s [ 0 ] ) & & is_hex_digit ( s [ 1 ] ) ) ) , from_utf8 ) > >
( {
u8 ::from_str_radix ( p , 16 ) . unwrap ( )
} ) ) ) ;
@ -22,51 +42,55 @@ named!(quoted_printable_byte<u8>, do_parse!(
* Tue , 5 Jan 2016 21 :30 :44 + 0100 ( CET )
* /
/*
* if a header value is a Vec < & str > , this is the tail of that Vector
* /
named ! ( valuelist < & str > ,
map_res ! ( delimited ! ( alt_complete ! ( tag ! ( "\t" ) | tag ! ( " " ) ) , take_until ! ( "\n" ) , tag ! ( "\n" ) ) , std ::str ::from_utf8 )
) ;
use nom ::{ IResult , Needed , ErrorKind } ;
/* Parse the value part of the header -> Vec<&str> */
named ! ( value < Vec < & str > > ,
do_parse ! (
head : map_res ! ( terminated ! ( take_until ! ( "\n" ) , tag ! ( "\n" ) ) , std ::str ::from_utf8 ) > >
tail : many0 ! ( valuelist ) > >
( {
let tail_len = tail . len ( ) ;
let tail : Vec < & str > = tail . iter ( ) . map ( | v | { v . trim ( ) } ) . collect ( ) ;
let mut result = Vec ::with_capacity ( 1 + tail . len ( ) ) ;
result . push ( head . trim ( ) ) ;
if tail_len = = 1 & & tail [ 0 ] = = "" {
result
} else {
tail . iter ( ) . fold ( result , | mut acc , x | { acc . push ( x ) ; acc } )
fn header_value ( input : & [ u8 ] ) -> IResult < & [ u8 ] , & str > {
if input . is_empty ( ) | | input [ 0 ] = = b'\n' {
IResult ::Incomplete ( Needed ::Size ( 1 ) )
} else {
let input_len = input . len ( ) ;
for ( i , x ) in input . iter ( ) . enumerate ( ) {
if * x = = b'\n' {
if ( i + 1 ) < input_len & &
( ( input [ i + 1 ] ! = b' ' & & input [ i + 1 ] ! = b'\t' ) | | input [ i + 1 ] = = b'\n' ) {
return match from_utf8 ( & input [ 0 .. i ] ) {
Ok ( v ) = > {
IResult ::Done ( & input [ ( i + 1 ) .. ] , v )
} ,
Err ( _ ) = > {
IResult ::Error ( error_code ! ( ErrorKind ::Custom ( 43 ) ) )
} ,
}
} else if i + 1 > input_len {
return IResult ::Incomplete ( Needed ::Size ( 1 ) ) ;
}
}
} )
) ) ;
}
IResult ::Error ( error_code ! ( ErrorKind ::Custom ( 43 ) ) )
}
}
/* Parse the name part of the header -> &str */
named ! ( name < & str > ,
terminated! ( verify! ( map_res ! ( take_until1 ! ( ":" ) , std::str :: from_utf8) , | v : & str | { ! v . contains ( "\n" ) } ) , tag ! ( ":" ) ) ) ;
named ! ( name < & str > ,
verify! ( map_res ! ( take_until1 ! ( ":" ) , from_utf8) , | v : & str | ! v . contains ( '\n' ) ) ) ;
/* Parse a single header as a tuple -> (&str, Vec<&str>) */
named ! ( header < ( & str , std ::vec ::Vec < & str > ) > ,
pair! ( complete ! ( name ) , complete! ( value) ) ) ;
named ! ( header < ( & str , & str ) > ,
separated_ pair! ( complete ! ( name ) , ws! ( tag ! ( ":" ) ) , complete! ( header_ value) ) ) ;
/* Parse all headers -> Vec<(&str, Vec<&str>)> */
named ! ( headers < std ::vec ::Vec < ( & str , std ::vec ::Vec < & str > ) > > ,
named ! ( headers < std ::vec ::Vec < ( & str , & str ) > > ,
many1 ! ( complete ! ( header ) ) ) ;
named ! ( pub mail < ( std ::vec ::Vec < ( & str , std ::vec ::Vec < & str > ) > , & [ u8 ] ) > ,
named ! ( pub mail < ( std ::vec ::Vec < ( & str , & str ) > , & [ u8 ] ) > ,
separated_pair ! ( headers , tag ! ( "\n" ) , take_while ! ( call ! ( | _ | { true } ) ) ) ) ;
named ! ( pub attachment < ( std ::vec ::Vec < ( & str , std ::vec ::Vec < & str > ) > , & [ u8 ] ) > ,
named ! ( pub attachment < ( std ::vec ::Vec < ( & str , & str ) > , & [ u8 ] ) > ,
do_parse ! (
opt ! ( is_a ! ( " \n\t\r" ) ) > >
pair : pair ! ( many0 ! ( complete ! ( header ) ) , take_while ! ( call ! ( | _ | { true } ) ) ) > >
( { pair } ) ) ) ;
/* try chrono parse_from_str with several formats
/* try chrono parse_from_str with several formats
* https ://docs.rs/chrono/0.4.0/chrono/struct.DateTime.html#method.parse_from_str
* /
@ -75,55 +99,69 @@ named!(pub attachment<(std::vec::Vec<(&str, std::vec::Vec<&str>)>, &[u8])>,
/* Encoded words
* "=?charset?encoding?encoded text?=" .
* /
named ! ( utf8_token_base64 < String > , do_parse ! (
named ! ( utf8_token_base64 < Vec< u8 > > , do_parse ! (
encoded : complete ! ( delimited ! ( tag_no_case ! ( "=?UTF-8?B?" ) , take_until1 ! ( "?=" ) , tag ! ( "?=" ) ) ) > >
( {
match base64 ::decode ( encoded ) {
Ok ( ref v ) = > { String ::from_utf8_lossy ( v ) . into_owned ( )
Ok ( v ) = > {
v
} ,
Err ( _ ) = > {
encoded . to_vec ( )
} ,
Err ( _ ) = > { String ::from_utf8_lossy ( encoded ) . into_owned ( ) }
}
} )
) ) ;
named ! ( utf8_token_quoted_p_raw < & [ u8 ] , & [ u8 ] > ,
named ! ( utf8_token_quoted_p_raw < & [ u8 ] , & [ u8 ] > ,
complete ! ( delimited ! ( tag_no_case ! ( "=?UTF-8?q?" ) , take_until1 ! ( "?=" ) , tag ! ( "?=" ) ) ) ) ;
//named!(utf8_token_quoted_p<String>, escaped_transform!(call!(alpha), '=', quoted_printable_byte));
named ! ( utf8_token_quoted_p < String > , do_parse ! (
named ! ( qp_underscore_header < u8 > ,
do_parse ! ( tag ! ( "_" ) > > ( { b' ' } ) ) ) ;
named ! ( utf8_token_quoted_p < Vec < u8 > > , do_parse ! (
raw : call ! ( utf8_token_quoted_p_raw ) > >
( {
named ! ( get_bytes < Vec < u8 > > , dbg! ( many0 ! ( alt ! ( quoted_printable_byte | le_u8 ) ) ) ) ;
let bytes = get_bytes ( raw ) . to_full_result ( ) . unwrap ( ) ;
String ::from_utf8_lossy ( & bytes ) . into_owned ( )
named ! ( get_bytes < Vec < u8 > > , many0 ! ( alt_complete ! ( quoted_printable_byte | qp_underscore_header | le_u8 ) ) ) ;
get_bytes ( raw ) . to_full_result ( ) . unwrap ( )
} ) ) ) ;
named ! ( utf8_token < String > , alt_complete ! (
named ! ( utf8_token < Vec< u8 > > , alt_complete ! (
utf8_token_base64 |
call ! ( utf8_token_quoted_p ) ) ) ;
named ! ( utf8_token_list < String > , ws ! ( do_parse ! (
list : separated_nonempty_list ! ( complete ! ( tag ! ( " " ) ) , utf8_token ) > >
( {
( {
let list_len = list . iter ( ) . fold ( 0 , | mut acc , x | { acc + = x . len ( ) ; acc } ) ;
list . iter ( ) . fold ( String ::with_capacity ( list_len ) , | mut acc , x | { acc . push_str ( x ) ; acc } )
let bytes = list . iter ( ) . fold ( Vec ::with_capacity ( list_len ) , | mut acc , x | { acc . append ( & mut x . clone ( ) ) ; acc } ) ;
String ::from_utf8_lossy ( & bytes ) . into_owned ( )
} )
) ) ) ;
named ! ( ascii_token < String > , do_parse ! (
word : alt ! ( terminated ! ( take_until1 ! ( "=?" ) , peek ! ( tag_no_case ! ( "=?UTF-8?" ) ) ) | take_while ! ( call ! ( | _ | { true } ) ) ) > >
( {
String ::from_utf8_lossy ( word ) . into_owned ( )
} ) ) ) ;
/* Lots of copying here. TODO: fix it */
named ! ( pub subject < String > , ws ! ( do_parse ! (
list : many0 ! ( alt_complete ! ( utf8_token_list | ascii_token ) ) > >
( {
let list_len = list . iter ( ) . fold ( 0 , | mut acc , x | { acc + = x . len ( ) ; acc } ) ;
let s = list . iter ( ) . fold ( String ::with_capacity ( list_len ) , | mut acc , x | { acc . push_str ( x ) ; acc . push_str ( " " ) ; acc } ) ;
s . trim ( ) . to_string ( )
( {
let string_len = list . iter ( ) . fold ( 0 , | mut acc , x | { acc + = x . len ( ) ; acc } ) + list . len ( ) - 1 ;
let list_len = list . len ( ) ;
let mut i = 0 ;
list . iter ( ) . fold ( String ::with_capacity ( string_len ) ,
| acc , x | {
let mut acc = acc + & x . replace ( "\n" , "" ) ;
if i ! = list_len - 1 {
acc . push_str ( " " ) ;
i + = 1 ;
}
acc
} )
} )
) ) ) ;
@ -159,27 +197,33 @@ fn test_eat_comments() {
let s = "Thu, 31 Aug 2017 13:43:37 +0000 (UTC)" ;
assert_eq! ( eat_comments ( s ) , "Thu, 31 Aug 2017 13:43:37 +0000 " ) ;
}
/* Date should tokenize input and convert the tokens, right now we expect input will have no extra
* spaces in between tokens * /
/*
* Date should tokenize input and convert the tokens ,
* right now we expect input will have no extra spaces in between tokens
*
* We should use a custom parser here * /
pub fn date ( input : & str ) -> Option < chrono ::DateTime < chrono ::FixedOffset > > {
chrono ::DateTime ::parse_from_rfc2822 ( eat_comments ( input ) . trim ( ) ) . ok ( )
let parsed_result = subject ( eat_comments ( input ) . as_bytes ( ) ) . to_full_result ( ) . unwrap ( ) . replace ( "-" , "+" ) ;
chrono ::DateTime ::parse_from_rfc2822 ( parsed_result . trim ( ) ) . ok ( )
}
#[ test ]
fn test_date ( ) {
let s = "Thu, 31 Aug 2017 13:43:37 +0000 (UTC)" ;
let _s = "Thu, 31 Aug 2017 13:43:37 +0000" ;
let __s = "=?utf-8?q?Thu=2C_31_Aug_2017_13=3A43=3A37_-0000?=" ;
assert_eq! ( date ( s ) . unwrap ( ) , date ( _s ) . unwrap ( ) ) ;
assert_eq! ( date ( _s ) . unwrap ( ) , date ( __s ) . unwrap ( ) ) ;
}
named ! ( pub message_id < & str > ,
map_res ! ( complete ! ( delimited ! ( tag ! ( "<" ) , take_until1 ! ( ">" ) , tag ! ( ">" ) ) ) , std::str :: from_utf8)
map_res ! ( complete ! ( delimited ! ( tag ! ( "<" ) , take_until1 ! ( ">" ) , tag ! ( ">" ) ) ) , from_utf8)
) ;
named ! ( pub references < Vec < & str > > , many0 ! ( preceded ! ( is_not ! ( "<" ) , message_id ) ) ) ;
named_args ! ( pub attachments < ' a > ( boundary : & ' a str , boundary_end : & ' a str ) < Vec < & ' this_is_probably_unique_i_hope_please [ u8 ] > > ,
dbg! ( alt_complete ! ( do_parse ! (
named_args ! ( pub attachments < ' a > ( boundary : & ' a str , boundary_end : & ' a str ) < Vec < & ' this_is_probably_unique_i_hope_please [ u8 ] > > ,
alt_complete ! ( do_parse ! (
take_until ! ( boundary ) > >
vecs : many0 ! ( complete ! ( do_parse ! (
tag ! ( boundary ) > >
@ -196,33 +240,41 @@ named_args!(pub attachments<'a>(boundary: &'a str, boundary_end: &'a str) < Vec<
take_until ! ( boundary_end ) > >
tag ! ( boundary_end ) > >
( { Vec ::< & [ u8 ] > ::new ( ) } ) )
) ) ) ;
) ) ;
#[ test ]
fn test_attachments ( ) {
use std ::io ::Read ;
let mut buffer : Vec < u8 > = Vec ::new ( ) ;
let _ = std ::fs ::File ::open ( "test/attachment_test" ) . unwrap ( ) . read_to_end ( & mut buffer ) ;
let boundary = "--b1_4382d284f0c601a737bb32aaeda53160" ;
let boundary = "--b1_4382d284f0c601a737bb32aaeda53160--" ;
let boundary_len = boundary . len ( ) ;
let ( _ , body ) = match mail ( & buffer ) . to_full_result ( ) {
Ok ( v ) = > v ,
Err ( _ ) = > { panic! ( ) }
} ;
//eprintln!("{:?}",std::str::from_utf8(body));
let attachments = attachments ( body , boundary ) . to_full_result ( ) . unwrap ( ) ;
let attachments = attachments ( body , & boundary [ 0 .. boundary_len - 2 ] , & boundary ) . to_full_result ( ) . unwrap ( ) ;
assert_eq! ( attachments . len ( ) , 4 ) ;
}
named ! ( content_type_parameter < ( & str , & str ) > ,
do_parse ! (
tag ! ( ";" ) > >
name : terminated ! ( map_res ! ( ws ! ( take_until ! ( "=" ) ) , from_utf8 ) , tag ! ( "=" ) ) > >
value : map_res ! ( ws ! (
alt_complete ! ( delimited ! ( tag ! ( "\"" ) , take_until ! ( "\"" ) , tag ! ( "\"" ) ) | is_not ! ( ";" ) ) ) ,
from_utf8 ) > >
( {
( name , value )
} )
) ) ;
named ! ( pub content_type < ( & str , & str , Vec < ( & str , & str ) > ) > ,
named ! ( pub content_type < ( & str , & str , Vec < ( & str , & str ) > ) > ,
do_parse ! (
_type : map_res ! ( take_until ! ( "/" ) , std ::str ::from_utf8 ) > >
_type : map_res ! ( take_until ! ( "/" ) , from_utf8) > >
tag ! ( "/" ) > >
_subtype : map_res ! ( is_not ! ( ";" ) , std ::str ::from_utf8 ) > >
parameters : many0 ! ( preceded ! ( tag ! ( ";" ) , pair ! (
terminated ! ( map_res ! ( ws ! ( take_until ! ( "=" ) ) , std ::str ::from_utf8 ) , tag ! ( "=" ) ) ,
map_res ! ( ws ! ( alt_complete ! (
delimited ! ( tag ! ( "\"" ) , take_until ! ( "\"" ) , tag ! ( "\"" ) ) | is_not ! ( ";" )
) ) , std ::str ::from_utf8 ) ) ) ) > >
_subtype : map_res ! ( is_not ! ( ";" ) , from_utf8 ) > >
parameters : many0 ! ( complete ! ( content_type_parameter ) ) > >
( {
( _type , _subtype , parameters )
} )