mailto: rewrite parsing

Rewrite parsing to fix error on escaped ampersands as html entities in
mailto value.

Signed-off-by: Manos Pitsidianakis <manos@pitsidianak.is>
pull/480/head
Manos Pitsidianakis 2 months ago
parent a55f65e131
commit 8091583221
No known key found for this signature in database
GPG Key ID: 7729C7707F7E09D0

@ -300,4 +300,21 @@ fn test_email_mailto() {
("To", "user@納豆.example.org"),
("Subject", "Test")
);
test_case!("mailto:1001084@bugs.debian.org?In-Reply-To=%3C168435582154.540248.1403466294276093439%40xxxxxxxxxxxxxx%3E&References=%3C163857393129.1083042.11053317018847169002.reportbug%40xxxxxxxxxxxxxx%3E%0A%20%3C168435582154.540248.1403466294276093439%40xxxxxxxxxxxxxx%3E&subject=Re%3A%20ITP%3A%20meli%20--%20terminal%20mail%20client&body=On%20Wed%2C%2017%20May%202023%2022%3A37%3A01%20%2B0200%20xxxxx%20zzzzzzzzzz%20%3Cdr%40xxxxxxxx%3E%20wrote%3A%0A%3E%200.7.2%2B20230517%20draft%201%20needs%20embedding%208%20crates%20%286%20missing%2C%201%20unwanted%2C%201%20ahead%29%3B%0A%3E%20runs%20and%20seems%20to%20work%20from%20a%20brief%20test%20use.%0A%3E%20%0A%3E%20Main%20tasks%20are%20still%20to%20keep%20package%20up-to-date%20with%20upstream%20releases%2C%20and%0A%3E%20to%20package%20more%20of%20the%20crates%20currently%20embedded.%0A%3E%20%0A%3E%20Here%27s%20how%20you%20can%20help%3A%0A%3E%20%0A%3E%20As%20user%20running%20Debian%2C%20you%20can%20test%20this%20draft%20package%3A%20Either%20build%20it%0A%3E%20yourself%20from%20source%20or%20tell%20%28by%20posting%20to%20this%20bugreport%29%20if%20you%0A%3E%20prefer%20testing%20the%20binary%20packages%20I%20built%20-%20then%20I%20will%20share%20those.%0A%3E%20%0A%3E%20As%20developer%20%28but%20no%20need%20to%20be%20official%20member%20of%20Debian%21%29%2C%20you%20can%0A%3E%20join%20the%20Debian%20Rust%20team%20and%20help%20package%20these%20missing%20crates%3A%0A%3E%20https%3A%2F%2Fsalsa.debian.org%2Fdebian%2Fmeli%2F-%2Fblob%2Fdebian%2Flatest%2Fdebian%2FTODO%0A%3E%20%0A%3E%20%0A%3E%20%20-%20JJJJJ%0A%3E%20%0A%3E%20--%20%0A%3E%20%20%2A%20JJJJJ%20xxxxxxxxxx%20-%20idealist%20%26%20Internet-arkitekt%0A%3E%20%20%2A%20Tlf.%3A%20%2B45%20%20%20Website%3A%20http%3A%2F%2Fxxxxxxxxxxx%2F%0A%3E%20%0A%3E%20%20%5Bx%5D%20quote%20me%20freely%20%20%5B%20%5D%20ask%20before%20reusing%20%20%5B%20%5D%20keep%20private",
addresses => "1001084@bugs.debian.org";
body => Some(
"On Wed, 17 May 2023 22:37:01 +0200 xxxxx zzzzzzzzzz <dr@xxxxxxxx> wrote:\n> 0.7.2+20230517 draft 1 needs embedding 8 crates (6 missing, 1 unwanted, 1 ahead);\n> runs and seems to work from a brief test use.\n> \n> Main tasks are still to keep package up-to-date with upstream releases, and\n> to package more of the crates currently embedded.\n> \n> Here's how you can help:\n> \n> As user running Debian, you can test this draft package: Either build it\n> yourself from source or tell (by posting to this bugreport) if you\n> prefer testing the binary packages I built - then I will share those.\n> \n> As developer (but no need to be official member of Debian!), you can\n> join the Debian Rust team and help package these missing crates:\n> https://salsa.debian.org/debian/meli/-/blob/debian/latest/debian/TODO\n> \n> \n> - JJJJJ\n> \n> -- \n> * JJJJJ xxxxxxxxxx - idealist & Internet-arkitekt\n> * Tlf.: +45 Website: http://xxxxxxxxxxx/\n> \n> [x] quote me freely [ ] ask before reusing [ ] keep private",
);
("To", "1001084@bugs.debian.org"),
("In-Reply-To", "<168435582154.540248.1403466294276093439@xxxxxxxxxxxxxx>"),
("References", "<163857393129.1083042.11053317018847169002.reportbug@xxxxxxxxxxxxxx>\n <168435582154.540248.1403466294276093439@xxxxxxxxxxxxxx>"),
("Subject", "Re: ITP: meli -- terminal mail client")
);
test_case!("mailto:1001084@bugs.debian.org?body=On%20Tue%2C%2023%20Jan%202024%2019%3A46%3A47%20%2B0100%20Jonas%20Smedegaard%20%3Cdr%40jones.dk%3E%20wrote%3A%0A%3E%200.8.5%2B20240101%20draft%201%20needs%20embedding%205%20crates%20%283%20missing%2C%202%20ahead%29%3B%20runs%20and%20seems%20to%20work%20from%20a%20brief%20test%20use.%0A%3E%20%0A%3E%20Main%20tasks%20are%20still%20to%20keep%20package%20up-to-date%20with%20upstream%20releases%2C%0A%3E%20and%20to%20package%20more%20of%20the%20crates%20currently%20embedded.%0A%3E%20%0A%3E%20Here%27s%20how%20you%20can%20help%3A%0A%3E%20%0A%3E%20As%20user%20running%20Debian%2C%20you%20can%20test%20this%20draft%20package%3A%20Either%20build%20it%0A%3E%20yourself%20from%20source%20or%20tell%20%28by%20posting%20to%20this%20bugreport%29%20if%20you%0A%3E%20prefer%20testing%20the%20binary%20packages%20I%20built%20-%20then%20I%20will%20share%20those.%0A%3E%20%0A%3E%20As%20developer%20%28but%20no%20need%20to%20be%20official%20member%20of%20Debian%21%29%2C%20you%20can%0A%3E%20join%20the%20Debian%20Rust%20team%20and%20help%20package%20these%20missing%20crates%3A%0A%3E%20https%3A%2F%2Fsalsa.debian.org%2Fdebian%2Fmeli%2F-%2Fblob%2Fdebian%2Flatest%2Fdebian%2FTODO%0A%3E%20%0A%3E%20%0A%3E%20%20-%20Jonas%0A%3E%20%0A%3E%20--%20%0A%3E%20%20%2A%20Jonas%20Smedegaard%20-%20idealist%20%26%20Internet-arkitekt%0A%3E%20%20%2A%20Tlf.%3A%20%2B45%2040843136%20%20Website%3A%20http%3A%2F%2Fdr.jones.dk%2F%0A%3E%20%0A%3E%20%20%5Bx%5D%20quote%20me%20freely%20%20%5B%20%5D%20ask%20before%20reusing%20%20%5B%20%5D%20keep%20private&In-Reply-To=%3C170603560794.2588145.11750867315250989304%40auryn.jones.dk%3E&References=%3C163857393129.1083042.11053317018847169002.reportbug%40auryn.jones.dk%3E%0A%20%3C170603560794.2588145.11750867315250989304%40auryn.jones.dk%3E&subject=Re%3A%20ITP%3A%20meli%20--%20terminal%20mail%20client",
addresses=> "1001084@bugs.debian.org";
body => Some("On Tue, 23 Jan 2024 19:46:47 +0100 Jonas Smedegaard <dr@jones.dk> wrote:\n> 0.8.5+20240101 draft 1 needs embedding 5 crates (3 missing, 2 ahead); runs and seems to work from a brief test use.\n> \n> Main tasks are still to keep package up-to-date with upstream releases,\n> and to package more of the crates currently embedded.\n> \n> Here's how you can help:\n> \n> As user running Debian, you can test this draft package: Either build it\n> yourself from source or tell (by posting to this bugreport) if you\n> prefer testing the binary packages I built - then I will share those.\n> \n> As developer (but no need to be official member of Debian!), you can\n> join the Debian Rust team and help package these missing crates:\n> https://salsa.debian.org/debian/meli/-/blob/debian/latest/debian/TODO\n> \n> \n> - Jonas\n> \n> -- \n> * Jonas Smedegaard - idealist & Internet-arkitekt\n> * Tlf.: +45 40843136 Website: http://dr.jones.dk/\n> \n> [x] quote me freely [ ] ask before reusing [ ] keep private");
("To", "1001084@bugs.debian.org"), ("Subject", "Re: ITP: meli -- terminal mail client"), ("In-Reply-To", "<170603560794.2588145.11750867315250989304@auryn.jones.dk>"), ("References", "<163857393129.1083042.11053317018847169002.reportbug@auryn.jones.dk>\n <170603560794.2588145.11750867315250989304@auryn.jones.dk>")
);
}

@ -1042,7 +1042,11 @@ pub mod generic {
));
}
input = &input[b"mailto:".len()..];
let mut decoded_owned = percent_decode(input).decode_utf8().unwrap().to_string();
let Ok(mut decoded_owned) = String::from_utf8(input.to_vec()) else {
return Err(nom::Err::Error(
(input, "mailto(): Not valid UTF-8.").into(),
));
};
let mut substitutions = vec![];
for (i, _) in decoded_owned.match_indices('&') {
@ -1063,7 +1067,15 @@ pub mod generic {
let end = decoded.as_bytes().iter().position(|e| *e == b'?');
let end_or_len = end.unwrap_or(decoded.len());
if let Ok(addr) = Address::list_try_from(&decoded[..end_or_len]) {
if let Ok(addr) = percent_decode(decoded[..end_or_len].as_bytes())
.decode_utf8()
.map_err(|_| nom::Err::Error((input, "mailto(): Not valid UTF-8.")))
.and_then(|s| {
Address::list_try_from(s.as_bytes()).map_err(|_| {
nom::Err::Error((input, "mailto(): doesn't start with an address."))
})
})
{
address = addr;
decoded = if decoded[end_or_len..].is_empty() {
&decoded[end_or_len..]
@ -1096,41 +1108,42 @@ pub mod generic {
headers.insert(HeaderName::TO, full_address);
}
while !decoded.is_empty() {
if decoded.starts_with("&amp;") {
decoded = &decoded["&amp;".len()..];
continue;
}
let tag = if let Some(tag_pos) = decoded.as_bytes().iter().position(|e| *e == b'=') {
let ret = &decoded[0..tag_pos];
decoded = &decoded[tag_pos + 1..];
let mut i = 0;
while !decoded[i..].is_empty() {
let tag = if let Some(tag_pos) = decoded[i..].as_bytes().iter().position(|e| *e == b'=')
{
let ret = &decoded[i..][0..tag_pos];
i += tag_pos + 1;
ret
} else if decoded.as_bytes().starts_with(b"body") {
let ret = &decoded[0.."body".len()];
decoded = &decoded[("body".len() + 1)..];
} else if decoded[i..].as_bytes().starts_with(b"body") {
let ret = &decoded[i..][0.."body".len()];
i += "body".len() + 1;
ret
} else {
return Err(nom::Err::Error(
(
input,
format!("mailto(): extra characters found in input: {}", decoded),
format!(
"mailto(): extra characters found in input: {}",
&decoded[i..]
),
)
.into(),
));
};
let value_end = if tag == "body" {
decoded.len()
} else {
decoded
.as_bytes()
.iter()
.position(|e| *e == b'&')
.unwrap_or(decoded.len())
let value_end = decoded[i..]
.as_bytes()
.iter()
.position(|e| *e == b'&')
.unwrap_or_else(|| decoded[i..].len());
let Ok(value) = percent_decode(decoded[i..][..value_end].as_bytes())
.decode_utf8()
.map(|v| v.to_string())
else {
return Err(nom::Err::Error((input, "mailto(): invalid UTF-8.").into()));
};
let value = decoded[..value_end].to_string();
match tag {
"body" if body.is_none() => {
body = Some(value);
@ -1152,15 +1165,14 @@ pub mod generic {
hdr,
value
);
}
if !headers.contains_key(&hdr) {
} else if !headers.contains_key(&hdr) {
headers.insert(hdr, value);
}
}
Ok(hdr) => {
log::warn!(
"parsing mailto(): header {} is not a known header and it will be \
ignored.Value was {:?}",
ignored. Value was {:?}",
hdr,
value
);
@ -1172,10 +1184,10 @@ pub mod generic {
}
},
}
if decoded[value_end..].is_empty() {
if decoded[i..][value_end..].is_empty() {
break;
}
decoded = &decoded[value_end + 1..];
i += value_end + 1;
}
Ok((
input,
@ -2083,12 +2095,11 @@ pub mod encodings {
break;
}
}
if tag_end_idx.is_none() {
let Some(tag_end_idx) = tag_end_idx else {
return Err(nom::Err::Error(
(input, "encoded_word(): expected end tag").into(),
));
}
let tag_end_idx = tag_end_idx.unwrap();
};
if tag_end_idx + 2 >= input.len() || input[2 + tag_end_idx] != b'?' {
return Err(nom::Err::Error(
@ -2106,12 +2117,11 @@ pub mod encodings {
break;
}
}
if encoded_end_idx.is_none() {
let Some(encoded_end_idx) = encoded_end_idx else {
return Err(nom::Err::Error(
(input, "encoded_word(): expected input after end tag").into(),
));
}
let encoded_end_idx = encoded_end_idx.unwrap();
};
let encoded_text = &input[3 + tag_end_idx..encoded_end_idx];
let s: Vec<u8> = match input[tag_end_idx + 1] {

@ -23,8 +23,6 @@
use crate::{
email::{
address::*,
headers::{HeaderMap, HeaderName},
mailto::Mailto,
parser::{
address::*,
dates::rfc5322_date,
@ -139,26 +137,6 @@ fn test_email_parser_address_list() {
);
}
// // [ref:FIXME]: add file
//#[test]
//fn test_email_parser_attachments() {
// use std::io::Read;
// let mut buffer: Vec<u8> = Vec::new();
// let _ = std::fs::File::open("").unwrap().read_to_end(&mut buffer);
// let boundary = b"b1_4382d284f0c601a737bb32aaeda53160";
// let (_, body) = match mail(&buffer) {
// Ok(v) => v,
// Err(_) => panic!(),
// };
// let attachments = parts(body, boundary).unwrap().1;
// assert_eq!(attachments.len(), 4);
// let v: Vec<&str> = attachments
// .iter()
// .map(|v| std::str::from_utf8(v).unwrap())
// .collect();
// //println!("attachments {:?}", v);
//}
#[test]
fn test_email_parser_addresses() {
macro_rules! assert_parse {
@ -464,73 +442,6 @@ fn test_email_parser_msg_id() {
assert_eq!(&msg_id_list(value).unwrap().1, &[a, c]);
}
#[rustfmt::skip]
#[test]
fn test_email_parser_mailto_parse() {
let value = "mailto:1001084@bugs.debian.org?In-Reply-To=%3C168435582154.540248.\
1403466294276093439%40xxxxxxxxxxxxxx%3E&References=%3C163857393129.1083042.\
11053317018847169002.reportbug%40xxxxxxxxxxxxxx%3E%0A%20%3C168435582154.540248.\
1403466294276093439%40xxxxxxxxxxxxxx%3E&subject=Re%3A%20ITP%3A%20meli%20--%\
20terminal%20mail%20client&body=On%20Wed%2C%2017%20May%202023%2022%3A37%3A01%20%\
2B0200%20xxxxx%20zzzzzzzzzz%20%3Cdr%40xxxxxxxx%3E%20wrote%3A%0A%3E%200.7.2%\
2B20230517%20draft%201%20needs%20embedding%208%20crates%20%286%20missing%2C%201%\
20unwanted%2C%201%20ahead%29%3B%0A%3E%20runs%20and%20seems%20to%20work%20from%20a%\
20brief%20test%20use.%0A%3E%20%0A%3E%20Main%20tasks%20are%20still%20to%20keep%\
20package%20up-to-date%20with%20upstream%20releases%2C%20and%0A%3E%20to%20package%\
20more%20of%20the%20crates%20currently%20embedded.%0A%3E%20%0A%3E%20Here%27s%20how%\
20you%20can%20help%3A%0A%3E%20%0A%3E%20As%20user%20running%20Debian%2C%20you%20can%\
20test%20this%20draft%20package%3A%20Either%20build%20it%0A%3E%20yourself%20from%\
20source%20or%20tell%20%28by%20posting%20to%20this%20bugreport%29%20if%20you%0A%3E%\
20prefer%20testing%20the%20binary%20packages%20I%20built%20-%20then%20I%20will%\
20share%20those.%0A%3E%20%0A%3E%20As%20developer%20%28but%20no%20need%20to%20be%\
20official%20member%20of%20Debian%21%29%2C%20you%20can%0A%3E%20join%20the%20Debian%\
20Rust%20team%20and%20help%20package%20these%20missing%20crates%3A%0A%3E%20https%3A%\
2F%2Fsalsa.debian.org%2Fdebian%2Fmeli%2F-%2Fblob%2Fdebian%2Flatest%2Fdebian%2FTODO%\
0A%3E%20%0A%3E%20%0A%3E%20%20-%20JJJJJ%0A%3E%20%0A%3E%20--%20%0A%3E%20%20%2A%20JJJJJ%\
20xxxxxxxxxx%20-%20idealist%20%26%20Internet-arkitekt%0A%3E%20%20%2A%20Tlf.%3A%20%\
2B45%20%20%20Website%3A%20http%3A%2F%2Fxxxxxxxxxxx%2F%0A%3E%20%0A%3E%20%20%5Bx%5D%\
20quote%20me%20freely%20%20%5B%20%5D%20ask%20before%20reusing%20%20%5B%20%5D%20keep%\
20private";
let (rest, mailto) = crate::parser::generic::mailto(value.as_bytes())
.map_err(|err| match err {
nom::Err::Failure(err) | nom::Err::Error(err) => {
format!(
"kind {:?} at: {:?}",
err,
String::from_utf8_lossy(err.input)
)
}
nom::Err::Incomplete(_) => "incomplete input".to_string(),
})
.unwrap();
println!(
"rest = {}, mailto = {:#?}",
String::from_utf8_lossy(rest),
mailto
);
assert_eq!(
mailto,
Mailto {
address: vec![
Address::new(None, "1001084@bugs.debian.org".to_string())
],
body: Some(
"On Wed, 17 May 2023 22:37:01 +0200 xxxxx zzzzzzzzzz <dr@xxxxxxxx> wrote:\n> 0.7.2+20230517 draft 1 needs embedding 8 crates (6 missing, 1 unwanted, 1 ahead);\n> runs and seems to work from a brief test use.\n> \n> Main tasks are still to keep package up-to-date with upstream releases, and\n> to package more of the crates currently embedded.\n> \n> Here's how you can help:\n> \n> As user running Debian, you can test this draft package: Either build it\n> yourself from source or tell (by posting to this bugreport) if you\n> prefer testing the binary packages I built - then I will share those.\n> \n> As developer (but no need to be official member of Debian!), you can\n> join the Debian Rust team and help package these missing crates:\n> https://salsa.debian.org/debian/meli/-/blob/debian/latest/debian/TODO\n> \n> \n> - JJJJJ\n> \n> -- \n> * JJJJJ xxxxxxxxxx - idealist & Internet-arkitekt\n> * Tlf.: +45 Website: http://xxxxxxxxxxx/\n> \n> [x] quote me freely [ ] ask before reusing [ ] keep private".to_string(),
),
headers: {
let mut map = HeaderMap::new();
map.insert(HeaderName::TO, "1001084@bugs.debian.org".into());
map.insert(HeaderName::IN_REPLY_TO, "<168435582154.540248.1403466294276093439@xxxxxxxxxxxxxx>".into());
map.insert(HeaderName::REFERENCES, "<163857393129.1083042.11053317018847169002.reportbug@xxxxxxxxxxxxxx>\n <168435582154.540248.1403466294276093439@xxxxxxxxxxxxxx>".into());
map.insert(HeaderName::SUBJECT, "Re: ITP: meli -- terminal mail client".into());
map
},
});
}
#[test]
fn test_email_parser_dates_date_new() {
let s = b"Thu, 31 Aug 2017 13:43:37 +0000 (UTC)";

Loading…
Cancel
Save