Add support for AppleMail

This commit is contained in:
Benedikt Terhechte 2021-10-10 08:03:41 +02:00
parent 6bdc412833
commit 471517047a
14 changed files with 118 additions and 101 deletions

8
Cargo.lock generated
View File

@ -538,7 +538,7 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "email-parser"
version = "0.5.0"
source = "git+https://github.com/terhechte/email-parser#da8582a266385b8f1b2f7e150ac891e3f850d41a"
source = "git+https://github.com/terhechte/email-parser#dba59d86771f7df67bb9e7f3a2c4b1e36b02d19b"
dependencies = [
"textcode",
"timezone-abbreviations",
@ -552,10 +552,10 @@ checksum = "cca5179aa9d15128cebb79bb56dda73a79cc66b402056ff19a992e54b365e15c"
[[package]]
name = "emlx"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5f41d97755f64845fb52dbff2acc01853ba51343e2076c4ab3099bb669d0e71e"
version = "0.1.5"
source = "git+https://github.com/terhechte/emlx#44c2f278551d9e7a9ae0c3c3207c4471da3049fe"
dependencies = [
"email-parser",
"plist",
"thiserror",
]

View File

@ -13,7 +13,7 @@ tracing-subscriber = "0.2.24"
rusqlite = {version = "0.25.3", features = ["chrono", "trace", "serde_json"]}
regex = "1.5.3"
flate2 = "1.0.22"
email-parser = { git = "https://github.com/terhechte/email-parser", features = ["sender", "to", "in-reply-to", "date", "subject", "mime", "allow-duplicate-headers"]}
email-parser = { git = "https://github.com/terhechte/email-parser", features = ["sender", "to", "in-reply-to", "date", "subject", "mime", "allow-duplicate-headers", "compatibility-fixes"]}
rayon = "1.5.1"
chrono = "0.4.19"
lazy_static = "*"
@ -27,7 +27,7 @@ num-format = "0.4.0"
strum = "0.21"
strum_macros = "0.21"
lru = { version = "0.7.0", optional = true }
emlx = "0.1"
emlx = { git = "https://github.com/terhechte/emlx", features = []}
walkdir = "*"
[features]

View File

@ -4,10 +4,7 @@ use eyre::{bail, Report, Result};
use rusqlite::{self, params, Connection, Statement};
use core::panic;
use std::{
path::{Path, PathBuf},
thread::JoinHandle,
};
use std::{path::Path, thread::JoinHandle};
use super::{query::Query, query_result::QueryResult, sql::*, DBMessage};
use crate::{database::RowConversion, importer::EmailEntry};

View File

@ -1,5 +1,3 @@
use std::path::PathBuf;
use eyre::Report;
use crate::importer::EmailEntry;

View File

@ -45,7 +45,7 @@ VALUES
pub const QUERY_ERRORS: &str = r#"
INSERT INTO errors
(message, path)
(message)
VALUES
(?, ?)
(?)
"#;

View File

@ -4,43 +4,46 @@
use eyre::Result;
use rayon::prelude::*;
use tracing::trace;
use walkdir::WalkDir;
use super::super::shared::filesystem::folders_in;
use super::super::{Message, MessageSender};
use super::raw_email::RawEmailEntry;
use super::super::shared::filesystem::emails_in;
use super::super::MessageSender;
use crate::types::Config;
use std::path::Path;
use super::mail::Mail;
use std::path::PathBuf;
fn test_walkdir() {
for entry in WalkDir::new("foo").int_par_iter().filter_map(|e| e.ok()) {
println!("{}", entry.path().display());
}
}
pub fn read_emails(config: &Config, sender: MessageSender) -> Result<Vec<RawEmailEntry>> {
Ok(folders_in(&config.emails_folder_path, sender, read_folder)?)
}
fn read_folder(path: &Path, sender: MessageSender) -> Result<Vec<RawEmailEntry>> {
let result = Ok(std::fs::read_dir(path)?
pub fn read_emails(config: &Config, sender: MessageSender) -> Result<Vec<Mail>> {
// As `walkdir` does not support `par_iter` (see https://www.reddit.com/r/rust/comments/6eif7r/walkdir_users_we_need_you/)
// - -we first collect all folders,
// then all sub-folders in those ending in mboxending in .mbox and then iterate over them in paralell
let folders: Vec<PathBuf> = WalkDir::new(&config.emails_folder_path)
.into_iter()
.par_bridge()
.filter_map(|entry| {
let path = entry
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e))
.ok()?
.path();
if path.is_dir() {
return None;
.filter_map(|e| match e {
Ok(n)
if n.path().is_dir()
&& n.path()
.to_str()
.map(|e| e.contains(".mbox"))
.unwrap_or(false) =>
{
Some(n.path().to_path_buf())
}
trace!("Reading {}", &path.display());
RawEmailEntry::new(path)
_ => None,
})
.collect());
// We're done reading the folder
sender.send(Message::ReadOne).unwrap();
result
.collect();
let mails = folders
.into_par_iter()
.filter_map(
|path| match emails_in(path.clone(), sender.clone(), Mail::new) {
Ok(n) => Some(n),
Err(e) => {
tracing::error!("{} {:?}", path.display(), &e);
None
}
},
)
.flatten()
.collect();
Ok(mails)
}

View File

@ -18,7 +18,12 @@ pub struct Mail {
}
impl Mail {
pub fn new(path: &Path) -> Result<Self> {
pub fn new<P: AsRef<Path>>(path: P) -> Option<Self> {
let path = path.as_ref();
let name = path.file_name()?.to_str()?;
if !name.ends_with(".emlx") {
return None;
}
// find the folder ending with `.mbox` in the path
let ext = ".mbox";
let label = path
@ -27,7 +32,7 @@ impl Mail {
.flatten()
.find(|s| s.ends_with(ext))
.map(|s| s.replace(ext, "").to_string());
Ok(Self {
Some(Self {
path: path.to_path_buf(),
is_seen: false,
label,

View File

@ -1,8 +1,6 @@
mod filesystem;
mod mail;
/// FIXME: Not sure if the number changes with each macOS release?
const DEFAULT_FOLDER: &str = "~/Library/Mail/V8/";
use super::{Config, ImporterFormat, MessageSender, Result};
#[derive(Default)]
@ -11,6 +9,6 @@ pub struct AppleMail {}
impl ImporterFormat for AppleMail {
type Item = mail::Mail;
fn emails(&self, config: &Config, sender: MessageSender) -> Result<Vec<Self::Item>> {
panic!()
filesystem::read_emails(config, sender)
}
}

View File

@ -1,35 +1,14 @@
use eyre::Result;
use rayon::prelude::*;
use tracing::trace;
use super::super::shared::filesystem::folders_in;
use super::super::{Message, MessageSender};
use super::super::shared::filesystem::{emails_in, folders_in};
use super::super::MessageSender;
use super::raw_email::RawEmailEntry;
use crate::types::Config;
use std::path::Path;
pub fn read_emails(config: &Config, sender: MessageSender) -> Result<Vec<RawEmailEntry>> {
Ok(folders_in(&config.emails_folder_path, sender, read_folder)?)
}
fn read_folder(path: &Path, sender: MessageSender) -> Result<Vec<RawEmailEntry>> {
let result = Ok(std::fs::read_dir(path)?
.into_iter()
.par_bridge()
.filter_map(|entry| {
let path = entry
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e))
.ok()?
.path();
if path.is_dir() {
return None;
}
trace!("Reading {}", &path.display());
RawEmailEntry::new(path)
})
.collect());
// We're done reading the folder
sender.send(Message::ReadOne).unwrap();
result
Ok(folders_in(
&config.emails_folder_path,
sender,
|path, sender| emails_in(path, sender, RawEmailEntry::new),
)?)
}

View File

@ -5,6 +5,7 @@ mod gmailbackup;
mod importer;
pub mod shared;
pub use apple_mail::AppleMail;
pub use gmailbackup::Gmail;
pub use crate::types::Config;

View File

@ -4,7 +4,7 @@ use crate::types::Config;
use super::super::{Message, MessageSender};
use eyre::{bail, Context, Result};
use eyre::{bail, Result};
use rayon::prelude::*;
pub fn into_database<Mail: ParseableEmail + 'static>(
@ -33,23 +33,20 @@ pub fn into_database<Mail: ParseableEmail + 'static>(
//.par_iter()
.par_iter_mut()
// parsing them
.map(|raw_mail| {
// Due to lifetime issues, we can't use raw_mail.path() or raw_mail.path().display()
// or raw_mail.path().to_path_buf().display() as all of those retain a reference to
// `raw_mail`. So we just format the context into a string
parse_email(raw_mail).with_context(|| format!("{}", raw_mail.path().display()))
})
.map(|raw_mail| parse_email(raw_mail))
// and inserting them into SQLite
.for_each(|entry| {
if let Err(e) = tx.send(Message::WriteOne) {
tracing::error!("Channel Failure: {:?}", &e);
}
// Try to write the message into the database
if let Err(e) = match entry {
Ok(mail) => sender.send(DBMessage::Mail(mail)),
Err(e) => sender.send(DBMessage::Error(e)),
} {
tracing::error!("Error Inserting into Database: {:?}", &e);
}
// Signal the write
if let Err(e) = tx.send(Message::WriteOne) {
tracing::error!("Channel Failure: {:?}", &e);
}
});
// Tell SQLite there's no more work coming. This will exit the listening loop

View File

@ -2,7 +2,7 @@ use eyre::{bail, Result};
use rayon::prelude::*;
use tracing::trace;
use std::path::Path;
use std::path::{Path, PathBuf};
use super::super::{Message, MessageSender};
@ -15,7 +15,7 @@ pub fn folders_in<FolderAction, ActionResult, P>(
) -> Result<Vec<ActionResult>>
where
P: AsRef<Path>,
FolderAction: Fn(&Path, MessageSender) -> Result<Vec<ActionResult>> + Send + Sync,
FolderAction: Fn(PathBuf, MessageSender) -> Result<Vec<ActionResult>> + Send + Sync,
ActionResult: Send,
{
let folder = folder.as_ref();
@ -40,10 +40,37 @@ where
}
let sender = sender.clone();
trace!("Reading folder {}", path.display());
action(&path, sender)
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e))
action(path.clone(), sender)
.map_err(|e| tracing::error!("{} {:?}", path.display(), &e))
.ok()
})
.flatten()
.collect())
}
pub fn emails_in<O, F, P: AsRef<Path>>(path: P, sender: MessageSender, make: F) -> Result<Vec<O>>
where
F: Fn(PathBuf) -> Option<O>,
F: Send + Sync + 'static,
O: Send + Sync,
{
let path = path.as_ref();
let result = Ok(std::fs::read_dir(path)?
.into_iter()
.par_bridge()
.filter_map(|entry| {
let path = entry
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e))
.ok()?
.path();
if path.is_dir() {
return None;
}
trace!("Reading {}", &path.display());
make(path)
})
.collect());
// We're done reading the folder
sender.send(Message::ReadOne).unwrap();
result
}

View File

@ -2,8 +2,7 @@ use chrono::prelude::*;
use email_parser::address::{Address, EmailAddress, Mailbox};
use eyre::{eyre, Result};
use std::borrow::{Borrow, Cow};
use std::convert::{TryFrom, TryInto};
use std::borrow::Cow;
use std::path::Path;
use super::email::{EmailEntry, EmailMeta};
@ -24,11 +23,15 @@ pub trait ParseableEmail: Send + Sized + Sync {
}
pub fn parse_email<Entry: ParseableEmail>(entry: &mut Entry) -> Result<EmailEntry> {
entry.prepare()?;
if let Err(e) = entry.prepare() {
tracing::error!("Prepare Error: {:?}", e);
return Err(e);
}
let content = entry.message()?;
match email_parser::email::Email::parse(&content) {
Ok(email) => {
let path = entry.path();
tracing::trace!("Parsing {}", path.display());
let (sender_name, _, sender_local_part, sender_domain) =
mailbox_to_string(&email.sender);
@ -69,13 +72,18 @@ pub fn parse_email<Entry: ParseableEmail>(entry: &mut Entry) -> Result<EmailEntr
})
}
Err(error) => {
//let content_string = String::from_utf8(content.clone())?;
//println!("{}|{}", &error, &raw_entry.eml_path.display());
Err(eyre!(
"Could not parse email: {:?} [{}]",
let error = eyre!(
"Could not parse email (trace to see contents): {:?} [{}]",
&error,
entry.path().display()
))
);
tracing::error!("{:?}", &error);
if let Ok(content_string) = String::from_utf8(content.into_owned()) {
tracing::trace!("Contents:\n{}\n---\n", content_string);
} else {
tracing::trace!("Contents:\nInvalid UTF8\n---\n");
}
Err(error)
}
}
}

View File

@ -38,3 +38,7 @@ pub type MessageReceiver = crossbeam_channel::Receiver<Message>;
pub fn gmail_importer(config: &Config) -> Importer<formats::Gmail> {
Importer::new(config, formats::Gmail::default())
}
pub fn applemail_importer(config: &Config) -> Importer<formats::AppleMail> {
Importer::new(config, formats::AppleMail::default())
}