Add support for AppleMail

pull/1/head
Benedikt Terhechte 3 years ago
parent 6bdc412833
commit 471517047a

8
Cargo.lock generated

@ -538,7 +538,7 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]] [[package]]
name = "email-parser" name = "email-parser"
version = "0.5.0" version = "0.5.0"
source = "git+https://github.com/terhechte/email-parser#da8582a266385b8f1b2f7e150ac891e3f850d41a" source = "git+https://github.com/terhechte/email-parser#dba59d86771f7df67bb9e7f3a2c4b1e36b02d19b"
dependencies = [ dependencies = [
"textcode", "textcode",
"timezone-abbreviations", "timezone-abbreviations",
@ -552,10 +552,10 @@ checksum = "cca5179aa9d15128cebb79bb56dda73a79cc66b402056ff19a992e54b365e15c"
[[package]] [[package]]
name = "emlx" name = "emlx"
version = "0.1.0" version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "git+https://github.com/terhechte/emlx#44c2f278551d9e7a9ae0c3c3207c4471da3049fe"
checksum = "5f41d97755f64845fb52dbff2acc01853ba51343e2076c4ab3099bb669d0e71e"
dependencies = [ dependencies = [
"email-parser",
"plist", "plist",
"thiserror", "thiserror",
] ]

@ -13,7 +13,7 @@ tracing-subscriber = "0.2.24"
rusqlite = {version = "0.25.3", features = ["chrono", "trace", "serde_json"]} rusqlite = {version = "0.25.3", features = ["chrono", "trace", "serde_json"]}
regex = "1.5.3" regex = "1.5.3"
flate2 = "1.0.22" flate2 = "1.0.22"
email-parser = { git = "https://github.com/terhechte/email-parser", features = ["sender", "to", "in-reply-to", "date", "subject", "mime", "allow-duplicate-headers"]} email-parser = { git = "https://github.com/terhechte/email-parser", features = ["sender", "to", "in-reply-to", "date", "subject", "mime", "allow-duplicate-headers", "compatibility-fixes"]}
rayon = "1.5.1" rayon = "1.5.1"
chrono = "0.4.19" chrono = "0.4.19"
lazy_static = "*" lazy_static = "*"
@ -27,7 +27,7 @@ num-format = "0.4.0"
strum = "0.21" strum = "0.21"
strum_macros = "0.21" strum_macros = "0.21"
lru = { version = "0.7.0", optional = true } lru = { version = "0.7.0", optional = true }
emlx = "0.1" emlx = { git = "https://github.com/terhechte/emlx", features = []}
walkdir = "*" walkdir = "*"
[features] [features]

@ -4,10 +4,7 @@ use eyre::{bail, Report, Result};
use rusqlite::{self, params, Connection, Statement}; use rusqlite::{self, params, Connection, Statement};
use core::panic; use core::panic;
use std::{ use std::{path::Path, thread::JoinHandle};
path::{Path, PathBuf},
thread::JoinHandle,
};
use super::{query::Query, query_result::QueryResult, sql::*, DBMessage}; use super::{query::Query, query_result::QueryResult, sql::*, DBMessage};
use crate::{database::RowConversion, importer::EmailEntry}; use crate::{database::RowConversion, importer::EmailEntry};

@ -1,5 +1,3 @@
use std::path::PathBuf;
use eyre::Report; use eyre::Report;
use crate::importer::EmailEntry; use crate::importer::EmailEntry;

@ -45,7 +45,7 @@ VALUES
pub const QUERY_ERRORS: &str = r#" pub const QUERY_ERRORS: &str = r#"
INSERT INTO errors INSERT INTO errors
(message, path) (message)
VALUES VALUES
(?, ?) (?)
"#; "#;

@ -4,43 +4,46 @@
use eyre::Result; use eyre::Result;
use rayon::prelude::*; use rayon::prelude::*;
use tracing::trace;
use walkdir::WalkDir; use walkdir::WalkDir;
use super::super::shared::filesystem::folders_in; use super::super::shared::filesystem::emails_in;
use super::super::{Message, MessageSender}; use super::super::MessageSender;
use super::raw_email::RawEmailEntry;
use crate::types::Config; use crate::types::Config;
use std::path::Path; use super::mail::Mail;
use std::path::PathBuf;
fn test_walkdir() { pub fn read_emails(config: &Config, sender: MessageSender) -> Result<Vec<Mail>> {
for entry in WalkDir::new("foo").int_par_iter().filter_map(|e| e.ok()) { // As `walkdir` does not support `par_iter` (see https://www.reddit.com/r/rust/comments/6eif7r/walkdir_users_we_need_you/)
println!("{}", entry.path().display()); // - -we first collect all folders,
} // then all sub-folders in those ending in mboxending in .mbox and then iterate over them in paralell
} let folders: Vec<PathBuf> = WalkDir::new(&config.emails_folder_path)
pub fn read_emails(config: &Config, sender: MessageSender) -> Result<Vec<RawEmailEntry>> {
Ok(folders_in(&config.emails_folder_path, sender, read_folder)?)
}
fn read_folder(path: &Path, sender: MessageSender) -> Result<Vec<RawEmailEntry>> {
let result = Ok(std::fs::read_dir(path)?
.into_iter() .into_iter()
.par_bridge() .filter_map(|e| match e {
.filter_map(|entry| { Ok(n)
let path = entry if n.path().is_dir()
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e)) && n.path()
.ok()? .to_str()
.path(); .map(|e| e.contains(".mbox"))
if path.is_dir() { .unwrap_or(false) =>
return None; {
Some(n.path().to_path_buf())
} }
trace!("Reading {}", &path.display()); _ => None,
RawEmailEntry::new(path)
}) })
.collect()); .collect();
// We're done reading the folder let mails = folders
sender.send(Message::ReadOne).unwrap(); .into_par_iter()
result .filter_map(
|path| match emails_in(path.clone(), sender.clone(), Mail::new) {
Ok(n) => Some(n),
Err(e) => {
tracing::error!("{} {:?}", path.display(), &e);
None
}
},
)
.flatten()
.collect();
Ok(mails)
} }

@ -18,7 +18,12 @@ pub struct Mail {
} }
impl Mail { impl Mail {
pub fn new(path: &Path) -> Result<Self> { pub fn new<P: AsRef<Path>>(path: P) -> Option<Self> {
let path = path.as_ref();
let name = path.file_name()?.to_str()?;
if !name.ends_with(".emlx") {
return None;
}
// find the folder ending with `.mbox` in the path // find the folder ending with `.mbox` in the path
let ext = ".mbox"; let ext = ".mbox";
let label = path let label = path
@ -27,7 +32,7 @@ impl Mail {
.flatten() .flatten()
.find(|s| s.ends_with(ext)) .find(|s| s.ends_with(ext))
.map(|s| s.replace(ext, "").to_string()); .map(|s| s.replace(ext, "").to_string());
Ok(Self { Some(Self {
path: path.to_path_buf(), path: path.to_path_buf(),
is_seen: false, is_seen: false,
label, label,

@ -1,8 +1,6 @@
mod filesystem;
mod mail; mod mail;
/// FIXME: Not sure if the number changes with each macOS release?
const DEFAULT_FOLDER: &str = "~/Library/Mail/V8/";
use super::{Config, ImporterFormat, MessageSender, Result}; use super::{Config, ImporterFormat, MessageSender, Result};
#[derive(Default)] #[derive(Default)]
@ -11,6 +9,6 @@ pub struct AppleMail {}
impl ImporterFormat for AppleMail { impl ImporterFormat for AppleMail {
type Item = mail::Mail; type Item = mail::Mail;
fn emails(&self, config: &Config, sender: MessageSender) -> Result<Vec<Self::Item>> { fn emails(&self, config: &Config, sender: MessageSender) -> Result<Vec<Self::Item>> {
panic!() filesystem::read_emails(config, sender)
} }
} }

@ -1,35 +1,14 @@
use eyre::Result; use eyre::Result;
use rayon::prelude::*;
use tracing::trace;
use super::super::shared::filesystem::folders_in; use super::super::shared::filesystem::{emails_in, folders_in};
use super::super::{Message, MessageSender}; use super::super::MessageSender;
use super::raw_email::RawEmailEntry; use super::raw_email::RawEmailEntry;
use crate::types::Config; use crate::types::Config;
use std::path::Path;
pub fn read_emails(config: &Config, sender: MessageSender) -> Result<Vec<RawEmailEntry>> { pub fn read_emails(config: &Config, sender: MessageSender) -> Result<Vec<RawEmailEntry>> {
Ok(folders_in(&config.emails_folder_path, sender, read_folder)?) Ok(folders_in(
} &config.emails_folder_path,
sender,
fn read_folder(path: &Path, sender: MessageSender) -> Result<Vec<RawEmailEntry>> { |path, sender| emails_in(path, sender, RawEmailEntry::new),
let result = Ok(std::fs::read_dir(path)? )?)
.into_iter()
.par_bridge()
.filter_map(|entry| {
let path = entry
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e))
.ok()?
.path();
if path.is_dir() {
return None;
}
trace!("Reading {}", &path.display());
RawEmailEntry::new(path)
})
.collect());
// We're done reading the folder
sender.send(Message::ReadOne).unwrap();
result
} }

@ -5,6 +5,7 @@ mod gmailbackup;
mod importer; mod importer;
pub mod shared; pub mod shared;
pub use apple_mail::AppleMail;
pub use gmailbackup::Gmail; pub use gmailbackup::Gmail;
pub use crate::types::Config; pub use crate::types::Config;

@ -4,7 +4,7 @@ use crate::types::Config;
use super::super::{Message, MessageSender}; use super::super::{Message, MessageSender};
use eyre::{bail, Context, Result}; use eyre::{bail, Result};
use rayon::prelude::*; use rayon::prelude::*;
pub fn into_database<Mail: ParseableEmail + 'static>( pub fn into_database<Mail: ParseableEmail + 'static>(
@ -33,23 +33,20 @@ pub fn into_database<Mail: ParseableEmail + 'static>(
//.par_iter() //.par_iter()
.par_iter_mut() .par_iter_mut()
// parsing them // parsing them
.map(|raw_mail| { .map(|raw_mail| parse_email(raw_mail))
// Due to lifetime issues, we can't use raw_mail.path() or raw_mail.path().display()
// or raw_mail.path().to_path_buf().display() as all of those retain a reference to
// `raw_mail`. So we just format the context into a string
parse_email(raw_mail).with_context(|| format!("{}", raw_mail.path().display()))
})
// and inserting them into SQLite // and inserting them into SQLite
.for_each(|entry| { .for_each(|entry| {
if let Err(e) = tx.send(Message::WriteOne) { // Try to write the message into the database
tracing::error!("Channel Failure: {:?}", &e);
}
if let Err(e) = match entry { if let Err(e) = match entry {
Ok(mail) => sender.send(DBMessage::Mail(mail)), Ok(mail) => sender.send(DBMessage::Mail(mail)),
Err(e) => sender.send(DBMessage::Error(e)), Err(e) => sender.send(DBMessage::Error(e)),
} { } {
tracing::error!("Error Inserting into Database: {:?}", &e); tracing::error!("Error Inserting into Database: {:?}", &e);
} }
// Signal the write
if let Err(e) = tx.send(Message::WriteOne) {
tracing::error!("Channel Failure: {:?}", &e);
}
}); });
// Tell SQLite there's no more work coming. This will exit the listening loop // Tell SQLite there's no more work coming. This will exit the listening loop

@ -2,7 +2,7 @@ use eyre::{bail, Result};
use rayon::prelude::*; use rayon::prelude::*;
use tracing::trace; use tracing::trace;
use std::path::Path; use std::path::{Path, PathBuf};
use super::super::{Message, MessageSender}; use super::super::{Message, MessageSender};
@ -15,7 +15,7 @@ pub fn folders_in<FolderAction, ActionResult, P>(
) -> Result<Vec<ActionResult>> ) -> Result<Vec<ActionResult>>
where where
P: AsRef<Path>, P: AsRef<Path>,
FolderAction: Fn(&Path, MessageSender) -> Result<Vec<ActionResult>> + Send + Sync, FolderAction: Fn(PathBuf, MessageSender) -> Result<Vec<ActionResult>> + Send + Sync,
ActionResult: Send, ActionResult: Send,
{ {
let folder = folder.as_ref(); let folder = folder.as_ref();
@ -40,10 +40,37 @@ where
} }
let sender = sender.clone(); let sender = sender.clone();
trace!("Reading folder {}", path.display()); trace!("Reading folder {}", path.display());
action(&path, sender) action(path.clone(), sender)
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e)) .map_err(|e| tracing::error!("{} {:?}", path.display(), &e))
.ok() .ok()
}) })
.flatten() .flatten()
.collect()) .collect())
} }
pub fn emails_in<O, F, P: AsRef<Path>>(path: P, sender: MessageSender, make: F) -> Result<Vec<O>>
where
F: Fn(PathBuf) -> Option<O>,
F: Send + Sync + 'static,
O: Send + Sync,
{
let path = path.as_ref();
let result = Ok(std::fs::read_dir(path)?
.into_iter()
.par_bridge()
.filter_map(|entry| {
let path = entry
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e))
.ok()?
.path();
if path.is_dir() {
return None;
}
trace!("Reading {}", &path.display());
make(path)
})
.collect());
// We're done reading the folder
sender.send(Message::ReadOne).unwrap();
result
}

@ -2,8 +2,7 @@ use chrono::prelude::*;
use email_parser::address::{Address, EmailAddress, Mailbox}; use email_parser::address::{Address, EmailAddress, Mailbox};
use eyre::{eyre, Result}; use eyre::{eyre, Result};
use std::borrow::{Borrow, Cow}; use std::borrow::Cow;
use std::convert::{TryFrom, TryInto};
use std::path::Path; use std::path::Path;
use super::email::{EmailEntry, EmailMeta}; use super::email::{EmailEntry, EmailMeta};
@ -24,11 +23,15 @@ pub trait ParseableEmail: Send + Sized + Sync {
} }
pub fn parse_email<Entry: ParseableEmail>(entry: &mut Entry) -> Result<EmailEntry> { pub fn parse_email<Entry: ParseableEmail>(entry: &mut Entry) -> Result<EmailEntry> {
entry.prepare()?; if let Err(e) = entry.prepare() {
tracing::error!("Prepare Error: {:?}", e);
return Err(e);
}
let content = entry.message()?; let content = entry.message()?;
match email_parser::email::Email::parse(&content) { match email_parser::email::Email::parse(&content) {
Ok(email) => { Ok(email) => {
let path = entry.path(); let path = entry.path();
tracing::trace!("Parsing {}", path.display());
let (sender_name, _, sender_local_part, sender_domain) = let (sender_name, _, sender_local_part, sender_domain) =
mailbox_to_string(&email.sender); mailbox_to_string(&email.sender);
@ -69,13 +72,18 @@ pub fn parse_email<Entry: ParseableEmail>(entry: &mut Entry) -> Result<EmailEntr
}) })
} }
Err(error) => { Err(error) => {
//let content_string = String::from_utf8(content.clone())?; let error = eyre!(
//println!("{}|{}", &error, &raw_entry.eml_path.display()); "Could not parse email (trace to see contents): {:?} [{}]",
Err(eyre!(
"Could not parse email: {:?} [{}]",
&error, &error,
entry.path().display() entry.path().display()
)) );
tracing::error!("{:?}", &error);
if let Ok(content_string) = String::from_utf8(content.into_owned()) {
tracing::trace!("Contents:\n{}\n---\n", content_string);
} else {
tracing::trace!("Contents:\nInvalid UTF8\n---\n");
}
Err(error)
} }
} }
} }

@ -38,3 +38,7 @@ pub type MessageReceiver = crossbeam_channel::Receiver<Message>;
pub fn gmail_importer(config: &Config) -> Importer<formats::Gmail> { pub fn gmail_importer(config: &Config) -> Importer<formats::Gmail> {
Importer::new(config, formats::Gmail::default()) Importer::new(config, formats::Gmail::default())
} }
pub fn applemail_importer(config: &Config) -> Importer<formats::AppleMail> {
Importer::new(config, formats::AppleMail::default())
}

Loading…
Cancel
Save