mirror of
https://github.com/terhechte/postsack
synced 2024-11-04 12:00:18 +00:00
Add support for AppleMail
This commit is contained in:
parent
6bdc412833
commit
471517047a
8
Cargo.lock
generated
8
Cargo.lock
generated
@ -538,7 +538,7 @@ checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
|
||||
[[package]]
|
||||
name = "email-parser"
|
||||
version = "0.5.0"
|
||||
source = "git+https://github.com/terhechte/email-parser#da8582a266385b8f1b2f7e150ac891e3f850d41a"
|
||||
source = "git+https://github.com/terhechte/email-parser#dba59d86771f7df67bb9e7f3a2c4b1e36b02d19b"
|
||||
dependencies = [
|
||||
"textcode",
|
||||
"timezone-abbreviations",
|
||||
@ -552,10 +552,10 @@ checksum = "cca5179aa9d15128cebb79bb56dda73a79cc66b402056ff19a992e54b365e15c"
|
||||
|
||||
[[package]]
|
||||
name = "emlx"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5f41d97755f64845fb52dbff2acc01853ba51343e2076c4ab3099bb669d0e71e"
|
||||
version = "0.1.5"
|
||||
source = "git+https://github.com/terhechte/emlx#44c2f278551d9e7a9ae0c3c3207c4471da3049fe"
|
||||
dependencies = [
|
||||
"email-parser",
|
||||
"plist",
|
||||
"thiserror",
|
||||
]
|
||||
|
@ -13,7 +13,7 @@ tracing-subscriber = "0.2.24"
|
||||
rusqlite = {version = "0.25.3", features = ["chrono", "trace", "serde_json"]}
|
||||
regex = "1.5.3"
|
||||
flate2 = "1.0.22"
|
||||
email-parser = { git = "https://github.com/terhechte/email-parser", features = ["sender", "to", "in-reply-to", "date", "subject", "mime", "allow-duplicate-headers"]}
|
||||
email-parser = { git = "https://github.com/terhechte/email-parser", features = ["sender", "to", "in-reply-to", "date", "subject", "mime", "allow-duplicate-headers", "compatibility-fixes"]}
|
||||
rayon = "1.5.1"
|
||||
chrono = "0.4.19"
|
||||
lazy_static = "*"
|
||||
@ -27,7 +27,7 @@ num-format = "0.4.0"
|
||||
strum = "0.21"
|
||||
strum_macros = "0.21"
|
||||
lru = { version = "0.7.0", optional = true }
|
||||
emlx = "0.1"
|
||||
emlx = { git = "https://github.com/terhechte/emlx", features = []}
|
||||
walkdir = "*"
|
||||
|
||||
[features]
|
||||
|
@ -4,10 +4,7 @@ use eyre::{bail, Report, Result};
|
||||
use rusqlite::{self, params, Connection, Statement};
|
||||
|
||||
use core::panic;
|
||||
use std::{
|
||||
path::{Path, PathBuf},
|
||||
thread::JoinHandle,
|
||||
};
|
||||
use std::{path::Path, thread::JoinHandle};
|
||||
|
||||
use super::{query::Query, query_result::QueryResult, sql::*, DBMessage};
|
||||
use crate::{database::RowConversion, importer::EmailEntry};
|
||||
|
@ -1,5 +1,3 @@
|
||||
use std::path::PathBuf;
|
||||
|
||||
use eyre::Report;
|
||||
|
||||
use crate::importer::EmailEntry;
|
||||
|
@ -45,7 +45,7 @@ VALUES
|
||||
|
||||
pub const QUERY_ERRORS: &str = r#"
|
||||
INSERT INTO errors
|
||||
(message, path)
|
||||
(message)
|
||||
VALUES
|
||||
(?, ?)
|
||||
(?)
|
||||
"#;
|
||||
|
@ -4,43 +4,46 @@
|
||||
|
||||
use eyre::Result;
|
||||
use rayon::prelude::*;
|
||||
use tracing::trace;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use super::super::shared::filesystem::folders_in;
|
||||
use super::super::{Message, MessageSender};
|
||||
use super::raw_email::RawEmailEntry;
|
||||
use super::super::shared::filesystem::emails_in;
|
||||
use super::super::MessageSender;
|
||||
use crate::types::Config;
|
||||
|
||||
use std::path::Path;
|
||||
use super::mail::Mail;
|
||||
use std::path::PathBuf;
|
||||
|
||||
fn test_walkdir() {
|
||||
for entry in WalkDir::new("foo").int_par_iter().filter_map(|e| e.ok()) {
|
||||
println!("{}", entry.path().display());
|
||||
}
|
||||
}
|
||||
|
||||
pub fn read_emails(config: &Config, sender: MessageSender) -> Result<Vec<RawEmailEntry>> {
|
||||
Ok(folders_in(&config.emails_folder_path, sender, read_folder)?)
|
||||
}
|
||||
|
||||
fn read_folder(path: &Path, sender: MessageSender) -> Result<Vec<RawEmailEntry>> {
|
||||
let result = Ok(std::fs::read_dir(path)?
|
||||
pub fn read_emails(config: &Config, sender: MessageSender) -> Result<Vec<Mail>> {
|
||||
// As `walkdir` does not support `par_iter` (see https://www.reddit.com/r/rust/comments/6eif7r/walkdir_users_we_need_you/)
|
||||
// - -we first collect all folders,
|
||||
// then all sub-folders in those ending in mboxending in .mbox and then iterate over them in paralell
|
||||
let folders: Vec<PathBuf> = WalkDir::new(&config.emails_folder_path)
|
||||
.into_iter()
|
||||
.par_bridge()
|
||||
.filter_map(|entry| {
|
||||
let path = entry
|
||||
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e))
|
||||
.ok()?
|
||||
.path();
|
||||
if path.is_dir() {
|
||||
return None;
|
||||
.filter_map(|e| match e {
|
||||
Ok(n)
|
||||
if n.path().is_dir()
|
||||
&& n.path()
|
||||
.to_str()
|
||||
.map(|e| e.contains(".mbox"))
|
||||
.unwrap_or(false) =>
|
||||
{
|
||||
Some(n.path().to_path_buf())
|
||||
}
|
||||
trace!("Reading {}", &path.display());
|
||||
RawEmailEntry::new(path)
|
||||
_ => None,
|
||||
})
|
||||
.collect());
|
||||
// We're done reading the folder
|
||||
sender.send(Message::ReadOne).unwrap();
|
||||
result
|
||||
.collect();
|
||||
let mails = folders
|
||||
.into_par_iter()
|
||||
.filter_map(
|
||||
|path| match emails_in(path.clone(), sender.clone(), Mail::new) {
|
||||
Ok(n) => Some(n),
|
||||
Err(e) => {
|
||||
tracing::error!("{} {:?}", path.display(), &e);
|
||||
None
|
||||
}
|
||||
},
|
||||
)
|
||||
.flatten()
|
||||
.collect();
|
||||
Ok(mails)
|
||||
}
|
||||
|
@ -18,7 +18,12 @@ pub struct Mail {
|
||||
}
|
||||
|
||||
impl Mail {
|
||||
pub fn new(path: &Path) -> Result<Self> {
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> Option<Self> {
|
||||
let path = path.as_ref();
|
||||
let name = path.file_name()?.to_str()?;
|
||||
if !name.ends_with(".emlx") {
|
||||
return None;
|
||||
}
|
||||
// find the folder ending with `.mbox` in the path
|
||||
let ext = ".mbox";
|
||||
let label = path
|
||||
@ -27,7 +32,7 @@ impl Mail {
|
||||
.flatten()
|
||||
.find(|s| s.ends_with(ext))
|
||||
.map(|s| s.replace(ext, "").to_string());
|
||||
Ok(Self {
|
||||
Some(Self {
|
||||
path: path.to_path_buf(),
|
||||
is_seen: false,
|
||||
label,
|
||||
|
@ -1,8 +1,6 @@
|
||||
mod filesystem;
|
||||
mod mail;
|
||||
|
||||
/// FIXME: Not sure if the number changes with each macOS release?
|
||||
const DEFAULT_FOLDER: &str = "~/Library/Mail/V8/";
|
||||
|
||||
use super::{Config, ImporterFormat, MessageSender, Result};
|
||||
|
||||
#[derive(Default)]
|
||||
@ -11,6 +9,6 @@ pub struct AppleMail {}
|
||||
impl ImporterFormat for AppleMail {
|
||||
type Item = mail::Mail;
|
||||
fn emails(&self, config: &Config, sender: MessageSender) -> Result<Vec<Self::Item>> {
|
||||
panic!()
|
||||
filesystem::read_emails(config, sender)
|
||||
}
|
||||
}
|
||||
|
@ -1,35 +1,14 @@
|
||||
use eyre::Result;
|
||||
use rayon::prelude::*;
|
||||
use tracing::trace;
|
||||
|
||||
use super::super::shared::filesystem::folders_in;
|
||||
use super::super::{Message, MessageSender};
|
||||
use super::super::shared::filesystem::{emails_in, folders_in};
|
||||
use super::super::MessageSender;
|
||||
use super::raw_email::RawEmailEntry;
|
||||
use crate::types::Config;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
pub fn read_emails(config: &Config, sender: MessageSender) -> Result<Vec<RawEmailEntry>> {
|
||||
Ok(folders_in(&config.emails_folder_path, sender, read_folder)?)
|
||||
}
|
||||
|
||||
fn read_folder(path: &Path, sender: MessageSender) -> Result<Vec<RawEmailEntry>> {
|
||||
let result = Ok(std::fs::read_dir(path)?
|
||||
.into_iter()
|
||||
.par_bridge()
|
||||
.filter_map(|entry| {
|
||||
let path = entry
|
||||
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e))
|
||||
.ok()?
|
||||
.path();
|
||||
if path.is_dir() {
|
||||
return None;
|
||||
}
|
||||
trace!("Reading {}", &path.display());
|
||||
RawEmailEntry::new(path)
|
||||
})
|
||||
.collect());
|
||||
// We're done reading the folder
|
||||
sender.send(Message::ReadOne).unwrap();
|
||||
result
|
||||
Ok(folders_in(
|
||||
&config.emails_folder_path,
|
||||
sender,
|
||||
|path, sender| emails_in(path, sender, RawEmailEntry::new),
|
||||
)?)
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ mod gmailbackup;
|
||||
mod importer;
|
||||
pub mod shared;
|
||||
|
||||
pub use apple_mail::AppleMail;
|
||||
pub use gmailbackup::Gmail;
|
||||
|
||||
pub use crate::types::Config;
|
||||
|
@ -4,7 +4,7 @@ use crate::types::Config;
|
||||
|
||||
use super::super::{Message, MessageSender};
|
||||
|
||||
use eyre::{bail, Context, Result};
|
||||
use eyre::{bail, Result};
|
||||
use rayon::prelude::*;
|
||||
|
||||
pub fn into_database<Mail: ParseableEmail + 'static>(
|
||||
@ -33,23 +33,20 @@ pub fn into_database<Mail: ParseableEmail + 'static>(
|
||||
//.par_iter()
|
||||
.par_iter_mut()
|
||||
// parsing them
|
||||
.map(|raw_mail| {
|
||||
// Due to lifetime issues, we can't use raw_mail.path() or raw_mail.path().display()
|
||||
// or raw_mail.path().to_path_buf().display() as all of those retain a reference to
|
||||
// `raw_mail`. So we just format the context into a string
|
||||
parse_email(raw_mail).with_context(|| format!("{}", raw_mail.path().display()))
|
||||
})
|
||||
.map(|raw_mail| parse_email(raw_mail))
|
||||
// and inserting them into SQLite
|
||||
.for_each(|entry| {
|
||||
if let Err(e) = tx.send(Message::WriteOne) {
|
||||
tracing::error!("Channel Failure: {:?}", &e);
|
||||
}
|
||||
// Try to write the message into the database
|
||||
if let Err(e) = match entry {
|
||||
Ok(mail) => sender.send(DBMessage::Mail(mail)),
|
||||
Err(e) => sender.send(DBMessage::Error(e)),
|
||||
} {
|
||||
tracing::error!("Error Inserting into Database: {:?}", &e);
|
||||
}
|
||||
// Signal the write
|
||||
if let Err(e) = tx.send(Message::WriteOne) {
|
||||
tracing::error!("Channel Failure: {:?}", &e);
|
||||
}
|
||||
});
|
||||
|
||||
// Tell SQLite there's no more work coming. This will exit the listening loop
|
||||
|
@ -2,7 +2,7 @@ use eyre::{bail, Result};
|
||||
use rayon::prelude::*;
|
||||
use tracing::trace;
|
||||
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use super::super::{Message, MessageSender};
|
||||
|
||||
@ -15,7 +15,7 @@ pub fn folders_in<FolderAction, ActionResult, P>(
|
||||
) -> Result<Vec<ActionResult>>
|
||||
where
|
||||
P: AsRef<Path>,
|
||||
FolderAction: Fn(&Path, MessageSender) -> Result<Vec<ActionResult>> + Send + Sync,
|
||||
FolderAction: Fn(PathBuf, MessageSender) -> Result<Vec<ActionResult>> + Send + Sync,
|
||||
ActionResult: Send,
|
||||
{
|
||||
let folder = folder.as_ref();
|
||||
@ -40,10 +40,37 @@ where
|
||||
}
|
||||
let sender = sender.clone();
|
||||
trace!("Reading folder {}", path.display());
|
||||
action(&path, sender)
|
||||
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e))
|
||||
action(path.clone(), sender)
|
||||
.map_err(|e| tracing::error!("{} {:?}", path.display(), &e))
|
||||
.ok()
|
||||
})
|
||||
.flatten()
|
||||
.collect())
|
||||
}
|
||||
|
||||
pub fn emails_in<O, F, P: AsRef<Path>>(path: P, sender: MessageSender, make: F) -> Result<Vec<O>>
|
||||
where
|
||||
F: Fn(PathBuf) -> Option<O>,
|
||||
F: Send + Sync + 'static,
|
||||
O: Send + Sync,
|
||||
{
|
||||
let path = path.as_ref();
|
||||
let result = Ok(std::fs::read_dir(path)?
|
||||
.into_iter()
|
||||
.par_bridge()
|
||||
.filter_map(|entry| {
|
||||
let path = entry
|
||||
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e))
|
||||
.ok()?
|
||||
.path();
|
||||
if path.is_dir() {
|
||||
return None;
|
||||
}
|
||||
trace!("Reading {}", &path.display());
|
||||
make(path)
|
||||
})
|
||||
.collect());
|
||||
// We're done reading the folder
|
||||
sender.send(Message::ReadOne).unwrap();
|
||||
result
|
||||
}
|
||||
|
@ -2,8 +2,7 @@ use chrono::prelude::*;
|
||||
use email_parser::address::{Address, EmailAddress, Mailbox};
|
||||
use eyre::{eyre, Result};
|
||||
|
||||
use std::borrow::{Borrow, Cow};
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::borrow::Cow;
|
||||
use std::path::Path;
|
||||
|
||||
use super::email::{EmailEntry, EmailMeta};
|
||||
@ -24,11 +23,15 @@ pub trait ParseableEmail: Send + Sized + Sync {
|
||||
}
|
||||
|
||||
pub fn parse_email<Entry: ParseableEmail>(entry: &mut Entry) -> Result<EmailEntry> {
|
||||
entry.prepare()?;
|
||||
if let Err(e) = entry.prepare() {
|
||||
tracing::error!("Prepare Error: {:?}", e);
|
||||
return Err(e);
|
||||
}
|
||||
let content = entry.message()?;
|
||||
match email_parser::email::Email::parse(&content) {
|
||||
Ok(email) => {
|
||||
let path = entry.path();
|
||||
tracing::trace!("Parsing {}", path.display());
|
||||
let (sender_name, _, sender_local_part, sender_domain) =
|
||||
mailbox_to_string(&email.sender);
|
||||
|
||||
@ -69,13 +72,18 @@ pub fn parse_email<Entry: ParseableEmail>(entry: &mut Entry) -> Result<EmailEntr
|
||||
})
|
||||
}
|
||||
Err(error) => {
|
||||
//let content_string = String::from_utf8(content.clone())?;
|
||||
//println!("{}|{}", &error, &raw_entry.eml_path.display());
|
||||
Err(eyre!(
|
||||
"Could not parse email: {:?} [{}]",
|
||||
let error = eyre!(
|
||||
"Could not parse email (trace to see contents): {:?} [{}]",
|
||||
&error,
|
||||
entry.path().display()
|
||||
))
|
||||
);
|
||||
tracing::error!("{:?}", &error);
|
||||
if let Ok(content_string) = String::from_utf8(content.into_owned()) {
|
||||
tracing::trace!("Contents:\n{}\n---\n", content_string);
|
||||
} else {
|
||||
tracing::trace!("Contents:\nInvalid UTF8\n---\n");
|
||||
}
|
||||
Err(error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -38,3 +38,7 @@ pub type MessageReceiver = crossbeam_channel::Receiver<Message>;
|
||||
pub fn gmail_importer(config: &Config) -> Importer<formats::Gmail> {
|
||||
Importer::new(config, formats::Gmail::default())
|
||||
}
|
||||
|
||||
pub fn applemail_importer(config: &Config) -> Importer<formats::AppleMail> {
|
||||
Importer::new(config, formats::AppleMail::default())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user