Initial Mbox support

pull/1/head
Benedikt Terhechte 3 years ago
parent 7a69902d83
commit a606538524

20
Cargo.lock generated

@ -766,6 +766,7 @@ dependencies = [
"flate2",
"lazy_static",
"lru",
"mbox-reader",
"num-format",
"rayon",
"regex",
@ -978,12 +979,31 @@ dependencies = [
"regex-automata",
]
[[package]]
name = "mbox-reader"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6231e973c0a8caceed71fac7355555012ba73fe230365989b298b36022e9e2ab"
dependencies = [
"memmap",
]
[[package]]
name = "memchr"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "memmap"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "memmap2"
version = "0.1.0"

@ -29,6 +29,7 @@ strum_macros = "0.21"
lru = { version = "0.7.0", optional = true }
emlx = { git = "https://github.com/terhechte/emlx", features = []}
walkdir = "*"
mbox-reader = "0.2.0"
[features]
default = ["gui"]

@ -31,6 +31,10 @@ fn main() -> Result<()> {
let importer = gmaildb::importer::gmail_importer(config);
adapter.process(importer)?
}
ImporterFormat::MboxVault => {
let importer = gmaildb::importer::mbox_importer(config);
adapter.process(importer)?
}
};
let mut stdout = stdout();

@ -0,0 +1,85 @@
//! FIXME: Implement our own Mailbox reader. This one is terrible!
//! use jetsci for efficient searching:
//! https://github.com/shepmaster/jetscii
//! (or aho corasick)
//! Here's the ref: file:///Users/terhechte/Development/Rust/gmaildb/target/doc/src/mbox_reader/lib.rs.html#65-67
//! Make it so that I can hold the mbox in the struct below
use eyre::bail;
use mbox_reader;
use tracing;
use super::{Config, ImporterFormat, MessageSender, Result};
use super::shared::email::EmailMeta;
use super::shared::parse::ParseableEmail;
use std::borrow::Cow;
use std::path::{Path, PathBuf};
pub struct Mail {
path: PathBuf,
/// For now, we go with a very simple implementation:
/// Each mal will have a heap-allocated vec of the corresponding
/// bytes in the mbox.
/// This wastes a lot of allocations and shows the limits of our current abstraction.
/// It would be better to just save the headers and ignore the rest.
content: Vec<u8>,
}
#[derive(Default)]
pub struct Mbox;
/// The inner parsing code
fn inner_emails(config: &Config) -> Result<Vec<Mail>> {
if config
.emails_folder_path
.extension()
.and_then(std::ffi::OsStr::to_str)
!= Some("mbox")
{
bail!("Path does not point to an .mbox file")
}
let mbox = mbox_reader::MboxFile::from_file(config.emails_folder_path.as_path())?;
let path = config.emails_folder_path.clone();
Ok(mbox
.iter()
.filter_map(|e| {
let content = match e.message() {
Some(n) => n,
None => {
tracing::error!("Could not parse mail at offset {}", e.offset());
return None;
}
};
Some(Mail {
path: path.clone(),
content: content.to_owned(),
})
})
.collect())
}
impl ImporterFormat for Mbox {
type Item = Mail;
fn emails(&self, config: &Config, _sender: MessageSender) -> Result<Vec<Self::Item>> {
inner_emails(config)
}
}
impl ParseableEmail for Mail {
fn prepare(&mut self) -> Result<()> {
Ok(())
}
fn message(&self) -> Result<Cow<'_, [u8]>> {
Ok(self.content.as_slice().into())
}
fn path(&self) -> &Path {
self.path.as_path()
}
fn meta(&self) -> Result<Option<EmailMeta>> {
Ok(None)
}
}

@ -2,10 +2,12 @@ pub use eyre::Result;
mod apple_mail;
mod gmailbackup;
mod mbox;
pub mod shared;
pub use apple_mail::AppleMail;
pub use gmailbackup::Gmail;
pub use mbox::Mbox;
pub use crate::types::Config;
use shared::parse::ParseableEmail;

@ -30,7 +30,6 @@ pub fn into_database<Mail: ParseableEmail + 'static>(
// Iterate over the mails..
emails
// in paralell..
//.par_iter()
.par_iter_mut()
// parsing them
.map(|raw_mail| parse_email(raw_mail, config.sender_email.as_str()))

@ -37,9 +37,11 @@ pub type MessageSender = crossbeam_channel::Sender<Message>;
pub type MessageReceiver = crossbeam_channel::Receiver<Message>;
pub fn importer(config: &Config) -> Box<dyn importer::Importerlike> {
use crate::types::ImporterFormat::*;
match config.format {
crate::types::ImporterFormat::AppleMail => Box::new(applemail_importer(config.clone())),
crate::types::ImporterFormat::GmailVault => Box::new(gmail_importer(config.clone())),
AppleMail => Box::new(applemail_importer(config.clone())),
GmailVault => Box::new(gmail_importer(config.clone())),
MboxVault => Box::new(gmail_importer(config.clone())),
}
}
@ -50,3 +52,7 @@ pub fn gmail_importer(config: Config) -> importer::Importer<formats::Gmail> {
pub fn applemail_importer(config: Config) -> importer::Importer<formats::AppleMail> {
importer::Importer::new(config, formats::AppleMail::default())
}
pub fn mbox_importer(config: Config) -> importer::Importer<formats::Mbox> {
importer::Importer::new(config, formats::Mbox::default())
}

@ -1,9 +1,10 @@
use std::path::{Path, PathBuf};
#[derive(Debug, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ImporterFormat {
AppleMail,
GmailVault,
MboxVault,
}
impl From<&String> for ImporterFormat {
@ -11,6 +12,7 @@ impl From<&String> for ImporterFormat {
match format.as_str() {
"apple" => ImporterFormat::AppleMail,
"gmailvault" => ImporterFormat::GmailVault,
"mbox" => ImporterFormat::MboxVault,
_ => panic!("Unknown format: {}", &format),
}
}
@ -43,7 +45,8 @@ impl Config {
);
}
let emails_folder_path = mails.as_ref().to_path_buf();
if !emails_folder_path.is_dir() {
// For non-mbox files, we make sure we have a directory
if format != ImporterFormat::MboxVault && !emails_folder_path.is_dir() {
panic!(
"Emails Folder Path is not a directory: {}",
&emails_folder_path.display()

Loading…
Cancel
Save