Cleanup, add more fields

pull/2/head
Benedikt Terhechte 3 years ago
parent 976004fbe3
commit 686a25a272

@ -13,7 +13,7 @@ tracing-subscriber = "0.2.24"
rusqlite = {version = "0.25.3", features = ["chrono", "trace"]}
regex = "1.5.3"
flate2 = "1.0.22"
email-parser = { git = "https://github.com/terhechte/email-parser", features = ["sender", "from", "date", "subject", "mime", "allow-duplicate-headers"]}
email-parser = { git = "https://github.com/terhechte/email-parser", features = ["sender", "to", "in-reply-to", "date", "subject", "mime", "allow-duplicate-headers"]}
rayon = "1.5.1"
chrono = "0.4.19"
lazy_static = "*"
@ -21,6 +21,11 @@ serde_json = "*"
serde = { version = "*", features = ["derive"]}
crossbeam-channel = "0.5.1"
[features]
default = []
# Trace all SQL Queries
trace-sql = []
#[profile.release]
#lto = "fat"
#codegen-units = 1

@ -118,13 +118,33 @@ impl Database {
fn insert_mail(statement: &mut Statement, entry: &EmailEntry) -> Result<()> {
let path = entry.path.display().to_string();
let domain = &entry.domain;
let local_part = &entry.local_part;
let year = entry.datetime.date().year();
let month = entry.datetime.date().month();
let day = entry.datetime.date().day();
let subject = entry.subject.to_string();
statement.execute(params![path, domain, local_part, year, month, day, subject])?;
let e = entry;
let to_name = e.to_first.as_ref().map(|e| &e.0);
let to_address = e.to_first.as_ref().map(|e| &e.1);
let meta_tags = e.meta.as_ref().map(|e| e.tags_string());
let meta_is_seen = e.meta.as_ref().map(|e| e.is_seen);
let p = params![
path,
e.sender_domain,
e.sender_local_part,
e.sender_name,
year,
month,
day,
e.subject,
e.to_count,
e.to_group,
to_name,
to_address,
e.is_reply,
e.is_send,
meta_tags,
meta_is_seen
];
statement.execute(p)?;
tracing::trace!("Insert Mail {}", &path);
Ok(())
}

@ -1,12 +1,21 @@
pub const TBL_EMAILS: &str = r#"
CREATE TABLE IF NOT EXISTS emails (
path TEXT NOT NULL,
domain TEXT NOT NULL,
local_part TEXT NOT NULL,
sender_domain TEXT NOT NULL,
sender_local_part TEXT NOT NULL,
sender_name TEXT NOT NULL,
year INTEGER NOT NULL,
month INTEGER NOT NULL,
day INTEGER NOT NULL,
subject TEXT NOT NULL
subject TEXT NOT NULL,
to_count INTEGER NOT NULL,
to_group TEXT NULL,
to_name TEXT NULL,
to_address TEXT NULL,
is_reply BOOL,
is_send BOOL,
meta_tags TEXT NULL,
meta_is_seen BOOL NULL
);"#;
pub const TBL_ERRORS: &str = r#"
@ -17,9 +26,21 @@ CREATE TABLE IF NOT EXISTS errors (
pub const QUERY_EMAILS: &str = r#"
INSERT INTO emails
(path, domain, local_part, year, month, day, subject)
(
path, sender_domain, sender_local_part, sender_name,
year, month, day, subject,
to_count, to_group, to_name, to_address,
is_reply, is_send,
meta_tags, meta_is_seen
)
VALUES
(?, ?, ?, ?, ?, ?, ?)
(
?, ?, ?, ?,
?, ?, ?, ?,
?, ?, ?, ?,
?, ?,
?, ?
)
"#;
pub const QUERY_ERRORS: &str = r#"

@ -1,16 +1,15 @@
use super::gmail_meta;
use crate::database::{DBMessage, Database};
use crate::filesystem::RawEmailEntry;
use crate::types::{Config, EmailEntry};
use chrono::prelude::*;
use email_parser::address::{Address, EmailAddress, Mailbox};
use eyre::{bail, eyre, Result};
use rayon::prelude::*;
use std::thread::JoinHandle;
use std::{
convert::{TryFrom, TryInto},
path::Path,
};
use std::convert::{TryFrom, TryInto};
pub enum ParseMessage {
Total(usize),
@ -96,7 +95,7 @@ fn parse_email(raw_entry: &RawEmailEntry) -> Result<EmailEntry> {
fn parse_email_parser(raw_entry: &RawEmailEntry, content: &Vec<u8>) -> Result<EmailEntry> {
match email_parser::email::Email::parse(&content) {
Ok(email) => (raw_entry.path(), email).try_into(),
Ok(email) => (raw_entry, email).try_into(),
Err(error) => {
//let content_string = String::from_utf8(content.clone())?;
//println!("{}|{}", &error, &raw_entry.eml_path.display());
@ -109,25 +108,102 @@ fn parse_email_parser(raw_entry: &RawEmailEntry, content: &Vec<u8>) -> Result<Em
}
}
impl<'a> TryFrom<(&Path, email_parser::email::Email<'a>)> for EmailEntry {
impl<'a> TryFrom<(&RawEmailEntry, email_parser::email::Email<'a>)> for EmailEntry {
type Error = eyre::Report;
fn try_from(content: (&Path, email_parser::email::Email)) -> Result<Self, Self::Error> {
let (path, email) = content;
let domain = email.sender.address.domain.to_string();
let local_part = email.sender.address.local_part.to_string();
fn try_from(
content: (&RawEmailEntry, email_parser::email::Email),
) -> Result<Self, Self::Error> {
let (entry, email) = content;
let path = entry.path();
let (sender_name, _, sender_local_part, sender_domain) = mailbox_to_string(&email.sender);
let datetime = emaildatetime_to_chrono(&email.date);
let subject = email.subject.map(|e| e.to_string()).unwrap_or_default();
let to_count = match email.to.as_ref() {
Some(n) => n.len(),
None => 0,
};
let to = match email.to.as_ref().map(|v| v.first()).flatten() {
Some(n) => address_to_name_string(n),
None => None,
};
let to_group = to.as_ref().map(|e| e.0.clone()).flatten();
let to_first = to.as_ref().map(|e| (e.1.clone(), e.2.clone()));
let is_reply = email.in_reply_to.map(|v| !v.is_empty()).unwrap_or(false);
let meta = if entry.has_gmail_meta() {
gmail_meta::parse_meta(&entry).ok().map(|e| e.into())
} else {
None
};
// This is filled out at a later stage
let is_send = false;
Ok(EmailEntry {
path: path.to_path_buf(),
domain,
local_part,
sender_domain,
sender_local_part,
sender_name,
datetime,
subject,
meta,
is_reply,
to_count,
to_group,
to_first,
is_send,
})
}
}
/// Returns a conversion from address to the fields we care about:
/// ([group name], display name, email address)
fn address_to_name_string(address: &Address) -> Option<(Option<String>, String, String)> {
match address {
Address::Group((names, boxes)) => match (names.first(), boxes.first()) {
(group_name, Some(mailbox)) => {
let group = group_name.map(|e| e.to_string());
let (display_name, address, _, _) = mailbox_to_string(&mailbox);
Some((group, display_name, address))
}
_ => None,
},
Address::Mailbox(mailbox) => {
let (display_name, address, _, _) = mailbox_to_string(&mailbox);
Some((None, display_name, address))
}
}
}
/// Returns (display name, email address, local part, domain)
fn mailbox_to_string(mailbox: &Mailbox) -> (String, String, String, String) {
let names = match mailbox.name.as_ref() {
Some(n) => n
.iter()
.map(|e| e.as_ref())
.collect::<Vec<&str>>()
.join(" "),
None => "".to_owned(),
};
(
names,
emailaddress_to_string(&mailbox.address),
mailbox.address.local_part.to_string(),
mailbox.address.domain.to_string(),
)
}
fn emailaddress_to_string(address: &EmailAddress) -> String {
format!(
"{}@{}",
address.local_part.to_string(),
address.domain.to_string()
)
}
fn emaildatetime_to_chrono(dt: &email_parser::time::DateTime) -> chrono::DateTime<Utc> {
Utc.ymd(
dt.date.year as i32,

@ -5,6 +5,7 @@ use serde::Deserialize;
use serde_json;
use crate::filesystem::RawEmailEntry;
use crate::types::EmailMeta;
#[derive(Deserialize, Debug, Clone)]
pub struct Meta {
@ -24,7 +25,17 @@ impl Meta {
}
}
fn parse_meta(raw_entry: &RawEmailEntry, _content: &Vec<u8>) -> Result<Meta> {
impl From<Meta> for EmailMeta {
fn from(meta: Meta) -> Self {
let is_seen = meta.is_seen();
EmailMeta {
tags: meta.labels,
is_seen,
}
}
}
pub fn parse_meta(raw_entry: &RawEmailEntry) -> Result<Meta> {
let content = match raw_entry.read_gmail_meta() {
None => bail!("No Gmail Meta Information Available"),
Some(content) => content?,

@ -1,12 +1,37 @@
use chrono::prelude::*;
use std::path::PathBuf;
/// This is based on additional information in some systems such as
/// Gmail labels or Apple Mail tags or Apple XML
#[derive(Debug)]
pub struct EmailMeta {
pub tags: Vec<String>,
pub is_seen: bool,
}
impl EmailMeta {
pub fn tags_string(&self) -> String {
self.tags.join(":|:")
}
}
/// Representation of an email
#[derive(Debug)]
pub struct EmailEntry {
pub path: PathBuf,
pub domain: String,
pub local_part: String,
pub sender_domain: String,
pub sender_local_part: String,
pub sender_name: String,
pub datetime: chrono::DateTime<Utc>,
pub subject: String,
/// The amount of `to:` adresses
pub to_count: usize,
/// When this email was send to a group, the group name
pub to_group: Option<String>,
/// The first address and name in `To`, if any
pub to_first: Option<(String, String)>,
pub is_reply: bool,
/// Was this email send from the account we're importing?
pub is_send: bool,
pub meta: Option<EmailMeta>,
}

@ -2,4 +2,4 @@ mod config;
mod email;
pub use config::Config;
pub use email::EmailEntry;
pub use email::{EmailEntry, EmailMeta};

Loading…
Cancel
Save