From 7a0be1657896f2908e4c2c959ffcc264029df64d Mon Sep 17 00:00:00 2001 From: Benedikt Terhechte Date: Wed, 15 Dec 2021 22:45:49 +0100 Subject: [PATCH] Ported the importer crate over --- ps-core/src/database/database_like.rs | 11 +- .../src/database}/db_message.rs | 2 +- ps-core/src/database/mod.rs | 1 + ps-core/src/importer.rs | 7 +- ps-core/src/lib.rs | 3 + ps-importer/Cargo.lock | 984 ++++++++++++++++++ ps-importer/Cargo.toml | 26 + ps-importer/src/format_type.rs | 0 .../src/formats/apple_mail/filesystem.rs | 87 ++ ps-importer/src/formats/apple_mail/mail.rs | 70 ++ ps-importer/src/formats/apple_mail/mod.rs | 24 + ps-importer/src/formats/gmailbackup/meta.rs | 46 + ps-importer/src/formats/gmailbackup/mod.rs | 23 + .../src/formats/gmailbackup/raw_email.rs | 118 +++ ps-importer/src/formats/mbox/mod.rs | 133 +++ ps-importer/src/formats/mod.rs | 30 + ps-importer/src/formats/shared/database.rs | 78 ++ ps-importer/src/formats/shared/filesystem.rs | 76 ++ ps-importer/src/formats/shared/mod.rs | 3 + ps-importer/src/formats/shared/parse.rs | 155 +++ ps-importer/src/importer.rs | 61 ++ ps-importer/src/lib.rs | 29 + ps-importer/src/message_adapter.rs | 151 +++ 23 files changed, 2114 insertions(+), 4 deletions(-) rename {ps-database/src => ps-core/src/database}/db_message.rs (93%) create mode 100644 ps-importer/Cargo.lock create mode 100644 ps-importer/Cargo.toml create mode 100644 ps-importer/src/format_type.rs create mode 100644 ps-importer/src/formats/apple_mail/filesystem.rs create mode 100644 ps-importer/src/formats/apple_mail/mail.rs create mode 100644 ps-importer/src/formats/apple_mail/mod.rs create mode 100644 ps-importer/src/formats/gmailbackup/meta.rs create mode 100644 ps-importer/src/formats/gmailbackup/mod.rs create mode 100644 ps-importer/src/formats/gmailbackup/raw_email.rs create mode 100644 ps-importer/src/formats/mbox/mod.rs create mode 100644 ps-importer/src/formats/mod.rs create mode 100644 ps-importer/src/formats/shared/database.rs create mode 100644 ps-importer/src/formats/shared/filesystem.rs create mode 100644 ps-importer/src/formats/shared/mod.rs create mode 100644 ps-importer/src/formats/shared/parse.rs create mode 100644 ps-importer/src/importer.rs create mode 100644 ps-importer/src/lib.rs create mode 100644 ps-importer/src/message_adapter.rs diff --git a/ps-core/src/database/database_like.rs b/ps-core/src/database/database_like.rs index 14959ba..3baaf15 100644 --- a/ps-core/src/database/database_like.rs +++ b/ps-core/src/database/database_like.rs @@ -1,7 +1,12 @@ -use eyre::Result; use std::path::Path; +use std::thread::JoinHandle; + +use crossbeam_channel::Sender; +use eyre::Result; + +use crate::Config; -use super::{query::Query, query_result::QueryResult}; +use super::{db_message::DBMessage, query::Query, query_result::QueryResult}; pub trait DatabaseLike: Send + Sync { fn new(path: impl AsRef) -> Result @@ -9,4 +14,6 @@ pub trait DatabaseLike: Send + Sync { Self: Sized; fn total_mails(&self) -> Result; fn query(&self, query: &Query) -> Result>; + fn import(self) -> (Sender, JoinHandle>); + fn save_config(&self, config: Config) -> Result<()>; } diff --git a/ps-database/src/db_message.rs b/ps-core/src/database/db_message.rs similarity index 93% rename from ps-database/src/db_message.rs rename to ps-core/src/database/db_message.rs index 9aec0ee..d5e72a9 100644 --- a/ps-database/src/db_message.rs +++ b/ps-core/src/database/db_message.rs @@ -1,6 +1,6 @@ use eyre::Report; -use ps_core::EmailEntry; +use crate::EmailEntry; /// Parameter for sending work to the database during `import`. pub enum DBMessage { diff --git a/ps-core/src/database/mod.rs b/ps-core/src/database/mod.rs index efed614..56b33ce 100644 --- a/ps-core/src/database/mod.rs +++ b/ps-core/src/database/mod.rs @@ -1,3 +1,4 @@ pub mod database_like; +pub mod db_message; pub mod query; pub mod query_result; diff --git a/ps-core/src/importer.rs b/ps-core/src/importer.rs index c0e3b1f..30d8d50 100644 --- a/ps-core/src/importer.rs +++ b/ps-core/src/importer.rs @@ -2,8 +2,13 @@ use crossbeam_channel; use eyre::{Report, Result}; use std::thread::JoinHandle; +use crate::DatabaseLike; + pub trait Importerlike { - fn import(self) -> Result<(MessageReceiver, JoinHandle>)>; + fn import( + self, + database: Database, + ) -> Result<(MessageReceiver, JoinHandle>)>; } /// The message that informs of the importers progress diff --git a/ps-core/src/lib.rs b/ps-core/src/lib.rs index 4f01c37..d2b7b12 100644 --- a/ps-core/src/lib.rs +++ b/ps-core/src/lib.rs @@ -3,8 +3,11 @@ mod importer; mod model; mod types; +pub use database::database_like::DatabaseLike; +pub use database::db_message::DBMessage; pub use database::query::{Field, OtherQuery, Query, ValueField, AMOUNT_FIELD_NAME}; pub use database::query_result::QueryResult; pub use types::{Config, EmailEntry, EmailMeta, FormatType}; pub use crossbeam_channel; +pub use importer::{Importerlike, Message, MessageReceiver, MessageSender}; diff --git a/ps-importer/Cargo.lock b/ps-importer/Cargo.lock new file mode 100644 index 0000000..61c52e0 --- /dev/null +++ b/ps-importer/Cargo.lock @@ -0,0 +1,984 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "arrayvec" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" +dependencies = [ + "nodrop", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "base64" +version = "0.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time 0.1.44", + "winapi", +] + +[[package]] +name = "crc32fast" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "738c290dfaea84fc1ca15ad9c168d083b05a714e1efddd8edaab678dc28d2836" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" +dependencies = [ + "cfg-if", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +dependencies = [ + "cfg-if", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" +dependencies = [ + "cfg-if", + "lazy_static", +] + +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "email-parser" +version = "0.5.0" +source = "git+https://github.com/terhechte/email-parser#dba59d86771f7df67bb9e7f3a2c4b1e36b02d19b" +dependencies = [ + "textcode", + "timezone-abbreviations", +] + +[[package]] +name = "emlx" +version = "0.1.5" +source = "git+https://github.com/terhechte/emlx#44c2f278551d9e7a9ae0c3c3207c4471da3049fe" +dependencies = [ + "email-parser", + "plist", + "thiserror", +] + +[[package]] +name = "eyre" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "221239d1d5ea86bf5d6f91c9d6bc3646ffe471b08ff9b0f91c44f115ac969d2b" +dependencies = [ + "indenter", + "once_cell", +] + +[[package]] +name = "flate2" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" +dependencies = [ + "cfg-if", + "crc32fast", + "libc", + "miniz_oxide", +] + +[[package]] +name = "getrandom" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", +] + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + +[[package]] +name = "indexmap" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5" +dependencies = [ + "autocfg", + "hashbrown", +] + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "itoa" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.112" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125" + +[[package]] +name = "line-wrap" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f30344350a2a51da54c1d53be93fade8a237e545dbcc4bdbe635413f2117cab9" +dependencies = [ + "safemem", +] + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "lru" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c748cfe47cb8da225c37595b3108bea1c198c84aaae8ea0ba76d01dda9fc803" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "mbox-reader" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6231e973c0a8caceed71fac7355555012ba73fe230365989b298b36022e9e2ab" +dependencies = [ + "memmap", +] + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "memoffset" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" +dependencies = [ + "adler", + "autocfg", +] + +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + +[[package]] +name = "num-format" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bafe4179722c2894288ee77a9f044f02811c86af699344c498b0840c698a2465" +dependencies = [ + "arrayvec", + "itoa 0.4.8", +] + +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5" + +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_macros", + "phf_shared", + "proc-macro-hack", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58fdf3184dd560f160dd73922bea2d5cd6e8f064bf4b13110abd81b03697b4e0" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro-hack", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d31d11c69a6b52a174b42bdc0c30e5e11670f90788b2c471c31c1d17d449443" + +[[package]] +name = "plist" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bd39bc6cdc9355ad1dc5eeedefee696bb35c34caf21768741e81826c0bbd7225" +dependencies = [ + "base64", + "indexmap", + "line-wrap", + "serde", + "time 0.3.5", + "xml-rs", +] + +[[package]] +name = "ppv-lite86" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba" + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "proc-macro2" +version = "1.0.34" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2f84e92c0f7c9d58328b85a78557813e4bd845130db68d7184635344399423b1" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "ps-core" +version = "0.2.0" +dependencies = [ + "chrono", + "crossbeam-channel", + "eyre", + "flate2", + "lru", + "num-format", + "once_cell", + "rand", + "rayon", + "regex", + "rsql_builder", + "serde", + "serde_json", + "shellexpand", + "strum", + "strum_macros", + "thiserror", + "tracing", + "tracing-subscriber", + "treemap", +] + +[[package]] +name = "ps-importer" +version = "0.2.0" +dependencies = [ + "chrono", + "email-parser", + "emlx", + "eyre", + "flate2", + "mbox-reader", + "once_cell", + "ps-core", + "rand", + "rayon", + "regex", + "serde", + "serde_json", + "shellexpand", + "thiserror", + "tracing", + "tracing-subscriber", + "walkdir", +] + +[[package]] +name = "quote" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "rand" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", + "rand_hc", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_hc" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rayon" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + +[[package]] +name = "redox_syscall" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff" +dependencies = [ + "bitflags", +] + +[[package]] +name = "redox_users" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" +dependencies = [ + "getrandom", + "redox_syscall", +] + +[[package]] +name = "regex" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "rsql_builder" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dbd5712883cef396d13516bb52b300fd97a29d52ca20361f0a4905bd38a2355" +dependencies = [ + "serde", + "serde_json", +] + +[[package]] +name = "rustversion" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f" + +[[package]] +name = "ryu" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f" + +[[package]] +name = "safemem" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" + +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "serde" +version = "1.0.131" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ad69dfbd3e45369132cc64e6748c2d65cdfb001a2b1c232d128b4ad60561c1" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.131" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b710a83c4e0dff6a3d511946b95274ad9ca9e5d3ae497b63fda866ac955358d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.73" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcbd0344bc6533bc7ec56df11d42fb70f1b912351c0825ccb7211b59d8af7cf5" +dependencies = [ + "itoa 1.0.1", + "ryu", + "serde", +] + +[[package]] +name = "sharded-slab" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "shellexpand" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83bdb7831b2d85ddf4a7b148aa19d0587eddbe8671a436b7bd1182eaad0f2829" +dependencies = [ + "dirs-next", +] + +[[package]] +name = "siphasher" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b" + +[[package]] +name = "smallvec" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309" + +[[package]] +name = "strum" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb" + +[[package]] +name = "strum_macros" +version = "0.23.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "rustversion", + "syn", +] + +[[package]] +name = "syn" +version = "1.0.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "textcode" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13de2d432b3eea016f6a010139c8b5a5bf050b5a05b8993d04033ca5232e44a9" + +[[package]] +name = "thiserror" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "thread_local" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd" +dependencies = [ + "once_cell", +] + +[[package]] +name = "time" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255" +dependencies = [ + "libc", + "wasi", + "winapi", +] + +[[package]] +name = "time" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41effe7cfa8af36f439fac33861b66b049edc6f9a32331e2312660529c1c24ad" +dependencies = [ + "itoa 0.4.8", + "libc", +] + +[[package]] +name = "timezone-abbreviations" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ead19eae5d0834473ce509eb859282c262e5b869a0171641d1668cd54c594d8f" +dependencies = [ + "phf", +] + +[[package]] +name = "tracing" +version = "0.1.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105" +dependencies = [ + "cfg-if", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4f480b8f81512e825f337ad51e94c1eb5d3bbdf2b363dcd01e2b19a9ffe3f8e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "tracing-log" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "245da694cc7fc4729f3f418b304cb57789f1bed2a78c575407ab8a23f53cb4d3" +dependencies = [ + "ansi_term", + "sharded-slab", + "smallvec", + "thread_local", + "tracing-core", + "tracing-log", +] + +[[package]] +name = "treemap" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1571f89da27a5e1aa83304ee1ab9519ea8c6432b4c8903aaaa6c9a9eecb6f36" + +[[package]] +name = "unicode-segmentation" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi", + "winapi-util", +] + +[[package]] +name = "wasi" +version = "0.10.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "xml-rs" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3" diff --git a/ps-importer/Cargo.toml b/ps-importer/Cargo.toml new file mode 100644 index 0000000..4899d75 --- /dev/null +++ b/ps-importer/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "ps-importer" +version = "0.2.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +eyre = "0.6.5" +thiserror = "1.0.29" +tracing = "0.1.29" +tracing-subscriber = "0.3.0" +regex = "1.5.3" +flate2 = "1.0.22" +once_cell = "1.8.0" +email-parser = { git = "https://github.com/terhechte/email-parser", features = ["sender", "to", "in-reply-to", "date", "subject", "mime", "allow-duplicate-headers", "compatibility-fixes"]} +rayon = "1.5.1" +chrono = "0.4.19" +emlx = { git = "https://github.com/terhechte/emlx", features = []} +walkdir = "2.3.2" +mbox-reader = "0.2.0" +rand = "0.8.4" +shellexpand = "2.1.0" +serde_json = "1.0.70" +serde = { version = "1.0.130", features = ["derive"]} +ps-core = { path = "../ps-core" } \ No newline at end of file diff --git a/ps-importer/src/format_type.rs b/ps-importer/src/format_type.rs new file mode 100644 index 0000000..e69de29 diff --git a/ps-importer/src/formats/apple_mail/filesystem.rs b/ps-importer/src/formats/apple_mail/filesystem.rs new file mode 100644 index 0000000..76d1436 --- /dev/null +++ b/ps-importer/src/formats/apple_mail/filesystem.rs @@ -0,0 +1,87 @@ +//! We use a stubbornly stupid algorithm where we just +//! recursively drill down into the appropriate folder +//! until we find `emlx` files and return those. + +use eyre::{eyre, Result}; +use rayon::prelude::*; +use walkdir::WalkDir; + +use super::super::shared::filesystem::emails_in; +use ps_core::{Config, Message, MessageSender}; + +use super::mail::Mail; +use std::path::PathBuf; + +pub fn read_emails(config: &Config, sender: MessageSender) -> Result> { + // on macOS, we might need permission for the `Library` folder... + match std::fs::read_dir(&config.emails_folder_path) { + Ok(_) => (), + Err(e) => match e.kind() { + #[cfg(target_os = "macos")] + std::io::ErrorKind::PermissionDenied => { + tracing::info!("Could not read folder: {}", e); + if let Err(e) = sender.send(Message::MissingPermissions) { + tracing::error!("Error sending: {}", e); + } + // We should return early now, otherwise the code below will send a different + // error + return Ok(Vec::new()); + } + _ => { + if let Err(e) = sender.send(Message::Error(eyre!("Error: {:?}", &e))) { + tracing::error!("Error sending: {}", e); + } + } + }, + } + + // As `walkdir` does not support `par_iter` (see https://www.reddit.com/r/rust/comments/6eif7r/walkdir_users_we_need_you/) + // - -we first collect all folders, + // then all sub-folders in those ending in mboxending in .mbox and then iterate over them in paralell + let folders: Vec = WalkDir::new(&config.emails_folder_path) + .into_iter() + .filter_map(|e| match e { + Ok(n) + if n.path().is_dir() + && n.path() + .to_str() + .map(|e| e.contains(".mbox")) + .unwrap_or(false) => + { + tracing::trace!("Found folder {}", n.path().display()); + Some(n.path().to_path_buf()) + } + Err(e) => { + tracing::info!("Could not read folder: {}", e); + if let Err(e) = sender.send(Message::Error(eyre!("Could not read folder: {:?}", e))) + { + tracing::error!("Error sending error {}", e); + } + None + } + _ => None, + }) + .collect(); + sender.send(Message::ReadTotal(folders.len()))?; + let mails: Vec = folders + .into_par_iter() + .filter_map( + |path| match emails_in(path.clone(), sender.clone(), Mail::new) { + Ok(n) => Some(n), + Err(e) => { + tracing::error!("{} {:?}", path.display(), &e); + if let Err(e) = sender.send(Message::Error(eyre!( + "Could read mails in {}: {:?}", + path.display(), + e + ))) { + tracing::error!("Error sending error {}", e); + } + None + } + }, + ) + .flatten() + .collect(); + Ok(mails) +} diff --git a/ps-importer/src/formats/apple_mail/mail.rs b/ps-importer/src/formats/apple_mail/mail.rs new file mode 100644 index 0000000..8d23a3b --- /dev/null +++ b/ps-importer/src/formats/apple_mail/mail.rs @@ -0,0 +1,70 @@ +use emlx::parse_emlx; +use eyre::Result; + +use std::borrow::Cow; +use std::path::{Path, PathBuf}; + +use ps_core::EmailMeta; + +use super::super::shared::parse::ParseableEmail; + +pub struct Mail { + path: PathBuf, + // This is parsed out of the `emlx` as it is parsed + is_seen: bool, + // This is parsed out of the `path` + label: Option, + // Maildata + data: Vec, +} + +impl Mail { + pub fn new>(path: P) -> Option { + let path = path.as_ref(); + let name = path.file_name()?.to_str()?; + if !name.ends_with(".emlx") { + return None; + } + // find the folder ending with `.mbox` in the path + let ext = ".mbox"; + let label = path + .iter() + .map(|e| e.to_str()) + .flatten() + .find(|s| s.ends_with(ext)) + .map(|s| s.replace(ext, "")); + Some(Self { + path: path.to_path_buf(), + is_seen: false, + label, + data: Vec::new(), + }) + } +} + +impl ParseableEmail for Mail { + fn prepare(&mut self) -> Result<()> { + let data = std::fs::read(self.path.as_path())?; + let parsed = parse_emlx(&data)?; + self.is_seen = !parsed.flags.is_read; + self.data = parsed.message.to_vec(); + Ok(()) + } + fn message(&self) -> Result> { + Ok(Cow::Borrowed(self.data.as_slice())) + } + fn path(&self) -> &Path { + self.path.as_path() + } + fn meta(&self) -> Result> { + let tags = match self.label { + Some(ref n) => vec![n.clone()], + None => vec![], + }; + let meta = EmailMeta { + tags, + is_seen: self.is_seen, + }; + Ok(Some(meta)) + } +} diff --git a/ps-importer/src/formats/apple_mail/mod.rs b/ps-importer/src/formats/apple_mail/mod.rs new file mode 100644 index 0000000..204177a --- /dev/null +++ b/ps-importer/src/formats/apple_mail/mod.rs @@ -0,0 +1,24 @@ +mod filesystem; +mod mail; + +use shellexpand; +use std::{path::PathBuf, str::FromStr}; + +use super::{ImporterFormat, Result}; +use ps_core::{Config, MessageSender}; + +#[derive(Default)] +pub struct AppleMail {} + +impl ImporterFormat for AppleMail { + type Item = mail::Mail; + + fn default_path() -> Option { + let path = shellexpand::tilde("~/Library/Mail"); + Some(PathBuf::from_str(&path.to_string()).unwrap()) + } + + fn emails(&self, config: &Config, sender: MessageSender) -> Result> { + filesystem::read_emails(config, sender) + } +} diff --git a/ps-importer/src/formats/gmailbackup/meta.rs b/ps-importer/src/formats/gmailbackup/meta.rs new file mode 100644 index 0000000..36d7a57 --- /dev/null +++ b/ps-importer/src/formats/gmailbackup/meta.rs @@ -0,0 +1,46 @@ +use chrono::prelude::*; + +use eyre::{bail, Result}; +use serde::Deserialize; +use serde_json; + +use super::raw_email::RawEmailEntry; +use ps_core::EmailMeta; + +#[derive(Deserialize, Debug, Clone)] +pub struct Meta { + pub msg_id: String, + pub subject: String, + pub labels: Vec, + pub flags: Vec, + internal_date: i64, + + #[serde(skip, default = "Utc::now")] + pub created: DateTime, +} + +impl Meta { + pub fn is_seen(&self) -> bool { + self.labels.contains(&"\\seen".to_owned()) + } +} + +impl From for EmailMeta { + fn from(meta: Meta) -> Self { + let is_seen = meta.is_seen(); + EmailMeta { + tags: meta.labels, + is_seen, + } + } +} + +pub fn parse_meta(raw_entry: &RawEmailEntry) -> Result { + let content = match raw_entry.read_gmail_meta() { + None => bail!("No Gmail Meta Information Available"), + Some(content) => content?, + }; + let mut meta: Meta = serde_json::from_slice(&content)?; + meta.created = Utc.timestamp(meta.internal_date, 0); + Ok(meta) +} diff --git a/ps-importer/src/formats/gmailbackup/mod.rs b/ps-importer/src/formats/gmailbackup/mod.rs new file mode 100644 index 0000000..7aceffc --- /dev/null +++ b/ps-importer/src/formats/gmailbackup/mod.rs @@ -0,0 +1,23 @@ +mod meta; +mod raw_email; + +use super::shared::filesystem::{emails_in, folders_in}; +use super::{Config, ImporterFormat, MessageSender, Result}; +use raw_email::RawEmailEntry; + +#[derive(Default)] +pub struct Gmail {} + +impl ImporterFormat for Gmail { + type Item = raw_email::RawEmailEntry; + + fn default_path() -> Option { + None + } + + fn emails(&self, config: &Config, sender: MessageSender) -> Result> { + folders_in(&config.emails_folder_path, sender, |path, sender| { + emails_in(path, sender, RawEmailEntry::new) + }) + } +} diff --git a/ps-importer/src/formats/gmailbackup/raw_email.rs b/ps-importer/src/formats/gmailbackup/raw_email.rs new file mode 100644 index 0000000..22c6839 --- /dev/null +++ b/ps-importer/src/formats/gmailbackup/raw_email.rs @@ -0,0 +1,118 @@ +use eyre::{eyre, Result}; +use flate2::read::GzDecoder; + +use std::borrow::Cow; +use std::io::Read; +use std::path::{Path, PathBuf}; + +use super::super::shared::parse::ParseableEmail; +use ps_core::EmailMeta; + +/// Raw representation of an email. +/// Contains the paths to the relevant files as well +/// as the name of the folder the email was in. +#[derive(Debug)] +pub struct RawEmailEntry { + #[allow(unused)] + folder_name: String, + eml_path: PathBuf, + gmail_meta_path: Option, + is_compressed: bool, + #[allow(unused)] + size: u64, +} + +impl RawEmailEntry { + pub fn path(&self) -> &Path { + self.eml_path.as_path() + } + + pub fn read(&self) -> Result> { + if self.is_compressed { + let reader = std::fs::File::open(&self.eml_path)?; + let mut decoder = GzDecoder::new(reader); + let mut buffer = Vec::new(); + decoder.read_to_end(&mut buffer)?; + Ok(buffer) + } else { + std::fs::read(&self.eml_path).map_err(|e| eyre!("IO Error: {}", &e)) + } + } + + pub fn has_gmail_meta(&self) -> bool { + self.gmail_meta_path.is_some() + } + + pub fn read_gmail_meta(&self) -> Option>> { + // Just using map here returns a `&Option` whereas we want `Option` + #[allow(clippy::manual_map)] + match &self.gmail_meta_path { + Some(p) => Some(std::fs::read(p).map_err(|e| eyre!("IO Error: {}", &e))), + None => None, + } + } +} + +impl RawEmailEntry { + pub(super) fn new>(path: P) -> Option { + let path = path.as_ref(); + let stem = path.file_stem()?.to_str()?; + let name = path.file_name()?.to_str()?; + let is_eml_gz = name.ends_with(".eml.gz"); + let is_eml = name.ends_with(".eml"); + if !is_eml_gz && !is_eml { + return None; + } + let is_compressed = is_eml_gz; + let folder_name = path.parent()?.file_name()?.to_str()?.to_owned(); + let eml_path = path.to_path_buf(); + + let file_metadata = path.metadata().ok()?; + + // Build a meta path + let meta_path = path + .parent()? + .join(format!("{}.meta", stem.replace(".eml", ""))); + + // Only embed it, if it exists + let gmail_meta_path = if meta_path.exists() { + Some(meta_path) + } else { + None + }; + tracing::trace!( + "Email [c?: {}] {} {:?}", + is_compressed, + eml_path.display(), + gmail_meta_path + ); + Some(RawEmailEntry { + folder_name, + eml_path, + gmail_meta_path, + is_compressed, + size: file_metadata.len(), + }) + } +} + +impl ParseableEmail for RawEmailEntry { + fn prepare(&mut self) -> Result<()> { + Ok(()) + } + fn message(&self) -> Result> { + Ok(Cow::Owned(self.read()?)) + } + + fn path(&self) -> &Path { + self.eml_path.as_path() + } + + fn meta(&self) -> Result> { + if self.has_gmail_meta() { + Ok(Some(super::meta::parse_meta(self)?.into())) + } else { + Ok(None) + } + } +} diff --git a/ps-importer/src/formats/mbox/mod.rs b/ps-importer/src/formats/mbox/mod.rs new file mode 100644 index 0000000..f109dc4 --- /dev/null +++ b/ps-importer/src/formats/mbox/mod.rs @@ -0,0 +1,133 @@ +//! FIXME: Implement our own Mailbox reader that better implements the spec. +//! use jetsci for efficient searching: +//! https://github.com/shepmaster/jetscii +//! (or aho corasick) +//! MBox parsing is also not particularly fast as it currently doesn't use parallelism + +use eyre::eyre; +use mbox_reader; +use rayon::prelude::*; +use tracing; +use walkdir::WalkDir; + +use super::{Config, ImporterFormat, Message, MessageSender, Result}; + +use super::shared::parse::ParseableEmail; +use ps_core::EmailMeta; + +use std::borrow::Cow; +use std::path::{Path, PathBuf}; + +pub struct Mail { + path: PathBuf, + /// For now, we go with a very simple implementation: + /// Each mal will have a heap-allocated vec of the corresponding + /// bytes in the mbox. + /// This wastes a lot of allocations and shows the limits of our current abstraction. + /// It would be better to just save the headers and ignore the rest. + content: Vec, +} + +#[derive(Default)] +pub struct Mbox; + +/// The inner parsing code +fn inner_emails(config: &Config, sender: MessageSender) -> Result> { + // find all files ending in .mbox + let mboxes: Vec = WalkDir::new(&config.emails_folder_path) + .into_iter() + .filter_map(|e| match e { + Ok(n) + if n.path().is_file() + && n.path() + .to_str() + .map(|e| e.contains(".mbox")) + .unwrap_or(false) => + { + tracing::trace!("Found mbox file {}", n.path().display()); + Some(n.path().to_path_buf()) + } + Err(e) => { + tracing::info!("Could not read folder: {}", e); + if let Err(e) = sender.send(Message::Error(eyre!("Could not read folder: {:?}", e))) + { + tracing::error!("Error sending error {}", e); + } + None + } + _ => None, + }) + .collect(); + + let mails: Vec = mboxes + .into_par_iter() + .filter_map(|mbox_file| { + let mbox = match mbox_reader::MboxFile::from_file(&mbox_file) { + Ok(n) => n, + Err(e) => { + tracing::error!( + "Could not open mbox file at {}: {}", + &mbox_file.display(), + e + ); + return None; + } + }; + let inner_mails: Vec = mbox + .iter() + .filter_map(|e| { + let content = match e.message() { + Some(n) => n, + None => { + tracing::error!("Could not parse mail at offset {}", e.offset()); + return None; + } + }; + Some(Mail { + path: mbox_file.clone(), + content: content.to_owned(), + }) + }) + .collect(); + Some(inner_mails) + }) + .flatten() + .collect(); + Ok(mails) +} + +impl ImporterFormat for Mbox { + type Item = Mail; + + fn default_path() -> Option { + None + } + + fn emails(&self, config: &Config, sender: MessageSender) -> Result> { + inner_emails(config, sender) + } +} + +impl ParseableEmail for Mail { + fn prepare(&mut self) -> Result<()> { + Ok(()) + } + fn message(&self) -> Result> { + Ok(self.content.as_slice().into()) + } + fn path(&self) -> &Path { + self.path.as_path() + } + fn meta(&self) -> Result> { + // The filename is a tag, e.g. `INBOX.mbox`, `WORK.mbox` + if let Some(prefix) = self.path.file_stem() { + if let Some(s) = prefix.to_str().map(|s| s.to_owned()) { + return Ok(Some(EmailMeta { + tags: vec![s], + is_seen: false, + })); + } + } + Ok(None) + } +} diff --git a/ps-importer/src/formats/mod.rs b/ps-importer/src/formats/mod.rs new file mode 100644 index 0000000..6164dfb --- /dev/null +++ b/ps-importer/src/formats/mod.rs @@ -0,0 +1,30 @@ +use std::path::PathBuf; + +pub use eyre::Result; + +mod apple_mail; +mod gmailbackup; +mod mbox; +pub mod shared; + +pub use apple_mail::AppleMail; +pub use gmailbackup::Gmail; +pub use mbox::Mbox; + +use shared::parse::ParseableEmail; + +pub use ps_core::{Config, Message, MessageReceiver, MessageSender}; + +/// This is implemented by the various formats +/// to define how they return email data. +pub trait ImporterFormat: Send + Sync { + type Item: ParseableEmail; + + /// The default location path where the data for this format resides + /// on system. If there is none (such as for mbox) return `None` + fn default_path() -> Option; + + /// Return all the emails in this format. + /// Use the sneder to give progress updates via the `ReadProgress` case. + fn emails(&self, config: &Config, sender: MessageSender) -> Result>; +} diff --git a/ps-importer/src/formats/shared/database.rs b/ps-importer/src/formats/shared/database.rs new file mode 100644 index 0000000..5cbef7a --- /dev/null +++ b/ps-importer/src/formats/shared/database.rs @@ -0,0 +1,78 @@ +use super::parse::{parse_email, ParseableEmail}; +use ps_core::{Config, DBMessage, DatabaseLike, Message, MessageSender}; + +use eyre::{bail, Result}; +use rayon::prelude::*; + +pub fn into_database( + config: &Config, + mut emails: Vec, + tx: MessageSender, + database: Database, +) -> Result { + let total = emails.len(); + tracing::info!("Loaded {} emails", &total); + + // First, communicate the total amount of mails received + if let Err(e) = tx.send(Message::WriteTotal(total)) { + bail!("Channel Failure {:?}", &e); + } + + // Save the config into the database + if let Err(e) = database.save_config(config.clone()) { + bail!("Could not save config to database {:?}", &e); + } + + // Consume the connection to begin the import. It will return the `handle` to use for + // waiting for the database to finish importing, and the `sender` to submit work. + let (sender, handle) = database.import(); + + // Iterate over the mails.. + emails + // in paralell.. + .par_iter_mut() + // parsing them + .map(|raw_mail| parse_email(raw_mail, &config.sender_emails)) + // and inserting them into SQLite + .for_each(|entry| { + // Try to write the message into the database + if let Err(e) = match entry { + Ok(mail) => sender.send(DBMessage::Mail(Box::new(mail))), + Err(e) => sender.send(DBMessage::Error(e)), + } { + tracing::error!("Error Inserting into Database: {:?}", &e); + } + // Signal the write + if let Err(e) = tx.send(Message::WriteOne) { + tracing::error!("Channel Failure: {:?}", &e); + } + }); + + // Tell SQLite there's no more work coming. This will exit the listening loop + if let Err(e) = sender.send(DBMessage::Done) { + bail!("Channel Failure {:?}", &e); + } + + // Wait for SQLite to finish parsing + tracing::info!("Waiting for SQLite to finish"); + + if let Err(e) = tx.send(Message::FinishingUp) { + bail!("Channel Failure {:?}", &e); + } + + tracing::trace!("Waiting for database handle..."); + let output = match handle.join() { + Ok(Ok(count)) => Ok(count), + Ok(Err(e)) => Err(e), + Err(e) => Err(eyre::eyre!("Join Error: {:?}", &e)), + }; + + // Tell the caller that we're done processing. This will allow leaving the + // display loop + tracing::trace!("Messaging Done"); + if let Err(e) = tx.send(Message::Done) { + bail!("Channel Failure {:?}", &e); + } + + output +} diff --git a/ps-importer/src/formats/shared/filesystem.rs b/ps-importer/src/formats/shared/filesystem.rs new file mode 100644 index 0000000..eff6f41 --- /dev/null +++ b/ps-importer/src/formats/shared/filesystem.rs @@ -0,0 +1,76 @@ +use eyre::{bail, Result}; +use rayon::prelude::*; +use tracing::trace; + +use std::path::{Path, PathBuf}; + +use ps_core::{Message, MessageSender}; + +/// Call `FolderAction` on all files in all sub folders in +/// folder `folder`. +pub fn folders_in( + folder: P, + sender: MessageSender, + action: FolderAction, +) -> Result> +where + P: AsRef, + FolderAction: Fn(PathBuf, MessageSender) -> Result> + Send + Sync, + ActionResult: Send, +{ + let folder = folder.as_ref(); + if !folder.exists() { + bail!("Folder {} does not exist", &folder.display()); + } + // For progress reporting, we collect the iterator in order to + // know how many items there are. + let items: Vec<_> = std::fs::read_dir(&folder)?.collect(); + let total = items.len(); + sender.send(Message::ReadTotal(total))?; + Ok(items + .into_iter() + .par_bridge() + .filter_map(|entry| { + let path = entry + .map_err(|e| tracing::error!("{} {:?}", &folder.display(), &e)) + .ok()? + .path(); + if !path.is_dir() { + return None; + } + let sender = sender.clone(); + trace!("Reading folder {}", path.display()); + action(path.clone(), sender) + .map_err(|e| tracing::error!("{} {:?}", path.display(), &e)) + .ok() + }) + .flatten() + .collect()) +} + +pub fn emails_in>(path: P, sender: MessageSender, make: F) -> Result> +where + F: Fn(PathBuf) -> Option, + F: Send + Sync + 'static, + O: Send + Sync, +{ + let path = path.as_ref(); + let result = Ok(std::fs::read_dir(path)? + .into_iter() + .par_bridge() + .filter_map(|entry| { + let path = entry + .map_err(|e| tracing::error!("{} {:?}", &path.display(), &e)) + .ok()? + .path(); + if path.is_dir() { + return None; + } + trace!("Reading {}", &path.display()); + make(path) + }) + .collect()); + // We're done reading the folder + sender.send(Message::ReadOne).unwrap(); + result +} diff --git a/ps-importer/src/formats/shared/mod.rs b/ps-importer/src/formats/shared/mod.rs new file mode 100644 index 0000000..0015454 --- /dev/null +++ b/ps-importer/src/formats/shared/mod.rs @@ -0,0 +1,3 @@ +pub mod database; +pub mod filesystem; +pub mod parse; diff --git a/ps-importer/src/formats/shared/parse.rs b/ps-importer/src/formats/shared/parse.rs new file mode 100644 index 0000000..465c970 --- /dev/null +++ b/ps-importer/src/formats/shared/parse.rs @@ -0,0 +1,155 @@ +use chrono::prelude::*; +use email_parser::address::{Address, EmailAddress, Mailbox}; +use eyre::{eyre, Result}; + +use std::borrow::Cow; +use std::collections::HashSet; +use std::path::Path; + +use ps_core::{EmailEntry, EmailMeta}; + +/// Different `importer`s can implement this trait to provide the necessary +/// data to parse their data into a `EmailEntry`. +pub trait ParseableEmail: Send + Sized + Sync { + /// This will be called once before `message`, `path` and `meta` + /// are called. It can be used to perform parsing operations + fn prepare(&mut self) -> Result<()>; + /// The message content as bytes + fn message(&self) -> Result>; + /// The original path of the email in the filesystem + fn path(&self) -> &Path; + /// Optional meta information if they're available. + /// (Depending on the `importer` capabilities and system) + fn meta(&self) -> Result>; +} + +pub fn parse_email( + entry: &mut Entry, + sender_emails: &HashSet, +) -> Result { + if let Err(e) = entry.prepare() { + tracing::error!("Prepare Error: {:?}", e); + return Err(e); + } + let content = entry.message()?; + match email_parser::email::Email::parse(&content) { + Ok(email) => { + let path = entry.path(); + tracing::trace!("Parsing {}", path.display()); + let (sender_name, _, sender_local_part, sender_domain) = + mailbox_to_string(&email.sender); + + let datetime = emaildatetime_to_chrono(&email.date); + let subject = email.subject.map(|e| e.to_string()).unwrap_or_default(); + + let to_count = match email.to.as_ref() { + Some(n) => n.len(), + None => 0, + }; + let to = match email.to.as_ref().map(|v| v.first()).flatten() { + Some(n) => address_to_name_string(n), + None => None, + }; + let to_group = to.as_ref().map(|e| e.0.clone()).flatten(); + let to_first = to.as_ref().map(|e| (e.1.clone(), e.2.clone())); + + let is_reply = email.in_reply_to.map(|v| !v.is_empty()).unwrap_or(false); + + let meta = entry.meta()?; + + // In order to determine the sender, we have to + // build up the address again :-( + let is_send = { + let email = format!("{}@{}", sender_local_part, sender_domain); + sender_emails.contains(&email) + }; + + Ok(EmailEntry { + path: path.to_path_buf(), + sender_domain, + sender_local_part, + sender_name, + datetime, + subject, + meta, + is_reply, + to_count, + to_group, + to_first, + is_send, + }) + } + Err(error) => { + let error = eyre!( + "Could not parse email (trace to see contents): {:?} [{}]", + &error, + entry.path().display() + ); + tracing::error!("{:?}", &error); + if let Ok(content_string) = String::from_utf8(content.into_owned()) { + tracing::trace!("Contents:\n{}\n---\n", content_string); + } else { + tracing::trace!("Contents:\nInvalid UTF8\n---\n"); + } + Err(error) + } + } +} + +/// Returns a conversion from address to the fields we care about: +/// ([group name], display name, email address) +fn address_to_name_string(address: &Address) -> Option<(Option, String, String)> { + match address { + Address::Group((names, boxes)) => match (names.first(), boxes.first()) { + (group_name, Some(mailbox)) => { + let group = group_name.map(|e| e.to_string()); + let (display_name, address, _, _) = mailbox_to_string(mailbox); + Some((group, display_name, address)) + } + _ => None, + }, + Address::Mailbox(mailbox) => { + let (display_name, address, _, _) = mailbox_to_string(mailbox); + Some((None, display_name, address)) + } + } +} + +/// Returns (display name, email address, local part, domain) +fn mailbox_to_string(mailbox: &Mailbox) -> (String, String, String, String) { + let names = match mailbox.name.as_ref() { + Some(n) => n + .iter() + .map(|e| e.as_ref()) + .collect::>() + .join(" "), + None => "".to_owned(), + }; + ( + names, + emailaddress_to_string(&mailbox.address), + mailbox.address.local_part.to_string(), + mailbox.address.domain.to_string(), + ) +} + +fn emailaddress_to_string(address: &EmailAddress) -> String { + format!( + "{}@{}", + address.local_part.to_string(), + address.domain.to_string() + ) +} + +fn emaildatetime_to_chrono(dt: &email_parser::time::DateTime) -> chrono::DateTime { + Utc.ymd( + dt.date.year as i32, + dt.date.month_number() as u32, + dt.date.day as u32, + ) + .and_hms( + dt.time.time.hour as u32, + dt.time.time.minute as u32, + dt.time.time.second as u32, + ) +} diff --git a/ps-importer/src/importer.rs b/ps-importer/src/importer.rs new file mode 100644 index 0000000..a01a689 --- /dev/null +++ b/ps-importer/src/importer.rs @@ -0,0 +1,61 @@ +use std::thread::JoinHandle; + +use super::formats::shared; +use ps_core::{ + crossbeam_channel::{self, unbounded}, + Config, DatabaseLike, Importerlike, Message, MessageReceiver, +}; + +use super::formats::ImporterFormat; + +use eyre::Result; + +pub struct Importer { + config: Config, + format: Format, +} + +impl Importer { + pub fn new(config: Config, format: Format) -> Self { + Self { config, format } + } +} + +impl Importerlike for Importer { + fn import( + self, + database: Database, + ) -> Result<(MessageReceiver, JoinHandle>)> { + let Importer { format, .. } = self; + let (sender, receiver) = unbounded(); + + let config = self.config; + let handle: JoinHandle> = std::thread::spawn(move || { + let outer_sender = sender.clone(); + let processed = move || { + let emails = format.emails(&config, sender.clone())?; + let processed = + shared::database::into_database(&config, emails, sender.clone(), database)?; + + Ok(processed) + }; + let result = processed(); + + // Send the error away and map it to a crossbeam channel error + match result { + Ok(_) => Ok(()), + Err(e) => match outer_sender.send(Message::Error(e)) { + Ok(_) => Ok(()), + Err(e) => Err(eyre::Report::new(e)), + }, + } + }); + Ok((receiver, handle)) + } +} + +// impl Importerlike for Box { +// fn import(self) -> Result<(MessageReceiver, JoinHandle>)> { +// (*self).import() +// } +// } diff --git a/ps-importer/src/lib.rs b/ps-importer/src/lib.rs new file mode 100644 index 0000000..df267ab --- /dev/null +++ b/ps-importer/src/lib.rs @@ -0,0 +1,29 @@ +pub(crate) mod formats; +#[allow(clippy::module_inception)] +mod importer; +mod message_adapter; + +pub use message_adapter::*; +use ps_core::{Config, EmailEntry, EmailMeta, FormatType, Importerlike}; + +use formats::ImporterFormat; + +// pub fn importer(config: &Config) -> Box { +// match config.format { +// AppleMail => Box::new(applemail_importer(config.clone())), +// GmailVault => Box::new(gmail_importer(config.clone())), +// Mbox => Box::new(gmail_importer(config.clone())), +// } +// } + +pub fn gmail_importer(config: Config) -> importer::Importer { + importer::Importer::new(config, formats::Gmail::default()) +} + +pub fn applemail_importer(config: Config) -> importer::Importer { + importer::Importer::new(config, formats::AppleMail::default()) +} + +pub fn mbox_importer(config: Config) -> importer::Importer { + importer::Importer::new(config, formats::Mbox::default()) +} diff --git a/ps-importer/src/message_adapter.rs b/ps-importer/src/message_adapter.rs new file mode 100644 index 0000000..b9bd404 --- /dev/null +++ b/ps-importer/src/message_adapter.rs @@ -0,0 +1,151 @@ +use eyre::{bail, eyre, Report, Result}; + +use std::sync::{Arc, RwLock}; +use std::thread::JoinHandle; + +use super::formats::ImporterFormat; +use ps_core::{DatabaseLike, Importerlike, Message}; + +#[derive(Debug, Default)] +struct Data { + total_read: usize, + read: usize, + total_write: usize, + write: usize, + finishing: bool, + done: bool, + error: Option, + #[cfg(target_os = "macos")] + missing_permissions: bool, +} + +#[derive(Clone, Debug, Copy)] +pub struct Progress { + pub total: usize, + pub count: usize, +} + +#[derive(Clone, Debug, Copy)] +pub struct State { + pub finishing: bool, + pub done: bool, + pub written: usize, + #[cfg(target_os = "macos")] + pub missing_permissions: bool, +} + +/// This can be initialized with a [`MessageSender`] and it will +/// automatically tally up the information into a thread-safe +/// datastructure +pub struct Adapter { + producer_lock: Arc>, + consumer_lock: Arc>, +} + +impl Adapter { + #[allow(clippy::new_without_default)] + pub fn new() -> Self { + let rw_lock = Arc::new(RwLock::default()); + // FIXME: Look up this warning. It looks like the clones are necessary? + #[allow(clippy::redundant_clone)] + let producer_lock = rw_lock.clone(); + #[allow(clippy::redundant_clone)] + let consumer_lock = rw_lock.clone(); + Self { + producer_lock, + consumer_lock, + } + } + + /// Starts up a thread that handles the `MessageReceiver` messages + /// into state that can be accessed via [`read_count`], [`write_count`] and [`finished`] + pub fn process( + &self, + importer: super::importer::Importer, + database: Database, + ) -> Result>> { + let (receiver, handle) = importer.import(database)?; + let lock = self.producer_lock.clone(); + let handle = std::thread::spawn(move || { + 'outer: loop { + let mut write_guard = match lock.write() { + Ok(n) => n, + Err(e) => bail!("RwLock Error: {:?}", e), + }; + for entry in receiver.try_iter() { + match entry { + Message::ReadTotal(n) => write_guard.total_read = n, + Message::ReadOne => { + write_guard.read += 1; + // Depending on the implementation, we may receive read calls before + // the total size is known. We prevent division by zero by + // always setting the total to read + 1 in these cases + if write_guard.total_read <= write_guard.read { + write_guard.total_read = write_guard.read + 1; + } + } + Message::WriteTotal(n) => write_guard.total_write = n, + Message::WriteOne => write_guard.write += 1, + Message::FinishingUp => write_guard.finishing = true, + Message::Done => { + write_guard.done = true; + break 'outer; + } + Message::Error(e) => { + write_guard.error = Some(e); + } + #[cfg(target_os = "macos")] + Message::MissingPermissions => { + write_guard.missing_permissions = true; + } + }; + } + } + + let _ = handle.join().map_err(|op| eyre::eyre!("{:?}", &op))??; + + Ok(()) + }); + Ok(handle) + } + + pub fn read_count(&self) -> Result { + let item = self.consumer_lock.read().map_err(|e| eyre!("{:?}", &e))?; + Ok(Progress { + total: item.total_read, + count: item.read, + }) + } + + pub fn write_count(&self) -> Result { + let item = self.consumer_lock.read().map_err(|e| eyre!("{:?}", &e))?; + Ok(Progress { + total: item.total_write, + count: item.write, + }) + } + + pub fn finished(&self) -> Result { + let item = self.consumer_lock.read().map_err(|e| eyre!("{:?}", &e))?; + Ok(State { + finishing: item.finishing, + done: item.done, + written: item.write, + #[cfg(target_os = "macos")] + missing_permissions: item.missing_permissions, + }) + } + + pub fn error(&self) -> Result> { + // We take the error of out of the write lock only if there is an error. + let item = self.consumer_lock.read().map_err(|e| eyre!("{:?}", &e))?; + let is_error = item.error.is_some(); + drop(item); + if is_error { + let mut item = self.producer_lock.write().map_err(|e| eyre!("{:?}", &e))?; + Ok(item.error.take()) + } else { + Ok(None) + } + } +}