commit e97ec16a041ff17be6a7693e5fa15abdc558f580 Author: Benedikt Terhechte Date: Wed Sep 29 15:29:21 2021 +0200 Initial commit before changes diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..dea82e0 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,1116 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "adler" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" + +[[package]] +name = "ahash" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43bb833f0bf979d8475d38fbf09ed3b8a55e1885fe93ad3f93239fc6a4f17b98" +dependencies = [ + "getrandom", + "once_cell", + "version_check", +] + +[[package]] +name = "aho-corasick" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f" +dependencies = [ + "memchr", +] + +[[package]] +name = "ansi_term" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" +dependencies = [ + "winapi", +] + +[[package]] +name = "autocfg" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "block-buffer" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" +dependencies = [ + "block-padding", + "byte-tools", + "byteorder", + "generic-array", +] + +[[package]] +name = "block-padding" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5" +dependencies = [ + "byte-tools", +] + +[[package]] +name = "bumpalo" +version = "3.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9df67f7bf9ef8498769f994239c45613ef0c5899415fb58e9add412d2c1a538" + +[[package]] +name = "byte-tools" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "cfg-if" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "chrono" +version = "0.4.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73" +dependencies = [ + "libc", + "num-integer", + "num-traits", + "time", + "winapi", +] + +[[package]] +name = "console_error_panic_hook" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8d976903543e0c48546a91908f21588a680a8c8f984df9a5d69feccb2b2a211" +dependencies = [ + "cfg-if 0.1.10", + "wasm-bindgen", +] + +[[package]] +name = "crc32fast" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81156fece84ab6a9f2afdb109ce3ae577e42b1228441eded99bd77f627953b1a" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd" +dependencies = [ + "cfg-if 1.0.0", + "crossbeam-utils", + "lazy_static", + "memoffset", + "scopeguard", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db" +dependencies = [ + "cfg-if 1.0.0", + "lazy_static", +] + +[[package]] +name = "digest" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" +dependencies = [ + "generic-array", +] + +[[package]] +name = "either" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457" + +[[package]] +name = "email-address-parser" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1840503695adadbe314fe6cedd297fbc406d13f0fe06fd28d02e499a17c2a599" +dependencies = [ + "console_error_panic_hook", + "pest", + "pest_derive", + "quick-xml", + "wasm-bindgen", +] + +[[package]] +name = "email-parser" +version = "0.5.0" +dependencies = [ + "textcode", + "timezone-abbreviations", +] + +[[package]] +name = "eml-parser" +version = "0.1.2" +dependencies = [ + "regex", +] + +[[package]] +name = "eyre" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "221239d1d5ea86bf5d6f91c9d6bc3646ffe471b08ff9b0f91c44f115ac969d2b" +dependencies = [ + "indenter", + "once_cell", +] + +[[package]] +name = "fake-simd" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" + +[[package]] +name = "fallible-iterator" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4443176a9f2c162692bd3d352d745ef9413eec5782a80d8fd6f8a1ac692a07f7" + +[[package]] +name = "fallible-streaming-iterator" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" + +[[package]] +name = "flate2" +version = "1.0.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f" +dependencies = [ + "cfg-if 1.0.0", + "crc32fast", + "libc", + "miniz_oxide", +] + +[[package]] +name = "generic-array" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd" +dependencies = [ + "typenum", +] + +[[package]] +name = "getrandom" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753" +dependencies = [ + "cfg-if 1.0.0", + "libc", + "wasi", +] + +[[package]] +name = "gmaildb" +version = "0.1.0" +dependencies = [ + "chrono", + "crossbeam-channel", + "email-address-parser", + "email-parser", + "eml-parser", + "eyre", + "flate2", + "lazy_static", + "rayon", + "regex", + "rhymessage", + "rusqlite", + "serde", + "serde_json", + "strum", + "strum_macros", + "thiserror", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "hashbrown" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e" +dependencies = [ + "ahash", +] + +[[package]] +name = "hashlink" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7249a3129cbc1ffccd74857f81464a323a152173cdb134e0fd81bc803b29facf" +dependencies = [ + "hashbrown", +] + +[[package]] +name = "heck" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c" +dependencies = [ + "unicode-segmentation", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "indenter" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683" + +[[package]] +name = "itoa" +version = "0.4.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4" + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "libc" +version = "0.2.103" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8f7255a17a627354f321ef0055d63b898c6fb27eff628af4d1b66b7331edf6" + +[[package]] +name = "libsqlite3-sys" +version = "0.22.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "290b64917f8b0cb885d9de0f9959fe1f775d7fa12f1da2db9001c1c8ab60f89d" +dependencies = [ + "pkg-config", + "vcpkg", +] + +[[package]] +name = "log" +version = "0.4.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710" +dependencies = [ + "cfg-if 1.0.0", +] + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "matchers" +version = "0.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f099785f7595cc4b4553a174ce30dd7589ef93391ff414dbb67f62392b9e0ce1" +dependencies = [ + "regex-automata", +] + +[[package]] +name = "memchr" +version = "2.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a" + +[[package]] +name = "memoffset" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "59accc507f1338036a0477ef61afdae33cde60840f4dfe481319ce3ad116ddf9" +dependencies = [ + "autocfg", +] + +[[package]] +name = "miniz_oxide" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b" +dependencies = [ + "adler", + "autocfg", +] + +[[package]] +name = "named_tuple" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "040c7794ae549f63b67c97aa325278ca37c8260226147a1be50d9af96f292430" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "syn 0.15.44", +] + +[[package]] +name = "num-integer" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db" +dependencies = [ + "autocfg", + "num-traits", +] + +[[package]] +name = "num-traits" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290" +dependencies = [ + "autocfg", +] + +[[package]] +name = "num_cpus" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3" +dependencies = [ + "hermit-abi", + "libc", +] + +[[package]] +name = "once_cell" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "692fcb63b64b1758029e0a96ee63e049ce8c5948587f2f7208df04625e5f6b56" + +[[package]] +name = "opaque-debug" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" + +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99b8db626e31e5b81787b9783425769681b347011cc59471e33ea46d2ea0cf55" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.77", +] + +[[package]] +name = "pest_meta" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54be6e404f5317079812fc8f9f5279de376d8856929e21c184ecf6bbd692a11d" +dependencies = [ + "maplit", + "pest", + "sha-1", +] + +[[package]] +name = "phf" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9fc3db1018c4b59d7d582a739436478b6035138b6aecbce989fc91c3e98409f" +dependencies = [ + "phf_macros", + "phf_shared", + "proc-macro-hack", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "58fdf3184dd560f160dd73922bea2d5cd6e8f064bf4b13110abd81b03697b4e0" +dependencies = [ + "phf_generator", + "phf_shared", + "proc-macro-hack", + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.77", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher", +] + +[[package]] +name = "pin-project-lite" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d31d11c69a6b52a174b42bdc0c30e5e11670f90788b2c471c31c1d17d449443" + +[[package]] +name = "pkg-config" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c9b1041b4387893b91ee6746cddfc28516aff326a3519fb2adf820932c5e6cb" + +[[package]] +name = "ppv-lite86" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac74c624d6b2d21f425f752262f42188365d7b8ff1aff74c82e45136510a4857" + +[[package]] +name = "proc-macro-hack" +version = "0.5.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5" + +[[package]] +name = "proc-macro2" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf3d2011ab5c909338f7887f4fc896d35932e29146c12c8d01da6b22a80ba759" +dependencies = [ + "unicode-xid 0.1.0", +] + +[[package]] +name = "proc-macro2" +version = "1.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9f5105d4fdaab20335ca9565e106a5d9b82b6219b5ba735731124ac6711d23d" +dependencies = [ + "unicode-xid 0.2.2", +] + +[[package]] +name = "quick-xml" +version = "0.18.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3cc440ee4802a86e357165021e3e255a9143724da31db1e2ea540214c96a0f82" +dependencies = [ + "memchr", +] + +[[package]] +name = "quote" +version = "0.6.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce23b6b870e8f94f81fb0a363d65d86675884b34a09043c81e5562f11c1f8e1" +dependencies = [ + "proc-macro2 0.4.30", +] + +[[package]] +name = "quote" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7" +dependencies = [ + "proc-macro2 1.0.29", +] + +[[package]] +name = "rand" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", + "rand_hc", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7" +dependencies = [ + "getrandom", +] + +[[package]] +name = "rand_hc" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7" +dependencies = [ + "rand_core", +] + +[[package]] +name = "rayon" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90" +dependencies = [ + "autocfg", + "crossbeam-deque", + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.9.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-utils", + "lazy_static", + "num_cpus", +] + +[[package]] +name = "regex" +version = "1.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.6.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b" + +[[package]] +name = "rhymessage" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e9d29b9f4112a4525d66aa9d6ec6c62dd48d7acdfd5ab414836727653a72d79" +dependencies = [ + "named_tuple", + "thiserror", +] + +[[package]] +name = "rusqlite" +version = "0.25.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57adcf67c8faaf96f3248c2a7b419a0dbc52ebe36ba83dd57fe83827c1ea4eb3" +dependencies = [ + "bitflags", + "chrono", + "fallible-iterator", + "fallible-streaming-iterator", + "hashlink", + "libsqlite3-sys", + "memchr", + "smallvec", +] + +[[package]] +name = "ryu" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" + +[[package]] +name = "scopeguard" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" + +[[package]] +name = "serde" +version = "1.0.130" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f12d06de37cf59146fbdecab66aa99f9fe4f78722e3607577a5375d66bd0c913" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.130" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7bc1a1ab1961464eae040d96713baa5a724a8152c1222492465b54322ec508b" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.77", +] + +[[package]] +name = "serde_json" +version = "1.0.68" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f690853975602e1bfe1ccbf50504d67174e3bcf340f23b5ea9992e0587a52d8" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "sha-1" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df" +dependencies = [ + "block-buffer", + "digest", + "fake-simd", + "opaque-debug", +] + +[[package]] +name = "sharded-slab" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "740223c51853f3145fe7c90360d2d4232f2b62e3449489c207eccde818979982" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "siphasher" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b" + +[[package]] +name = "smallvec" +version = "1.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe0f37c9e8f3c5a4a66ad655a93c74daac4ad00c441533bf5c6e7990bb42604e" + +[[package]] +name = "strum" +version = "0.21.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aaf86bbcfd1fa9670b7a129f64fc0c9fcbbfe4f1bc4210e9e98fe71ffc12cde2" + +[[package]] +name = "strum_macros" +version = "0.21.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d06aaeeee809dbc59eb4556183dd927df67db1540de5be8d3ec0b6636358a5ec" +dependencies = [ + "heck", + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.77", +] + +[[package]] +name = "syn" +version = "0.15.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" +dependencies = [ + "proc-macro2 0.4.30", + "quote 0.6.13", + "unicode-xid 0.1.0", +] + +[[package]] +name = "syn" +version = "1.0.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5239bc68e0fef57495900cfea4e8dc75596d9a319d7e16b1e0a440d24e6fe0a0" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "unicode-xid 0.2.2", +] + +[[package]] +name = "textcode" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13de2d432b3eea016f6a010139c8b5a5bf050b5a05b8993d04033ca5232e44a9" + +[[package]] +name = "thiserror" +version = "1.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "602eca064b2d83369e2b2f34b09c70b605402801927c65c11071ac911d299b88" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "1.0.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bad553cc2c78e8de258400763a647e80e6d1b31ee237275d756f6836d204494c" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.77", +] + +[[package]] +name = "thread_local" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd" +dependencies = [ + "once_cell", +] + +[[package]] +name = "time" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca8a50ef2360fbd1eeb0ecd46795a87a19024eb4b53c5dc916ca1fd95fe62438" +dependencies = [ + "libc", + "winapi", +] + +[[package]] +name = "timezone-abbreviations" +version = "0.1.0" +dependencies = [ + "phf", +] + +[[package]] +name = "tracing" +version = "0.1.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "84f96e095c0c82419687c20ddf5cb3eadb61f4e1405923c9dc8e53a1adacbda8" +dependencies = [ + "cfg-if 1.0.0", + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "98863d0dd09fa59a1b79c6750ad80dbda6b75f4e71c437a6a1a8cb91a8bcbd77" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.77", +] + +[[package]] +name = "tracing-core" +version = "0.1.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46125608c26121c81b0c6d693eab5a420e416da7e43c426d2e8f7df8da8a3acf" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "tracing-log" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3" +dependencies = [ + "lazy_static", + "log", + "tracing-core", +] + +[[package]] +name = "tracing-serde" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb65ea441fbb84f9f6748fd496cf7f63ec9af5bca94dd86456978d055e8eb28b" +dependencies = [ + "serde", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdd0568dbfe3baf7048b7908d2b32bca0d81cd56bec6d2a8f894b01d74f86be3" +dependencies = [ + "ansi_term", + "chrono", + "lazy_static", + "matchers", + "regex", + "serde", + "serde_json", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", + "tracing-serde", +] + +[[package]] +name = "typenum" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" + +[[package]] +name = "ucd-trie" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" + +[[package]] +name = "unicode-segmentation" +version = "1.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b" + +[[package]] +name = "unicode-xid" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" + +[[package]] +name = "vcpkg" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" + +[[package]] +name = "version_check" +version = "0.9.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe" + +[[package]] +name = "wasi" +version = "0.10.2+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6" + +[[package]] +name = "wasm-bindgen" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "632f73e236b219150ea279196e54e610f5dbafa5d61786303d4da54f84e47fce" +dependencies = [ + "cfg-if 1.0.0", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a317bf8f9fba2476b4b2c85ef4c4af8ff39c3c7f0cdfeed4f82c34a880aa837b" +dependencies = [ + "bumpalo", + "lazy_static", + "log", + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.77", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d56146e7c495528bf6587663bea13a8eb588d39b36b679d83972e1a2dbbdacf9" +dependencies = [ + "quote 1.0.9", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7803e0eea25835f8abdc585cd3021b3deb11543c6fe226dcd30b228857c5c5ab" +dependencies = [ + "proc-macro2 1.0.29", + "quote 1.0.9", + "syn 1.0.77", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.78" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0237232789cf037d5480773fe568aac745bfe2afbc11a863e97901780a6b47cc" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..1473a89 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "gmaildb" +version = "0.1.0" +edition = "2018" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +eyre = "0.6.5" +thiserror = "1.0.29" +tracing = "0.1.28" +tracing-subscriber = "0.2.24" +rusqlite = {version = "0.25.3", features = ["chrono", "trace"]} +regex = "1.5.3" +flate2 = "1.0.22" +email-parser = { path = "../email-parser/email-parser", features = ["sender", "from", "date", "subject", "mime", "allow-duplicate-headers"]} +rayon = "1.5.1" +chrono = "0.4.19" +eml-parser = { path = "../EmlParser" } +email-address-parser = "1.0.3" +lazy_static = "*" +rhymessage = "1.3.1" +serde_json = "*" +serde = { version = "*", features = ["derive"]} +strum = "0.21" +strum_macros = "0.21" +crossbeam-channel = "0.5.1" \ No newline at end of file diff --git a/src/database.rs b/src/database.rs new file mode 100644 index 0000000..71cce7f --- /dev/null +++ b/src/database.rs @@ -0,0 +1,119 @@ +use std::path::PathBuf; + +use crate::emails::EmailEntry; +use chrono::Datelike; +use crossbeam_channel::{unbounded, Receiver, Sender}; +use eyre::{Report, Result}; +use rusqlite::{self, params, Connection, Error, Row}; + +#[derive(Debug)] +pub struct Database { + connection: Option, +} + +pub enum DBMessage { + Mail(EmailEntry), + Error(Report, PathBuf), + Done, +} + +impl Database { + /// Create a in-memory db. + pub fn new() -> Result { + //let mut connection = Connection::open_in_memory()?; + let mut connection = Connection::open("/tmp/db.sql")?; + Self::create_tables(&connection)?; + //connection.trace(Some(|n| { + // println!("SQL: {}", &n); + //})); + Ok(Database { + connection: Some(connection), + }) + } + + pub fn process(&mut self) -> Sender { + let (sender, receiver) = unbounded(); + let connection = self.connection.take().unwrap(); + std::thread::spawn(move || loop { + let next = match receiver.recv() { + Ok(n) => n, + Err(e) => { + println!("Receiver error: {:?}", &e); + std::process::exit(0); + } + }; + let result = match next { + DBMessage::Mail(mail) => insert_mail(&connection, &mail), + DBMessage::Error(report, path) => insert_error(&connection, &report, &path), + DBMessage::Done => break, + }; + result.unwrap(); + //if let Err(e) = result { + // tracing::error!("SQL Error: {:?}", &e); + //} + }); + sender + } + + fn create_tables(connection: &Connection) -> Result<()> { + let emails_table = r#" +CREATE TABLE IF NOT EXISTS emails ( + path TEXT NOT NULL, + domain TEXT NOT NULL, + local_part TEXT NOT NULL, + year INTEGER NOT NULL, + month INTEGER NOT NULL, + day INTEGER NOT NULL, + kind TEXT NOT NULL +);"#; + connection.execute(&emails_table, params![])?; + let errors_table = r#" +CREATE TABLE IF NOT EXISTS errors ( + message TEXT NOT NULL, + path TEXT NOT NULL, +);"#; + connection.execute(&errors_table, params![])?; + Ok(()) + } +} + +fn insert_mail(connection: &Connection, entry: &EmailEntry) -> Result<()> { + let path = entry.path.display().to_string(); + let domain = &entry.domain; + let local_part = &entry.local_part; + let year = entry.datetime.date().year(); + let month = entry.datetime.date().month(); + let day = entry.datetime.date().day(); + let kind = entry.parser.to_string(); + let sql = "INSERT INTO emails (path, domain, local_part, year, month, day, kind) VALUES (?, ?, ?, ?, ?, ?, ?)"; + let mut prepared = connection.prepare(sql)?; + prepared.execute(params![path, domain, local_part, year, month, day, kind])?; + Ok(()) +} + +fn insert_error(connection: &Connection, message: &Report, path: &PathBuf) -> Result<()> { + let sql = "INSERT INTO errors (message, path) VALUES (?, ?)"; + let mut prepared = connection.prepare(sql)?; + prepared.execute(params![message.to_string(), path.display().to_string()])?; + Ok(()) +} + +pub trait RowConversion: Sized { + fn from_row<'stmt>(row: &Row<'stmt>) -> Result; + fn to_row(&self) -> Result; +} + +/*impl RowConversion for EmailEntry { +fn from_row<'stmt>(row: &Row<'stmt>) -> Result { + let path: String = row.get("path")?; + let domain: String = row.get("domain")?; + let local_part: String = row.get("local_part")?; + let year: usize = row.get("year")?; + let month: usize = row.get("month")?; + let day: usize = row.get("day")?; + let created = email_parser::time::DateTime:: + Ok(EmailEntry { + path, domain, local_part, year, month, day + }) +} +*/ diff --git a/src/emails.rs b/src/emails.rs new file mode 100644 index 0000000..6f66484 --- /dev/null +++ b/src/emails.rs @@ -0,0 +1,530 @@ +use chrono::prelude::*; +use email_address_parser; +use eml_parser::eml::HeaderFieldValue; +use eyre::{bail, eyre, Result, WrapErr}; +use flate2; +use flate2::read::GzDecoder; +use rayon::prelude::*; +use rhymessage; +use serde::Deserialize; +use serde_json; +use strum_macros; + +const SENDER_HEADER_NAMES: &[&str] = &["Sender", "Reply-to", "From"]; +const DATE_HEADER_NAMES: &[&str] = &["Received", "Date"]; + +use std::{ + convert::{TryFrom, TryInto}, + io::Read, + path::{Path, PathBuf}, +}; + +/// We want to know which library was used to parse this email +#[derive(Debug, strum_macros::EnumString, strum_macros::ToString)] +pub enum ParserKind { + EmailParser, + Eml, + Rhymessage, + Meta, +} + +/// Representation of an email +#[derive(Debug)] +pub struct EmailEntry { + pub path: PathBuf, + pub domain: String, + pub local_part: String, + pub datetime: chrono::DateTime, + pub parser: ParserKind, +} + +/// Raw representation of an email. +/// Contains the paths to the relevant files as well +/// as the name of the folder the email was in. +#[derive(Debug)] +pub struct RawEmailEntry { + folder_name: String, + eml_path: PathBuf, + meta_path: PathBuf, +} + +impl RawEmailEntry { + pub fn new>(path: P) -> RawEmailEntry { + let path = path.as_ref(); + let folder_name = path + .parent() + .unwrap() + .file_name() + .unwrap() + .to_str() + .unwrap() + .to_owned(); + let eml_path = path.to_path_buf(); + let meta_path = path + .parent() + .unwrap() + .join(format!( + "{}.meta", + &path + .file_stem() + .unwrap() + .to_str() + .unwrap() + .replace(".eml", "") + )) + .to_path_buf(); + RawEmailEntry { + folder_name, + eml_path, + meta_path, + } + } + + pub fn path(&self) -> PathBuf { + self.eml_path.clone() + } +} + +pub struct Emails { + /// The current index in the Vec of emails + curr: usize, + /// The `Vec` with the `EmailEntry` entries + pub emails: Vec, +} + +impl Emails { + pub fn new>(folder: A) -> Result { + let folder = folder.as_ref(); + if !folder.exists() { + bail!("Folder {} does not exist", &folder.display()); + } + let emails = read_folders(&folder)?; + Ok(Emails { curr: 0, emails }) + } + + pub fn len(&self) -> usize { + self.emails.len() + } +} + +//impl Iterator for Emails { +// // We can refer to this type using Self::Item +// type Item = Result; +// +// fn next(&mut self) -> Option { +// let new_next = self.curr + 1; +// let entry = self.emails.get(self.curr)?; +// self.curr = new_next; +// let email = read_email(&entry); +// Some(email) +// } +//} + +//impl ParallelIterator for Emails { +// type Item = Result; +// +// fn drive_unindexed(self, consumer: C) -> C::Result +// where +// C: rayon::iter::plumbing::UnindexedConsumer, +// { +// self.emails +// .into_par_iter() +// .map(|e| read_email(&e)) +// .drive_unindexed(consumer) +// } +//} + +fn read_folders(folder: &Path) -> Result> { + Ok(std::fs::read_dir(&folder)? + .into_iter() + .par_bridge() + .filter_map(|entry| { + let path = entry + .map_err(|e| tracing::error!("{} {:?}", &folder.display(), &e)) + .ok()? + .path(); + if !path.is_dir() { + return None; + } + read_emails(&path) + .map_err(|e| tracing::error!("{} {:?}", &path.display(), &e)) + .ok() + }) + .flatten() + .collect()) +} + +fn read_emails(folder_path: &Path) -> Result> { + Ok(std::fs::read_dir(folder_path)? + .into_iter() + .par_bridge() + .filter_map(|entry| { + let path = entry + .map_err(|e| tracing::error!("{} {:?}", &folder_path.display(), &e)) + .ok()? + .path(); + if path.is_dir() { + return None; + } + if !path.extension()?.eq("gz") { + return None; + } + Some(RawEmailEntry { + folder_name: folder_path.file_name()?.to_str()?.to_string(), + eml_path: path.clone(), + meta_path: path + .parent()? + .join(format!( + "{}.meta", + &path.file_stem()?.to_str()?.replace(".eml", "") + )) + .to_path_buf(), + }) + }) + .collect()) +} + +pub fn read_email(raw_entry: &RawEmailEntry) -> Result { + let content = unziped_content(&raw_entry.eml_path)?; + // We have to try multiple different email readers as each of them seems to fail in a different way + let email = parse_email_parser(&raw_entry, &content) + .or_else(|e| { + tracing::trace!("Parser Error: {:?}", &e); + parse_rhymessage(&raw_entry, &content) + }) + .or_else(|e| { + tracing::trace!("Parser Error: {:?}", &e); + parse_eml(&raw_entry, &content) + }) + .or_else(|e| { + tracing::trace!("Parser Error: {:?}", &e); + parse_meta(&raw_entry, &content) + }); + + Ok(email.wrap_err_with(|| { + format!( + "{}\n{:?}", + String::from_utf8(content.clone()).unwrap(), + &raw_entry + ) + })?) +} + +fn parse_email_parser(raw_entry: &RawEmailEntry, content: &Vec) -> Result { + let x = match email_parser::email::Email::parse(&content) { + Ok(email) => (&raw_entry.eml_path, email).try_into(), + Err(error) => { + let content_string = String::from_utf8(content.clone())?; + //println!( + // "---\n{}\n---\n{:?}\n---\n{}", + // &content_string, + // &error, + // &raw_entry.eml_path.display() + //); + println!("{}|{}", &error, &raw_entry.eml_path.display()); + //Err(eyre!("Could not `email_parser` email:\n{:?}", &error)) + Err(eyre!("Could not `email_parser` email")) + } + }; + x + //.unwrap(); + //Ok(x) +} + +fn parse_eml(raw_entry: &RawEmailEntry, content: &Vec) -> Result { + let content_string = String::from_utf8(content.clone())?; + match eml_parser::EmlParser::from_string(content_string) + .ignore_body() + .parse() + { + Ok(eml) => (&raw_entry.eml_path, eml).try_into(), + Err(error) => bail!("Could not `eml` parse email:\n{:?}", &error), + } +} + +fn parse_meta(raw_entry: &RawEmailEntry, _content: &Vec) -> Result { + use chrono::prelude::*; + #[derive(Deserialize)] + struct Meta { + msg_id: String, + internal_date: i64, + } + let content = std::fs::read_to_string(&raw_entry.meta_path)?; + let meta: Meta = serde_json::from_str(&content)?; + let parsed = email_address_parser::EmailAddress::parse(&meta.msg_id, None) + .ok_or(eyre!("Cannot Parse Address: {}", &meta.msg_id))?; + let datetime = Utc.timestamp(meta.internal_date, 0); + Ok(EmailEntry { + path: raw_entry.eml_path.to_path_buf(), + domain: parsed.get_domain().to_owned(), + local_part: parsed.get_local_part().to_owned(), + datetime, + parser: ParserKind::Meta, + }) +} + +fn parse_rhymessage(raw_entry: &RawEmailEntry, content: &Vec) -> Result { + use rhymessage::MessageHeaders; + let mut headers = MessageHeaders::new(); + match headers.parse(&content) { + Ok(_) => (), + Err(e) => bail!("Error Parsing Message: {:?}", &e), + } + Ok((&raw_entry.eml_path, headers).try_into()?) +} + +impl<'a> TryFrom<(&PathBuf, email_parser::email::Email<'a>)> for EmailEntry { + type Error = eyre::Report; + fn try_from(content: (&PathBuf, email_parser::email::Email)) -> Result { + let (path, email) = content; + let domain = email.sender.address.domain.to_string(); + let local_part = email.sender.address.local_part.to_string(); + let datetime = emaildatetime_to_chrono(&email.date); + + Ok(EmailEntry { + path: path.to_path_buf(), + domain, + local_part, + datetime, + parser: ParserKind::EmailParser, + }) + } +} + +impl TryFrom<(&PathBuf, rhymessage::MessageHeaders)> for EmailEntry { + type Error = eyre::Report; + fn try_from(content: (&PathBuf, rhymessage::MessageHeaders)) -> Result { + let (path, headers) = content; + + let mut address: Option = None; + let mut date: Option = None; + for entry in headers.headers() { + if address == None && SENDER_HEADER_NAMES.contains(&entry.name.as_ref()) { + address = Some(entry.value.to_string()); + } + if date == None && DATE_HEADER_NAMES.contains(&entry.name.as_ref()) { + date = Some(entry.value.to_string()); + } + if address.is_some() && date.is_some() { + break; + } + } + + let address = address.ok_or(eyre!("Cannot find sender header"))?; + let date = date.ok_or(eyre!("Cannot find date header"))?; + + let parsed_address = email_address_parser::EmailAddress::parse(&address, None) + .ok_or(eyre!("Cannot Parse Address: {}", &address))?; + + let parsed_date = date + .parse::>() + .map_err(|e| eyre!("Cannot Parse Date {}: {:?}", &date, &e))?; + + Ok(EmailEntry { + path: path.to_path_buf(), + domain: parsed_address.get_domain().to_string(), + local_part: parsed_address.get_local_part().to_string(), + datetime: parsed_date, + parser: ParserKind::Rhymessage, + }) + } +} + +impl TryFrom<(&PathBuf, eml_parser::eml::Eml)> for EmailEntry { + type Error = eyre::Report; + fn try_from(content: (&PathBuf, eml_parser::eml::Eml)) -> Result { + use eml_parser::eml::EmailAddress; + let (path, email) = content; + let headers = email.headers; + let sender = email + .from + .as_ref() + .or_else(|| { + // Try to find the address from some other field + headers + .iter() + .find(|f| SENDER_HEADER_NAMES.contains(&f.name.as_str())) + .map(|f| &f.value) + }) + .ok_or(eyre!("Missing From Field"))?; + + let datetime = headers + .iter() + .find(|f| DATE_HEADER_NAMES.contains(&f.name.as_str())) + .map(|f| match &f.value { + HeaderFieldValue::Unstructured(s) => Some(s.clone()), + _ => None, + }) + .flatten() + .ok_or(eyre!("Missing Date Field"))?; + + let parsed_date = datetime + .parse::>() + .map_err(|e| eyre!("Cannot Parse Date {}: {:?}", &datetime, &e))?; + + use eml_parser::eml::HeaderFieldValue::*; + let address = match &sender { + SingleEmailAddress(e) => EmailAddress::AddressOnly { + address: extract_address(e), + }, + MultipleEmailAddresses(e) if !e.is_empty() => EmailAddress::AddressOnly { + address: extract_address(e.get(0).unwrap()), + }, + Unstructured(data) => { + parse_unstructured(&data).ok_or(eyre!("Invalid Unstructered Email: {}", &data))? + } + MultipleEmailAddresses(e) => { + bail!("Email has invalid amount of senders: {:?}", &e) + } + _ => bail!("Email has invalid amount of senders: {:?}", &sender), + }; + + let address = extract_address(&address); + + let parsed = email_address_parser::EmailAddress::parse(&address, None) + .ok_or(eyre!("Cannot Parse Address: {}", &address))?; + + Ok(EmailEntry { + path: path.to_path_buf(), + domain: parsed.get_domain().to_string(), + local_part: parsed.get_local_part().to_string(), + datetime: parsed_date, + parser: ParserKind::Eml, + }) + } +} + +fn emaildatetime_to_chrono(dt: &email_parser::time::DateTime) -> chrono::DateTime { + use email_parser::time::Month::*; + let m = match dt.date.month { + January => 1, + February => 2, + March => 3, + April => 4, + May => 5, + June => 6, + July => 7, + August => 8, + September => 9, + October => 10, + November => 11, + December => 12, + }; + Utc.ymd(dt.date.year as i32, m, dt.date.day as u32).and_hms( + dt.time.time.hour as u32, + dt.time.time.minute as u32, + dt.time.time.second as u32, + ) +} + +fn unziped_content(path: &Path) -> Result> { + let reader = std::fs::File::open(path)?; + let mut decoder = GzDecoder::new(reader); + let mut buffer = Vec::new(); + decoder.read_to_end(&mut buffer)?; + Ok(buffer) +} + +/// Try to parse unstructed data into some sort of +/// email address +fn parse_unstructured(data: &str) -> Option { + use lazy_static::lazy_static; + use regex::Regex; + lazy_static! { + static ref EMAIL_RE: Regex = Regex::new(r#"(?:[a-z0-9!#$%&'*+/=?^_`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_`{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9]))\.){3}(?:(2(5[0-5]|[0-4][0-9])|1[0-9][0-9]|[1-9]?[0-9])|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])"#).unwrap(); + } + lazy_static! { + static ref RE: Regex = Regex::new("<(.*?)>").unwrap(); + } + if let Some(capture) = RE.captures(&data).and_then(|f| f.get(1)) { + Some(eml_parser::eml::EmailAddress::AddressOnly { + address: capture.as_str().to_string(), + }) + } else { + let capture = EMAIL_RE.captures(&data).and_then(|f| f.get(0))?; + Some(eml_parser::eml::EmailAddress::AddressOnly { + address: capture.as_str().to_string(), + }) + } +} + +fn extract_address(from: &eml_parser::eml::EmailAddress) -> String { + use eml_parser::eml::EmailAddress::*; + match from { + AddressOnly { address } => address.clone(), + NameAndEmailAddress { name: _, address } => address.clone(), + } +} + +#[cfg(test)] +mod tests { + use std::{path::PathBuf, str::FromStr}; + + use super::RawEmailEntry; + + #[test] + fn test_weird_email1() { + let data = "No Reply , terhechte.5cffa@m.evernote.com"; + let address = super::parse_unstructured(&data).unwrap(); + assert_eq!( + address, + eml_parser::eml::EmailAddress::AddressOnly { + address: "no-reply@evernote.com".to_owned() + } + ); + } + + #[test] + fn test_weird_email2() { + let data = r#"info@sport-news.denReply-To:info"@sport-news.denX-Mailer:Sport-News.de"#; + let address = super::parse_unstructured(&data).unwrap(); + assert_eq!( + address, + eml_parser::eml::EmailAddress::AddressOnly { + address: "info@sport-news.den".to_owned() + } + ); + } + + #[test] + fn test_weird_email3() { + crate::setup(); + let eml_path = PathBuf::from_str( + "/Users/terhechte/Documents/gmail_backup/db/2014-09/1479692635489080640.eml.gz", + ) + .unwrap(); + let meta_path = PathBuf::from_str( + "/Users/terhechte/Documents/gmail_backup/db/2014-09/1479692635489080640.meta", + ) + .unwrap(); + let r = RawEmailEntry { + folder_name: "2014-09".to_owned(), + eml_path, + meta_path, + }; + //let result = super::read_email(&r).expect(""); + let content = Vec::new(); + let result = super::parse_meta(&r, &content).expect(""); + dbg!(&result); + } + + #[test] + fn test_weird_email4() { + crate::setup(); + let eml_path = PathBuf::from_str( + "/Users/terhechte/Documents/gmail_backup/db/2014-08/1475705321427236077.eml.gz", + ) + .unwrap(); + let meta_path = PathBuf::from_str( + "/Users/terhechte/Documents/gmail_backup/db/2014-08/1475705321427236077.meta", + ) + .unwrap(); + let r = RawEmailEntry { + folder_name: "2014-08".to_owned(), + eml_path, + meta_path, + }; + let result = super::read_email(&r).expect(""); + dbg!(&result); + } +} diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..2aefdd6 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,74 @@ +use eyre::{bail, Result}; +use rayon::prelude::*; +use std::io::prelude::*; +use std::{io, path::PathBuf}; +use thiserror; +use tracing_subscriber::EnvFilter; + +use std::path::Path; +use std::sync::{Arc, Mutex}; + +use crate::database::Database; + +mod database; +mod emails; + +#[derive(Debug, thiserror::Error)] +enum GmailDBError { + #[error("Missing folder argument")] + MissingFolder, +} + +fn main() -> Result<()> { + setup(); + let arguments: Vec = std::env::args().collect(); + let folder = arguments.get(1).ok_or(GmailDBError::MissingFolder)?; + process_folder(&folder)?; + //process_email(&folder)?; + Ok(()) +} + +fn process_email(path: &str) -> Result<()> { + let entry = emails::RawEmailEntry::new(&path); + let mail = emails::read_email(&entry).unwrap(); + Ok(()) +} + +fn process_folder(folder: &str) -> Result<()> { + let emails = emails::Emails::new(&folder)?; + let total = emails.len(); + + println!("Done Loading {} emails", &total); + + let mut database = Database::new().expect("Expect a valid database"); + + let sender = database.process(); + + use database::DBMessage; + emails + .emails + //.par_iter() + .iter() + .map(|raw_mail| (&raw_mail, emails::read_email(&raw_mail))) + .for_each(|(raw_mail, entry)| { + if let Err(e) = match entry { + Ok(mail) => sender.send(DBMessage::Mail(mail)), + Err(e) => sender.send(DBMessage::Error(e, raw_mail.path())), + } { + tracing::info!("Error Inserting into Database: {:?}", &e); + } + }); + + sender.send(database::DBMessage::Done).unwrap(); + while !sender.is_empty() {} + Ok(()) +} + +fn setup() { + if std::env::var("RUST_LOG").is_err() { + std::env::set_var("RUST_LOG", "error") + } + tracing_subscriber::fmt::fmt() + .with_env_filter(EnvFilter::from_default_env()) + .init(); +}