Allow already-parsed emails as importer input

pull/35/head
Benedikt Terhechte 2 years ago
parent 3431e6f79b
commit 3b4e220ae7

@ -6,7 +6,7 @@ use std::path::{Path, PathBuf};
use ps_core::eyre::Result;
use ps_core::EmailMeta;
use super::super::shared::parse::ParseableEmail;
use super::super::shared::parse::{MessageKind, ParseableEmail};
pub struct Mail {
path: PathBuf,
@ -50,8 +50,8 @@ impl ParseableEmail for Mail {
self.data = parsed.message.to_vec();
Ok(())
}
fn message(&self) -> Result<Cow<'_, [u8]>> {
Ok(Cow::Borrowed(self.data.as_slice()))
fn kind(&self) -> MessageKind<'_> {
MessageKind::Data(Cow::Borrowed(self.data.as_slice()))
}
fn path(&self) -> &Path {
self.path.as_path()

@ -6,7 +6,7 @@ use std::borrow::Cow;
use std::io::Read;
use std::path::{Path, PathBuf};
use super::super::shared::parse::ParseableEmail;
use super::super::shared::parse::{MessageKind, ParseableEmail};
use ps_core::EmailMeta;
/// Raw representation of an email.
@ -101,8 +101,11 @@ impl ParseableEmail for RawEmailEntry {
fn prepare(&mut self) -> Result<()> {
Ok(())
}
fn message(&self) -> Result<Cow<'_, [u8]>> {
Ok(Cow::Owned(self.read()?))
fn kind(&self) -> MessageKind<'_> {
match self.read() {
Ok(n) => MessageKind::Data(Cow::Owned(n)),
Err(e) => return MessageKind::Error(e),
}
}
fn path(&self) -> &Path {

@ -5,7 +5,7 @@ use walkdir::{DirEntry, WalkDir};
use super::{Config, ImporterFormat, Message, MessageSender, Result};
use super::shared::parse::ParseableEmail;
use super::shared::parse::{MessageKind, ParseableEmail};
use maildir;
use ps_core::EmailMeta;
@ -153,8 +153,8 @@ impl ParseableEmail for Mail {
fn prepare(&mut self) -> Result<()> {
Ok(())
}
fn message(&self) -> Result<Cow<'_, [u8]>> {
Ok(Cow::Borrowed(self.data.as_slice()))
fn kind(&self) -> MessageKind<'_> {
MessageKind::Data(Cow::Borrowed(self.data.as_slice()))
}
fn path(&self) -> &Path {
self.path.as_path()

@ -5,10 +5,9 @@ use walkdir::WalkDir;
use super::{Config, ImporterFormat, Message, MessageSender, Result};
use super::shared::parse::ParseableEmail;
use super::shared::parse::{MessageKind, ParseableEmail};
use ps_core::EmailMeta;
use std::borrow::Cow;
use std::path::{Path, PathBuf};
pub struct Mail {
@ -105,8 +104,8 @@ impl ParseableEmail for Mail {
fn prepare(&mut self) -> Result<()> {
Ok(())
}
fn message(&self) -> Result<Cow<'_, [u8]>> {
Ok(self.content.as_slice().into())
fn kind(&self) -> MessageKind<'_> {
MessageKind::Data(self.content.as_slice().into())
}
fn path(&self) -> &Path {
self.path.as_path()

@ -1,4 +1,4 @@
use super::parse::{parse_email, ParseableEmail};
use super::parse::{parse_email, MessageKind, ParseableEmail};
use ps_core::{Config, DBMessage, DatabaseLike, Message, MessageSender};
use ps_core::eyre::{self, bail, Result};
@ -33,7 +33,19 @@ pub fn into_database<Mail: ParseableEmail + 'static, Database: DatabaseLike + 's
// in paralell..
.par_iter_mut()
// parsing them
.map(|raw_mail| parse_email(raw_mail, &config.sender_emails))
.map(|raw_mail| {
raw_mail.prepare()?;
match raw_mail.kind() {
MessageKind::Data(data) => parse_email(
&data,
raw_mail.path(),
raw_mail.meta()?,
&config.sender_emails,
),
MessageKind::Parsed(mail) => Ok(mail),
MessageKind::Error(e) => Err(e),
}
})
// and inserting them into SQLite
.for_each(|entry| {
// Try to write the message into the database

@ -1,7 +1,7 @@
use email_parser::address::{Address, EmailAddress, Mailbox};
use ps_core::chrono;
use ps_core::chrono::prelude::*;
use ps_core::eyre::{eyre, Result};
use ps_core::eyre::{eyre, Report, Result};
use ps_core::tracing;
use std::borrow::Cow;
@ -16,8 +16,13 @@ pub trait ParseableEmail: Send + Sized + Sync {
/// This will be called once before `message`, `path` and `meta`
/// are called. It can be used to perform parsing operations
fn prepare(&mut self) -> Result<()>;
/// The message content as bytes
fn message(&self) -> Result<Cow<'_, [u8]>>;
/// The message, either as raw bytes for already parsed.
/// If the importer supports getting the data this has the benefit
/// of being parsed concurrently already. Some importers types might already
/// return a fully parsed mail in which case it is easier to
/// just use the parsed type instead of parsing it all again
fn kind(&self) -> MessageKind<'_>;
//fn message(&self) -> Result<Cow<'_, [u8]>>;
/// The original path of the email in the filesystem
fn path(&self) -> &Path;
/// Optional meta information if they're available.
@ -25,18 +30,21 @@ pub trait ParseableEmail: Send + Sized + Sync {
fn meta(&self) -> Result<Option<EmailMeta>>;
}
pub fn parse_email<Entry: ParseableEmail>(
entry: &mut Entry,
#[derive(Debug)]
pub enum MessageKind<'a> {
Data(Cow<'a, [u8]>),
Parsed(ps_core::EmailEntry),
Error(Report),
}
pub fn parse_email(
data: &[u8],
path: &Path,
meta: Option<EmailMeta>,
sender_emails: &HashSet<String>,
) -> Result<EmailEntry> {
if let Err(e) = entry.prepare() {
tracing::error!("Prepare Error: {:?}", e);
return Err(e);
}
let content = entry.message()?;
match email_parser::email::Email::parse(&content) {
match email_parser::email::Email::parse(&data) {
Ok(email) => {
let path = entry.path();
tracing::trace!("Parsing {}", path.display());
let (sender_name, _, sender_local_part, sender_domain) =
mailbox_to_string(&email.sender);
@ -57,8 +65,6 @@ pub fn parse_email<Entry: ParseableEmail>(
let is_reply = email.in_reply_to.map(|v| !v.is_empty()).unwrap_or(false);
let meta = entry.meta()?;
// In order to determine the sender, we have to
// build up the address again :-(
let is_send = {
@ -85,10 +91,10 @@ pub fn parse_email<Entry: ParseableEmail>(
let error = eyre!(
"Could not parse email (trace to see contents): {:?} [{}]",
&error,
entry.path().display()
path.display()
);
tracing::error!("{:?}", &error);
if let Ok(content_string) = String::from_utf8(content.into_owned()) {
if let Ok(content_string) = String::from_utf8(data.to_vec()) {
tracing::trace!("Contents:\n{}\n---\n", content_string);
} else {
tracing::trace!("Contents:\nInvalid UTF8\n---\n");

Loading…
Cancel
Save