2
0
mirror of https://github.com/carlostrub/sisyphus synced 2024-10-31 09:20:15 +00:00

start loading mails and classifying them

This commit is contained in:
Carlo Strub 2017-02-22 23:01:16 +00:00
parent 1694b0b35d
commit da427ae069
3 changed files with 82 additions and 26 deletions

6
glide.lock generated
View File

@ -1,6 +1,8 @@
hash: 0fbdb475136d5d29feb99b15dafd533580a8bbbf4fcfd801ddeeeb24e9a840c0
updated: 2017-02-20T22:35:31.503897743Z
hash: d18ff656d998425dece8d9d7fbdf5e7c0dd9a2a41b0fc791d55b242d1f8915e7
updated: 2017-02-22T20:04:09.761903217Z
imports:
- name: github.com/jbrukh/bayesian
version: bf3f261f9a9c61145c60d47665b0518cc32c774f
- name: github.com/luksen/maildir
version: 5297d9c3091c7d4891c9d4f6fa743d500c038d6f
testImports: []

View File

@ -1,3 +1,4 @@
package: github.com/carlostrub/sisyphus
import:
- package: github.com/luksen/maildir
- package: github.com/jbrukh/bayesian

101
main.go
View File

@ -1,42 +1,95 @@
package main
import (
"fmt"
"bufio"
"log"
"strings"
"github.com/jbrukh/bayesian"
"github.com/luksen/maildir"
)
var (
// Maildirs holds a set of mail directories to handle.
Maildirs []string
const (
// good is the class of good mails that are not supposed to be Spam
good bayesian.Class = "Good"
// junk is the class of Spam mails
junk bayesian.Class = "Junk"
)
func main() {
Maildirs = []string{"/usr/home/cs/Maildir.TEST"}
var (
// Maildir holds a set of mail directories to handle.
Maildir = "/usr/home/cs/Maildir.TEST"
var err error
var Bad, Good []string
// processed is a map of e-mail IDs and true if processed already.
processed map[string]bool
)
for _, dir := range Maildirs {
var keysBad, keysGood []string
keysBad, err = maildir.Dir(dir + "/.Junk").Keys()
if err != nil {
log.Fatal(err)
}
// Mails contains the keys of all mails in the Junk.cur and cur directories.
type Mails struct {
Junk, Good []string
}
Bad = append(Bad, keysBad...)
// Classifiers contains the classifiers for mail subjects and bodies
type Classifiers struct {
Subject, Body *bayesian.Classifier
}
keysGood, err = maildir.Dir(dir).Keys()
if err != nil {
log.Fatal(err)
}
// LoadMails loads all mail keys from the Maildir directory for processing.
func LoadMails() (m Mails, err error) {
Good = append(Good, keysGood...)
m.Junk, err = maildir.Dir(Maildir + "/.Junk").Keys()
if err != nil {
return m, err
}
fmt.Println("Bad guys:")
fmt.Println(Bad)
fmt.Println("Good guys:")
fmt.Println(Good)
m.Good, err = maildir.Dir(Maildir).Keys()
if err != nil {
return m, err
}
return m, nil
}
// Learn initially classifies all mails and returns the respective classifiers.
func (m Mails) Learn() (c Classifiers, err error) {
return
}
func cleanText(t string) (c string, err error) {
return
}
// getContent reads mails' subjects and bodies and returns the respective
// slices of strings
func getContent(keys []string) (s, b []string, err error) {
for _, k := range keys {
message, err := maildir.Dir(Maildir).Message(k)
if err != nil {
return s, b, err
}
// get Subject
subject := message.Header.Get("Subject")
s = append(s, strings.Split(subject, " ")...)
// get Body
bScanner := bufio.NewScanner(message.Body)
for bScanner.Scan() {
b = append(b, strings.Split(bScanner.Text(), " ")...)
}
}
return s, b, nil
}
func main() {
_, err := LoadMails()
if err != nil {
log.Fatal(err)
}
// Create a classifier
//classifier := bayesian.NewClassifier(Good, Junk)
}