mirror of
https://github.com/carlostrub/sisyphus
synced 2024-10-31 09:20:15 +00:00
start loading mails and classifying them
This commit is contained in:
parent
1694b0b35d
commit
da427ae069
6
glide.lock
generated
6
glide.lock
generated
@ -1,6 +1,8 @@
|
||||
hash: 0fbdb475136d5d29feb99b15dafd533580a8bbbf4fcfd801ddeeeb24e9a840c0
|
||||
updated: 2017-02-20T22:35:31.503897743Z
|
||||
hash: d18ff656d998425dece8d9d7fbdf5e7c0dd9a2a41b0fc791d55b242d1f8915e7
|
||||
updated: 2017-02-22T20:04:09.761903217Z
|
||||
imports:
|
||||
- name: github.com/jbrukh/bayesian
|
||||
version: bf3f261f9a9c61145c60d47665b0518cc32c774f
|
||||
- name: github.com/luksen/maildir
|
||||
version: 5297d9c3091c7d4891c9d4f6fa743d500c038d6f
|
||||
testImports: []
|
||||
|
@ -1,3 +1,4 @@
|
||||
package: github.com/carlostrub/sisyphus
|
||||
import:
|
||||
- package: github.com/luksen/maildir
|
||||
- package: github.com/jbrukh/bayesian
|
||||
|
101
main.go
101
main.go
@ -1,42 +1,95 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"bufio"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"github.com/jbrukh/bayesian"
|
||||
"github.com/luksen/maildir"
|
||||
)
|
||||
|
||||
var (
|
||||
// Maildirs holds a set of mail directories to handle.
|
||||
Maildirs []string
|
||||
const (
|
||||
// good is the class of good mails that are not supposed to be Spam
|
||||
good bayesian.Class = "Good"
|
||||
// junk is the class of Spam mails
|
||||
junk bayesian.Class = "Junk"
|
||||
)
|
||||
|
||||
func main() {
|
||||
Maildirs = []string{"/usr/home/cs/Maildir.TEST"}
|
||||
var (
|
||||
// Maildir holds a set of mail directories to handle.
|
||||
Maildir = "/usr/home/cs/Maildir.TEST"
|
||||
|
||||
var err error
|
||||
var Bad, Good []string
|
||||
// processed is a map of e-mail IDs and true if processed already.
|
||||
processed map[string]bool
|
||||
)
|
||||
|
||||
for _, dir := range Maildirs {
|
||||
var keysBad, keysGood []string
|
||||
keysBad, err = maildir.Dir(dir + "/.Junk").Keys()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
// Mails contains the keys of all mails in the Junk.cur and cur directories.
|
||||
type Mails struct {
|
||||
Junk, Good []string
|
||||
}
|
||||
|
||||
Bad = append(Bad, keysBad...)
|
||||
// Classifiers contains the classifiers for mail subjects and bodies
|
||||
type Classifiers struct {
|
||||
Subject, Body *bayesian.Classifier
|
||||
}
|
||||
|
||||
keysGood, err = maildir.Dir(dir).Keys()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
// LoadMails loads all mail keys from the Maildir directory for processing.
|
||||
func LoadMails() (m Mails, err error) {
|
||||
|
||||
Good = append(Good, keysGood...)
|
||||
m.Junk, err = maildir.Dir(Maildir + "/.Junk").Keys()
|
||||
if err != nil {
|
||||
return m, err
|
||||
}
|
||||
|
||||
fmt.Println("Bad guys:")
|
||||
fmt.Println(Bad)
|
||||
fmt.Println("Good guys:")
|
||||
fmt.Println(Good)
|
||||
m.Good, err = maildir.Dir(Maildir).Keys()
|
||||
if err != nil {
|
||||
return m, err
|
||||
}
|
||||
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// Learn initially classifies all mails and returns the respective classifiers.
|
||||
func (m Mails) Learn() (c Classifiers, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
func cleanText(t string) (c string, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
// getContent reads mails' subjects and bodies and returns the respective
|
||||
// slices of strings
|
||||
func getContent(keys []string) (s, b []string, err error) {
|
||||
for _, k := range keys {
|
||||
|
||||
message, err := maildir.Dir(Maildir).Message(k)
|
||||
if err != nil {
|
||||
return s, b, err
|
||||
}
|
||||
|
||||
// get Subject
|
||||
subject := message.Header.Get("Subject")
|
||||
s = append(s, strings.Split(subject, " ")...)
|
||||
|
||||
// get Body
|
||||
bScanner := bufio.NewScanner(message.Body)
|
||||
for bScanner.Scan() {
|
||||
b = append(b, strings.Split(bScanner.Text(), " ")...)
|
||||
}
|
||||
}
|
||||
|
||||
return s, b, nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
|
||||
_, err := LoadMails()
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
// Create a classifier
|
||||
//classifier := bayesian.NewClassifier(Good, Junk)
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user