wrong prior calculation

master
Carlo Strub 7 years ago
parent 359d24caa9
commit 276e665172

@ -15,26 +15,12 @@ import (
// classes.
func classificationPrior(db *bolt.DB) (g float64, err error) {
err = db.View(func(tx *bolt.Tx) error {
b := tx.Bucket([]byte("Wordlists"))
good := b.Bucket([]byte("Good"))
gN := float64(good.Stats().KeyN)
junk := b.Bucket([]byte("Junk"))
jN := float64(junk.Stats().KeyN)
// division by zero means there are no learned mails so far
if (gN + jN) == 0 {
return errors.New("no mails have been classified so far")
}
g = gN / (gN + jN)
return nil
})
gTotal, jTotal, err := classificationStatistics(db)
if err != nil {
return g, err
}
return g, err
return gTotal / (gTotal + jTotal), err
}
// classificationLikelihoodWordcounts gets wordcounts from database to be used
@ -69,9 +55,9 @@ func classificationLikelihoodWordcounts(db *bolt.DB, word string) (gN, jN float6
return gN, jN, err
}
// classificationLikelihoodStatistics gets global statistics from database to
// classificationStatistics gets global statistics from database to
// be used in Likelihood calculation
func classificationLikelihoodStatistics(db *bolt.DB, word string) (gTotal, jTotal float64, err error) {
func classificationStatistics(db *bolt.DB) (gTotal, jTotal float64, err error) {
err = db.View(func(tx *bolt.Tx) error {
p := tx.Bucket([]byte("Statistics"))
@ -114,7 +100,7 @@ func classificationLikelihood(db *bolt.DB, word string) (g, j float64, err error
return g, j, err
}
gTotal, jTotal, err := classificationLikelihoodStatistics(db, word)
gTotal, jTotal, err := classificationStatistics(db)
if err != nil {
return g, j, err
}

@ -10,8 +10,8 @@ import (
. "github.com/onsi/gomega"
)
var _ = Describe("Classify", func() {
Context("Classify a new mail", func() {
var _ = Describe("Classify Mails", func() {
Context("Classify one word from the mail that was ", func() {
BeforeEach(func() {
// check whether there exists a DB file
_, oserr := os.Stat("test/Maildir/sisyphus.db")
@ -54,7 +54,7 @@ var _ = Describe("Classify", func() {
Ω(err).ShouldNot(HaveOccurred())
})
It("Classify one word from the mail that was learned before", func() {
It("learned before and is junk", func() {
answer, prob, err := Junk(dbs["test/Maildir"], []string{"london"})
@ -64,7 +64,7 @@ var _ = Describe("Classify", func() {
})
It("Classify one word from the mail that was learned before", func() {
It("learned before and is good", func() {
answer, prob, err := Junk(dbs["test/Maildir"], []string{"localbase"})
@ -74,7 +74,7 @@ var _ = Describe("Classify", func() {
})
It("Classify one word from the mail that was never learned", func() {
It("never learned before", func() {
answer, prob, err := Junk(dbs["test/Maildir"], []string{"abcdefg"})
@ -84,13 +84,13 @@ var _ = Describe("Classify", func() {
})
It("Classify one word from the mail that was learned in good and junk", func() {
It("learned both as good and junk, respectively", func() {
answer, prob, err := Junk(dbs["test/Maildir"], []string{"than"})
Ω(err).ShouldNot(HaveOccurred())
Ω(prob).Should(Equal(0.7795275590551181))
Ω(answer).Should(BeTrue())
Ω(prob).Should(Equal(0.5))
Ω(answer).Should(BeFalse())
})
})

Loading…
Cancel
Save