Implement fisher classifier.

This commit is contained in:
Ray Miller 2024-09-12 15:51:12 +01:00
parent dd1078ea66
commit 55b2804f01

View file

@ -65,8 +65,8 @@
(/ (fcount c f cat)
(catcount c cat))))
(define-method (weighted-prob (c <classifier>) f cat)
(let ((basic-prob (fprob c f cat))
(define-method (weighted-prob (c <classifier>) f cat prf)
(let ((basic-prob (prf c f cat))
(totals (fold (lambda (cat accum) (+ accum (fcount c f cat))) 0.0 (categories c))))
(/ (+ (* (weight c) (assumed-prob c))
(* totals basic-prob))
@ -75,7 +75,7 @@
(define-class <naive-bayes> (<classifier>))
(define-method (doc-prob (c <naive-bayes>) doc cat)
(fold (lambda (feature p) (* p (weighted-prob c feature cat)))
(fold (lambda (feature p) (* p (weighted-prob c feature cat fprob)))
1.0
(get-features c doc)))
@ -116,9 +116,47 @@
(sample-train c)
(loop (1- n)))))
(define (make-test-classifier)
(define (make-test-naive-bayes-classifier)
(let ((c (make <naive-bayes> #:get-features get-words)))
(set-threshold! c 'spam 3.0)
(set-threshold! c 'ham 1.0)
(sample-train c)
c))
(define-class <fisher-classifier> (<classifier>))
(define-method (cprob (c <fisher-classifier>) f cat)
(let ((clf (fprob c f cat)))
(if (zero? clf)
0
(let ((freqsum (fold (lambda (cat accum)
(+ accum (fprob c f cat)))
0.0
(categories c))))
(/ clf freqsum)))))
(define (inv-chi chi df)
(let* ((m (/ chi 2.0))
(sum (exp (- m)))
(term sum))
(for-each (lambda (i)
(set! term (* term (/ m i)))
(set! sum (+ sum term)))
(iota (floor (/ df 2.0)) 1))
(min sum 1.0)))
(define-method (fisher-prob (c <fisher-classifier>) doc cat)
(define features (get-features c doc))
(define p (fold (lambda (f accum)
(* accum (weighted-prob c f cat cprob)))
1.0
features))
(define fscore (* -2.0 (log p)))
(inv-chi fscore (* 2 (length features))))
(define (make-test-fisher-classifier)
(let ((c (make <fisher-classifier> #:get-features get-words)))
(set-threshold! c 'spam 3.0)
(set-threshold! c 'ham 1.0)
(sample-train c)
c))