Compare commits

...

2 commits

Author SHA1 Message Date
Ray Miller
3c5cb254aa Simplify package build. 2024-09-12 17:38:00 +01:00
Ray Miller
55b2804f01 Implement fisher classifier. 2024-09-12 15:51:12 +01:00
5 changed files with 46 additions and 140 deletions

54
HACKING
View file

@ -1,54 +0,0 @@
# -*- mode: org; coding: utf-8; -*-
#+TITLE: Hacking spam-filter
* Contributing
By far the easiest way to hack on spam-filter is to develop using Guix:
#+BEGIN_SRC bash
# Obtain the source code
cd /path/to/source-code
guix shell -Df guix.scm
# In the new shell, run:
hall build --execute && autoreconf -vif && ./configure && make check
#+END_SRC
You may also want to set your directory as an authorized directory for
`guix shell' so it works without arguments. To do that, simply run
#+BEGIN_SRC bash
echo $(pwd) >> $HOME/.config/guix/shell-authorized-directories
#+END_SRC
You can now hack this project's files to your heart's content, whilst
testing them from your `guix shell' shell.
To try out any scripts in the project you can now use
#+BEGIN_SRC bash
./pre-inst-env scripts/${script-name}
#+END_SRC
If you'd like to tidy the project again, but retain the ability to test the
project from the commandline, simply run:
#+BEGIN_SRC bash
./hall clean --skip "scripts/${script-name},pre-inst-env" --execute
#+END_SRC
** Manual Installation
If you do not yet use Guix, you will have to install this project's
dependencies manually:
- autoconf
- automake
- pkg-config
- texinfo
- guile-hall
Once those dependencies are installed you can run:
#+BEGIN_SRC bash
hall build -x && autoreconf -vif && ./configure && make check
#+END_SRC

View file

@ -1,12 +1,7 @@
(use-modules
(gnu packages)
(gnu packages autotools)
(gnu packages guile)
(gnu packages guile-xyz)
(gnu packages pkg-config)
(gnu packages texinfo)
(guix build-system gnu)
(guix download)
(guix build-system guile)
(guix gexp)
((guix licenses) #:prefix license:)
(guix packages)
@ -14,7 +9,7 @@
(package
(name "guile-spam-filter")
(version "0.1")
(version "0.1.0")
(source
(local-file
(dirname (current-filename))
@ -25,12 +20,9 @@
(not (any (lambda (my-string)
(string-contains file my-string))
(list ".git" ".dir-locals.el" "guix.scm"))))))
(build-system gnu-build-system)
(arguments `())
(build-system guile-build-system)
(native-inputs
(list autoconf automake pkg-config texinfo))
(inputs (list guile-3.0))
(propagated-inputs (list))
(list guile-3.0))
(synopsis "")
(description "")
(home-page "")

View file

@ -1,35 +0,0 @@
(hall-description
(name "spam-filter")
(prefix "guile")
(version "0.1")
(author "Ray Miller")
(email "ray@1729.org.uk")
(copyright (2024))
(synopsis "")
(description "")
(home-page "")
(license gpl3+)
(dependencies `())
(skip ())
(features
((guix #f)
(use-guix-specs-for-dependencies #f)
(native-language-support #f)
(licensing #f)))
(files (libraries
((scheme-file "spam-filter")
(directory
"spam-filter"
((scheme-file "hconfig")))))
(tests ((directory "tests" ())))
(programs ((directory "scripts" ())))
(documentation
((org-file "README")
(symlink "README" "README.org")
(text-file "HACKING")
(text-file "COPYING")
(directory "doc" ((texi-file "spam-filter")))))
(infrastructure
((scheme-file "guix")
(text-file ".gitignore")
(scheme-file "hall")))))

View file

@ -65,8 +65,8 @@
(/ (fcount c f cat)
(catcount c cat))))
(define-method (weighted-prob (c <classifier>) f cat)
(let ((basic-prob (fprob c f cat))
(define-method (weighted-prob (c <classifier>) f cat prf)
(let ((basic-prob (prf c f cat))
(totals (fold (lambda (cat accum) (+ accum (fcount c f cat))) 0.0 (categories c))))
(/ (+ (* (weight c) (assumed-prob c))
(* totals basic-prob))
@ -75,7 +75,7 @@
(define-class <naive-bayes> (<classifier>))
(define-method (doc-prob (c <naive-bayes>) doc cat)
(fold (lambda (feature p) (* p (weighted-prob c feature cat)))
(fold (lambda (feature p) (* p (weighted-prob c feature cat fprob)))
1.0
(get-features c doc)))
@ -116,9 +116,47 @@
(sample-train c)
(loop (1- n)))))
(define (make-test-classifier)
(define (make-test-naive-bayes-classifier)
(let ((c (make <naive-bayes> #:get-features get-words)))
(set-threshold! c 'spam 3.0)
(set-threshold! c 'ham 1.0)
(sample-train c)
c))
(define-class <fisher-classifier> (<classifier>))
(define-method (cprob (c <fisher-classifier>) f cat)
(let ((clf (fprob c f cat)))
(if (zero? clf)
0
(let ((freqsum (fold (lambda (cat accum)
(+ accum (fprob c f cat)))
0.0
(categories c))))
(/ clf freqsum)))))
(define (inv-chi chi df)
(let* ((m (/ chi 2.0))
(sum (exp (- m)))
(term sum))
(for-each (lambda (i)
(set! term (* term (/ m i)))
(set! sum (+ sum term)))
(iota (floor (/ df 2.0)) 1))
(min sum 1.0)))
(define-method (fisher-prob (c <fisher-classifier>) doc cat)
(define features (get-features c doc))
(define p (fold (lambda (f accum)
(* accum (weighted-prob c f cat cprob)))
1.0
features))
(define fscore (* -2.0 (log p)))
(inv-chi fscore (* 2 (length features))))
(define (make-test-fisher-classifier)
(let ((c (make <fisher-classifier> #:get-features get-words)))
(set-threshold! c 'spam 3.0)
(set-threshold! c 'ham 1.0)
(sample-train c)
c))

View file

@ -1,35 +0,0 @@
(define-module
(spam-filter hconfig)
#:use-module
(srfi srfi-26)
#:export
(%version
%author
%license
%copyright
%gettext-domain
G_
N_
init-nls
init-locale))
(define %version "0.1")
(define %author "Ray Miller")
(define %license 'gpl3+)
(define %copyright '(2024))
(define %gettext-domain "guile-spam-filter")
(define G_ identity)
(define N_ identity)
(define (init-nls) "Dummy as no NLS is used" #t)
(define (init-locale)
"Dummy as no NLS is used"
#t)