Compare commits
2 commits
dd1078ea66
...
3c5cb254aa
Author | SHA1 | Date | |
---|---|---|---|
|
3c5cb254aa | ||
|
55b2804f01 |
5 changed files with 46 additions and 140 deletions
54
HACKING
54
HACKING
|
@ -1,54 +0,0 @@
|
|||
# -*- mode: org; coding: utf-8; -*-
|
||||
|
||||
#+TITLE: Hacking spam-filter
|
||||
|
||||
* Contributing
|
||||
|
||||
By far the easiest way to hack on spam-filter is to develop using Guix:
|
||||
|
||||
#+BEGIN_SRC bash
|
||||
# Obtain the source code
|
||||
cd /path/to/source-code
|
||||
guix shell -Df guix.scm
|
||||
# In the new shell, run:
|
||||
hall build --execute && autoreconf -vif && ./configure && make check
|
||||
#+END_SRC
|
||||
|
||||
You may also want to set your directory as an authorized directory for
|
||||
`guix shell' so it works without arguments. To do that, simply run
|
||||
|
||||
#+BEGIN_SRC bash
|
||||
echo $(pwd) >> $HOME/.config/guix/shell-authorized-directories
|
||||
#+END_SRC
|
||||
|
||||
You can now hack this project's files to your heart's content, whilst
|
||||
testing them from your `guix shell' shell.
|
||||
|
||||
To try out any scripts in the project you can now use
|
||||
|
||||
#+BEGIN_SRC bash
|
||||
./pre-inst-env scripts/${script-name}
|
||||
#+END_SRC
|
||||
|
||||
If you'd like to tidy the project again, but retain the ability to test the
|
||||
project from the commandline, simply run:
|
||||
|
||||
#+BEGIN_SRC bash
|
||||
./hall clean --skip "scripts/${script-name},pre-inst-env" --execute
|
||||
#+END_SRC
|
||||
|
||||
** Manual Installation
|
||||
|
||||
If you do not yet use Guix, you will have to install this project's
|
||||
dependencies manually:
|
||||
- autoconf
|
||||
- automake
|
||||
- pkg-config
|
||||
- texinfo
|
||||
- guile-hall
|
||||
|
||||
Once those dependencies are installed you can run:
|
||||
|
||||
#+BEGIN_SRC bash
|
||||
hall build -x && autoreconf -vif && ./configure && make check
|
||||
#+END_SRC
|
16
guix.scm
16
guix.scm
|
@ -1,12 +1,7 @@
|
|||
(use-modules
|
||||
(gnu packages)
|
||||
(gnu packages autotools)
|
||||
(gnu packages guile)
|
||||
(gnu packages guile-xyz)
|
||||
(gnu packages pkg-config)
|
||||
(gnu packages texinfo)
|
||||
(guix build-system gnu)
|
||||
(guix download)
|
||||
(guix build-system guile)
|
||||
(guix gexp)
|
||||
((guix licenses) #:prefix license:)
|
||||
(guix packages)
|
||||
|
@ -14,7 +9,7 @@
|
|||
|
||||
(package
|
||||
(name "guile-spam-filter")
|
||||
(version "0.1")
|
||||
(version "0.1.0")
|
||||
(source
|
||||
(local-file
|
||||
(dirname (current-filename))
|
||||
|
@ -25,12 +20,9 @@
|
|||
(not (any (lambda (my-string)
|
||||
(string-contains file my-string))
|
||||
(list ".git" ".dir-locals.el" "guix.scm"))))))
|
||||
(build-system gnu-build-system)
|
||||
(arguments `())
|
||||
(build-system guile-build-system)
|
||||
(native-inputs
|
||||
(list autoconf automake pkg-config texinfo))
|
||||
(inputs (list guile-3.0))
|
||||
(propagated-inputs (list))
|
||||
(list guile-3.0))
|
||||
(synopsis "")
|
||||
(description "")
|
||||
(home-page "")
|
||||
|
|
35
hall.scm
35
hall.scm
|
@ -1,35 +0,0 @@
|
|||
(hall-description
|
||||
(name "spam-filter")
|
||||
(prefix "guile")
|
||||
(version "0.1")
|
||||
(author "Ray Miller")
|
||||
(email "ray@1729.org.uk")
|
||||
(copyright (2024))
|
||||
(synopsis "")
|
||||
(description "")
|
||||
(home-page "")
|
||||
(license gpl3+)
|
||||
(dependencies `())
|
||||
(skip ())
|
||||
(features
|
||||
((guix #f)
|
||||
(use-guix-specs-for-dependencies #f)
|
||||
(native-language-support #f)
|
||||
(licensing #f)))
|
||||
(files (libraries
|
||||
((scheme-file "spam-filter")
|
||||
(directory
|
||||
"spam-filter"
|
||||
((scheme-file "hconfig")))))
|
||||
(tests ((directory "tests" ())))
|
||||
(programs ((directory "scripts" ())))
|
||||
(documentation
|
||||
((org-file "README")
|
||||
(symlink "README" "README.org")
|
||||
(text-file "HACKING")
|
||||
(text-file "COPYING")
|
||||
(directory "doc" ((texi-file "spam-filter")))))
|
||||
(infrastructure
|
||||
((scheme-file "guix")
|
||||
(text-file ".gitignore")
|
||||
(scheme-file "hall")))))
|
|
@ -65,8 +65,8 @@
|
|||
(/ (fcount c f cat)
|
||||
(catcount c cat))))
|
||||
|
||||
(define-method (weighted-prob (c <classifier>) f cat)
|
||||
(let ((basic-prob (fprob c f cat))
|
||||
(define-method (weighted-prob (c <classifier>) f cat prf)
|
||||
(let ((basic-prob (prf c f cat))
|
||||
(totals (fold (lambda (cat accum) (+ accum (fcount c f cat))) 0.0 (categories c))))
|
||||
(/ (+ (* (weight c) (assumed-prob c))
|
||||
(* totals basic-prob))
|
||||
|
@ -75,7 +75,7 @@
|
|||
(define-class <naive-bayes> (<classifier>))
|
||||
|
||||
(define-method (doc-prob (c <naive-bayes>) doc cat)
|
||||
(fold (lambda (feature p) (* p (weighted-prob c feature cat)))
|
||||
(fold (lambda (feature p) (* p (weighted-prob c feature cat fprob)))
|
||||
1.0
|
||||
(get-features c doc)))
|
||||
|
||||
|
@ -116,9 +116,47 @@
|
|||
(sample-train c)
|
||||
(loop (1- n)))))
|
||||
|
||||
(define (make-test-classifier)
|
||||
(define (make-test-naive-bayes-classifier)
|
||||
(let ((c (make <naive-bayes> #:get-features get-words)))
|
||||
(set-threshold! c 'spam 3.0)
|
||||
(set-threshold! c 'ham 1.0)
|
||||
(sample-train c)
|
||||
c))
|
||||
|
||||
(define-class <fisher-classifier> (<classifier>))
|
||||
|
||||
(define-method (cprob (c <fisher-classifier>) f cat)
|
||||
(let ((clf (fprob c f cat)))
|
||||
(if (zero? clf)
|
||||
0
|
||||
(let ((freqsum (fold (lambda (cat accum)
|
||||
(+ accum (fprob c f cat)))
|
||||
0.0
|
||||
(categories c))))
|
||||
(/ clf freqsum)))))
|
||||
|
||||
(define (inv-chi chi df)
|
||||
(let* ((m (/ chi 2.0))
|
||||
(sum (exp (- m)))
|
||||
(term sum))
|
||||
(for-each (lambda (i)
|
||||
(set! term (* term (/ m i)))
|
||||
(set! sum (+ sum term)))
|
||||
(iota (floor (/ df 2.0)) 1))
|
||||
(min sum 1.0)))
|
||||
|
||||
(define-method (fisher-prob (c <fisher-classifier>) doc cat)
|
||||
(define features (get-features c doc))
|
||||
(define p (fold (lambda (f accum)
|
||||
(* accum (weighted-prob c f cat cprob)))
|
||||
1.0
|
||||
features))
|
||||
(define fscore (* -2.0 (log p)))
|
||||
(inv-chi fscore (* 2 (length features))))
|
||||
|
||||
(define (make-test-fisher-classifier)
|
||||
(let ((c (make <fisher-classifier> #:get-features get-words)))
|
||||
(set-threshold! c 'spam 3.0)
|
||||
(set-threshold! c 'ham 1.0)
|
||||
(sample-train c)
|
||||
c))
|
||||
|
|
|
@ -1,35 +0,0 @@
|
|||
(define-module
|
||||
(spam-filter hconfig)
|
||||
#:use-module
|
||||
(srfi srfi-26)
|
||||
#:export
|
||||
(%version
|
||||
%author
|
||||
%license
|
||||
%copyright
|
||||
%gettext-domain
|
||||
G_
|
||||
N_
|
||||
init-nls
|
||||
init-locale))
|
||||
|
||||
(define %version "0.1")
|
||||
|
||||
(define %author "Ray Miller")
|
||||
|
||||
(define %license 'gpl3+)
|
||||
|
||||
(define %copyright '(2024))
|
||||
|
||||
(define %gettext-domain "guile-spam-filter")
|
||||
|
||||
(define G_ identity)
|
||||
|
||||
(define N_ identity)
|
||||
|
||||
(define (init-nls) "Dummy as no NLS is used" #t)
|
||||
|
||||
(define (init-locale)
|
||||
"Dummy as no NLS is used"
|
||||
#t)
|
||||
|
Loading…
Reference in a new issue