diff --git a/HACKING b/HACKING new file mode 100644 index 0000000..4470c0d --- /dev/null +++ b/HACKING @@ -0,0 +1,54 @@ +# -*- mode: org; coding: utf-8; -*- + +#+TITLE: Hacking spam-filter + +* Contributing + +By far the easiest way to hack on spam-filter is to develop using Guix: + +#+BEGIN_SRC bash + # Obtain the source code + cd /path/to/source-code + guix shell -Df guix.scm + # In the new shell, run: + hall build --execute && autoreconf -vif && ./configure && make check +#+END_SRC + +You may also want to set your directory as an authorized directory for +`guix shell' so it works without arguments. To do that, simply run + +#+BEGIN_SRC bash + echo $(pwd) >> $HOME/.config/guix/shell-authorized-directories +#+END_SRC + +You can now hack this project's files to your heart's content, whilst +testing them from your `guix shell' shell. + +To try out any scripts in the project you can now use + +#+BEGIN_SRC bash + ./pre-inst-env scripts/${script-name} +#+END_SRC + +If you'd like to tidy the project again, but retain the ability to test the +project from the commandline, simply run: + +#+BEGIN_SRC bash + ./hall clean --skip "scripts/${script-name},pre-inst-env" --execute +#+END_SRC + +** Manual Installation + +If you do not yet use Guix, you will have to install this project's +dependencies manually: + - autoconf + - automake + - pkg-config + - texinfo + - guile-hall + +Once those dependencies are installed you can run: + +#+BEGIN_SRC bash + hall build -x && autoreconf -vif && ./configure && make check +#+END_SRC diff --git a/guix.scm b/guix.scm index 8e495ab..19bc390 100644 --- a/guix.scm +++ b/guix.scm @@ -1,7 +1,12 @@ (use-modules (gnu packages) + (gnu packages autotools) (gnu packages guile) - (guix build-system guile) + (gnu packages guile-xyz) + (gnu packages pkg-config) + (gnu packages texinfo) + (guix build-system gnu) + (guix download) (guix gexp) ((guix licenses) #:prefix license:) (guix packages) @@ -9,7 +14,7 @@ (package (name "guile-spam-filter") - (version "0.1.0") + (version "0.1") (source (local-file (dirname (current-filename)) @@ -20,9 +25,12 @@ (not (any (lambda (my-string) (string-contains file my-string)) (list ".git" ".dir-locals.el" "guix.scm")))))) - (build-system guile-build-system) + (build-system gnu-build-system) + (arguments `()) (native-inputs - (list guile-3.0)) + (list autoconf automake pkg-config texinfo)) + (inputs (list guile-3.0)) + (propagated-inputs (list)) (synopsis "") (description "") (home-page "") diff --git a/hall.scm b/hall.scm new file mode 100644 index 0000000..e1ddd9a --- /dev/null +++ b/hall.scm @@ -0,0 +1,35 @@ +(hall-description + (name "spam-filter") + (prefix "guile") + (version "0.1") + (author "Ray Miller") + (email "ray@1729.org.uk") + (copyright (2024)) + (synopsis "") + (description "") + (home-page "") + (license gpl3+) + (dependencies `()) + (skip ()) + (features + ((guix #f) + (use-guix-specs-for-dependencies #f) + (native-language-support #f) + (licensing #f))) + (files (libraries + ((scheme-file "spam-filter") + (directory + "spam-filter" + ((scheme-file "hconfig"))))) + (tests ((directory "tests" ()))) + (programs ((directory "scripts" ()))) + (documentation + ((org-file "README") + (symlink "README" "README.org") + (text-file "HACKING") + (text-file "COPYING") + (directory "doc" ((texi-file "spam-filter"))))) + (infrastructure + ((scheme-file "guix") + (text-file ".gitignore") + (scheme-file "hall"))))) diff --git a/spam-filter/core.scm b/spam-filter/core.scm index 4f672c4..ee831d2 100644 --- a/spam-filter/core.scm +++ b/spam-filter/core.scm @@ -65,8 +65,8 @@ (/ (fcount c f cat) (catcount c cat)))) -(define-method (weighted-prob (c ) f cat prf) - (let ((basic-prob (prf c f cat)) +(define-method (weighted-prob (c ) f cat) + (let ((basic-prob (fprob c f cat)) (totals (fold (lambda (cat accum) (+ accum (fcount c f cat))) 0.0 (categories c)))) (/ (+ (* (weight c) (assumed-prob c)) (* totals basic-prob)) @@ -75,7 +75,7 @@ (define-class ()) (define-method (doc-prob (c ) doc cat) - (fold (lambda (feature p) (* p (weighted-prob c feature cat fprob))) + (fold (lambda (feature p) (* p (weighted-prob c feature cat))) 1.0 (get-features c doc))) @@ -116,47 +116,9 @@ (sample-train c) (loop (1- n))))) -(define (make-test-naive-bayes-classifier) +(define (make-test-classifier) (let ((c (make #:get-features get-words))) (set-threshold! c 'spam 3.0) (set-threshold! c 'ham 1.0) (sample-train c) c)) - -(define-class ()) - -(define-method (cprob (c ) f cat) - (let ((clf (fprob c f cat))) - (if (zero? clf) - 0 - (let ((freqsum (fold (lambda (cat accum) - (+ accum (fprob c f cat))) - 0.0 - (categories c)))) - (/ clf freqsum))))) - -(define (inv-chi chi df) - (let* ((m (/ chi 2.0)) - (sum (exp (- m))) - (term sum)) - (for-each (lambda (i) - (set! term (* term (/ m i))) - (set! sum (+ sum term))) - (iota (floor (/ df 2.0)) 1)) - (min sum 1.0))) - -(define-method (fisher-prob (c ) doc cat) - (define features (get-features c doc)) - (define p (fold (lambda (f accum) - (* accum (weighted-prob c f cat cprob))) - 1.0 - features)) - (define fscore (* -2.0 (log p))) - (inv-chi fscore (* 2 (length features)))) - -(define (make-test-fisher-classifier) - (let ((c (make #:get-features get-words))) - (set-threshold! c 'spam 3.0) - (set-threshold! c 'ham 1.0) - (sample-train c) - c)) diff --git a/spam-filter/hconfig.scm b/spam-filter/hconfig.scm new file mode 100644 index 0000000..0c5842f --- /dev/null +++ b/spam-filter/hconfig.scm @@ -0,0 +1,35 @@ +(define-module + (spam-filter hconfig) + #:use-module + (srfi srfi-26) + #:export + (%version + %author + %license + %copyright + %gettext-domain + G_ + N_ + init-nls + init-locale)) + +(define %version "0.1") + +(define %author "Ray Miller") + +(define %license 'gpl3+) + +(define %copyright '(2024)) + +(define %gettext-domain "guile-spam-filter") + +(define G_ identity) + +(define N_ identity) + +(define (init-nls) "Dummy as no NLS is used" #t) + +(define (init-locale) + "Dummy as no NLS is used" + #t) +