(define-module (recipe-search) #:use-module (ice-9 ftw) #:use-module (ice-9 textual-ports) #:use-module (srfi srfi-1) ; lists #:use-module (srfi srfi-26) ; cut/cute #:use-module (srfi srfi-69) ; hash tables #:use-module (text-utils)) (define (get-tokens s) (map porter-stem (remove stop-word? (map (compose remove-diacritics string-downcase) (string-tokenize s char-set:letter))))) (define (digrams xs) (if (< (length xs) 2) '() (map list xs (cdr xs)))) (define (trigrams xs) (if (< (length xs) 3) '() (map list xs (cdr xs) (cddr xs)))) (define stem-index (make-hash-table)) (define digram-index (make-hash-table)) (define trigram-index (make-hash-table)) (define (add-to-index! ix term val) (let ((curr-val (hash-table-ref/default ix term '()))) (hash-table-set! ix term (cons val curr-val)))) (define (frequencies xs) (let ((result (make-hash-table))) (for-each (lambda (x) (hash-table-set! result x (1+ (hash-table-ref/default result x 0)))) xs) (hash-table->alist result))) (define (sort-by-frequency xs) (map car (sort (frequencies xs) (lambda (a b) (> (cdr a) (cdr b)))))) (define (search-index ix xs) (sort-by-frequency (append-map (cut hash-table-ref/default ix <> '()) (delete-duplicates xs)))) (define (search-stem-index tokens) (search-index stem-index tokens)) (define (search-digram-index tokens) (search-index digram-index (digrams tokens))) (define (search-trigram-index tokens) (search-index trigram-index (trigrams tokens))) (define (load-recipe path) (let ((name (basename path)) (tokens (get-tokens (call-with-input-file path get-string-all)))) (for-each (cut add-to-index! stem-index <> name) (delete-duplicates tokens)) (for-each (cut add-to-index! digram-index <> name) (delete-duplicates (digrams tokens))) (for-each (cut add-to-index! trigram-index <> name) (delete-duplicates (trigrams tokens))))) (define (load-recipes dir) (for-each (lambda (filename) (load-recipe (string-append dir "/" filename))) (scandir dir (cut string-suffix? ".txt" <>)))) (define* (find-recipes search-term #:key (num-results 5)) (let ((tokens (get-tokens search-term))) (when (zero? (length tokens)) (error "Please try a more specific search term")) (let ((result (search-trigram-index tokens))) (if (>= (length result) num-results) (take result num-results) (let ((result (delete-duplicates (append result (search-digram-index tokens))))) (if (>= (length result) num-results) (take result num-results) (let ((result (delete-duplicates (append result (search-stem-index tokens))))) (take result (min (length result) num-results))))))))) (define (main args) (load-recipes "./recipes") (find-recipes "broccoli and stilton soup"))