82 lines
2.9 KiB
Scheme
82 lines
2.9 KiB
Scheme
(define-module (recipe-search)
|
|
#:use-module (ice-9 ftw)
|
|
#:use-module (ice-9 textual-ports)
|
|
#:use-module (srfi srfi-1) ; lists
|
|
#:use-module (srfi srfi-26) ; cut/cute
|
|
#:use-module (srfi srfi-69) ; hash tables
|
|
#:use-module (text-utils))
|
|
|
|
(define (get-tokens s)
|
|
(map porter-stem
|
|
(remove stop-word?
|
|
(map (compose remove-diacritics string-downcase)
|
|
(string-tokenize s char-set:letter)))))
|
|
|
|
(define (digrams xs)
|
|
(if (< (length xs) 2)
|
|
'()
|
|
(map list xs (cdr xs))))
|
|
|
|
(define (trigrams xs)
|
|
(if (< (length xs) 3)
|
|
'()
|
|
(map list xs (cdr xs) (cddr xs))))
|
|
|
|
(define stem-index (make-hash-table))
|
|
(define digram-index (make-hash-table))
|
|
(define trigram-index (make-hash-table))
|
|
|
|
(define (add-to-index! ix term val)
|
|
(let ((curr-val (hash-table-ref/default ix term '())))
|
|
(hash-table-set! ix term (cons val curr-val))))
|
|
|
|
(define (frequencies xs)
|
|
(let ((result (make-hash-table)))
|
|
(for-each (lambda (x) (hash-table-set! result x (1+ (hash-table-ref/default result x 0)))) xs)
|
|
(hash-table->alist result)))
|
|
|
|
(define (sort-by-frequency xs)
|
|
(map car
|
|
(sort (frequencies xs)
|
|
(lambda (a b) (> (cdr a) (cdr b))))))
|
|
|
|
(define (search-index ix xs)
|
|
(sort-by-frequency (append-map (cut hash-table-ref/default ix <> '()) (delete-duplicates xs))))
|
|
|
|
(define (search-stem-index tokens)
|
|
(search-index stem-index tokens))
|
|
|
|
(define (search-digram-index tokens)
|
|
(search-index digram-index (digrams tokens)))
|
|
|
|
(define (search-trigram-index tokens)
|
|
(search-index trigram-index (trigrams tokens)))
|
|
|
|
(define (load-recipe path)
|
|
(let ((name (basename path))
|
|
(tokens (get-tokens (call-with-input-file path get-string-all))))
|
|
(for-each (cut add-to-index! stem-index <> name) (delete-duplicates tokens))
|
|
(for-each (cut add-to-index! digram-index <> name) (delete-duplicates (digrams tokens)))
|
|
(for-each (cut add-to-index! trigram-index <> name) (delete-duplicates (trigrams tokens)))))
|
|
|
|
(define (load-recipes dir)
|
|
(for-each (lambda (filename)
|
|
(load-recipe (string-append dir "/" filename)))
|
|
(scandir dir (cut string-suffix? ".txt" <>))))
|
|
|
|
(define* (find-recipes search-term #:key (num-results 5))
|
|
(let ((tokens (get-tokens search-term)))
|
|
(when (zero? (length tokens))
|
|
(error "Please try a more specific search term"))
|
|
(let ((result (search-trigram-index tokens)))
|
|
(if (>= (length result) num-results)
|
|
(take result num-results)
|
|
(let ((result (delete-duplicates (append result (search-digram-index tokens)))))
|
|
(if (>= (length result) num-results)
|
|
(take result num-results)
|
|
(let ((result (delete-duplicates (append result (search-stem-index tokens)))))
|
|
(take result (min (length result) num-results)))))))))
|
|
|
|
(define (main args)
|
|
(load-recipes "./recipes")
|
|
(find-recipes "broccoli and stilton soup"))
|