recipe-search-tech-test/recipe-search.scm
2025-07-01 18:01:08 +01:00

82 lines
2.9 KiB
Scheme

(define-module (recipe-search)
#:use-module (ice-9 ftw)
#:use-module (ice-9 textual-ports)
#:use-module (srfi srfi-1) ; lists
#:use-module (srfi srfi-26) ; cut/cute
#:use-module (srfi srfi-69) ; hash tables
#:use-module (text-utils))
(define (get-tokens s)
(map porter-stem
(remove stop-word?
(map (compose remove-diacritics string-downcase)
(string-tokenize s char-set:letter)))))
(define (digrams xs)
(if (< (length xs) 2)
'()
(map list xs (cdr xs))))
(define (trigrams xs)
(if (< (length xs) 3)
'()
(map list xs (cdr xs) (cddr xs))))
(define stem-index (make-hash-table))
(define digram-index (make-hash-table))
(define trigram-index (make-hash-table))
(define (add-to-index! ix term val)
(let ((curr-val (hash-table-ref/default ix term '())))
(hash-table-set! ix term (cons val curr-val))))
(define (frequencies xs)
(let ((result (make-hash-table)))
(for-each (lambda (x) (hash-table-set! result x (1+ (hash-table-ref/default result x 0)))) xs)
(hash-table->alist result)))
(define (sort-by-frequency xs)
(map car
(sort (frequencies xs)
(lambda (a b) (> (cdr a) (cdr b))))))
(define (search-index ix xs)
(sort-by-frequency (append-map (cut hash-table-ref/default ix <> '()) (delete-duplicates xs))))
(define (search-stem-index tokens)
(search-index stem-index tokens))
(define (search-digram-index tokens)
(search-index digram-index (digrams tokens)))
(define (search-trigram-index tokens)
(search-index trigram-index (trigrams tokens)))
(define (load-recipe path)
(let ((name (basename path))
(tokens (get-tokens (call-with-input-file path get-string-all))))
(for-each (cut add-to-index! stem-index <> name) (delete-duplicates tokens))
(for-each (cut add-to-index! digram-index <> name) (delete-duplicates (digrams tokens)))
(for-each (cut add-to-index! trigram-index <> name) (delete-duplicates (trigrams tokens)))))
(define (load-recipes dir)
(for-each (lambda (filename)
(load-recipe (string-append dir "/" filename)))
(scandir dir (cut string-suffix? ".txt" <>))))
(define* (find-recipes search-term #:key (num-results 5))
(let ((tokens (get-tokens search-term)))
(when (zero? (length tokens))
(error "Please try a more specific search term"))
(let ((result (search-trigram-index tokens)))
(if (>= (length result) num-results)
(take result num-results)
(let ((result (delete-duplicates (append result (search-digram-index tokens)))))
(if (>= (length result) num-results)
(take result num-results)
(let ((result (delete-duplicates (append result (search-stem-index tokens)))))
(take result (min (length result) num-results)))))))))
(define (main args)
(load-recipes "./recipes")
(find-recipes "broccoli and stilton soup"))