Add a filter to ensure we only enqueue http urls.

This commit is contained in:
Ray Miller 2024-05-13 09:28:01 +01:00
parent 7f8ad24e81
commit f52c1882f1

View file

@ -8,11 +8,13 @@
(provide crawl host=? delay-upto) (provide crawl host=? delay-upto)
(define (url-without-fragment u) (define (url-without-fragment u)
(url->string (struct-copy url u (fragment #f)))) (struct-copy url u (fragment #f)))
(define (extract-links url x) (define (extract-links url x)
(list->set (map (lambda (u) (url-without-fragment (combine-url/relative url u))) (list->set (map url->string
(se-path*/list '(a @ href) x)))) (filter (lambda (u) (or (string=? (url-scheme u) "http") (string=? (url-scheme u) "https")))
(map (lambda (u) (url-without-fragment (combine-url/relative url u)))
(se-path*/list '(a @ href) x))))))
(define (process url handler) (define (process url handler)
(match (http:get url) (match (http:get url)