Add a filter to ensure we only enqueue http urls.
This commit is contained in:
parent
7f8ad24e81
commit
f52c1882f1
1 changed files with 5 additions and 3 deletions
|
@ -8,11 +8,13 @@
|
|||
(provide crawl host=? delay-upto)
|
||||
|
||||
(define (url-without-fragment u)
|
||||
(url->string (struct-copy url u (fragment #f))))
|
||||
(struct-copy url u (fragment #f)))
|
||||
|
||||
(define (extract-links url x)
|
||||
(list->set (map (lambda (u) (url-without-fragment (combine-url/relative url u)))
|
||||
(se-path*/list '(a @ href) x))))
|
||||
(list->set (map url->string
|
||||
(filter (lambda (u) (or (string=? (url-scheme u) "http") (string=? (url-scheme u) "https")))
|
||||
(map (lambda (u) (url-without-fragment (combine-url/relative url u)))
|
||||
(se-path*/list '(a @ href) x))))))
|
||||
|
||||
(define (process url handler)
|
||||
(match (http:get url)
|
||||
|
|
Loading…
Reference in a new issue