Move blc code around to better match blog post.
This commit is contained in:
parent
e54d8f16bc
commit
8cf60c5f0d
1 changed files with 57 additions and 57 deletions
|
@ -32,62 +32,6 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|||
(srfi srfi-37)
|
||||
(srfi srfi-71))
|
||||
|
||||
;; List of SXPath expressions to search for links.
|
||||
(define link-paths (map sxpath
|
||||
'((// a @ href)
|
||||
(// img @ src)
|
||||
(// link @ href)
|
||||
(// video @ src)
|
||||
(// video source @ src)
|
||||
(// audio @ src)
|
||||
(// audio source @ src)
|
||||
(// picture source @ srcset)
|
||||
(// img @ srcset))))
|
||||
|
||||
;; Special handling for srcset
|
||||
(define (parse-srcset s)
|
||||
"Parse out the list of URLs from an image srcset, discarding whitespace and the
|
||||
width/density descriptor."
|
||||
(map (compose first (cut string-split <> #\space))
|
||||
(map string-trim-both (string-split s #\,))))
|
||||
|
||||
(define (extract-links body)
|
||||
"Extract links from an HTML document, discarding any tel: or mailto: links."
|
||||
(let ((x (html->sxml body)))
|
||||
(remove (lambda (s) (or (string-prefix? "tel:" s) (string-prefix? "mailto:" s)))
|
||||
(delete-duplicates
|
||||
(fold (lambda (f accum)
|
||||
(fold (lambda (x accum)
|
||||
(match-let (((attr val) x))
|
||||
(if (equal? attr 'srcset)
|
||||
(append (parse-srcset val) accum)
|
||||
(cons val accum))))
|
||||
accum
|
||||
(f x)))
|
||||
'()
|
||||
link-paths)))))
|
||||
|
||||
(define (uri-without-fragment uri)
|
||||
"If uri has a fragment, return a new URI without the fragment."
|
||||
(if (uri-fragment uri)
|
||||
(set-field uri (uri-fragment) #f)
|
||||
uri))
|
||||
|
||||
(define (resolve-relative-uri base-uri s)
|
||||
"If the string s is a relative URI, resolve it relative to base-uri."
|
||||
(let ((uri (string->uri-reference s)))
|
||||
(if (uri-scheme uri)
|
||||
uri
|
||||
(build-uri (uri-scheme base-uri)
|
||||
#:userinfo (or (uri-userinfo uri) (uri-userinfo base-uri))
|
||||
#:host (or (uri-host uri) (uri-host base-uri))
|
||||
#:port (or (uri-port uri) (uri-port base-uri))
|
||||
#:path (cond
|
||||
((string-prefix? "/" (uri-path uri)) (uri-path uri))
|
||||
((string-suffix? "/" (uri-path base-uri)) (string-append (uri-path base-uri) (uri-path uri)))
|
||||
(else (string-append (uri-path base-uri) "/" (uri-path uri))))
|
||||
#:query (uri-query uri)))))
|
||||
|
||||
;; Queue of work to do, which is stored as pairs of (referrer . uri) so we
|
||||
;; can report the referring page if `uri` is invalid
|
||||
(define queue (list))
|
||||
|
@ -172,6 +116,62 @@ width/density descriptor."
|
|||
((uri-port) (uri-port base-uri)))
|
||||
uri))
|
||||
|
||||
;; List of SXPath expressions to search for links.
|
||||
(define link-paths (map sxpath
|
||||
'((// a @ href)
|
||||
(// img @ src)
|
||||
(// link @ href)
|
||||
(// video @ src)
|
||||
(// video source @ src)
|
||||
(// audio @ src)
|
||||
(// audio source @ src)
|
||||
(// picture source @ srcset)
|
||||
(// img @ srcset))))
|
||||
|
||||
;; Special handling for srcset
|
||||
(define (parse-srcset s)
|
||||
"Parse out the list of URLs from an image srcset, discarding whitespace and the
|
||||
width/density descriptor."
|
||||
(map (compose first (cut string-split <> #\space))
|
||||
(map string-trim-both (string-split s #\,))))
|
||||
|
||||
(define (extract-links body)
|
||||
"Extract links from an HTML document, discarding any tel: or mailto: links."
|
||||
(let ((x (html->sxml body)))
|
||||
(remove (lambda (s) (or (string-prefix? "tel:" s) (string-prefix? "mailto:" s)))
|
||||
(delete-duplicates
|
||||
(fold (lambda (f accum)
|
||||
(fold (lambda (x accum)
|
||||
(match-let (((attr val) x))
|
||||
(if (equal? attr 'srcset)
|
||||
(append (parse-srcset val) accum)
|
||||
(cons val accum))))
|
||||
accum
|
||||
(f x)))
|
||||
'()
|
||||
link-paths)))))
|
||||
|
||||
(define (uri-without-fragment uri)
|
||||
"If uri has a fragment, return a new URI without the fragment."
|
||||
(if (uri-fragment uri)
|
||||
(set-field uri (uri-fragment) #f)
|
||||
uri))
|
||||
|
||||
(define (resolve-relative-uri base-uri s)
|
||||
"If the string s is a relative URI, resolve it relative to base-uri."
|
||||
(let ((uri (string->uri-reference s)))
|
||||
(if (uri-scheme uri)
|
||||
uri
|
||||
(build-uri (uri-scheme base-uri)
|
||||
#:userinfo (or (uri-userinfo uri) (uri-userinfo base-uri))
|
||||
#:host (or (uri-host uri) (uri-host base-uri))
|
||||
#:port (or (uri-port uri) (uri-port base-uri))
|
||||
#:path (cond
|
||||
((string-prefix? "/" (uri-path uri)) (uri-path uri))
|
||||
((string-suffix? "/" (uri-path base-uri)) (string-append (uri-path base-uri) (uri-path uri)))
|
||||
(else (string-append (uri-path base-uri) "/" (uri-path uri))))
|
||||
#:query (uri-query uri)))))
|
||||
|
||||
(define (enqueue-links base-uri)
|
||||
(unless (seen? base-uri)
|
||||
(hash-set! seen (uri->string base-uri) #t)
|
||||
|
@ -208,8 +208,8 @@ width/density descriptor."
|
|||
(let ((memo (make-hash-table)))
|
||||
(lambda (uri)
|
||||
(let ((key (uri->string uri)))
|
||||
;;(pk 'http-head-memoized key)
|
||||
(unless (hash-ref memo key #f)
|
||||
(format #t "[DEBUG] checking link ~a~%" key)
|
||||
(with-exception-handler
|
||||
(lambda (e)
|
||||
(hash-set! memo key e))
|
||||
|
|
Loading…
Add table
Reference in a new issue