diff --git a/guile/broken-link-checker.scm b/guile/broken-link-checker.scm index e7bf846..c2ef41a 100755 --- a/guile/broken-link-checker.scm +++ b/guile/broken-link-checker.scm @@ -32,62 +32,6 @@ along with this program. If not, see . (srfi srfi-37) (srfi srfi-71)) -;; List of SXPath expressions to search for links. -(define link-paths (map sxpath - '((// a @ href) - (// img @ src) - (// link @ href) - (// video @ src) - (// video source @ src) - (// audio @ src) - (// audio source @ src) - (// picture source @ srcset) - (// img @ srcset)))) - -;; Special handling for srcset -(define (parse-srcset s) - "Parse out the list of URLs from an image srcset, discarding whitespace and the -width/density descriptor." - (map (compose first (cut string-split <> #\space)) - (map string-trim-both (string-split s #\,)))) - -(define (extract-links body) - "Extract links from an HTML document, discarding any tel: or mailto: links." - (let ((x (html->sxml body))) - (remove (lambda (s) (or (string-prefix? "tel:" s) (string-prefix? "mailto:" s))) - (delete-duplicates - (fold (lambda (f accum) - (fold (lambda (x accum) - (match-let (((attr val) x)) - (if (equal? attr 'srcset) - (append (parse-srcset val) accum) - (cons val accum)))) - accum - (f x))) - '() - link-paths))))) - -(define (uri-without-fragment uri) - "If uri has a fragment, return a new URI without the fragment." - (if (uri-fragment uri) - (set-field uri (uri-fragment) #f) - uri)) - -(define (resolve-relative-uri base-uri s) - "If the string s is a relative URI, resolve it relative to base-uri." - (let ((uri (string->uri-reference s))) - (if (uri-scheme uri) - uri - (build-uri (uri-scheme base-uri) - #:userinfo (or (uri-userinfo uri) (uri-userinfo base-uri)) - #:host (or (uri-host uri) (uri-host base-uri)) - #:port (or (uri-port uri) (uri-port base-uri)) - #:path (cond - ((string-prefix? "/" (uri-path uri)) (uri-path uri)) - ((string-suffix? "/" (uri-path base-uri)) (string-append (uri-path base-uri) (uri-path uri))) - (else (string-append (uri-path base-uri) "/" (uri-path uri)))) - #:query (uri-query uri))))) - ;; Queue of work to do, which is stored as pairs of (referrer . uri) so we ;; can report the referring page if `uri` is invalid (define queue (list)) @@ -172,6 +116,62 @@ width/density descriptor." ((uri-port) (uri-port base-uri))) uri)) +;; List of SXPath expressions to search for links. +(define link-paths (map sxpath + '((// a @ href) + (// img @ src) + (// link @ href) + (// video @ src) + (// video source @ src) + (// audio @ src) + (// audio source @ src) + (// picture source @ srcset) + (// img @ srcset)))) + +;; Special handling for srcset +(define (parse-srcset s) + "Parse out the list of URLs from an image srcset, discarding whitespace and the +width/density descriptor." + (map (compose first (cut string-split <> #\space)) + (map string-trim-both (string-split s #\,)))) + +(define (extract-links body) + "Extract links from an HTML document, discarding any tel: or mailto: links." + (let ((x (html->sxml body))) + (remove (lambda (s) (or (string-prefix? "tel:" s) (string-prefix? "mailto:" s))) + (delete-duplicates + (fold (lambda (f accum) + (fold (lambda (x accum) + (match-let (((attr val) x)) + (if (equal? attr 'srcset) + (append (parse-srcset val) accum) + (cons val accum)))) + accum + (f x))) + '() + link-paths))))) + +(define (uri-without-fragment uri) + "If uri has a fragment, return a new URI without the fragment." + (if (uri-fragment uri) + (set-field uri (uri-fragment) #f) + uri)) + +(define (resolve-relative-uri base-uri s) + "If the string s is a relative URI, resolve it relative to base-uri." + (let ((uri (string->uri-reference s))) + (if (uri-scheme uri) + uri + (build-uri (uri-scheme base-uri) + #:userinfo (or (uri-userinfo uri) (uri-userinfo base-uri)) + #:host (or (uri-host uri) (uri-host base-uri)) + #:port (or (uri-port uri) (uri-port base-uri)) + #:path (cond + ((string-prefix? "/" (uri-path uri)) (uri-path uri)) + ((string-suffix? "/" (uri-path base-uri)) (string-append (uri-path base-uri) (uri-path uri))) + (else (string-append (uri-path base-uri) "/" (uri-path uri)))) + #:query (uri-query uri))))) + (define (enqueue-links base-uri) (unless (seen? base-uri) (hash-set! seen (uri->string base-uri) #t) @@ -208,8 +208,8 @@ width/density descriptor." (let ((memo (make-hash-table))) (lambda (uri) (let ((key (uri->string uri))) - ;;(pk 'http-head-memoized key) (unless (hash-ref memo key #f) + (format #t "[DEBUG] checking link ~a~%" key) (with-exception-handler (lambda (e) (hash-set! memo key e))