From 8c4df8948351db47265db8d2ccd55d513ecc2fed Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Fri, 31 Jan 2025 15:12:54 +0000 Subject: [PATCH] Update blc script to specify headers as strings This makes the code a bit more readable. We then map over the list of strings to produce the parsed format the web client requires. --- guile/broken-link-checker.scm | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/guile/broken-link-checker.scm b/guile/broken-link-checker.scm index dc85b46..968b173 100755 --- a/guile/broken-link-checker.scm +++ b/guile/broken-link-checker.scm @@ -23,6 +23,7 @@ along with this program. If not, see . (web client) (web response) (web uri) + ((web http) #:select (parse-header)) ((htmlprag) #:select (html->sxml)) ((sxml xpath) #:select (sxpath)) (srfi srfi-1) @@ -177,10 +178,12 @@ along with this program. If not, see . ;; Some sites return 403 errors for bot requests, these headers make ;; us look more like a real browser. -(define request-headers '((accept . ((text/html) (application/xhtml+xml) (application/xml (q . 900)) (*/* (q . 800)))) - (accept-encoding . ((1000 . "gzip") (1000 . "deflate") (1000 . "br") (1000 . "zstd"))) - (accept-language . ((1000 . "en-GB") (500 . "en"))) - (user-agent . "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0"))) +(define request-headers + (map (match-lambda ((k . v) (cons k (parse-header k v)))) + '((accept . "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + (accept-encoding . "gzip, deflate, br, zstd") + (accept-language . "en-GB,en;q=0.5") + (user-agent . "Mozilla/5.0 (X11; Linux x86_64; rv:133.0) Gecko/20100101 Firefox/133.0")))) ;; A memoized version of http-get. This allows us to quickly check a URL we have ;; seen before (when it is linked from multiple different pages) without generating