scripts/guile/b2-pitr.scm
2025-06-15 15:03:13 +01:00

255 lines
11 KiB
Scheme
Executable file

#!/usr/bin/guile \
--no-auto-compile -e main -s
!#
(use-modules (gcrypt base64)
(ice-9 binary-ports)
(ice-9 textual-ports)
(ice-9 exceptions)
((ice-9 iconv) #:select (string->bytevector))
(ice-9 match)
(ice-9 threads)
(json)
(oop goops)
(srfi srfi-1) ;; lists
(srfi srfi-19) ;; date/time
(srfi srfi-26) ;; cut
(srfi srfi-71) ;; extended let
(web client)
(web http)
(web response)
(web uri))
(define-exception-type &api-error &external-error
make-api-error
api-error?
(endpoint api-error-endpoint)
(status-code api-error-status-code)
(body api-error-body))
(define (json-response? res)
(eq? 'application/json (car (response-content-type res))))
(define (text-response? res)
(string-prefix? "text/"
(symbol->string (car (response-content-type res)))))
(define-syntax check-api-response
(syntax-rules ()
((check-api-response endpoint res body-port)
(check-api-response endpoint res body-port (<= 100 (response-code res) 399)))
((check-api-response endpoint res body-port ok?)
(unless ok?
(let ((body (cond
((json-response? res) (json->scm body-port))
((text-response? res) (get-string-all body-port))
(else "<binary response body>"))))
(close-port body-port)
(raise-exception (make-api-error endpoint (response-code res) body)))))))
(define (encode-query alist)
(fold (match-lambda*
(((k . v) s)
(if v
(string-append s
(if (zero? (string-length s)) "" "&")
(symbol->string k)
"="
(uri-encode v))
s)))
""
alist))
;; We have to override the default handling of the Authorization header as the
;; Backblaze API expects just a bare string. The b2_authorize_account endpoint
;; works as it's standard Basic Auth, but this work-around is needed for the
;; main API.
(declare-opaque-header! "authorization")
(define (authorize-account key-id app-key)
(define auth-url "https://api.backblazeb2.com/b2api/v4/b2_authorize_account")
(let* ((token (base64-encode (string->bytevector (string-append key-id ":" app-key) "utf-8")))
(res body (http-get auth-url #:headers `((authorization . ,(string-append "Basic " token))) #:streaming? #t)))
(check-api-response "/b2api/v4/b2_authorize_account" res body (and (= 200 (response-code res)) (json-response? res)))
(json->scm body)))
(define (assoc-ref* alist . keys)
(fold (lambda (k alist) (assoc-ref alist k)) alist keys))
(define-class <b2-client> ()
(key-id #:init-keyword #:key-id #:getter b2-key-id)
(app-key #:init-keyword #:app-key #:getter b2-app-key)
(auth-info #:getter b2-auth-info #:setter set-b2-auth-info!))
(define-method (initialize (cli <b2-client>) initargs)
(next-method)
(let ((auth-info (authorize-account (b2-key-id cli) (b2-app-key cli))))
(set-b2-auth-info! cli auth-info)))
(define-method (auth-token (cli <b2-client>))
(assoc-ref (b2-auth-info cli) "authorizationToken"))
(define-method (api-url (cli <b2-client>))
(assoc-ref* (b2-auth-info cli) "apiInfo" "storageApi" "apiUrl"))
(define-method (account-id (cli <b2-client>))
(assoc-ref (b2-auth-info cli) "accountId"))
(define* (api-call cli method endpoint #:key query-params payload raw?)
(let* ((url (if query-params
(string-append (api-url cli) endpoint "?" (encode-query query-params))
(string-append (api-url cli) endpoint)))
(res body-port (method url
#:body (and=> payload scm->json-string)
#:headers `((authorization . ,(auth-token cli))
(content-type . (application/json (charset . "utf-8"))))
#:streaming? #t)))
(check-api-response endpoint res body-port)
(if raw?
body-port
(let ((body (cond
((json-response? res) (json->scm body-port))
((text-response? res) (get-string-all body-port))
(else (get-bytevector-all body-port)))))
(close-port body-port)
body))))
(define* (list-buckets cli #:key bucket-id bucket-name bucket-types)
(let ((params `(("accountId" . ,(account-id cli)))))
(when bucket-id (set! params (assoc-set! params "bucketId" bucket-id)))
(when bucket-name (set! params (assoc-set! params "bucketName" bucket-name)))
(when bucket-types (set! params (assoc-set! params "bucketTypes" bucket-types)))
(assoc-ref (api-call cli
http-post
"/b2api/v4/b2_list_buckets"
#:payload params)
"buckets")))
(define (get-bucket-id cli bucket-name)
(let ((buckets (list-buckets cli #:bucket-name bucket-name)))
(when (zero? (vector-length buckets))
(raise-exception (make-exception
(make-external-error)
(make-exception-with-message "Bucket not found")
(make-exception-with-irritants bucket-name))))
(assoc-ref (vector-ref buckets 0) "bucketId")))
(define* (list-file-versions cli bucket-id #:key prefix delimiter start-file-name start-file-id max-file-count)
(api-call cli http-get "/b2api/v4/b2_list_file_versions"
#:query-params `((bucketId . ,bucket-id)
(prefix . ,prefix)
(delimiter . ,delimiter)
(startFileName . ,start-file-name)
(startFileId . ,start-file-id)
(maxFileCount . ,(and=> max-file-count number->string)))))
(define* (list-all-file-versions cli bucket-id #:key prefix delimiter)
(let loop ((result '()) (page (list-file-versions cli bucket-id
#:prefix prefix
#:delimiter delimiter
#:max-file-count 10000)))
(let ((next-file-name (assoc-ref page "nextFileName"))
(next-file-id (assoc-ref page "nextFileId"))
(result (append result (vector->list (assoc-ref page "files")))))
(if (eq? next-file-name 'null)
result
(loop result (list-file-versions cli bucket-id
#:prefix prefix
#:delimiter delimiter
#:start-file-name next-file-name
#:start-file-id next-file-id
#:max-file-count 10000))))))
;; Borrowed from guix/build/utils.scm
(define (mkdir-p dir)
"Create directory DIR and all its ancestors."
(define absolute?
(string-prefix? "/" dir))
(define not-slash
(char-set-complement (char-set #\/)))
(let loop ((components (string-tokenize dir not-slash))
(root (if absolute?
""
".")))
(match components
((head tail ...)
(let ((path (string-append root "/" head)))
(catch 'system-error
(lambda ()
(mkdir path)
(loop tail path))
(lambda args
(if (= EEXIST (system-error-errno args))
(loop tail path)
(apply throw args))))))
(() #t))))
(define (download-file-by-id cli file-id path)
(format #t "Downloading ~a~%" path)
(let ((in-port (api-call cli
http-get
"/b2api/v4/b2_download_file_by_id"
#:query-params `((fileId . ,file-id)
(b2ContentType . "application/octet-stream"))
#:raw? #t)))
(mkdir-p (dirname path))
(call-with-output-file path
(lambda (out-port)
(let loop ((data (get-bytevector-some in-port)))
(if (eof-object? data)
(close-port in-port)
(begin
(put-bytevector out-port data)
(loop (get-bytevector-some in-port)))))))))
(define (select-for-recovery files cutoff)
"File versions are returned in alphabetical order by filename and reverse of date/time uploaded.
This function selects the latest version of each file that was uploaded before the cutoff.
For each batch with the same filename, we first find the latest verison before the cutoff, then
we check if this was an 'upload' action - if it was a 'hide' action, this means the file was has
been deleted. The 'start' and 'folder' actions are not of interest here.
Backblaze creates a file called .bzEmpty in every directory. We never want to
resore these empty files."
(define (latest-before-cutoff files)
(find (lambda (f) (< (assoc-ref f "uploadTimestamp") cutoff)) files))
(define (bz-empty? f)
(string=? ".bzEmpty" (basename (assoc-ref f "fileName"))))
(define (select-by-filename files result)
(if (null? files)
result
(let* ((file-name (assoc-ref (car files) "fileName"))
(batch remainder (span (lambda (f) (string=? file-name (assoc-ref f "fileName")))
files))
(batch-latest (latest-before-cutoff batch)))
(if (and batch-latest (string=? "upload" (assoc-ref batch-latest "action")))
(select-by-filename remainder (cons batch-latest result))
(select-by-filename remainder result)))))
(select-by-filename (remove bz-empty? files) '()))
(define (restore cli files target-dir)
(n-par-for-each 4 (lambda (f)
(download-file-by-id cli
(assoc-ref f "fileId")
(string-append target-dir "/" (assoc-ref f "fileName"))))
files))
(define* (recover-at-point-in-time cli #:key bucket-name cutoff-time target-dir prefix)
(let* ((bucket-id (get-bucket-id cli bucket-name))
(file-versions (list-all-file-versions cli bucket-id #:prefix prefix))
(to-recover (select-for-recovery file-versions cutoff-time)))
(restore cli to-recover target-dir)))
(define (main args)
;; TODO: parse command-line args for bucket name, prefix, cutoff, and target dir.
(define cli (make <b2-client> #:key-id (getenv "BB_KEY_ID") #:app-key (getenv "BB_APP_KEY")))
(define bucket-name "uk-org-1729-backups")
(define cutoff (* 1000 (time-second (date->time-utc (string->date "2025-06-01 00:00:00+0000" "~Y-~m-~d ~H:~M:~S~z")))))
(define target-dir "/home/ray/Downloads/uk-org-1729-backups")
(recover-at-point-in-time cli #:bucket-name bucket-name #:cutoff-time cutoff #:target-dir target-dir))