From b61996f393eeabe2c5d3c41136695c519ab66fe6 Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 14 Jun 2025 21:11:19 +0100 Subject: [PATCH] Add script for backblaze point-in-time recovery --- guile/b2-pitr.scm | 244 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 guile/b2-pitr.scm diff --git a/guile/b2-pitr.scm b/guile/b2-pitr.scm new file mode 100644 index 0000000..56950d8 --- /dev/null +++ b/guile/b2-pitr.scm @@ -0,0 +1,244 @@ +#!/usr/bin/guile \ +--no-auto-compile -e main -s +!# + +(use-modules (gcrypt base64) + (ice-9 binary-ports) + (ice-9 textual-ports) + (ice-9 exceptions) + ((ice-9 iconv) #:select (string->bytevector)) + (ice-9 match) + (json) + (oop goops) + (srfi srfi-1) + (srfi srfi-19) + (srfi srfi-26) + (srfi srfi-71) + (web client) + (web http) + (web response) + (web uri)) + +(define (encode-query alist) + (fold (match-lambda* + (((k . v) s) + (if v + (string-append s + (if (zero? (string-length s)) "" "&") + (symbol->string k) + "=" + (uri-encode v)) + s))) + "" + alist)) + +(define (http-ok? res) + (<= 200 (response-code res) 299)) + +(define (json-response? res) + (eq? 'application/json (car (response-content-type res)))) + +(define (text-response? res) + (string-prefix? "text/" + (symbol->string (car (response-content-type res))))) + +;; We have to override the default handling of the Authorization header as the +;; Backblaze API expects just a bare string. The b2_authorize_account endpoint +;; works as it's standard Basic Auth, but this work-around is needed for the +;; main API. + +(declare-opaque-header! "authorization") + +(define (authorize-account key-id app-key) + (define auth-url "https://api.backblazeb2.com/b2api/v4/b2_authorize_account") + (let* ((token (base64-encode (string->bytevector (string-append key-id ":" app-key) "utf-8"))) + (res body (http-get auth-url #:headers `((authorization . ,(string-append "Basic " token))) #:streaming? #t))) + (unless (http-ok? res) + (raise-exception (make-exception + (make-external-error) + (make-exception-with-message "Error response from b2_authorize_account") + (make-exception-with-irritants res)))) + (json->scm body))) + +(define (assoc-ref* alist . keys) + (fold (lambda (k alist) (assoc-ref alist k)) alist keys)) + +(define-class () + (key-id #:init-keyword #:key-id #:getter b2-key-id) + (app-key #:init-keyword #:app-key #:getter b2-app-key) + (auth-info #:getter b2-auth-info #:setter set-b2-auth-info!)) + +(define-method (initialize (cli ) initargs) + (next-method) + (let ((auth-info (authorize-account (b2-key-id cli) (b2-app-key cli)))) + (set-b2-auth-info! cli auth-info))) + +(define-method (auth-token (cli )) + (assoc-ref (b2-auth-info cli) "authorizationToken")) + +(define-method (api-url (cli )) + (assoc-ref* (b2-auth-info cli) "apiInfo" "storageApi" "apiUrl")) + +(define-method (account-id (cli )) + (assoc-ref (b2-auth-info cli) "accountId")) + +(define (check-api-response res body-port err-msg) + (unless (http-ok? res) + (let ((body (cond + ((json-response? res) (json->scm body-port)) + ((text-response? res) (get-string-all body-port)) + (else "Binary response body")))) + (close-port body-port) + (raise-exception (make-exception + (make-external-error) + (make-exception-with-message err-msg) + (make-exception-with-irritants (get-string-all body))))))) + +(define* (api-call cli method path #:key query-params payload raw?) + (let* ((url (if query-params + (string-append (api-url cli) path "?" (encode-query query-params)) + (string-append (api-url cli) path))) + (res body-port (method url + #:body (and=> payload scm->json-string) + #:headers `((authorization . ,(auth-token cli)) + (content-type . (application/json (charset . "utf-8")))) + #:streaming? #t))) + (check-api-response res body-port (string-append "Error from " path)) + (if raw? + body-port + (let ((body (cond + ((json-response? res) (json->scm body-port)) + ((text-response? res) (get-string-all body-port)) + (else (get-bytevector-all body-port))))) + (close-port body-port) + body)))) + +(define (list-buckets cli) + (assoc-ref (api-call cli + http-post + "/b2api/v4/b2_list_buckets" + #:payload `(("accountId" . ,(account-id cli)))) + "buckets")) + +(define* (list-file-versions cli bucket-id #:key prefix delimiter start-file-name start-file-id max-file-count) + (api-call cli http-get "/b2api/v4/b2_list_file_versions" + #:query-params `((bucketId . ,bucket-id) + (prefix . ,prefix) + (delimiter . ,delimiter) + (startFileName . ,start-file-name) + (startFileId . ,start-file-id) + (maxFileCount . ,(and=> max-file-count number->string))))) + +(define* (list-all-file-versions cli bucket-id #:key prefix delimiter) + (let loop ((result '()) (page (list-file-versions cli bucket-id + #:prefix prefix + #:delimiter delimiter + #:max-file-count 10000))) + (let ((next-file-name (assoc-ref page "nextFileName")) + (next-file-id (assoc-ref page "nextFileId")) + (result (append result (vector->list (assoc-ref page "files"))))) + (if (eq? next-file-name 'null) + result + (loop result (list-file-versions cli bucket-id + #:prefix prefix + #:delimiter delimiter + #:start-file-name next-file-name + #:start-file-id next-file-id + #:max-file-count 10000)))))) + +(define (resolve-bucket-id cli bucket-name) + (let ((bucket (find (lambda (b) (string=? bucket-name (assoc-ref b "bucketName"))) + (vector->list (list-buckets cli))))) + (if bucket + (assoc-ref bucket "bucketId") + #f))) + +;; Borrowed from guix/build/utils.scm +(define (mkdir-p dir) + "Create directory DIR and all its ancestors." + (define absolute? + (string-prefix? "/" dir)) + + (define not-slash + (char-set-complement (char-set #\/))) + + (let loop ((components (string-tokenize dir not-slash)) + (root (if absolute? + "" + "."))) + (match components + ((head tail ...) + (let ((path (string-append root "/" head))) + (catch 'system-error + (lambda () + (mkdir path) + (loop tail path)) + (lambda args + (if (= EEXIST (system-error-errno args)) + (loop tail path) + (apply throw args)))))) + (() #t)))) + + +(define (download-file-by-id cli file-id path) + (format #t "Downloading ~a~%" path) + (let ((in-port (api-call cli + http-get + "/b2api/v4/b2_download_file_by_id" + #:query-params `((fileId . ,file-id) + (b2ContentType . "application/octet-stream")) + #:raw? #t))) + (mkdir-p (dirname path)) + (call-with-output-file path + (lambda (out-port) + (let loop ((data (get-bytevector-some in-port))) + (if (eof-object? data) + (close-port in-port) + (begin + (put-bytevector out-port data) + (loop (get-bytevector-some in-port))))))))) + +(define (select-for-recovery files cutoff) + "File versions are returned in alphabetical order by filename and reverse of date/time uploaded. +This function selects the latest version of each file that was uploaded before the cutoff. + +For each batch with the same filename, we first find the latest verison before the cutoff, then +we check if this was an 'upload' action - if it was a 'hide' action, this means the file was has +been deleted. The 'start' and 'folder' actions are not of interest here. + +Backblaze creates a file called .bzEmpty in every directory. We never want to +resore these empty files." + (define (latest-before-cutoff files) + (find (lambda (f) + (< (assoc-ref f "uploadTimestamp") cutoff)) + files)) + (define (select-by-filename files result) + (if (null? files) + result + (let* ((file-name (assoc-ref (car files) "fileName")) + (batch remainder (span (lambda (f) (string=? file-name (assoc-ref f "fileName"))) + files)) + (batch-latest (latest-before-cutoff batch))) + (if (and batch-latest (string=? "upload" (assoc-ref batch-latest "action"))) + (select-by-filename remainder (cons batch-latest result)) + (select-by-filename remainder result))))) + (select-by-filename (remove (lambda (f) (string=? ".bzEmpty" (assoc-ref f "fileName"))) files) '())) + +(define (restore cli files target-dir) + (for-each (lambda (f) + (download-file-by-id cli + (assoc-ref f "fileId") + (string-append target-dir "/" (assoc-ref f "fileName")))) + files)) + +(define (main args) + (define cli (make #:key-id (getenv "BB_KEY_ID") #:app-key (getenv "BB_APP_KEY"))) + (define bucket-id (resolve-bucket-id cli "uk-org-1729-server-backups")) + (define cutoff (* 1000 (time-second (date->time-utc (string->date "2025-06-01 00:00:00+0000" "~Y-~m-~d ~H:~M:~S~z"))))) + + (define file-versions (list-all-file-versions cli bucket-id)) + (define to-recover (select-for-recovery file-versions cutoff)) + (define target-dir "/home/ray/Downloads/uk-org-1729-server-backups") + + #f + )