From b2e883105c2b7d5eae481abc5a0c9a3de913f544 Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Wed, 28 Aug 2024 14:51:43 +0100 Subject: [PATCH] Script to de-blogger markdown. This script downloads images from blogger and replaces all the image links in the markdown files with links to the local (just-downloaded) files. It also removes Amazon associate links and tracking and replaces links to the orginal Blogger blog with relative links to the new blog. --- guile/de-blogger.scm | 115 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100755 guile/de-blogger.scm diff --git a/guile/de-blogger.scm b/guile/de-blogger.scm new file mode 100755 index 0000000..70b90c2 --- /dev/null +++ b/guile/de-blogger.scm @@ -0,0 +1,115 @@ +#!/usr/bin/env -S guile -e main -s +!# + +(use-modules (srfi srfi-26) + (srfi srfi-71) + (srfi srfi-197) + (ice-9 regex) + (ice-9 textual-ports) + (ice-9 binary-ports) + (ice-9 ftw) + (ice-9 format) + (ice-9 string-fun) + (web client) + (web response)) + +(define base-dir "/home/ray/Workspace/personal/start-again-at-zero/") +(define posts-dir (string-append base-dir "content/posts/")) +(define image-dir (string-append base-dir "static/img/")) + +(define md-img-rx (make-regexp "!\\[[^]]*\\]\\((https?[^)]+)\\)")) + +(define md-img-link-rx (make-regexp "\\((https?[^)]+\\.(png|jpg))\\)" regexp/icase)) + +(define img-src-rx (make-regexp " doc)))) + +(define (main args) + (for-each process-file + (map (cute string-append posts-dir <>) + (scandir posts-dir (cute string-suffix? ".md" <>)))))