From 9257de8439817883a5b70b3f056173edc3801569 Mon Sep 17 00:00:00 2001
From: Ray Miller <ray@metail.co.uk>
Date: Sat, 10 Feb 2024 15:52:16 +0000
Subject: [PATCH 01/10] Add AWS housekeeping scripts, rename for consistency

---
 aws/delete_iam_policies.py                    | 61 +++++++++++++++++++
 ...elete-iam-users.py => delete_iam_users.py} |  0
 aws/delete_ssm_parameters.py                  | 46 ++++++++++++++
 aws/delete_unused_security_groups.py          | 48 +++++++++++++++
 4 files changed, 155 insertions(+)
 create mode 100755 aws/delete_iam_policies.py
 rename aws/{delete-iam-users.py => delete_iam_users.py} (100%)
 create mode 100755 aws/delete_ssm_parameters.py
 create mode 100755 aws/delete_unused_security_groups.py

diff --git a/aws/delete_iam_policies.py b/aws/delete_iam_policies.py
new file mode 100755
index 0000000..b669ef0
--- /dev/null
+++ b/aws/delete_iam_policies.py
@@ -0,0 +1,61 @@
+#!/usr/bin/python3
+#
+# Delete IAM policies whose names match a pattern
+#
+
+import argparse
+import boto3
+import re
+import click
+
+def list_policies(iam, pattern):
+    policies=[]
+    paginator = iam.get_paginator('list_policies')
+    for page in paginator.paginate(Scope='Local'):
+        for policy in page['Policies']:
+            if pattern.match(policy['PolicyName']):
+                policies.append(policy)
+    return policies
+
+
+def delete_policy_versions(iam, policy):
+    paginator = iam.get_paginator('list_policy_versions')
+    for page in paginator.paginate(PolicyArn=policy['Arn']):
+        for version in page['Versions']:
+            if version['IsDefaultVersion']:
+                continue
+            print("Deleting version {v}".format(v=version['VersionId']))
+            iam.delete_policy_version(PolicyArn=policy['Arn'], VersionId=version['VersionId'])
+
+
+def delete_policy(iam, policy):
+    print("Deleting policy {name}".format(name=policy['PolicyName']))
+    delete_policy_versions(iam, policy)
+    iam.delete_policy(PolicyArn=policy['Arn'])
+
+
+def confirm_delete(policies):
+    print("Delete policies:")
+    for policy in policies:
+        print(policy['PolicyName'])
+    return click.confirm("Continue?")
+
+
+def delete_matching_policies(pattern):
+    iam = boto3.client('iam')
+    policies = list_policies(iam, pattern)
+    if len(policies) == 0:
+        print("No matching policies")
+        return
+    if confirm_delete(policies):
+        for policy in policies:
+            delete_policy(iam, policy)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Delete IAM policies")
+    parser.add_argument("--pattern", help="Regex to match policy name", default=".*")
+
+    args = parser.parse_args()
+    pattern = re.compile(args.pattern)
+    delete_matching_policies(pattern)
diff --git a/aws/delete-iam-users.py b/aws/delete_iam_users.py
similarity index 100%
rename from aws/delete-iam-users.py
rename to aws/delete_iam_users.py
diff --git a/aws/delete_ssm_parameters.py b/aws/delete_ssm_parameters.py
new file mode 100755
index 0000000..318e2b0
--- /dev/null
+++ b/aws/delete_ssm_parameters.py
@@ -0,0 +1,46 @@
+#!/usr/bin/python3
+#
+# Delete all SSM parameters under the give prefix
+#
+
+import argparse
+import boto3
+import click
+import sys
+
+def list_parameters(ssm, prefix):
+    parameters = []
+    filter = {'Key': 'Name', 'Values': [prefix]}
+    paginator = ssm.get_paginator('describe_parameters')
+    for page in paginator.paginate(Filters=[filter]):
+        for param in page['Parameters']:
+            parameters.append(param['Name'])
+    return parameters
+
+
+def delete_parameters(ssm, parameter_names):
+    n = len(parameter_names)
+    for i in range(0, n, 10):
+        batch = parameter_names[i:min(i+10, n)]
+        ssm.delete_parameters(Names=batch)
+
+
+parser = argparse.ArgumentParser(description="Delete SSM Parameters")
+parser.add_argument("--region", help="AWS Region name", default="eu-west-1")
+parser.add_argument("--prefix", help="Delete parameters with this prefix", required=True)
+
+args = parser.parse_args()
+
+ssm = boto3.client('ssm', region_name=args.region)
+params = list_parameters(ssm, args.prefix)
+
+if not params:
+    print("No parameters with prefix {prefix}".format(prefix=args.prefix))
+    sys.exit(0)
+
+print("Delete parameters:")
+for p in params:
+    print(" {name}".format(name=p))
+
+if click.confirm("Continue?"):
+    delete_parameters(ssm, params)
diff --git a/aws/delete_unused_security_groups.py b/aws/delete_unused_security_groups.py
new file mode 100755
index 0000000..b55ed01
--- /dev/null
+++ b/aws/delete_unused_security_groups.py
@@ -0,0 +1,48 @@
+#!/usr/bin/env python3
+
+import boto3
+import botocore
+import click
+
+
+def get_interfaces(ec2, groupId):
+    ifs = ec2.describe_network_interfaces(Filters=[
+        {"Name": "group-id", "Values": [groupId]}
+    ])
+    return ifs['NetworkInterfaces']
+
+
+def list_unused_groups(ec2):
+    unused = []
+    paginator = ec2.get_paginator('describe_security_groups')
+    for page in paginator.paginate():
+        for sg in page['SecurityGroups']:
+            interfaces = get_interfaces(ec2, sg['GroupId'])
+            num_attachments = len(interfaces)
+            if num_attachments == 0:
+                unused.append(sg)
+    return unused
+    
+
+def delete_security_groups(ec2, security_groups):
+    for sg in security_groups:
+        try:
+            ec2.delete_security_group(GroupId=sg['GroupId'])
+            print("Deleted security group {id}".format(id=sg['GroupId']))
+        except botocore.exceptions.ClientError as err:
+            print("Security group {id} could not be deleted".format(id=sg['GroupId']))
+            print(err)
+            
+
+if __name__ == "__main__":
+    ec2 = boto3.client('ec2')
+    unused = list_unused_groups(ec2)
+    for sg in unused:
+        print(sg['GroupId'], sg['GroupName'], sg['Description'])
+    if click.confirm("Delete {n} groups?".format(n=len(unused))):
+        delete_security_groups(ec2, unused)
+
+
+
+
+

From f1d5e236bab34b4978fa3a7ee69d304a5d508f12 Mon Sep 17 00:00:00 2001
From: Ray Miller <ray@metail.co.uk>
Date: Sat, 13 Jul 2024 15:04:53 +0100
Subject: [PATCH 02/10] Add script to delete AWS S3 bucket

---
 aws/delete_s3_bucket.py | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100755 aws/delete_s3_bucket.py

diff --git a/aws/delete_s3_bucket.py b/aws/delete_s3_bucket.py
new file mode 100755
index 0000000..6df0773
--- /dev/null
+++ b/aws/delete_s3_bucket.py
@@ -0,0 +1,35 @@
+#!/usr/bin/python3
+#
+# An S3 bucket can only be deleted if it is empty, so all
+# objects must be deleted. For a versioned bucket, this includes
+# object versions and object deletion markers.
+#
+
+import argparse
+import boto3
+import click
+
+
+def delete_s3_bucket(bucket_name, dry_run=True):
+    s3 = boto3.resource('s3')
+    bucket = s3.Bucket(bucket_name)
+    if not bucket.creation_date:
+        print(f"Bucket {bucket_name} not found")
+        return
+    n = 0
+    for o in bucket.objects.all():
+        n = n+1
+        print(f"Delete {o.key}")
+    if click.confirm(f"Delete {n} objects from {bucket_name}?"):
+        bucket.objects.all().delete()
+        bucket.object_versions.all().delete()
+        bucket.delete()
+        print(f"Deleted bucket {bucket_name}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Delete S3 bucket and its contents")
+    parser.add_argument("bucket", help="Name of the bucket to delete", nargs=1)
+    args = parser.parse_args()
+    for bucket in args.bucket:
+        delete_s3_bucket(bucket)

From 378ef21e3b4b17086cd9ee33f688836b41fa5837 Mon Sep 17 00:00:00 2001
From: Ray Miller <ray@metail.co.uk>
Date: Sat, 13 Jul 2024 15:05:41 +0100
Subject: [PATCH 03/10] Script to dump MIME parts (useful for debugging email
 messages)

---
 misc/dump-mime-parts.pl | 9 +++++++++
 1 file changed, 9 insertions(+)
 create mode 100755 misc/dump-mime-parts.pl

diff --git a/misc/dump-mime-parts.pl b/misc/dump-mime-parts.pl
new file mode 100755
index 0000000..9d69307
--- /dev/null
+++ b/misc/dump-mime-parts.pl
@@ -0,0 +1,9 @@
+#!/usr/bin/perl
+
+use MIME::Parser;
+
+my $parser = new MIME::Parser;
+
+$parser->output_under(".");
+$entity = $parser->parse(\*STDIN) or die "parse failed";
+$entity->dump_skeleton();

From c3123ff07ccb9b5c84a21c5e076c135ee6d0e5e9 Mon Sep 17 00:00:00 2001
From: Ray Miller <ray@metail.co.uk>
Date: Sat, 13 Jul 2024 15:07:07 +0100
Subject: [PATCH 04/10] Add script to generate a shadow audit scheme for a
 PostgreSQL database.

---
 misc/generate-pg-audit-ddl | 261 +++++++++++++++++++++++++++++++++++++
 1 file changed, 261 insertions(+)
 create mode 100755 misc/generate-pg-audit-ddl

diff --git a/misc/generate-pg-audit-ddl b/misc/generate-pg-audit-ddl
new file mode 100755
index 0000000..b21e579
--- /dev/null
+++ b/misc/generate-pg-audit-ddl
@@ -0,0 +1,261 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings FATAL => 'all';
+
+use DBI;
+use Getopt::Long;
+use Const::Fast;
+use Term::ReadPassword;
+use Template;
+use Getopt::Long;
+
+const my %IGNORE_TABLE => (
+    cached_reports              => 1,
+    crispr_off_targets          => 1,
+    summaries                   => 1,
+    fixture_md5                 => 1,
+    crispr_off_target_summaries => 1,
+    design_attempts             => 1,
+    crisprs                     => 1,
+    project_alleles             => 1,
+);
+
+const my $MAIN_SCHEMA  => 'public';
+const my $AUDIT_SCHEMA => 'audit';
+
+const my $CREATE_AUDIT_TABLE_TT => <<'EOT';
+CREATE TABLE [% audit_schema %].[% table_name %] (
+audit_op CHAR(1) NOT NULL CHECK (audit_op IN ('D','I','U')),
+audit_user TEXT NOT NULL,
+audit_stamp TIMESTAMP NOT NULL,
+audit_txid INTEGER NOT NULL,
+[% column_spec.join(",\n") %]
+);
+EOT
+
+const my $CREATE_AUDIT_FUNCTION_TT => <<'EOT';
+CREATE OR REPLACE FUNCTION [% main_schema %].process_[% table_name %]_audit()
+RETURNS TRIGGER AS $[% table_name %]_audit$
+    BEGIN
+        IF (TG_OP = 'DELETE') THEN
+           INSERT INTO [% audit_schema %].[% table_name %] SELECT 'D', user, now(), txid_current(), OLD.*;
+        ELSIF (TG_OP = 'UPDATE') THEN
+           INSERT INTO [% audit_schema %].[% table_name %] SELECT 'U', user, now(), txid_current(), NEW.*;
+        ELSIF (TG_OP = 'INSERT') THEN
+           INSERT INTO [% audit_schema %].[% table_name %] SELECT 'I', user, now(), txid_current(), NEW.*;
+        END IF;
+        RETURN NULL;
+    END;
+$[% table_name %]_audit$ LANGUAGE plpgsql;
+EOT
+
+const my $CREATE_AUDIT_TRIGGER_TT => <<'EOT';
+CREATE TRIGGER [% table_name %]_audit
+AFTER INSERT OR UPDATE OR DELETE ON [% main_schema %].[% table_name %]
+    FOR EACH ROW EXECUTE PROCEDURE [% main_schema %].process_[% table_name %]_audit();
+EOT
+
+const my $DROP_AUDIT_TABLE_COLUMN_TT => <<'EOT';
+ALTER TABLE [% audit_schema %].[% table_name %] DROP COLUMN [% column_name %];
+EOT
+
+const my $ADD_AUDIT_TABLE_COLUMN_TT => <<'EOT';
+ALTER TABLE [% audit_schema %].[% table_name %] ADD COLUMN [% column_name %] [% column_type %];
+EOT
+
+const my $DROP_AUDIT_TABLE_TT => <<'EOT';
+DROP TABLE [% audit_schema %].[% table_name %];
+EOT
+
+const my %IS_AUDIT_COL => map { $_ => 1 } qw( audit_op audit_user audit_stamp audit_txid );
+
+const my %NEEDS_SIZE => map { $_ => 1 } qw( char character varchar );
+
+{
+
+    my $pg_host   = $ENV{PGHOST};
+    my $pg_port   = $ENV{PGPORT};
+    my $pg_dbname = $ENV{PGDATABASE};
+    my $pg_user   = 'lims2';
+
+    GetOptions(
+        'host=s'   => \$pg_host,
+        'port=s'   => \$pg_port,
+        'dbname=s' => \$pg_dbname,
+        'user=s'   => \$pg_user
+    ) or die "Usage: $0 [OPTIONS]\n";
+
+    my $pg_password;
+    while ( not defined $pg_password ) {
+        $pg_password = read_password("Enter PostgreSQL password for $pg_user: ");
+    }
+
+    my $dsn = 'dbi:Pg:dbname=' . $pg_dbname;
+
+    if ( defined $pg_host ) {
+        $dsn .= ";host=" . $pg_host;
+    }
+
+    if ( defined $pg_port ) {
+        $dsn .= ";port=" . $pg_port;
+    }
+
+    my $dbh = DBI->connect( $dsn, $pg_user, $pg_password, { AutoCommit => 1, RaiseError => 1, PrintError => 0 } )
+        or die "Failed to connect to $dsn: $DBI::errstr\n";
+
+    const my %VARS => (
+        main_schema  => $MAIN_SCHEMA,
+        audit_schema => $AUDIT_SCHEMA,
+    );
+
+    my $tt = Template->new;
+
+    my $main_tables  = get_tables( $dbh, $MAIN_SCHEMA );
+    my $audit_tables = get_tables( $dbh, $AUDIT_SCHEMA );
+
+    while ( my ( $table_name, $main_table ) = each %{$main_tables} ) {
+        next if exists $IGNORE_TABLE{$table_name};
+        my $audit_table = $audit_tables->{$table_name};
+        if ($audit_table) {
+            diff_tables( $table_name, $main_table, $audit_table, $tt, \%VARS );
+        }
+        else {
+            initialize_auditing( $table_name, $main_table, $tt, \%VARS );
+        }
+    }
+
+    for my $table_name ( keys %{$audit_tables} ) {
+        unless ( $main_tables->{$table_name} ) {
+            $tt->process( \$DROP_AUDIT_TABLE_TT, { %VARS, table_name => $table_name } );
+        }
+    }
+}
+
+sub diff_tables {
+    my ( $table_name, $col_spec, $audit_col_spec, $tt, $VARS ) = @_;
+
+    my %vars = ( %{$VARS}, table_name => $table_name );
+
+    my %cols       = map { @{$_} } @{$col_spec};
+    my %audit_cols = map { @{$_} } @{$audit_col_spec};
+
+    for my $cs ( @{$col_spec} ) {
+        my ( $column_name, $column_type ) = @{$cs};
+        my $audit_column_type = $audit_cols{$column_name};
+        if ($audit_column_type) {
+            if ( $audit_column_type ne $column_type ) {
+                warn "Table $table_name column $column_name type mismatch ($column_type vs $audit_column_type)\n";
+            }
+        }
+        else {
+            $tt->process( \$ADD_AUDIT_TABLE_COLUMN_TT,
+                { %vars, column_name => $column_name, column_type => $column_type } );
+        }
+    }
+
+    for my $audit_column_name ( keys %audit_cols ) {
+        unless ( $cols{$audit_column_name} or exists $IS_AUDIT_COL{$audit_column_name} ) {
+            $tt->process( \$DROP_AUDIT_TABLE_COLUMN_TT, { %vars, column_name => $audit_column_name } );
+        }
+    }
+
+    return;
+}
+
+sub initialize_auditing {
+    my ( $table_name, $col_spec, $tt, $VARS ) = @_;
+
+    my %vars = (
+        %{$VARS},
+        table_name  => $table_name,
+        column_spec => [ map { join q{ }, @{$_} } @{$col_spec} ]
+    );
+
+    $tt->process( \$CREATE_AUDIT_TABLE_TT,    \%vars );
+    $tt->process( \$CREATE_AUDIT_FUNCTION_TT, \%vars );
+    $tt->process( \$CREATE_AUDIT_TRIGGER_TT,  \%vars );
+
+    return;
+}
+
+sub get_tables {
+    my ( $dbh, $schema_name ) = @_;
+
+    my $sth = $dbh->table_info( undef, $schema_name, undef, 'TABLE' );
+
+    my %tables;
+
+    while ( my $r = $sth->fetchrow_hashref ) {
+        $tables{ $r->{TABLE_NAME} } = get_column_info( $dbh, $schema_name, $r->{TABLE_NAME} );
+    }
+
+    return \%tables;
+}
+
+sub get_column_info {
+    my ( $dbh, $schema_name, $table_name ) = @_;
+
+    my @column_info;
+
+    my $sth = $dbh->column_info( undef, $schema_name, $table_name, undef );
+    while ( my $r = $sth->fetchrow_hashref ) {
+        my $type = $r->{TYPE_NAME};
+        if ( exists $NEEDS_SIZE{$type} ) {
+            # HACK for bpchar type columns in qc_template_well_genotyping_primers and
+            # qc_template_well_crispr_primers tables ( qc_run_id column )
+            my $col_size = $r->{COLUMN_SIZE} ? $r->{COLUMN_SIZE} : '36';
+            $type = $type . '(' . $col_size . ')';
+        }
+        push @column_info, [ $r->{COLUMN_NAME}, $type ];
+    }
+
+    return \@column_info;
+}
+
+__END__
+
+=pod
+
+=head1 NAME
+
+generate-pg-audit-ddl
+
+=head1 SYNOPSIS
+
+  generate-pg-audit-ddl --host pgsrv5 --port 5437 --dbname lims2_devel --user lims2
+
+=head1 DESCRIPTION
+
+This script interrogates the database specified by the C<--dbname>
+command-line option and compares the I<public> schema with the
+I<audit> schema. It emits SQL to create a table in the I<audit> schema
+shadowing each table in the B<public> schema, a function to insert a
+row in the corresponding I<audit> table for each C<INSERT>, C<UPDATE>
+or C<DELETE> in the I<public> schema, and a trigger that calls this
+function.
+
+=head1 LIMITATIONS
+
+This script assumes that the I<audit> schema already exists. It
+attempts to create SQL that will transition the current state of the
+I<audit> schema to the desired state (shadowing the current state of
+the I<public> schema). Note, however, that it cannot accurately detect
+column and table renames.
+
+If you have renamed a column or table in the main schema, this script
+will emit C<DROP> and C<CREATE> statements that will B<delete data
+from your audit schema>. Please review the generated SQL carefully in
+case this is not what you intended.
+
+=head1 SEE ALSO
+
+The I<audit> tables, functions, and triggers are derived from an
+example described here:
+L<http://www.postgresql.org/docs/9.0/static/plpgsql-trigger.html>
+
+=head1 AUTHOR
+
+Ray Miller E<lt>rm7@sanger.ac.ukE<gt>
+
+=cut

From eb39abfd968f8526a27b09baafb1b611a87d84fe Mon Sep 17 00:00:00 2001
From: Ray Miller <ray@metail.co.uk>
Date: Sat, 13 Jul 2024 15:07:44 +0100
Subject: [PATCH 05/10] Add script to split mbox exported by GMail.

GMail exports the entire account to a single mbox file. This script
splits it into multiple mbox files according to the labels.
---
 misc/sort-mail.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100755 misc/sort-mail.py

diff --git a/misc/sort-mail.py b/misc/sort-mail.py
new file mode 100755
index 0000000..1ef2429
--- /dev/null
+++ b/misc/sort-mail.py
@@ -0,0 +1,60 @@
+#!/usr/bin/python3
+
+from mailbox import mbox
+
+targets = [
+    "28 Ellesmere Road Purchase",
+    "3-mobile",
+    "Anglian Windows",
+    "Clojure",
+    "Conveyancing Quotes",
+    "CTCCambridge",
+    "CTCCambridgeRoutes",
+    "CTCOxford",
+    "Dad's Estate",
+    "Dad's Memorial",
+    "Dad's Memorial Service",
+    "Facebook",
+    "Golang",
+    "GreenMetropolis",
+    "LibDems",
+    "Nationwide",
+    "OkCupid",
+    "Pseudospam",
+    "Riverford",
+    "RussianDatingScam",
+    "Sanger",
+    "SmileBanking",
+    "UKUUG",
+    "Virgin Wines",
+    "Personal",
+    "Sent",
+    "Inbox",
+    "Archived",
+    "Spam",
+    "Bin",
+]
+
+def target(m):
+    if "X-Gmail-Labels" in m:
+        labels = m["X-Gmail-Labels"].split(",")
+        for t in targets:
+            if t in labels:
+                return t
+    return "Uncategorized"    
+
+
+incoming = mbox("/home/ray/Mail/Gmail.mbox", create=False)
+
+destinations = {}
+
+n = 0
+for m in incoming:
+    t = target(m)
+    if t not in destinations:
+        destinations[t] = mbox(f"/home/ray/Mail/GMail/{t}", create=True)
+    destinations[t].add(m)
+
+for d in destinations:
+    d.flush()
+

From ef791b6be6c4d99bfccf49d62832d059fc86888c Mon Sep 17 00:00:00 2001
From: Ray Miller <ray@metail.co.uk>
Date: Sat, 13 Jul 2024 15:10:36 +0100
Subject: [PATCH 06/10] Initial version of script to fix Nationwide credit card
 statement

---
 guile/fix-nationwide-statement.scm | 72 ++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)
 create mode 100755 guile/fix-nationwide-statement.scm

diff --git a/guile/fix-nationwide-statement.scm b/guile/fix-nationwide-statement.scm
new file mode 100755
index 0000000..60be247
--- /dev/null
+++ b/guile/fix-nationwide-statement.scm
@@ -0,0 +1,72 @@
+#!/usr/bin/env -S guile -e main -s
+!#
+
+(use-modules (ice-9 match)
+             (ice-9 getopt-long)
+             (dsv))
+
+
+(define date-input-format "%d %b %Y")
+(define date-output-format "%Y-%m-%d")
+
+(define (format-date d)
+  (strftime date-output-format
+            (car (strptime date-input-format d))))
+
+(define (read-statement path)
+  (call-with-input-file path
+    (lambda (port)
+      (dsv->scm port #:format 'rfc4180))))
+
+(define currency-charset (string->char-set "0123456789.-"))
+
+(define (format-amount s)
+  (string-filter currency-charset s))
+
+(define (process-row row)
+  (match-let (((date description location paid-out paid-in) row))
+    (list (format-date date)
+          description
+          location
+          (format-amount paid-out)
+          (format-amount paid-in))))
+
+(define (process-statement input-path output-path)
+  (match-let (((_ _ _ _ header . data) (read-statement input-path)))
+    (let ((updated (cons header (map process-row data))))
+      (call-with-output-file output-path
+        (lambda (port)
+          (scm->dsv updated port #:format 'rfc4180))))))
+
+(define* (usage #:optional errmsg)
+  (with-output-to-port (current-error-port)
+    (lambda ()
+      (when errmsg
+        (display "Error: ")
+        (display errmsg)
+        (newline))
+      (display "\
+Usage: fix-credit-card-statement [options]
+    -h, --help             Display this help.
+    -i, --input=FILENAME   Input file path.
+    -o, --output=FILENAME  Output file path. Required unless --overwrite is given.
+    -w, --overwrite        Overwrite the input file with the updated data.
+")
+      (exit (if errmsg EXIT_FAILURE EXIT_SUCCESS)))))
+
+(define (main args)
+  (let* ((option-spec '((help      (single-char #\h) (value #f))
+                        (input     (single-char #\i) (value #t))
+                        (output    (single-char #\o) (value #t))
+                        (overwrite (single-char #\w) (value #f))))
+         (options (getopt-long args option-spec))
+         (help-wanted (option-ref options 'help #f))
+         (input (option-ref options 'input #f))
+         (output (option-ref options 'output #f))
+         (overwrite (option-ref options 'overwrite #f)))
+    (cond
+     (help-wanted (usage))
+     ((not input) (usage "input filename is required"))
+     ((and overwrite output) (usage "output filename cannot be given with --overwrite"))
+     ((not (or overwrite output)) (usage "output filename is required without --overwrite")))
+    (process-statement input (or output input))))

From 98910d33069ed65888a301de6f5e0e7c2144a114 Mon Sep 17 00:00:00 2001
From: Ray Miller <ray@metail.co.uk>
Date: Sat, 13 Jul 2024 16:55:52 +0100
Subject: [PATCH 07/10] Make Nationwide statement processing data-driven.

Instead of different scripts for credit card and current account
statements, define a profile for each that specifies the date
and amount columns, and have this control the processing.
---
 guile/fix-nationwide-statement.scm | 113 +++++++++++++++++++++++------
 1 file changed, 89 insertions(+), 24 deletions(-)

diff --git a/guile/fix-nationwide-statement.scm b/guile/fix-nationwide-statement.scm
index 60be247..cc45410 100755
--- a/guile/fix-nationwide-statement.scm
+++ b/guile/fix-nationwide-statement.scm
@@ -1,11 +1,17 @@
 #!/usr/bin/env -S guile -e main -s
 !#
 
-(use-modules (ice-9 match)
-             (ice-9 getopt-long)
+;; Script for updating current account and credit card statements
+;; downloaded from Nationwide, who use a date and currency format
+;; that Gnucash does not support.
+
+(use-modules (ice-9 getopt-long)
+             ((srfi srfi-1) #:select (drop))
              (dsv))
 
-
+;; Date appears in Nationwide statements in the format
+;; "10 Jan 2024", but this is not understood by Gnucash
+;; so we convert it to YYYY-MM-DD format.
 (define date-input-format "%d %b %Y")
 (define date-output-format "%Y-%m-%d")
 
@@ -13,31 +19,84 @@
   (strftime date-output-format
             (car (strptime date-input-format d))))
 
-(define (read-statement path)
-  (call-with-input-file path
-    (lambda (port)
-      (dsv->scm port #:format 'rfc4180))))
-
+;; Characters we expect to see in a numeric amount field. The
+;; Nationwide statements contain a non-ASCII currency character
+;; that we want to delete.
 (define currency-charset (string->char-set "0123456789.-"))
 
 (define (format-amount s)
   (string-filter currency-charset s))
 
-(define (process-row row)
-  (match-let (((date description location paid-out paid-in) row))
-    (list (format-date date)
-          description
-          location
-          (format-amount paid-out)
-          (format-amount paid-in))))
+;; Profiles for the different statemnets.
+;; skip: the number of leading rows to skip
+;; header: boolean indicating whether or not the first unskipped
+;;         row is a header
+;; date-cols: list of columns containing dates
+;; amount-cols: list columns containing amounts
+(define profiles
+  '(("credit-card" . ((skip . 4)
+                      (header . #t)
+                      (date-cols . (0))
+                      (amount-cols . (3 4))))
+    ("current-account" . ((skip . 4)
+                          (header . #t)
+                          (date-cols . (0))
+                          (amount-cols . (3 4 5))))))
 
-(define (process-statement input-path output-path)
-  (match-let (((_ _ _ _ header . data) (read-statement input-path)))
-    (let ((updated (cons header (map process-row data))))
-      (call-with-output-file output-path
-        (lambda (port)
-          (scm->dsv updated port #:format 'rfc4180))))))
+;; Predicate for validating the profile option.
+(define (valid-profile? p)
+  (if (assoc p profiles) #t #f))
 
+;; Update a list by applying the given function to each of the
+;; listed columns.
+(define (update-list lst cols f)
+  (for-each (lambda (k)
+              (let ((v (list-ref lst k)))
+                (list-set! lst k (f v))))
+            cols))
+
+;; Given a spec listing the date and amount columns, return a
+;; function that will apply the corresponding formats to a row.
+(define (process-row spec)
+  (let ((date-cols (assq-ref spec 'date-cols))
+        (amount-cols (assq-ref spec 'amount-cols)))
+    (lambda (row)
+      (when date-cols
+        (update-list row date-cols format-date))
+      (when amount-cols
+        (update-list row amount-cols format-amount)))))
+
+;; Read a CSV from the given path.
+(define (read-statement path)
+  (call-with-input-file path
+    (lambda (port)
+      (dsv->scm port #:format 'rfc4180))))
+
+;; Write data to the given path in CSV format.
+(define (write-statement data path)
+  (call-with-output-file path
+    (lambda (port)
+      (scm->dsv data port #:format 'rfc4180))))
+
+
+(define (update-data spec data)
+  (let* ((data (drop data (or (assq-ref spec 'skip) 0)))
+         (header (if (assq-ref spec 'header) (car data) #f))
+         (data (if header (cdr data) data)))
+    (for-each (process-row spec) data)
+    (if header
+        (cons header data)
+        data)))
+
+;; Apply the updates defined in `spec` to the statement read
+;; from input-path and write the updated data to output-path.
+(define (process-statement spec input-path output-path)
+  (let ((data (read-statement input-path)))
+    (write-statement (update-data spec data) output-path)))
+
+;; Display a usage message and (optional) error message to STDERR
+;; and exit. If an error message is given the exit code will be
+;; non-zero.
 (define* (usage #:optional errmsg)
   (with-output-to-port (current-error-port)
     (lambda ()
@@ -51,22 +110,28 @@ Usage: fix-credit-card-statement [options]
     -i, --input=FILENAME   Input file path.
     -o, --output=FILENAME  Output file path. Required unless --overwrite is given.
     -w, --overwrite        Overwrite the input file with the updated data.
+    -p, --profile=PROFILE  Profile name [credit-card|current-account].
 ")
       (exit (if errmsg EXIT_FAILURE EXIT_SUCCESS)))))
 
+;; Process command-line arguments and validate options.
+;; If valid, run process-statement with the given options.
 (define (main args)
-  (let* ((option-spec '((help      (single-char #\h) (value #f))
+  (let* ((option-spec `((help      (single-char #\h) (value #f))
                         (input     (single-char #\i) (value #t))
                         (output    (single-char #\o) (value #t))
-                        (overwrite (single-char #\w) (value #f))))
+                        (overwrite (single-char #\w) (value #f))
+                        (profile   (single-char #\p) (value #t) (predicate ,valid-profile?))))
          (options (getopt-long args option-spec))
          (help-wanted (option-ref options 'help #f))
+         (profile (option-ref options 'profile #f))
          (input (option-ref options 'input #f))
          (output (option-ref options 'output #f))
          (overwrite (option-ref options 'overwrite #f)))
     (cond
      (help-wanted (usage))
+     ((not profile) (usage "profile is required"))
      ((not input) (usage "input filename is required"))
      ((and overwrite output) (usage "output filename cannot be given with --overwrite"))
      ((not (or overwrite output)) (usage "output filename is required without --overwrite")))
-    (process-statement input (or output input))))
+    (process-statement (assoc-ref profiles profile) input (or output input))))

From ae2de95d521b5d9a4bd3537c31835f28094ec960 Mon Sep 17 00:00:00 2001
From: Ray Miller <ray@metail.co.uk>
Date: Sat, 13 Jul 2024 17:07:43 +0100
Subject: [PATCH 08/10] Document update-data function.

---
 guile/fix-nationwide-statement.scm | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/guile/fix-nationwide-statement.scm b/guile/fix-nationwide-statement.scm
index cc45410..b3b9b83 100755
--- a/guile/fix-nationwide-statement.scm
+++ b/guile/fix-nationwide-statement.scm
@@ -78,7 +78,10 @@
     (lambda (port)
       (scm->dsv data port #:format 'rfc4180))))
 
-
+;; Apply the specified updates to data (a list of rows
+;; read from the CSV). If a 'skip value is specified, drop
+;; this many leading rows. If a 'header is present, only
+;; apply the updates to the succeeding rows.
 (define (update-data spec data)
   (let* ((data (drop data (or (assq-ref spec 'skip) 0)))
          (header (if (assq-ref spec 'header) (car data) #f))
@@ -107,7 +110,7 @@
       (display "\
 Usage: fix-credit-card-statement [options]
     -h, --help             Display this help.
-    -i, --input=FILENAME   Input file path.
+    -i, --input=FILENAME   Input file path. Required.
     -o, --output=FILENAME  Output file path. Required unless --overwrite is given.
     -w, --overwrite        Overwrite the input file with the updated data.
     -p, --profile=PROFILE  Profile name [credit-card|current-account].

From ec51a0c9193242f7397e8dd633aaf55998c361cd Mon Sep 17 00:00:00 2001
From: Ray Miller <ray@metail.co.uk>
Date: Sat, 13 Jul 2024 17:14:16 +0100
Subject: [PATCH 09/10] Simplify the update-data function.

---
 guile/fix-nationwide-statement.scm | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/guile/fix-nationwide-statement.scm b/guile/fix-nationwide-statement.scm
index b3b9b83..c410a3c 100755
--- a/guile/fix-nationwide-statement.scm
+++ b/guile/fix-nationwide-statement.scm
@@ -81,15 +81,13 @@
 ;; Apply the specified updates to data (a list of rows
 ;; read from the CSV). If a 'skip value is specified, drop
 ;; this many leading rows. If a 'header is present, only
-;; apply the updates to the succeeding rows.
+;; apply the updates to the succeeding rows, preserving
+;; the header as-is.
 (define (update-data spec data)
-  (let* ((data (drop data (or (assq-ref spec 'skip) 0)))
-         (header (if (assq-ref spec 'header) (car data) #f))
-         (data (if header (cdr data) data)))
-    (for-each (process-row spec) data)
-    (if header
-        (cons header data)
-        data)))
+  (let* ((skip (assq-ref spec 'skip))
+         (data (if skip (drop data skip) data)))
+    (for-each (process-row spec) (if (assq-ref spec 'header) (cdr data) data))
+    data))
 
 ;; Apply the updates defined in `spec` to the statement read
 ;; from input-path and write the updated data to output-path.

From 26bf41e073341455e423854d718056f2ec5da575 Mon Sep 17 00:00:00 2001
From: Ray Miller <ray@metail.co.uk>
Date: Sat, 13 Jul 2024 17:20:04 +0100
Subject: [PATCH 10/10] Fix typo.

---
 guile/fix-nationwide-statement.scm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/guile/fix-nationwide-statement.scm b/guile/fix-nationwide-statement.scm
index c410a3c..27854b4 100755
--- a/guile/fix-nationwide-statement.scm
+++ b/guile/fix-nationwide-statement.scm
@@ -27,7 +27,7 @@
 (define (format-amount s)
   (string-filter currency-charset s))
 
-;; Profiles for the different statemnets.
+;; Profiles for the different statement formats.
 ;; skip: the number of leading rows to skip
 ;; header: boolean indicating whether or not the first unskipped
 ;;         row is a header