Compare commits


10 commits

Author SHA1 Message Date
Ray Miller
26bf41e073 Fix typo. 2024-07-13 17:20:04 +01:00
Ray Miller
ec51a0c919 Simplify the update-data function. 2024-07-13 17:14:16 +01:00
Ray Miller
ae2de95d52 Document update-data function. 2024-07-13 17:07:43 +01:00
Ray Miller
98910d3306 Make Nationwide statement processing data-driven.
Instead of different scripts for credit card and current account
statements, define a profile for each that specifies the date
and amount columns, and have this control the processing.
2024-07-13 16:55:52 +01:00
Ray Miller
ef791b6be6 Initial version of script to fix Nationwide credit card statement 2024-07-13 15:10:36 +01:00
Ray Miller
eb39abfd96 Add script to split mbox exported by GMail.
GMail exports the entire account to a single mbox file. This script
splits it into multiple mbox files according to the labels.
2024-07-13 15:07:44 +01:00
Ray Miller
c3123ff07c Add script to generate a shadow audit scheme for a PostgreSQL database. 2024-07-13 15:07:07 +01:00
Ray Miller
378ef21e3b Script to dump MIME parts (useful for debugging email messages) 2024-07-13 15:05:41 +01:00
Ray Miller
f1d5e236ba Add script to delete AWS S3 bucket 2024-07-13 15:04:53 +01:00
Ray Miller
9257de8439 Add AWS housekeeping scripts, rename for consistency 2024-02-10 15:52:16 +00:00
9 changed files with 658 additions and 0 deletions

aws/ Executable file
View file

@ -0,0 +1,61 @@
# Delete IAM policies whose names match a pattern
import argparse
import boto3
import re
import click
def list_policies(iam, pattern):
paginator = iam.get_paginator('list_policies')
for page in paginator.paginate(Scope='Local'):
for policy in page['Policies']:
if pattern.match(policy['PolicyName']):
return policies
def delete_policy_versions(iam, policy):
paginator = iam.get_paginator('list_policy_versions')
for page in paginator.paginate(PolicyArn=policy['Arn']):
for version in page['Versions']:
if version['IsDefaultVersion']:
print("Deleting version {v}".format(v=version['VersionId']))
iam.delete_policy_version(PolicyArn=policy['Arn'], VersionId=version['VersionId'])
def delete_policy(iam, policy):
print("Deleting policy {name}".format(name=policy['PolicyName']))
delete_policy_versions(iam, policy)
def confirm_delete(policies):
print("Delete policies:")
for policy in policies:
return click.confirm("Continue?")
def delete_matching_policies(pattern):
iam = boto3.client('iam')
policies = list_policies(iam, pattern)
if len(policies) == 0:
print("No matching policies")
if confirm_delete(policies):
for policy in policies:
delete_policy(iam, policy)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Delete IAM policies")
parser.add_argument("--pattern", help="Regex to match policy name", default=".*")
args = parser.parse_args()
pattern = re.compile(args.pattern)

aws/ Executable file
View file

@ -0,0 +1,35 @@
# An S3 bucket can only be deleted if it is empty, so all
# objects must be deleted. For a versioned bucket, this includes
# object versions and object deletion markers.
import argparse
import boto3
import click
def delete_s3_bucket(bucket_name, dry_run=True):
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
if not bucket.creation_date:
print(f"Bucket {bucket_name} not found")
n = 0
for o in bucket.objects.all():
n = n+1
print(f"Delete {o.key}")
if click.confirm(f"Delete {n} objects from {bucket_name}?"):
print(f"Deleted bucket {bucket_name}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Delete S3 bucket and its contents")
parser.add_argument("bucket", help="Name of the bucket to delete", nargs=1)
args = parser.parse_args()
for bucket in args.bucket:

aws/ Executable file
View file

@ -0,0 +1,46 @@
# Delete all SSM parameters under the give prefix
import argparse
import boto3
import click
import sys
def list_parameters(ssm, prefix):
parameters = []
filter = {'Key': 'Name', 'Values': [prefix]}
paginator = ssm.get_paginator('describe_parameters')
for page in paginator.paginate(Filters=[filter]):
for param in page['Parameters']:
return parameters
def delete_parameters(ssm, parameter_names):
n = len(parameter_names)
for i in range(0, n, 10):
batch = parameter_names[i:min(i+10, n)]
parser = argparse.ArgumentParser(description="Delete SSM Parameters")
parser.add_argument("--region", help="AWS Region name", default="eu-west-1")
parser.add_argument("--prefix", help="Delete parameters with this prefix", required=True)
args = parser.parse_args()
ssm = boto3.client('ssm', region_name=args.region)
params = list_parameters(ssm, args.prefix)
if not params:
print("No parameters with prefix {prefix}".format(prefix=args.prefix))
print("Delete parameters:")
for p in params:
print(" {name}".format(name=p))
if click.confirm("Continue?"):
delete_parameters(ssm, params)

View file

@ -0,0 +1,48 @@
#!/usr/bin/env python3
import boto3
import botocore
import click
def get_interfaces(ec2, groupId):
ifs = ec2.describe_network_interfaces(Filters=[
{"Name": "group-id", "Values": [groupId]}
return ifs['NetworkInterfaces']
def list_unused_groups(ec2):
unused = []
paginator = ec2.get_paginator('describe_security_groups')
for page in paginator.paginate():
for sg in page['SecurityGroups']:
interfaces = get_interfaces(ec2, sg['GroupId'])
num_attachments = len(interfaces)
if num_attachments == 0:
return unused
def delete_security_groups(ec2, security_groups):
for sg in security_groups:
print("Deleted security group {id}".format(id=sg['GroupId']))
except botocore.exceptions.ClientError as err:
print("Security group {id} could not be deleted".format(id=sg['GroupId']))
if __name__ == "__main__":
ec2 = boto3.client('ec2')
unused = list_unused_groups(ec2)
for sg in unused:
print(sg['GroupId'], sg['GroupName'], sg['Description'])
if click.confirm("Delete {n} groups?".format(n=len(unused))):
delete_security_groups(ec2, unused)

View file

@ -0,0 +1,138 @@
#!/usr/bin/env -S guile -e main -s
;; Script for updating current account and credit card statements
;; downloaded from Nationwide, who use a date and currency format
;; that Gnucash does not support.
(use-modules (ice-9 getopt-long)
((srfi srfi-1) #:select (drop))
;; Date appears in Nationwide statements in the format
;; "10 Jan 2024", but this is not understood by Gnucash
;; so we convert it to YYYY-MM-DD format.
(define date-input-format "%d %b %Y")
(define date-output-format "%Y-%m-%d")
(define (format-date d)
(strftime date-output-format
(car (strptime date-input-format d))))
;; Characters we expect to see in a numeric amount field. The
;; Nationwide statements contain a non-ASCII currency character
;; that we want to delete.
(define currency-charset (string->char-set "0123456789.-"))
(define (format-amount s)
(string-filter currency-charset s))
;; Profiles for the different statement formats.
;; skip: the number of leading rows to skip
;; header: boolean indicating whether or not the first unskipped
;; row is a header
;; date-cols: list of columns containing dates
;; amount-cols: list columns containing amounts
(define profiles
'(("credit-card" . ((skip . 4)
(header . #t)
(date-cols . (0))
(amount-cols . (3 4))))
("current-account" . ((skip . 4)
(header . #t)
(date-cols . (0))
(amount-cols . (3 4 5))))))
;; Predicate for validating the profile option.
(define (valid-profile? p)
(if (assoc p profiles) #t #f))
;; Update a list by applying the given function to each of the
;; listed columns.
(define (update-list lst cols f)
(for-each (lambda (k)
(let ((v (list-ref lst k)))
(list-set! lst k (f v))))
;; Given a spec listing the date and amount columns, return a
;; function that will apply the corresponding formats to a row.
(define (process-row spec)
(let ((date-cols (assq-ref spec 'date-cols))
(amount-cols (assq-ref spec 'amount-cols)))
(lambda (row)
(when date-cols
(update-list row date-cols format-date))
(when amount-cols
(update-list row amount-cols format-amount)))))
;; Read a CSV from the given path.
(define (read-statement path)
(call-with-input-file path
(lambda (port)
(dsv->scm port #:format 'rfc4180))))
;; Write data to the given path in CSV format.
(define (write-statement data path)
(call-with-output-file path
(lambda (port)
(scm->dsv data port #:format 'rfc4180))))
;; Apply the specified updates to data (a list of rows
;; read from the CSV). If a 'skip value is specified, drop
;; this many leading rows. If a 'header is present, only
;; apply the updates to the succeeding rows, preserving
;; the header as-is.
(define (update-data spec data)
(let* ((skip (assq-ref spec 'skip))
(data (if skip (drop data skip) data)))
(for-each (process-row spec) (if (assq-ref spec 'header) (cdr data) data))
;; Apply the updates defined in `spec` to the statement read
;; from input-path and write the updated data to output-path.
(define (process-statement spec input-path output-path)
(let ((data (read-statement input-path)))
(write-statement (update-data spec data) output-path)))
;; Display a usage message and (optional) error message to STDERR
;; and exit. If an error message is given the exit code will be
;; non-zero.
(define* (usage #:optional errmsg)
(with-output-to-port (current-error-port)
(lambda ()
(when errmsg
(display "Error: ")
(display errmsg)
(display "\
Usage: fix-credit-card-statement [options]
-h, --help Display this help.
-i, --input=FILENAME Input file path. Required.
-o, --output=FILENAME Output file path. Required unless --overwrite is given.
-w, --overwrite Overwrite the input file with the updated data.
-p, --profile=PROFILE Profile name [credit-card|current-account].
(exit (if errmsg EXIT_FAILURE EXIT_SUCCESS)))))
;; Process command-line arguments and validate options.
;; If valid, run process-statement with the given options.
(define (main args)
(let* ((option-spec `((help (single-char #\h) (value #f))
(input (single-char #\i) (value #t))
(output (single-char #\o) (value #t))
(overwrite (single-char #\w) (value #f))
(profile (single-char #\p) (value #t) (predicate ,valid-profile?))))
(options (getopt-long args option-spec))
(help-wanted (option-ref options 'help #f))
(profile (option-ref options 'profile #f))
(input (option-ref options 'input #f))
(output (option-ref options 'output #f))
(overwrite (option-ref options 'overwrite #f)))
(help-wanted (usage))
((not profile) (usage "profile is required"))
((not input) (usage "input filename is required"))
((and overwrite output) (usage "output filename cannot be given with --overwrite"))
((not (or overwrite output)) (usage "output filename is required without --overwrite")))
(process-statement (assoc-ref profiles profile) input (or output input))))

misc/ Executable file
View file

@ -0,0 +1,9 @@
use MIME::Parser;
my $parser = new MIME::Parser;
$entity = $parser->parse(\*STDIN) or die "parse failed";

misc/generate-pg-audit-ddl Executable file
View file

@ -0,0 +1,261 @@
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use DBI;
use Getopt::Long;
use Const::Fast;
use Term::ReadPassword;
use Template;
use Getopt::Long;
const my %IGNORE_TABLE => (
cached_reports => 1,
crispr_off_targets => 1,
summaries => 1,
fixture_md5 => 1,
crispr_off_target_summaries => 1,
design_attempts => 1,
crisprs => 1,
project_alleles => 1,
const my $MAIN_SCHEMA => 'public';
const my $AUDIT_SCHEMA => 'audit';
const my $CREATE_AUDIT_TABLE_TT => <<'EOT';
CREATE TABLE [% audit_schema %].[% table_name %] (
audit_op CHAR(1) NOT NULL CHECK (audit_op IN ('D','I','U')),
audit_user TEXT NOT NULL,
audit_txid INTEGER NOT NULL,
[% column_spec.join(",\n") %]
CREATE OR REPLACE FUNCTION [% main_schema %].process_[% table_name %]_audit()
RETURNS TRIGGER AS $[% table_name %]_audit$
INSERT INTO [% audit_schema %].[% table_name %] SELECT 'D', user, now(), txid_current(), OLD.*;
INSERT INTO [% audit_schema %].[% table_name %] SELECT 'U', user, now(), txid_current(), NEW.*;
INSERT INTO [% audit_schema %].[% table_name %] SELECT 'I', user, now(), txid_current(), NEW.*;
$[% table_name %]_audit$ LANGUAGE plpgsql;
CREATE TRIGGER [% table_name %]_audit
AFTER INSERT OR UPDATE OR DELETE ON [% main_schema %].[% table_name %]
FOR EACH ROW EXECUTE PROCEDURE [% main_schema %].process_[% table_name %]_audit();
ALTER TABLE [% audit_schema %].[% table_name %] DROP COLUMN [% column_name %];
ALTER TABLE [% audit_schema %].[% table_name %] ADD COLUMN [% column_name %] [% column_type %];
const my $DROP_AUDIT_TABLE_TT => <<'EOT';
DROP TABLE [% audit_schema %].[% table_name %];
const my %IS_AUDIT_COL => map { $_ => 1 } qw( audit_op audit_user audit_stamp audit_txid );
const my %NEEDS_SIZE => map { $_ => 1 } qw( char character varchar );
my $pg_host = $ENV{PGHOST};
my $pg_port = $ENV{PGPORT};
my $pg_dbname = $ENV{PGDATABASE};
my $pg_user = 'lims2';
'host=s' => \$pg_host,
'port=s' => \$pg_port,
'dbname=s' => \$pg_dbname,
'user=s' => \$pg_user
) or die "Usage: $0 [OPTIONS]\n";
my $pg_password;
while ( not defined $pg_password ) {
$pg_password = read_password("Enter PostgreSQL password for $pg_user: ");
my $dsn = 'dbi:Pg:dbname=' . $pg_dbname;
if ( defined $pg_host ) {
$dsn .= ";host=" . $pg_host;
if ( defined $pg_port ) {
$dsn .= ";port=" . $pg_port;
my $dbh = DBI->connect( $dsn, $pg_user, $pg_password, { AutoCommit => 1, RaiseError => 1, PrintError => 0 } )
or die "Failed to connect to $dsn: $DBI::errstr\n";
const my %VARS => (
main_schema => $MAIN_SCHEMA,
audit_schema => $AUDIT_SCHEMA,
my $tt = Template->new;
my $main_tables = get_tables( $dbh, $MAIN_SCHEMA );
my $audit_tables = get_tables( $dbh, $AUDIT_SCHEMA );
while ( my ( $table_name, $main_table ) = each %{$main_tables} ) {
next if exists $IGNORE_TABLE{$table_name};
my $audit_table = $audit_tables->{$table_name};
if ($audit_table) {
diff_tables( $table_name, $main_table, $audit_table, $tt, \%VARS );
else {
initialize_auditing( $table_name, $main_table, $tt, \%VARS );
for my $table_name ( keys %{$audit_tables} ) {
unless ( $main_tables->{$table_name} ) {
$tt->process( \$DROP_AUDIT_TABLE_TT, { %VARS, table_name => $table_name } );
sub diff_tables {
my ( $table_name, $col_spec, $audit_col_spec, $tt, $VARS ) = @_;
my %vars = ( %{$VARS}, table_name => $table_name );
my %cols = map { @{$_} } @{$col_spec};
my %audit_cols = map { @{$_} } @{$audit_col_spec};
for my $cs ( @{$col_spec} ) {
my ( $column_name, $column_type ) = @{$cs};
my $audit_column_type = $audit_cols{$column_name};
if ($audit_column_type) {
if ( $audit_column_type ne $column_type ) {
warn "Table $table_name column $column_name type mismatch ($column_type vs $audit_column_type)\n";
else {
$tt->process( \$ADD_AUDIT_TABLE_COLUMN_TT,
{ %vars, column_name => $column_name, column_type => $column_type } );
for my $audit_column_name ( keys %audit_cols ) {
unless ( $cols{$audit_column_name} or exists $IS_AUDIT_COL{$audit_column_name} ) {
$tt->process( \$DROP_AUDIT_TABLE_COLUMN_TT, { %vars, column_name => $audit_column_name } );
sub initialize_auditing {
my ( $table_name, $col_spec, $tt, $VARS ) = @_;
my %vars = (
table_name => $table_name,
column_spec => [ map { join q{ }, @{$_} } @{$col_spec} ]
$tt->process( \$CREATE_AUDIT_TABLE_TT, \%vars );
$tt->process( \$CREATE_AUDIT_FUNCTION_TT, \%vars );
$tt->process( \$CREATE_AUDIT_TRIGGER_TT, \%vars );
sub get_tables {
my ( $dbh, $schema_name ) = @_;
my $sth = $dbh->table_info( undef, $schema_name, undef, 'TABLE' );
my %tables;
while ( my $r = $sth->fetchrow_hashref ) {
$tables{ $r->{TABLE_NAME} } = get_column_info( $dbh, $schema_name, $r->{TABLE_NAME} );
return \%tables;
sub get_column_info {
my ( $dbh, $schema_name, $table_name ) = @_;
my @column_info;
my $sth = $dbh->column_info( undef, $schema_name, $table_name, undef );
while ( my $r = $sth->fetchrow_hashref ) {
my $type = $r->{TYPE_NAME};
if ( exists $NEEDS_SIZE{$type} ) {
# HACK for bpchar type columns in qc_template_well_genotyping_primers and
# qc_template_well_crispr_primers tables ( qc_run_id column )
my $col_size = $r->{COLUMN_SIZE} ? $r->{COLUMN_SIZE} : '36';
$type = $type . '(' . $col_size . ')';
push @column_info, [ $r->{COLUMN_NAME}, $type ];
return \@column_info;
=head1 NAME
generate-pg-audit-ddl --host pgsrv5 --port 5437 --dbname lims2_devel --user lims2
This script interrogates the database specified by the C<--dbname>
command-line option and compares the I<public> schema with the
I<audit> schema. It emits SQL to create a table in the I<audit> schema
shadowing each table in the B<public> schema, a function to insert a
row in the corresponding I<audit> table for each C<INSERT>, C<UPDATE>
or C<DELETE> in the I<public> schema, and a trigger that calls this
This script assumes that the I<audit> schema already exists. It
attempts to create SQL that will transition the current state of the
I<audit> schema to the desired state (shadowing the current state of
the I<public> schema). Note, however, that it cannot accurately detect
column and table renames.
If you have renamed a column or table in the main schema, this script
will emit C<DROP> and C<CREATE> statements that will B<delete data
from your audit schema>. Please review the generated SQL carefully in
case this is not what you intended.
=head1 SEE ALSO
The I<audit> tables, functions, and triggers are derived from an
example described here:
=head1 AUTHOR
Ray Miller E<lt><gt>

misc/ Executable file
View file

@ -0,0 +1,60 @@
from mailbox import mbox
targets = [
"28 Ellesmere Road Purchase",
"Anglian Windows",
"Conveyancing Quotes",
"Dad's Estate",
"Dad's Memorial",
"Dad's Memorial Service",
"Virgin Wines",
def target(m):
if "X-Gmail-Labels" in m:
labels = m["X-Gmail-Labels"].split(",")
for t in targets:
if t in labels:
return t
return "Uncategorized"
incoming = mbox("/home/ray/Mail/Gmail.mbox", create=False)
destinations = {}
n = 0
for m in incoming:
t = target(m)
if t not in destinations:
destinations[t] = mbox(f"/home/ray/Mail/GMail/{t}", create=True)
for d in destinations: