Compare commits

..

10 commits

Author SHA1 Message Date
Ray Miller
26bf41e073 Fix typo. 2024-07-13 17:20:04 +01:00
Ray Miller
ec51a0c919 Simplify the update-data function. 2024-07-13 17:14:16 +01:00
Ray Miller
ae2de95d52 Document update-data function. 2024-07-13 17:07:43 +01:00
Ray Miller
98910d3306 Make Nationwide statement processing data-driven.
Instead of different scripts for credit card and current account
statements, define a profile for each that specifies the date
and amount columns, and have this control the processing.
2024-07-13 16:55:52 +01:00
Ray Miller
ef791b6be6 Initial version of script to fix Nationwide credit card statement 2024-07-13 15:10:36 +01:00
Ray Miller
eb39abfd96 Add script to split mbox exported by GMail.
GMail exports the entire account to a single mbox file. This script
splits it into multiple mbox files according to the labels.
2024-07-13 15:07:44 +01:00
Ray Miller
c3123ff07c Add script to generate a shadow audit scheme for a PostgreSQL database. 2024-07-13 15:07:07 +01:00
Ray Miller
378ef21e3b Script to dump MIME parts (useful for debugging email messages) 2024-07-13 15:05:41 +01:00
Ray Miller
f1d5e236ba Add script to delete AWS S3 bucket 2024-07-13 15:04:53 +01:00
Ray Miller
9257de8439 Add AWS housekeeping scripts, rename for consistency 2024-02-10 15:52:16 +00:00
9 changed files with 658 additions and 0 deletions

61
aws/delete_iam_policies.py Executable file
View file

@ -0,0 +1,61 @@
#!/usr/bin/python3
#
# Delete IAM policies whose names match a pattern
#
import argparse
import boto3
import re
import click
def list_policies(iam, pattern):
policies=[]
paginator = iam.get_paginator('list_policies')
for page in paginator.paginate(Scope='Local'):
for policy in page['Policies']:
if pattern.match(policy['PolicyName']):
policies.append(policy)
return policies
def delete_policy_versions(iam, policy):
paginator = iam.get_paginator('list_policy_versions')
for page in paginator.paginate(PolicyArn=policy['Arn']):
for version in page['Versions']:
if version['IsDefaultVersion']:
continue
print("Deleting version {v}".format(v=version['VersionId']))
iam.delete_policy_version(PolicyArn=policy['Arn'], VersionId=version['VersionId'])
def delete_policy(iam, policy):
print("Deleting policy {name}".format(name=policy['PolicyName']))
delete_policy_versions(iam, policy)
iam.delete_policy(PolicyArn=policy['Arn'])
def confirm_delete(policies):
print("Delete policies:")
for policy in policies:
print(policy['PolicyName'])
return click.confirm("Continue?")
def delete_matching_policies(pattern):
iam = boto3.client('iam')
policies = list_policies(iam, pattern)
if len(policies) == 0:
print("No matching policies")
return
if confirm_delete(policies):
for policy in policies:
delete_policy(iam, policy)
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Delete IAM policies")
parser.add_argument("--pattern", help="Regex to match policy name", default=".*")
args = parser.parse_args()
pattern = re.compile(args.pattern)
delete_matching_policies(pattern)

35
aws/delete_s3_bucket.py Executable file
View file

@ -0,0 +1,35 @@
#!/usr/bin/python3
#
# An S3 bucket can only be deleted if it is empty, so all
# objects must be deleted. For a versioned bucket, this includes
# object versions and object deletion markers.
#
import argparse
import boto3
import click
def delete_s3_bucket(bucket_name, dry_run=True):
s3 = boto3.resource('s3')
bucket = s3.Bucket(bucket_name)
if not bucket.creation_date:
print(f"Bucket {bucket_name} not found")
return
n = 0
for o in bucket.objects.all():
n = n+1
print(f"Delete {o.key}")
if click.confirm(f"Delete {n} objects from {bucket_name}?"):
bucket.objects.all().delete()
bucket.object_versions.all().delete()
bucket.delete()
print(f"Deleted bucket {bucket_name}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Delete S3 bucket and its contents")
parser.add_argument("bucket", help="Name of the bucket to delete", nargs=1)
args = parser.parse_args()
for bucket in args.bucket:
delete_s3_bucket(bucket)

46
aws/delete_ssm_parameters.py Executable file
View file

@ -0,0 +1,46 @@
#!/usr/bin/python3
#
# Delete all SSM parameters under the give prefix
#
import argparse
import boto3
import click
import sys
def list_parameters(ssm, prefix):
parameters = []
filter = {'Key': 'Name', 'Values': [prefix]}
paginator = ssm.get_paginator('describe_parameters')
for page in paginator.paginate(Filters=[filter]):
for param in page['Parameters']:
parameters.append(param['Name'])
return parameters
def delete_parameters(ssm, parameter_names):
n = len(parameter_names)
for i in range(0, n, 10):
batch = parameter_names[i:min(i+10, n)]
ssm.delete_parameters(Names=batch)
parser = argparse.ArgumentParser(description="Delete SSM Parameters")
parser.add_argument("--region", help="AWS Region name", default="eu-west-1")
parser.add_argument("--prefix", help="Delete parameters with this prefix", required=True)
args = parser.parse_args()
ssm = boto3.client('ssm', region_name=args.region)
params = list_parameters(ssm, args.prefix)
if not params:
print("No parameters with prefix {prefix}".format(prefix=args.prefix))
sys.exit(0)
print("Delete parameters:")
for p in params:
print(" {name}".format(name=p))
if click.confirm("Continue?"):
delete_parameters(ssm, params)

View file

@ -0,0 +1,48 @@
#!/usr/bin/env python3
import boto3
import botocore
import click
def get_interfaces(ec2, groupId):
ifs = ec2.describe_network_interfaces(Filters=[
{"Name": "group-id", "Values": [groupId]}
])
return ifs['NetworkInterfaces']
def list_unused_groups(ec2):
unused = []
paginator = ec2.get_paginator('describe_security_groups')
for page in paginator.paginate():
for sg in page['SecurityGroups']:
interfaces = get_interfaces(ec2, sg['GroupId'])
num_attachments = len(interfaces)
if num_attachments == 0:
unused.append(sg)
return unused
def delete_security_groups(ec2, security_groups):
for sg in security_groups:
try:
ec2.delete_security_group(GroupId=sg['GroupId'])
print("Deleted security group {id}".format(id=sg['GroupId']))
except botocore.exceptions.ClientError as err:
print("Security group {id} could not be deleted".format(id=sg['GroupId']))
print(err)
if __name__ == "__main__":
ec2 = boto3.client('ec2')
unused = list_unused_groups(ec2)
for sg in unused:
print(sg['GroupId'], sg['GroupName'], sg['Description'])
if click.confirm("Delete {n} groups?".format(n=len(unused))):
delete_security_groups(ec2, unused)

View file

@ -0,0 +1,138 @@
#!/usr/bin/env -S guile -e main -s
!#
;; Script for updating current account and credit card statements
;; downloaded from Nationwide, who use a date and currency format
;; that Gnucash does not support.
(use-modules (ice-9 getopt-long)
((srfi srfi-1) #:select (drop))
(dsv))
;; Date appears in Nationwide statements in the format
;; "10 Jan 2024", but this is not understood by Gnucash
;; so we convert it to YYYY-MM-DD format.
(define date-input-format "%d %b %Y")
(define date-output-format "%Y-%m-%d")
(define (format-date d)
(strftime date-output-format
(car (strptime date-input-format d))))
;; Characters we expect to see in a numeric amount field. The
;; Nationwide statements contain a non-ASCII currency character
;; that we want to delete.
(define currency-charset (string->char-set "0123456789.-"))
(define (format-amount s)
(string-filter currency-charset s))
;; Profiles for the different statement formats.
;; skip: the number of leading rows to skip
;; header: boolean indicating whether or not the first unskipped
;; row is a header
;; date-cols: list of columns containing dates
;; amount-cols: list columns containing amounts
(define profiles
'(("credit-card" . ((skip . 4)
(header . #t)
(date-cols . (0))
(amount-cols . (3 4))))
("current-account" . ((skip . 4)
(header . #t)
(date-cols . (0))
(amount-cols . (3 4 5))))))
;; Predicate for validating the profile option.
(define (valid-profile? p)
(if (assoc p profiles) #t #f))
;; Update a list by applying the given function to each of the
;; listed columns.
(define (update-list lst cols f)
(for-each (lambda (k)
(let ((v (list-ref lst k)))
(list-set! lst k (f v))))
cols))
;; Given a spec listing the date and amount columns, return a
;; function that will apply the corresponding formats to a row.
(define (process-row spec)
(let ((date-cols (assq-ref spec 'date-cols))
(amount-cols (assq-ref spec 'amount-cols)))
(lambda (row)
(when date-cols
(update-list row date-cols format-date))
(when amount-cols
(update-list row amount-cols format-amount)))))
;; Read a CSV from the given path.
(define (read-statement path)
(call-with-input-file path
(lambda (port)
(dsv->scm port #:format 'rfc4180))))
;; Write data to the given path in CSV format.
(define (write-statement data path)
(call-with-output-file path
(lambda (port)
(scm->dsv data port #:format 'rfc4180))))
;; Apply the specified updates to data (a list of rows
;; read from the CSV). If a 'skip value is specified, drop
;; this many leading rows. If a 'header is present, only
;; apply the updates to the succeeding rows, preserving
;; the header as-is.
(define (update-data spec data)
(let* ((skip (assq-ref spec 'skip))
(data (if skip (drop data skip) data)))
(for-each (process-row spec) (if (assq-ref spec 'header) (cdr data) data))
data))
;; Apply the updates defined in `spec` to the statement read
;; from input-path and write the updated data to output-path.
(define (process-statement spec input-path output-path)
(let ((data (read-statement input-path)))
(write-statement (update-data spec data) output-path)))
;; Display a usage message and (optional) error message to STDERR
;; and exit. If an error message is given the exit code will be
;; non-zero.
(define* (usage #:optional errmsg)
(with-output-to-port (current-error-port)
(lambda ()
(when errmsg
(display "Error: ")
(display errmsg)
(newline))
(display "\
Usage: fix-credit-card-statement [options]
-h, --help Display this help.
-i, --input=FILENAME Input file path. Required.
-o, --output=FILENAME Output file path. Required unless --overwrite is given.
-w, --overwrite Overwrite the input file with the updated data.
-p, --profile=PROFILE Profile name [credit-card|current-account].
")
(exit (if errmsg EXIT_FAILURE EXIT_SUCCESS)))))
;; Process command-line arguments and validate options.
;; If valid, run process-statement with the given options.
(define (main args)
(let* ((option-spec `((help (single-char #\h) (value #f))
(input (single-char #\i) (value #t))
(output (single-char #\o) (value #t))
(overwrite (single-char #\w) (value #f))
(profile (single-char #\p) (value #t) (predicate ,valid-profile?))))
(options (getopt-long args option-spec))
(help-wanted (option-ref options 'help #f))
(profile (option-ref options 'profile #f))
(input (option-ref options 'input #f))
(output (option-ref options 'output #f))
(overwrite (option-ref options 'overwrite #f)))
(cond
(help-wanted (usage))
((not profile) (usage "profile is required"))
((not input) (usage "input filename is required"))
((and overwrite output) (usage "output filename cannot be given with --overwrite"))
((not (or overwrite output)) (usage "output filename is required without --overwrite")))
(process-statement (assoc-ref profiles profile) input (or output input))))

9
misc/dump-mime-parts.pl Executable file
View file

@ -0,0 +1,9 @@
#!/usr/bin/perl
use MIME::Parser;
my $parser = new MIME::Parser;
$parser->output_under(".");
$entity = $parser->parse(\*STDIN) or die "parse failed";
$entity->dump_skeleton();

261
misc/generate-pg-audit-ddl Executable file
View file

@ -0,0 +1,261 @@
#!/usr/bin/env perl
use strict;
use warnings FATAL => 'all';
use DBI;
use Getopt::Long;
use Const::Fast;
use Term::ReadPassword;
use Template;
use Getopt::Long;
const my %IGNORE_TABLE => (
cached_reports => 1,
crispr_off_targets => 1,
summaries => 1,
fixture_md5 => 1,
crispr_off_target_summaries => 1,
design_attempts => 1,
crisprs => 1,
project_alleles => 1,
);
const my $MAIN_SCHEMA => 'public';
const my $AUDIT_SCHEMA => 'audit';
const my $CREATE_AUDIT_TABLE_TT => <<'EOT';
CREATE TABLE [% audit_schema %].[% table_name %] (
audit_op CHAR(1) NOT NULL CHECK (audit_op IN ('D','I','U')),
audit_user TEXT NOT NULL,
audit_stamp TIMESTAMP NOT NULL,
audit_txid INTEGER NOT NULL,
[% column_spec.join(",\n") %]
);
EOT
const my $CREATE_AUDIT_FUNCTION_TT => <<'EOT';
CREATE OR REPLACE FUNCTION [% main_schema %].process_[% table_name %]_audit()
RETURNS TRIGGER AS $[% table_name %]_audit$
BEGIN
IF (TG_OP = 'DELETE') THEN
INSERT INTO [% audit_schema %].[% table_name %] SELECT 'D', user, now(), txid_current(), OLD.*;
ELSIF (TG_OP = 'UPDATE') THEN
INSERT INTO [% audit_schema %].[% table_name %] SELECT 'U', user, now(), txid_current(), NEW.*;
ELSIF (TG_OP = 'INSERT') THEN
INSERT INTO [% audit_schema %].[% table_name %] SELECT 'I', user, now(), txid_current(), NEW.*;
END IF;
RETURN NULL;
END;
$[% table_name %]_audit$ LANGUAGE plpgsql;
EOT
const my $CREATE_AUDIT_TRIGGER_TT => <<'EOT';
CREATE TRIGGER [% table_name %]_audit
AFTER INSERT OR UPDATE OR DELETE ON [% main_schema %].[% table_name %]
FOR EACH ROW EXECUTE PROCEDURE [% main_schema %].process_[% table_name %]_audit();
EOT
const my $DROP_AUDIT_TABLE_COLUMN_TT => <<'EOT';
ALTER TABLE [% audit_schema %].[% table_name %] DROP COLUMN [% column_name %];
EOT
const my $ADD_AUDIT_TABLE_COLUMN_TT => <<'EOT';
ALTER TABLE [% audit_schema %].[% table_name %] ADD COLUMN [% column_name %] [% column_type %];
EOT
const my $DROP_AUDIT_TABLE_TT => <<'EOT';
DROP TABLE [% audit_schema %].[% table_name %];
EOT
const my %IS_AUDIT_COL => map { $_ => 1 } qw( audit_op audit_user audit_stamp audit_txid );
const my %NEEDS_SIZE => map { $_ => 1 } qw( char character varchar );
{
my $pg_host = $ENV{PGHOST};
my $pg_port = $ENV{PGPORT};
my $pg_dbname = $ENV{PGDATABASE};
my $pg_user = 'lims2';
GetOptions(
'host=s' => \$pg_host,
'port=s' => \$pg_port,
'dbname=s' => \$pg_dbname,
'user=s' => \$pg_user
) or die "Usage: $0 [OPTIONS]\n";
my $pg_password;
while ( not defined $pg_password ) {
$pg_password = read_password("Enter PostgreSQL password for $pg_user: ");
}
my $dsn = 'dbi:Pg:dbname=' . $pg_dbname;
if ( defined $pg_host ) {
$dsn .= ";host=" . $pg_host;
}
if ( defined $pg_port ) {
$dsn .= ";port=" . $pg_port;
}
my $dbh = DBI->connect( $dsn, $pg_user, $pg_password, { AutoCommit => 1, RaiseError => 1, PrintError => 0 } )
or die "Failed to connect to $dsn: $DBI::errstr\n";
const my %VARS => (
main_schema => $MAIN_SCHEMA,
audit_schema => $AUDIT_SCHEMA,
);
my $tt = Template->new;
my $main_tables = get_tables( $dbh, $MAIN_SCHEMA );
my $audit_tables = get_tables( $dbh, $AUDIT_SCHEMA );
while ( my ( $table_name, $main_table ) = each %{$main_tables} ) {
next if exists $IGNORE_TABLE{$table_name};
my $audit_table = $audit_tables->{$table_name};
if ($audit_table) {
diff_tables( $table_name, $main_table, $audit_table, $tt, \%VARS );
}
else {
initialize_auditing( $table_name, $main_table, $tt, \%VARS );
}
}
for my $table_name ( keys %{$audit_tables} ) {
unless ( $main_tables->{$table_name} ) {
$tt->process( \$DROP_AUDIT_TABLE_TT, { %VARS, table_name => $table_name } );
}
}
}
sub diff_tables {
my ( $table_name, $col_spec, $audit_col_spec, $tt, $VARS ) = @_;
my %vars = ( %{$VARS}, table_name => $table_name );
my %cols = map { @{$_} } @{$col_spec};
my %audit_cols = map { @{$_} } @{$audit_col_spec};
for my $cs ( @{$col_spec} ) {
my ( $column_name, $column_type ) = @{$cs};
my $audit_column_type = $audit_cols{$column_name};
if ($audit_column_type) {
if ( $audit_column_type ne $column_type ) {
warn "Table $table_name column $column_name type mismatch ($column_type vs $audit_column_type)\n";
}
}
else {
$tt->process( \$ADD_AUDIT_TABLE_COLUMN_TT,
{ %vars, column_name => $column_name, column_type => $column_type } );
}
}
for my $audit_column_name ( keys %audit_cols ) {
unless ( $cols{$audit_column_name} or exists $IS_AUDIT_COL{$audit_column_name} ) {
$tt->process( \$DROP_AUDIT_TABLE_COLUMN_TT, { %vars, column_name => $audit_column_name } );
}
}
return;
}
sub initialize_auditing {
my ( $table_name, $col_spec, $tt, $VARS ) = @_;
my %vars = (
%{$VARS},
table_name => $table_name,
column_spec => [ map { join q{ }, @{$_} } @{$col_spec} ]
);
$tt->process( \$CREATE_AUDIT_TABLE_TT, \%vars );
$tt->process( \$CREATE_AUDIT_FUNCTION_TT, \%vars );
$tt->process( \$CREATE_AUDIT_TRIGGER_TT, \%vars );
return;
}
sub get_tables {
my ( $dbh, $schema_name ) = @_;
my $sth = $dbh->table_info( undef, $schema_name, undef, 'TABLE' );
my %tables;
while ( my $r = $sth->fetchrow_hashref ) {
$tables{ $r->{TABLE_NAME} } = get_column_info( $dbh, $schema_name, $r->{TABLE_NAME} );
}
return \%tables;
}
sub get_column_info {
my ( $dbh, $schema_name, $table_name ) = @_;
my @column_info;
my $sth = $dbh->column_info( undef, $schema_name, $table_name, undef );
while ( my $r = $sth->fetchrow_hashref ) {
my $type = $r->{TYPE_NAME};
if ( exists $NEEDS_SIZE{$type} ) {
# HACK for bpchar type columns in qc_template_well_genotyping_primers and
# qc_template_well_crispr_primers tables ( qc_run_id column )
my $col_size = $r->{COLUMN_SIZE} ? $r->{COLUMN_SIZE} : '36';
$type = $type . '(' . $col_size . ')';
}
push @column_info, [ $r->{COLUMN_NAME}, $type ];
}
return \@column_info;
}
__END__
=pod
=head1 NAME
generate-pg-audit-ddl
=head1 SYNOPSIS
generate-pg-audit-ddl --host pgsrv5 --port 5437 --dbname lims2_devel --user lims2
=head1 DESCRIPTION
This script interrogates the database specified by the C<--dbname>
command-line option and compares the I<public> schema with the
I<audit> schema. It emits SQL to create a table in the I<audit> schema
shadowing each table in the B<public> schema, a function to insert a
row in the corresponding I<audit> table for each C<INSERT>, C<UPDATE>
or C<DELETE> in the I<public> schema, and a trigger that calls this
function.
=head1 LIMITATIONS
This script assumes that the I<audit> schema already exists. It
attempts to create SQL that will transition the current state of the
I<audit> schema to the desired state (shadowing the current state of
the I<public> schema). Note, however, that it cannot accurately detect
column and table renames.
If you have renamed a column or table in the main schema, this script
will emit C<DROP> and C<CREATE> statements that will B<delete data
from your audit schema>. Please review the generated SQL carefully in
case this is not what you intended.
=head1 SEE ALSO
The I<audit> tables, functions, and triggers are derived from an
example described here:
L<http://www.postgresql.org/docs/9.0/static/plpgsql-trigger.html>
=head1 AUTHOR
Ray Miller E<lt>rm7@sanger.ac.ukE<gt>
=cut

60
misc/sort-mail.py Executable file
View file

@ -0,0 +1,60 @@
#!/usr/bin/python3
from mailbox import mbox
targets = [
"28 Ellesmere Road Purchase",
"3-mobile",
"Anglian Windows",
"Clojure",
"Conveyancing Quotes",
"CTCCambridge",
"CTCCambridgeRoutes",
"CTCOxford",
"Dad's Estate",
"Dad's Memorial",
"Dad's Memorial Service",
"Facebook",
"Golang",
"GreenMetropolis",
"LibDems",
"Nationwide",
"OkCupid",
"Pseudospam",
"Riverford",
"RussianDatingScam",
"Sanger",
"SmileBanking",
"UKUUG",
"Virgin Wines",
"Personal",
"Sent",
"Inbox",
"Archived",
"Spam",
"Bin",
]
def target(m):
if "X-Gmail-Labels" in m:
labels = m["X-Gmail-Labels"].split(",")
for t in targets:
if t in labels:
return t
return "Uncategorized"
incoming = mbox("/home/ray/Mail/Gmail.mbox", create=False)
destinations = {}
n = 0
for m in incoming:
t = target(m)
if t not in destinations:
destinations[t] = mbox(f"/home/ray/Mail/GMail/{t}", create=True)
destinations[t].add(m)
for d in destinations:
d.flush()