From 9257de8439817883a5b70b3f056173edc3801569 Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 10 Feb 2024 15:52:16 +0000 Subject: [PATCH 01/10] Add AWS housekeeping scripts, rename for consistency --- aws/delete_iam_policies.py | 61 +++++++++++++++++++ ...elete-iam-users.py => delete_iam_users.py} | 0 aws/delete_ssm_parameters.py | 46 ++++++++++++++ aws/delete_unused_security_groups.py | 48 +++++++++++++++ 4 files changed, 155 insertions(+) create mode 100755 aws/delete_iam_policies.py rename aws/{delete-iam-users.py => delete_iam_users.py} (100%) create mode 100755 aws/delete_ssm_parameters.py create mode 100755 aws/delete_unused_security_groups.py diff --git a/aws/delete_iam_policies.py b/aws/delete_iam_policies.py new file mode 100755 index 0000000..b669ef0 --- /dev/null +++ b/aws/delete_iam_policies.py @@ -0,0 +1,61 @@ +#!/usr/bin/python3 +# +# Delete IAM policies whose names match a pattern +# + +import argparse +import boto3 +import re +import click + +def list_policies(iam, pattern): + policies=[] + paginator = iam.get_paginator('list_policies') + for page in paginator.paginate(Scope='Local'): + for policy in page['Policies']: + if pattern.match(policy['PolicyName']): + policies.append(policy) + return policies + + +def delete_policy_versions(iam, policy): + paginator = iam.get_paginator('list_policy_versions') + for page in paginator.paginate(PolicyArn=policy['Arn']): + for version in page['Versions']: + if version['IsDefaultVersion']: + continue + print("Deleting version {v}".format(v=version['VersionId'])) + iam.delete_policy_version(PolicyArn=policy['Arn'], VersionId=version['VersionId']) + + +def delete_policy(iam, policy): + print("Deleting policy {name}".format(name=policy['PolicyName'])) + delete_policy_versions(iam, policy) + iam.delete_policy(PolicyArn=policy['Arn']) + + +def confirm_delete(policies): + print("Delete policies:") + for policy in policies: + print(policy['PolicyName']) + return click.confirm("Continue?") + + +def delete_matching_policies(pattern): + iam = boto3.client('iam') + policies = list_policies(iam, pattern) + if len(policies) == 0: + print("No matching policies") + return + if confirm_delete(policies): + for policy in policies: + delete_policy(iam, policy) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Delete IAM policies") + parser.add_argument("--pattern", help="Regex to match policy name", default=".*") + + args = parser.parse_args() + pattern = re.compile(args.pattern) + delete_matching_policies(pattern) diff --git a/aws/delete-iam-users.py b/aws/delete_iam_users.py similarity index 100% rename from aws/delete-iam-users.py rename to aws/delete_iam_users.py diff --git a/aws/delete_ssm_parameters.py b/aws/delete_ssm_parameters.py new file mode 100755 index 0000000..318e2b0 --- /dev/null +++ b/aws/delete_ssm_parameters.py @@ -0,0 +1,46 @@ +#!/usr/bin/python3 +# +# Delete all SSM parameters under the give prefix +# + +import argparse +import boto3 +import click +import sys + +def list_parameters(ssm, prefix): + parameters = [] + filter = {'Key': 'Name', 'Values': [prefix]} + paginator = ssm.get_paginator('describe_parameters') + for page in paginator.paginate(Filters=[filter]): + for param in page['Parameters']: + parameters.append(param['Name']) + return parameters + + +def delete_parameters(ssm, parameter_names): + n = len(parameter_names) + for i in range(0, n, 10): + batch = parameter_names[i:min(i+10, n)] + ssm.delete_parameters(Names=batch) + + +parser = argparse.ArgumentParser(description="Delete SSM Parameters") +parser.add_argument("--region", help="AWS Region name", default="eu-west-1") +parser.add_argument("--prefix", help="Delete parameters with this prefix", required=True) + +args = parser.parse_args() + +ssm = boto3.client('ssm', region_name=args.region) +params = list_parameters(ssm, args.prefix) + +if not params: + print("No parameters with prefix {prefix}".format(prefix=args.prefix)) + sys.exit(0) + +print("Delete parameters:") +for p in params: + print(" {name}".format(name=p)) + +if click.confirm("Continue?"): + delete_parameters(ssm, params) diff --git a/aws/delete_unused_security_groups.py b/aws/delete_unused_security_groups.py new file mode 100755 index 0000000..b55ed01 --- /dev/null +++ b/aws/delete_unused_security_groups.py @@ -0,0 +1,48 @@ +#!/usr/bin/env python3 + +import boto3 +import botocore +import click + + +def get_interfaces(ec2, groupId): + ifs = ec2.describe_network_interfaces(Filters=[ + {"Name": "group-id", "Values": [groupId]} + ]) + return ifs['NetworkInterfaces'] + + +def list_unused_groups(ec2): + unused = [] + paginator = ec2.get_paginator('describe_security_groups') + for page in paginator.paginate(): + for sg in page['SecurityGroups']: + interfaces = get_interfaces(ec2, sg['GroupId']) + num_attachments = len(interfaces) + if num_attachments == 0: + unused.append(sg) + return unused + + +def delete_security_groups(ec2, security_groups): + for sg in security_groups: + try: + ec2.delete_security_group(GroupId=sg['GroupId']) + print("Deleted security group {id}".format(id=sg['GroupId'])) + except botocore.exceptions.ClientError as err: + print("Security group {id} could not be deleted".format(id=sg['GroupId'])) + print(err) + + +if __name__ == "__main__": + ec2 = boto3.client('ec2') + unused = list_unused_groups(ec2) + for sg in unused: + print(sg['GroupId'], sg['GroupName'], sg['Description']) + if click.confirm("Delete {n} groups?".format(n=len(unused))): + delete_security_groups(ec2, unused) + + + + + From f1d5e236bab34b4978fa3a7ee69d304a5d508f12 Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 13 Jul 2024 15:04:53 +0100 Subject: [PATCH 02/10] Add script to delete AWS S3 bucket --- aws/delete_s3_bucket.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100755 aws/delete_s3_bucket.py diff --git a/aws/delete_s3_bucket.py b/aws/delete_s3_bucket.py new file mode 100755 index 0000000..6df0773 --- /dev/null +++ b/aws/delete_s3_bucket.py @@ -0,0 +1,35 @@ +#!/usr/bin/python3 +# +# An S3 bucket can only be deleted if it is empty, so all +# objects must be deleted. For a versioned bucket, this includes +# object versions and object deletion markers. +# + +import argparse +import boto3 +import click + + +def delete_s3_bucket(bucket_name, dry_run=True): + s3 = boto3.resource('s3') + bucket = s3.Bucket(bucket_name) + if not bucket.creation_date: + print(f"Bucket {bucket_name} not found") + return + n = 0 + for o in bucket.objects.all(): + n = n+1 + print(f"Delete {o.key}") + if click.confirm(f"Delete {n} objects from {bucket_name}?"): + bucket.objects.all().delete() + bucket.object_versions.all().delete() + bucket.delete() + print(f"Deleted bucket {bucket_name}") + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Delete S3 bucket and its contents") + parser.add_argument("bucket", help="Name of the bucket to delete", nargs=1) + args = parser.parse_args() + for bucket in args.bucket: + delete_s3_bucket(bucket) From 378ef21e3b4b17086cd9ee33f688836b41fa5837 Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 13 Jul 2024 15:05:41 +0100 Subject: [PATCH 03/10] Script to dump MIME parts (useful for debugging email messages) --- misc/dump-mime-parts.pl | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100755 misc/dump-mime-parts.pl diff --git a/misc/dump-mime-parts.pl b/misc/dump-mime-parts.pl new file mode 100755 index 0000000..9d69307 --- /dev/null +++ b/misc/dump-mime-parts.pl @@ -0,0 +1,9 @@ +#!/usr/bin/perl + +use MIME::Parser; + +my $parser = new MIME::Parser; + +$parser->output_under("."); +$entity = $parser->parse(\*STDIN) or die "parse failed"; +$entity->dump_skeleton(); From c3123ff07ccb9b5c84a21c5e076c135ee6d0e5e9 Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 13 Jul 2024 15:07:07 +0100 Subject: [PATCH 04/10] Add script to generate a shadow audit scheme for a PostgreSQL database. --- misc/generate-pg-audit-ddl | 261 +++++++++++++++++++++++++++++++++++++ 1 file changed, 261 insertions(+) create mode 100755 misc/generate-pg-audit-ddl diff --git a/misc/generate-pg-audit-ddl b/misc/generate-pg-audit-ddl new file mode 100755 index 0000000..b21e579 --- /dev/null +++ b/misc/generate-pg-audit-ddl @@ -0,0 +1,261 @@ +#!/usr/bin/env perl + +use strict; +use warnings FATAL => 'all'; + +use DBI; +use Getopt::Long; +use Const::Fast; +use Term::ReadPassword; +use Template; +use Getopt::Long; + +const my %IGNORE_TABLE => ( + cached_reports => 1, + crispr_off_targets => 1, + summaries => 1, + fixture_md5 => 1, + crispr_off_target_summaries => 1, + design_attempts => 1, + crisprs => 1, + project_alleles => 1, +); + +const my $MAIN_SCHEMA => 'public'; +const my $AUDIT_SCHEMA => 'audit'; + +const my $CREATE_AUDIT_TABLE_TT => <<'EOT'; +CREATE TABLE [% audit_schema %].[% table_name %] ( +audit_op CHAR(1) NOT NULL CHECK (audit_op IN ('D','I','U')), +audit_user TEXT NOT NULL, +audit_stamp TIMESTAMP NOT NULL, +audit_txid INTEGER NOT NULL, +[% column_spec.join(",\n") %] +); +EOT + +const my $CREATE_AUDIT_FUNCTION_TT => <<'EOT'; +CREATE OR REPLACE FUNCTION [% main_schema %].process_[% table_name %]_audit() +RETURNS TRIGGER AS $[% table_name %]_audit$ + BEGIN + IF (TG_OP = 'DELETE') THEN + INSERT INTO [% audit_schema %].[% table_name %] SELECT 'D', user, now(), txid_current(), OLD.*; + ELSIF (TG_OP = 'UPDATE') THEN + INSERT INTO [% audit_schema %].[% table_name %] SELECT 'U', user, now(), txid_current(), NEW.*; + ELSIF (TG_OP = 'INSERT') THEN + INSERT INTO [% audit_schema %].[% table_name %] SELECT 'I', user, now(), txid_current(), NEW.*; + END IF; + RETURN NULL; + END; +$[% table_name %]_audit$ LANGUAGE plpgsql; +EOT + +const my $CREATE_AUDIT_TRIGGER_TT => <<'EOT'; +CREATE TRIGGER [% table_name %]_audit +AFTER INSERT OR UPDATE OR DELETE ON [% main_schema %].[% table_name %] + FOR EACH ROW EXECUTE PROCEDURE [% main_schema %].process_[% table_name %]_audit(); +EOT + +const my $DROP_AUDIT_TABLE_COLUMN_TT => <<'EOT'; +ALTER TABLE [% audit_schema %].[% table_name %] DROP COLUMN [% column_name %]; +EOT + +const my $ADD_AUDIT_TABLE_COLUMN_TT => <<'EOT'; +ALTER TABLE [% audit_schema %].[% table_name %] ADD COLUMN [% column_name %] [% column_type %]; +EOT + +const my $DROP_AUDIT_TABLE_TT => <<'EOT'; +DROP TABLE [% audit_schema %].[% table_name %]; +EOT + +const my %IS_AUDIT_COL => map { $_ => 1 } qw( audit_op audit_user audit_stamp audit_txid ); + +const my %NEEDS_SIZE => map { $_ => 1 } qw( char character varchar ); + +{ + + my $pg_host = $ENV{PGHOST}; + my $pg_port = $ENV{PGPORT}; + my $pg_dbname = $ENV{PGDATABASE}; + my $pg_user = 'lims2'; + + GetOptions( + 'host=s' => \$pg_host, + 'port=s' => \$pg_port, + 'dbname=s' => \$pg_dbname, + 'user=s' => \$pg_user + ) or die "Usage: $0 [OPTIONS]\n"; + + my $pg_password; + while ( not defined $pg_password ) { + $pg_password = read_password("Enter PostgreSQL password for $pg_user: "); + } + + my $dsn = 'dbi:Pg:dbname=' . $pg_dbname; + + if ( defined $pg_host ) { + $dsn .= ";host=" . $pg_host; + } + + if ( defined $pg_port ) { + $dsn .= ";port=" . $pg_port; + } + + my $dbh = DBI->connect( $dsn, $pg_user, $pg_password, { AutoCommit => 1, RaiseError => 1, PrintError => 0 } ) + or die "Failed to connect to $dsn: $DBI::errstr\n"; + + const my %VARS => ( + main_schema => $MAIN_SCHEMA, + audit_schema => $AUDIT_SCHEMA, + ); + + my $tt = Template->new; + + my $main_tables = get_tables( $dbh, $MAIN_SCHEMA ); + my $audit_tables = get_tables( $dbh, $AUDIT_SCHEMA ); + + while ( my ( $table_name, $main_table ) = each %{$main_tables} ) { + next if exists $IGNORE_TABLE{$table_name}; + my $audit_table = $audit_tables->{$table_name}; + if ($audit_table) { + diff_tables( $table_name, $main_table, $audit_table, $tt, \%VARS ); + } + else { + initialize_auditing( $table_name, $main_table, $tt, \%VARS ); + } + } + + for my $table_name ( keys %{$audit_tables} ) { + unless ( $main_tables->{$table_name} ) { + $tt->process( \$DROP_AUDIT_TABLE_TT, { %VARS, table_name => $table_name } ); + } + } +} + +sub diff_tables { + my ( $table_name, $col_spec, $audit_col_spec, $tt, $VARS ) = @_; + + my %vars = ( %{$VARS}, table_name => $table_name ); + + my %cols = map { @{$_} } @{$col_spec}; + my %audit_cols = map { @{$_} } @{$audit_col_spec}; + + for my $cs ( @{$col_spec} ) { + my ( $column_name, $column_type ) = @{$cs}; + my $audit_column_type = $audit_cols{$column_name}; + if ($audit_column_type) { + if ( $audit_column_type ne $column_type ) { + warn "Table $table_name column $column_name type mismatch ($column_type vs $audit_column_type)\n"; + } + } + else { + $tt->process( \$ADD_AUDIT_TABLE_COLUMN_TT, + { %vars, column_name => $column_name, column_type => $column_type } ); + } + } + + for my $audit_column_name ( keys %audit_cols ) { + unless ( $cols{$audit_column_name} or exists $IS_AUDIT_COL{$audit_column_name} ) { + $tt->process( \$DROP_AUDIT_TABLE_COLUMN_TT, { %vars, column_name => $audit_column_name } ); + } + } + + return; +} + +sub initialize_auditing { + my ( $table_name, $col_spec, $tt, $VARS ) = @_; + + my %vars = ( + %{$VARS}, + table_name => $table_name, + column_spec => [ map { join q{ }, @{$_} } @{$col_spec} ] + ); + + $tt->process( \$CREATE_AUDIT_TABLE_TT, \%vars ); + $tt->process( \$CREATE_AUDIT_FUNCTION_TT, \%vars ); + $tt->process( \$CREATE_AUDIT_TRIGGER_TT, \%vars ); + + return; +} + +sub get_tables { + my ( $dbh, $schema_name ) = @_; + + my $sth = $dbh->table_info( undef, $schema_name, undef, 'TABLE' ); + + my %tables; + + while ( my $r = $sth->fetchrow_hashref ) { + $tables{ $r->{TABLE_NAME} } = get_column_info( $dbh, $schema_name, $r->{TABLE_NAME} ); + } + + return \%tables; +} + +sub get_column_info { + my ( $dbh, $schema_name, $table_name ) = @_; + + my @column_info; + + my $sth = $dbh->column_info( undef, $schema_name, $table_name, undef ); + while ( my $r = $sth->fetchrow_hashref ) { + my $type = $r->{TYPE_NAME}; + if ( exists $NEEDS_SIZE{$type} ) { + # HACK for bpchar type columns in qc_template_well_genotyping_primers and + # qc_template_well_crispr_primers tables ( qc_run_id column ) + my $col_size = $r->{COLUMN_SIZE} ? $r->{COLUMN_SIZE} : '36'; + $type = $type . '(' . $col_size . ')'; + } + push @column_info, [ $r->{COLUMN_NAME}, $type ]; + } + + return \@column_info; +} + +__END__ + +=pod + +=head1 NAME + +generate-pg-audit-ddl + +=head1 SYNOPSIS + + generate-pg-audit-ddl --host pgsrv5 --port 5437 --dbname lims2_devel --user lims2 + +=head1 DESCRIPTION + +This script interrogates the database specified by the C<--dbname> +command-line option and compares the I schema with the +I schema. It emits SQL to create a table in the I schema +shadowing each table in the B schema, a function to insert a +row in the corresponding I table for each C, C +or C in the I schema, and a trigger that calls this +function. + +=head1 LIMITATIONS + +This script assumes that the I schema already exists. It +attempts to create SQL that will transition the current state of the +I schema to the desired state (shadowing the current state of +the I schema). Note, however, that it cannot accurately detect +column and table renames. + +If you have renamed a column or table in the main schema, this script +will emit C and C statements that will B. Please review the generated SQL carefully in +case this is not what you intended. + +=head1 SEE ALSO + +The I tables, functions, and triggers are derived from an +example described here: +L + +=head1 AUTHOR + +Ray Miller Erm7@sanger.ac.ukE + +=cut From eb39abfd968f8526a27b09baafb1b611a87d84fe Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 13 Jul 2024 15:07:44 +0100 Subject: [PATCH 05/10] Add script to split mbox exported by GMail. GMail exports the entire account to a single mbox file. This script splits it into multiple mbox files according to the labels. --- misc/sort-mail.py | 60 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) create mode 100755 misc/sort-mail.py diff --git a/misc/sort-mail.py b/misc/sort-mail.py new file mode 100755 index 0000000..1ef2429 --- /dev/null +++ b/misc/sort-mail.py @@ -0,0 +1,60 @@ +#!/usr/bin/python3 + +from mailbox import mbox + +targets = [ + "28 Ellesmere Road Purchase", + "3-mobile", + "Anglian Windows", + "Clojure", + "Conveyancing Quotes", + "CTCCambridge", + "CTCCambridgeRoutes", + "CTCOxford", + "Dad's Estate", + "Dad's Memorial", + "Dad's Memorial Service", + "Facebook", + "Golang", + "GreenMetropolis", + "LibDems", + "Nationwide", + "OkCupid", + "Pseudospam", + "Riverford", + "RussianDatingScam", + "Sanger", + "SmileBanking", + "UKUUG", + "Virgin Wines", + "Personal", + "Sent", + "Inbox", + "Archived", + "Spam", + "Bin", +] + +def target(m): + if "X-Gmail-Labels" in m: + labels = m["X-Gmail-Labels"].split(",") + for t in targets: + if t in labels: + return t + return "Uncategorized" + + +incoming = mbox("/home/ray/Mail/Gmail.mbox", create=False) + +destinations = {} + +n = 0 +for m in incoming: + t = target(m) + if t not in destinations: + destinations[t] = mbox(f"/home/ray/Mail/GMail/{t}", create=True) + destinations[t].add(m) + +for d in destinations: + d.flush() + From ef791b6be6c4d99bfccf49d62832d059fc86888c Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 13 Jul 2024 15:10:36 +0100 Subject: [PATCH 06/10] Initial version of script to fix Nationwide credit card statement --- guile/fix-nationwide-statement.scm | 72 ++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100755 guile/fix-nationwide-statement.scm diff --git a/guile/fix-nationwide-statement.scm b/guile/fix-nationwide-statement.scm new file mode 100755 index 0000000..60be247 --- /dev/null +++ b/guile/fix-nationwide-statement.scm @@ -0,0 +1,72 @@ +#!/usr/bin/env -S guile -e main -s +!# + +(use-modules (ice-9 match) + (ice-9 getopt-long) + (dsv)) + + +(define date-input-format "%d %b %Y") +(define date-output-format "%Y-%m-%d") + +(define (format-date d) + (strftime date-output-format + (car (strptime date-input-format d)))) + +(define (read-statement path) + (call-with-input-file path + (lambda (port) + (dsv->scm port #:format 'rfc4180)))) + +(define currency-charset (string->char-set "0123456789.-")) + +(define (format-amount s) + (string-filter currency-charset s)) + +(define (process-row row) + (match-let (((date description location paid-out paid-in) row)) + (list (format-date date) + description + location + (format-amount paid-out) + (format-amount paid-in)))) + +(define (process-statement input-path output-path) + (match-let (((_ _ _ _ header . data) (read-statement input-path))) + (let ((updated (cons header (map process-row data)))) + (call-with-output-file output-path + (lambda (port) + (scm->dsv updated port #:format 'rfc4180)))))) + +(define* (usage #:optional errmsg) + (with-output-to-port (current-error-port) + (lambda () + (when errmsg + (display "Error: ") + (display errmsg) + (newline)) + (display "\ +Usage: fix-credit-card-statement [options] + -h, --help Display this help. + -i, --input=FILENAME Input file path. + -o, --output=FILENAME Output file path. Required unless --overwrite is given. + -w, --overwrite Overwrite the input file with the updated data. +") + (exit (if errmsg EXIT_FAILURE EXIT_SUCCESS))))) + +(define (main args) + (let* ((option-spec '((help (single-char #\h) (value #f)) + (input (single-char #\i) (value #t)) + (output (single-char #\o) (value #t)) + (overwrite (single-char #\w) (value #f)))) + (options (getopt-long args option-spec)) + (help-wanted (option-ref options 'help #f)) + (input (option-ref options 'input #f)) + (output (option-ref options 'output #f)) + (overwrite (option-ref options 'overwrite #f))) + (cond + (help-wanted (usage)) + ((not input) (usage "input filename is required")) + ((and overwrite output) (usage "output filename cannot be given with --overwrite")) + ((not (or overwrite output)) (usage "output filename is required without --overwrite"))) + (process-statement input (or output input)))) From 98910d33069ed65888a301de6f5e0e7c2144a114 Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 13 Jul 2024 16:55:52 +0100 Subject: [PATCH 07/10] Make Nationwide statement processing data-driven. Instead of different scripts for credit card and current account statements, define a profile for each that specifies the date and amount columns, and have this control the processing. --- guile/fix-nationwide-statement.scm | 113 +++++++++++++++++++++++------ 1 file changed, 89 insertions(+), 24 deletions(-) diff --git a/guile/fix-nationwide-statement.scm b/guile/fix-nationwide-statement.scm index 60be247..cc45410 100755 --- a/guile/fix-nationwide-statement.scm +++ b/guile/fix-nationwide-statement.scm @@ -1,11 +1,17 @@ #!/usr/bin/env -S guile -e main -s !# -(use-modules (ice-9 match) - (ice-9 getopt-long) +;; Script for updating current account and credit card statements +;; downloaded from Nationwide, who use a date and currency format +;; that Gnucash does not support. + +(use-modules (ice-9 getopt-long) + ((srfi srfi-1) #:select (drop)) (dsv)) - +;; Date appears in Nationwide statements in the format +;; "10 Jan 2024", but this is not understood by Gnucash +;; so we convert it to YYYY-MM-DD format. (define date-input-format "%d %b %Y") (define date-output-format "%Y-%m-%d") @@ -13,31 +19,84 @@ (strftime date-output-format (car (strptime date-input-format d)))) -(define (read-statement path) - (call-with-input-file path - (lambda (port) - (dsv->scm port #:format 'rfc4180)))) - +;; Characters we expect to see in a numeric amount field. The +;; Nationwide statements contain a non-ASCII currency character +;; that we want to delete. (define currency-charset (string->char-set "0123456789.-")) (define (format-amount s) (string-filter currency-charset s)) -(define (process-row row) - (match-let (((date description location paid-out paid-in) row)) - (list (format-date date) - description - location - (format-amount paid-out) - (format-amount paid-in)))) +;; Profiles for the different statemnets. +;; skip: the number of leading rows to skip +;; header: boolean indicating whether or not the first unskipped +;; row is a header +;; date-cols: list of columns containing dates +;; amount-cols: list columns containing amounts +(define profiles + '(("credit-card" . ((skip . 4) + (header . #t) + (date-cols . (0)) + (amount-cols . (3 4)))) + ("current-account" . ((skip . 4) + (header . #t) + (date-cols . (0)) + (amount-cols . (3 4 5)))))) -(define (process-statement input-path output-path) - (match-let (((_ _ _ _ header . data) (read-statement input-path))) - (let ((updated (cons header (map process-row data)))) - (call-with-output-file output-path - (lambda (port) - (scm->dsv updated port #:format 'rfc4180)))))) +;; Predicate for validating the profile option. +(define (valid-profile? p) + (if (assoc p profiles) #t #f)) +;; Update a list by applying the given function to each of the +;; listed columns. +(define (update-list lst cols f) + (for-each (lambda (k) + (let ((v (list-ref lst k))) + (list-set! lst k (f v)))) + cols)) + +;; Given a spec listing the date and amount columns, return a +;; function that will apply the corresponding formats to a row. +(define (process-row spec) + (let ((date-cols (assq-ref spec 'date-cols)) + (amount-cols (assq-ref spec 'amount-cols))) + (lambda (row) + (when date-cols + (update-list row date-cols format-date)) + (when amount-cols + (update-list row amount-cols format-amount))))) + +;; Read a CSV from the given path. +(define (read-statement path) + (call-with-input-file path + (lambda (port) + (dsv->scm port #:format 'rfc4180)))) + +;; Write data to the given path in CSV format. +(define (write-statement data path) + (call-with-output-file path + (lambda (port) + (scm->dsv data port #:format 'rfc4180)))) + + +(define (update-data spec data) + (let* ((data (drop data (or (assq-ref spec 'skip) 0))) + (header (if (assq-ref spec 'header) (car data) #f)) + (data (if header (cdr data) data))) + (for-each (process-row spec) data) + (if header + (cons header data) + data))) + +;; Apply the updates defined in `spec` to the statement read +;; from input-path and write the updated data to output-path. +(define (process-statement spec input-path output-path) + (let ((data (read-statement input-path))) + (write-statement (update-data spec data) output-path))) + +;; Display a usage message and (optional) error message to STDERR +;; and exit. If an error message is given the exit code will be +;; non-zero. (define* (usage #:optional errmsg) (with-output-to-port (current-error-port) (lambda () @@ -51,22 +110,28 @@ Usage: fix-credit-card-statement [options] -i, --input=FILENAME Input file path. -o, --output=FILENAME Output file path. Required unless --overwrite is given. -w, --overwrite Overwrite the input file with the updated data. + -p, --profile=PROFILE Profile name [credit-card|current-account]. ") (exit (if errmsg EXIT_FAILURE EXIT_SUCCESS))))) +;; Process command-line arguments and validate options. +;; If valid, run process-statement with the given options. (define (main args) - (let* ((option-spec '((help (single-char #\h) (value #f)) + (let* ((option-spec `((help (single-char #\h) (value #f)) (input (single-char #\i) (value #t)) (output (single-char #\o) (value #t)) - (overwrite (single-char #\w) (value #f)))) + (overwrite (single-char #\w) (value #f)) + (profile (single-char #\p) (value #t) (predicate ,valid-profile?)))) (options (getopt-long args option-spec)) (help-wanted (option-ref options 'help #f)) + (profile (option-ref options 'profile #f)) (input (option-ref options 'input #f)) (output (option-ref options 'output #f)) (overwrite (option-ref options 'overwrite #f))) (cond (help-wanted (usage)) + ((not profile) (usage "profile is required")) ((not input) (usage "input filename is required")) ((and overwrite output) (usage "output filename cannot be given with --overwrite")) ((not (or overwrite output)) (usage "output filename is required without --overwrite"))) - (process-statement input (or output input)))) + (process-statement (assoc-ref profiles profile) input (or output input)))) From ae2de95d521b5d9a4bd3537c31835f28094ec960 Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 13 Jul 2024 17:07:43 +0100 Subject: [PATCH 08/10] Document update-data function. --- guile/fix-nationwide-statement.scm | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/guile/fix-nationwide-statement.scm b/guile/fix-nationwide-statement.scm index cc45410..b3b9b83 100755 --- a/guile/fix-nationwide-statement.scm +++ b/guile/fix-nationwide-statement.scm @@ -78,7 +78,10 @@ (lambda (port) (scm->dsv data port #:format 'rfc4180)))) - +;; Apply the specified updates to data (a list of rows +;; read from the CSV). If a 'skip value is specified, drop +;; this many leading rows. If a 'header is present, only +;; apply the updates to the succeeding rows. (define (update-data spec data) (let* ((data (drop data (or (assq-ref spec 'skip) 0))) (header (if (assq-ref spec 'header) (car data) #f)) @@ -107,7 +110,7 @@ (display "\ Usage: fix-credit-card-statement [options] -h, --help Display this help. - -i, --input=FILENAME Input file path. + -i, --input=FILENAME Input file path. Required. -o, --output=FILENAME Output file path. Required unless --overwrite is given. -w, --overwrite Overwrite the input file with the updated data. -p, --profile=PROFILE Profile name [credit-card|current-account]. From ec51a0c9193242f7397e8dd633aaf55998c361cd Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 13 Jul 2024 17:14:16 +0100 Subject: [PATCH 09/10] Simplify the update-data function. --- guile/fix-nationwide-statement.scm | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/guile/fix-nationwide-statement.scm b/guile/fix-nationwide-statement.scm index b3b9b83..c410a3c 100755 --- a/guile/fix-nationwide-statement.scm +++ b/guile/fix-nationwide-statement.scm @@ -81,15 +81,13 @@ ;; Apply the specified updates to data (a list of rows ;; read from the CSV). If a 'skip value is specified, drop ;; this many leading rows. If a 'header is present, only -;; apply the updates to the succeeding rows. +;; apply the updates to the succeeding rows, preserving +;; the header as-is. (define (update-data spec data) - (let* ((data (drop data (or (assq-ref spec 'skip) 0))) - (header (if (assq-ref spec 'header) (car data) #f)) - (data (if header (cdr data) data))) - (for-each (process-row spec) data) - (if header - (cons header data) - data))) + (let* ((skip (assq-ref spec 'skip)) + (data (if skip (drop data skip) data))) + (for-each (process-row spec) (if (assq-ref spec 'header) (cdr data) data)) + data)) ;; Apply the updates defined in `spec` to the statement read ;; from input-path and write the updated data to output-path. From 26bf41e073341455e423854d718056f2ec5da575 Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 13 Jul 2024 17:20:04 +0100 Subject: [PATCH 10/10] Fix typo. --- guile/fix-nationwide-statement.scm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/guile/fix-nationwide-statement.scm b/guile/fix-nationwide-statement.scm index c410a3c..27854b4 100755 --- a/guile/fix-nationwide-statement.scm +++ b/guile/fix-nationwide-statement.scm @@ -27,7 +27,7 @@ (define (format-amount s) (string-filter currency-charset s)) -;; Profiles for the different statemnets. +;; Profiles for the different statement formats. ;; skip: the number of leading rows to skip ;; header: boolean indicating whether or not the first unskipped ;; row is a header