From 2f9e0e66e35b685a26c726ba9e28179d14d2abaa Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sun, 19 Apr 2020 14:57:02 +0100 Subject: [PATCH] Add filters to opennames processing. Filter out suburban areas. --- cmd/localtypes/main.go | 33 +++++++++++++++ cmd/save-gob/main.go | 25 +++++++++++- pkg/openname/io.go | 75 +++++++++++++++++++++++------------ pkg/openname/parse.go | 4 ++ pkg/placenames/placenames.bin | 4 +- 5 files changed, 112 insertions(+), 29 deletions(-) create mode 100644 cmd/localtypes/main.go diff --git a/cmd/localtypes/main.go b/cmd/localtypes/main.go new file mode 100644 index 0000000..f199f8f --- /dev/null +++ b/cmd/localtypes/main.go @@ -0,0 +1,33 @@ +package main + +import ( + "fmt" + "log" + "os" + "sort" + + "github.com/ray1729/gpx-utils/pkg/openname" +) + +func main() { + log.SetFlags(0) + if len(os.Args) != 2 { + log.Fatal("Usage: %s OPNAME_CSV_ZIP", os.Args[0]) + } + var records []*openname.Record + openname.ProcessFile( + os.Args[1], + func(r *openname.Record) error { + records = append(records, r) + return nil + }, + openname.FilterType("populatedPlace"), + openname.FilterWithinRadius(544945, 258410, 20000), + ) + sort.Slice(records, func(i, j int) bool { + return records[i].Name < records[j].Name + }) + for _, r := range records { + fmt.Printf("%s,%s\n", r.Name, r.LocalType) + } +} diff --git a/cmd/save-gob/main.go b/cmd/save-gob/main.go index 6608d82..f318730 100644 --- a/cmd/save-gob/main.go +++ b/cmd/save-gob/main.go @@ -1,10 +1,12 @@ package main import ( + "encoding/gob" "log" "os" "github.com/ray1729/gpx-utils/pkg/openname" + "github.com/ray1729/gpx-utils/pkg/placenames" ) func main() { @@ -12,7 +14,28 @@ func main() { if len(os.Args) != 3 { log.Fatal("Usage: %s INFILE OUTFILE", os.Args[0]) } - if err := openname.Save(os.Args[1], os.Args[2]); err != nil { + wc, err := os.OpenFile(os.Args[2], os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0644) + if err != nil { + log.Fatal(err) + } + defer wc.Close() + enc := gob.NewEncoder(wc) + err = openname.ProcessFile( + os.Args[1], + func(r *openname.Record) error { + b := placenames.NamedBoundary{ + Name: r.Name, + Xmin: r.MbrXMin, + Ymin: r.MbrYMin, + Xmax: r.MbrXMax, + Ymax: r.MbrYMax} + return enc.Encode(b) + }, + openname.FilterType("populatedPlace"), + openname.FilterLocalType("Suburban Area").Complement(), + openname.FilterAreaGt(0), + ) + if err != nil { log.Fatal(err) } } diff --git a/pkg/openname/io.go b/pkg/openname/io.go index f3cdf37..d31f4b2 100644 --- a/pkg/openname/io.go +++ b/pkg/openname/io.go @@ -2,16 +2,49 @@ package openname import ( "archive/zip" - "encoding/gob" "fmt" - "os" + "math" "strings" - - "github.com/ray1729/gpx-utils/pkg/placenames" ) +type Handler func(*Record) error + +type Filter func(*Record) bool + +func (f Filter) Complement() Filter { + return func(r *Record) bool { + return !f(r) + } +} + +func FilterType(t string) Filter { + return func(r *Record) bool { + return r.Type == t + } +} + +func FilterLocalType(t string) Filter { + return func(r *Record) bool { + return r.LocalType == t + } +} + +func FilterWithinRadius(x, y, radius float64) Filter { + return func(r *Record) bool { + dx := x - r.GeomX + dy := y - r.GeomY + return math.Sqrt(dx*dx+dy*dy) <= radius + } +} + +func FilterAreaGt(a float64) Filter { + return func(r *Record) bool { + return r.Area() > a + } +} + // ProcessFile reads the compressed OS Open Names data set and calls the handler for each record. -func ProcessFile(filename string, handler func(*Record) error) error { +func ProcessFile(filename string, handler Handler, filters ...Filter) error { r, err := zip.OpenReader(filename) if err != nil { return fmt.Errorf("error opening %s for reading: %v", filename, err) @@ -33,10 +66,11 @@ func ProcessFile(filename string, handler func(*Record) error) error { } for s.Scan() { r := s.Record() - if r.Type == "populatedPlace" && r.MbrXMax != r.MbrXMin && r.MbrYMax != r.MbrYMin { - if err := handler(r); err != nil { - return err - } + if wanted := applyFilters(r, filters); !wanted { + continue + } + if err := handler(r); err != nil { + return err } } if err = s.Err(); err != nil { @@ -48,22 +82,11 @@ func ProcessFile(filename string, handler func(*Record) error) error { return nil } -// Save processes the OS OpenNames zip file and outputs bounded places in gob format. -func Save(inFile string, outFile string) error { - wc, err := os.OpenFile(outFile, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0644) - if err != nil { - return err +func applyFilters(r *Record, filters []Filter) bool { + for _, f := range filters { + if !f(r) { + return false + } } - defer wc.Close() - enc := gob.NewEncoder(wc) - err = ProcessFile(inFile, func(r *Record) error { - b := placenames.NamedBoundary{ - Name: r.Name, - Xmin: r.MbrXMin, - Ymin: r.MbrYMin, - Xmax: r.MbrXMax, - Ymax: r.MbrYMax} - return enc.Encode(b) - }) - return err + return true } diff --git a/pkg/openname/parse.go b/pkg/openname/parse.go index b0e4461..49270dd 100644 --- a/pkg/openname/parse.go +++ b/pkg/openname/parse.go @@ -56,6 +56,10 @@ func (r *Record) Bounds() *rtreego.Rect { return rect } +func (r *Record) Area() float64 { + return (r.MbrXMax - r.MbrXMin) * (r.MbrYMax - r.MbrYMin) +} + type Scanner struct { csvReader *csv.Reader nextRecord *Record diff --git a/pkg/placenames/placenames.bin b/pkg/placenames/placenames.bin index c8c35c2..60ebb29 100644 --- a/pkg/placenames/placenames.bin +++ b/pkg/placenames/placenames.bin @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0db50946fc948c9db77ae4644aaca0ffa6d748b61ab0465840ab814a0b87833c -size 1711464 +oid sha256:7294ed9778ce80df6bda24d8dc58835fece74071cefbef6b9c93fa3d460b704d +size 1278382