From 86022818da1dcd8797a73a5a2164db3e5ebf42f7 Mon Sep 17 00:00:00 2001 From: Ray Miller Date: Sat, 18 Apr 2020 12:58:53 +0100 Subject: [PATCH] Store subset of opennames data in gob format and use that to restore index. --- .gitattributes | 1 + .gitignore | 1 + README.md | 26 +++++++++ cmd/analyze-gpx/main.go | 18 ++---- cmd/save-gob/main.go | 18 ++++++ cmd/serve-rwgps/main.go | 14 +---- go.mod | 1 + go.sum | 2 + pkg/openname/index.go | 44 --------------- pkg/openname/io.go | 69 +++++++++++++++++++++++ pkg/placenames/placenames.bin | 3 + pkg/placenames/placenames.go | 57 +++++++++++++++++++ pkg/{openname => placenames}/summarize.go | 18 +++--- 13 files changed, 195 insertions(+), 77 deletions(-) create mode 100644 .gitattributes create mode 100644 cmd/save-gob/main.go delete mode 100644 pkg/openname/index.go create mode 100644 pkg/openname/io.go create mode 100644 pkg/placenames/placenames.bin create mode 100644 pkg/placenames/placenames.go rename pkg/{openname => placenames}/summarize.go (83%) diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..4edd5ac --- /dev/null +++ b/.gitattributes @@ -0,0 +1 @@ +*.bin filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index 5df8d32..7182b33 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /data/ /bin/ +/pkg/placenames/data.go diff --git a/README.md b/README.md index 43335f2..718573e 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,29 @@ # GPX Utils Utilities for analyzing and indexing GPX routes. + +## Compiling placenames.bin + +This step extracts bounding boxes for populated places from the OS Open Names dataset, which is available for free download: https://www.ordnancesurvey.co.uk/business-government/products/open-map-names + + go run ./cmd/save-gob/... opname_csv_gb.zip ./pkg/placenames/placenames.bin + +I have included a the compiled extract in this repository so you can skip this step. + +## Binary embedding + +We use [mule](https://github.com/wlbr/mule) to embed the gob data in the compiled binaries: + + go get github.com/wlbr/mule + +Make sure the `mule` command is on you PATH before running `go generate`. + +## Compiling + + mudir -p bin + go generate ./... + go build -o bin ./... + +## Attribution + +Contains OS data © Crown copyright and database right 2018 diff --git a/cmd/analyze-gpx/main.go b/cmd/analyze-gpx/main.go index 17e2a76..b72fd45 100644 --- a/cmd/analyze-gpx/main.go +++ b/cmd/analyze-gpx/main.go @@ -10,25 +10,17 @@ import ( "os" "path" - "github.com/ray1729/gpx-utils/pkg/openname" + "github.com/ray1729/gpx-utils/pkg/placenames" ) func main() { - openNames := flag.String("opname", "", "Path to Ordnance Server Open Names zip archive") gpxFile := flag.String("gpx", "", "Path to GPX file") dirName := flag.String("dir", "", "Directory to scan for GPX files") flag.Parse() - if *openNames == "" { - log.Fatal("--opname is required") - } if (*gpxFile == "" && *dirName == "") || (*gpxFile != "" && *dirName != "") { log.Fatal("exactly one of --dir or --gpx is required") } - rt, err := openname.BuildIndex(*openNames) - if err != nil { - log.Fatal(err) - } - gs, err := openname.NewGPXSummarizer(rt) + gs, err := placenames.NewGPXSummarizer() if err != nil { log.Fatal(err) } @@ -42,7 +34,7 @@ func main() { } } -func summarizeDirectory(gs *openname.GPXSummarizer, dirName string) error { +func summarizeDirectory(gs *placenames.GPXSummarizer, dirName string) error { files, err := ioutil.ReadDir(dirName) if err != nil { return err @@ -78,7 +70,7 @@ func summarizeDirectory(gs *openname.GPXSummarizer, dirName string) error { return nil } -func summarizeSingleFile(gs *openname.GPXSummarizer, filename string) error { +func summarizeSingleFile(gs *placenames.GPXSummarizer, filename string) error { r, err := os.Open(filename) if err != nil { return fmt.Errorf("error opening %s for reading: %v", filename, err) @@ -93,7 +85,7 @@ func summarizeSingleFile(gs *openname.GPXSummarizer, filename string) error { return nil } -func writeSummary(s *openname.TrackSummary, w io.Writer) error { +func writeSummary(s *placenames.TrackSummary, w io.Writer) error { enc := json.NewEncoder(w) enc.SetIndent("", " ") if err := enc.Encode(s); err != nil { diff --git a/cmd/save-gob/main.go b/cmd/save-gob/main.go new file mode 100644 index 0000000..6608d82 --- /dev/null +++ b/cmd/save-gob/main.go @@ -0,0 +1,18 @@ +package main + +import ( + "log" + "os" + + "github.com/ray1729/gpx-utils/pkg/openname" +) + +func main() { + log.SetFlags(0) + if len(os.Args) != 3 { + log.Fatal("Usage: %s INFILE OUTFILE", os.Args[0]) + } + if err := openname.Save(os.Args[1], os.Args[2]); err != nil { + log.Fatal(err) + } +} diff --git a/cmd/serve-rwgps/main.go b/cmd/serve-rwgps/main.go index f149d44..9f5e7cd 100644 --- a/cmd/serve-rwgps/main.go +++ b/cmd/serve-rwgps/main.go @@ -10,7 +10,7 @@ import ( "os" "strconv" - "github.com/ray1729/gpx-utils/pkg/openname" + "github.com/ray1729/gpx-utils/pkg/placenames" ) func main() { @@ -18,15 +18,7 @@ func main() { if listenAddr == "" { listenAddr = ":8000" } - openNames := os.Getenv("OPNAMES") - if openNames == "" { - log.Fatal("OPNAMES not set") - } - rt, err := openname.BuildIndex(openNames) - if err != nil { - log.Fatal(err) - } - gs, err := openname.NewGPXSummarizer(rt) + gs, err := placenames.NewGPXSummarizer() if err != nil { log.Fatal(err) } @@ -35,7 +27,7 @@ func main() { log.Fatal(http.ListenAndServe(listenAddr, nil)) } -var gpxSummarizer *openname.GPXSummarizer +var gpxSummarizer *placenames.GPXSummarizer func rwgpsHandler(w http.ResponseWriter, r *http.Request) { q := r.URL.Query() diff --git a/go.mod b/go.mod index 355c189..cf8a2b5 100644 --- a/go.mod +++ b/go.mod @@ -6,4 +6,5 @@ require ( github.com/dhconnelly/rtreego v1.0.0 github.com/fofanov/go-osgb v0.0.0-20170711141822-6893d1f95cd9 github.com/twpayne/go-gpx v1.1.1 + github.com/wlbr/mule v0.0.0-20200329114911-0724e1639b62 // indirect ) diff --git a/go.sum b/go.sum index b49d757..1b933ff 100644 --- a/go.sum +++ b/go.sum @@ -10,6 +10,8 @@ github.com/twpayne/go-gpx v1.1.1 h1:vbg0lRc/ZKSu8ev84/hJWZtplKJdBbucNmks4TNzSqQ= github.com/twpayne/go-gpx v1.1.1/go.mod h1:fQ+EsiFNgDuErUYyI0ZOgZPB+ACxW58L16oormty798= github.com/twpayne/go-kml v1.0.0/go.mod h1:LlvLIQSfMqYk2O7Nx8vYAbSLv4K9rjMvLlEdUKWdjq0= github.com/twpayne/go-polyline v1.0.0/go.mod h1:ICh24bcLYBX8CknfvNPKqoTbe+eg+MX1NPyJmSBo7pU= +github.com/wlbr/mule v0.0.0-20200329114911-0724e1639b62 h1:vHDdpwOGHzfFKbMLEnnM0s1jnGLjsQ9EPWtCFWMJs8o= +github.com/wlbr/mule v0.0.0-20200329114911-0724e1639b62/go.mod h1:uDXgZTfL0uJWiY/MQKcqI5VPQV8PCooNsWXozHf7CJ8= golang.org/x/net v0.0.0-20180824152047-4bcd98cce591 h1:4S2XUgvg3hUNTvxI307qkFPb9zKHG3Nf9TXFzX/DZZI= golang.org/x/net v0.0.0-20180824152047-4bcd98cce591/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= diff --git a/pkg/openname/index.go b/pkg/openname/index.go deleted file mode 100644 index 336e2d5..0000000 --- a/pkg/openname/index.go +++ /dev/null @@ -1,44 +0,0 @@ -package openname - -import ( - "archive/zip" - "fmt" - "strings" - - "github.com/dhconnelly/rtreego" -) - -func BuildIndex(filename string) (*rtreego.Rtree, error) { - r, err := zip.OpenReader(filename) - if err != nil { - return nil, fmt.Errorf("error opening %s fo reading: %v", filename, err) - } - defer r.Close() - rt := rtreego.NewTree(2, 25, 50) - for _, f := range r.File { - if !(strings.HasPrefix(f.Name, "DATA/") && strings.HasSuffix(f.Name, ".csv")) { - continue - } - rc, err := f.Open() - if err != nil { - return nil, fmt.Errorf("erorr opening %s: %v", filename, err) - } - s, err := NewScanner(rc) - if err != nil { - rc.Close() - return nil, fmt.Errorf("error reading %s: %v", f.Name, err) - } - for s.Scan() { - r := s.Record() - if r.Type == "populatedPlace" && r.MbrXMax != r.MbrXMin && r.MbrYMax != r.MbrYMin { - rt.Insert(r) - } - } - if err = s.Err(); err != nil { - rc.Close() - return nil, fmt.Errorf("error parsing %s: %v", f.Name, err) - } - rc.Close() - } - return rt, nil -} diff --git a/pkg/openname/io.go b/pkg/openname/io.go new file mode 100644 index 0000000..f3cdf37 --- /dev/null +++ b/pkg/openname/io.go @@ -0,0 +1,69 @@ +package openname + +import ( + "archive/zip" + "encoding/gob" + "fmt" + "os" + "strings" + + "github.com/ray1729/gpx-utils/pkg/placenames" +) + +// ProcessFile reads the compressed OS Open Names data set and calls the handler for each record. +func ProcessFile(filename string, handler func(*Record) error) error { + r, err := zip.OpenReader(filename) + if err != nil { + return fmt.Errorf("error opening %s for reading: %v", filename, err) + } + defer r.Close() + + for _, f := range r.File { + if !(strings.HasPrefix(f.Name, "DATA/") && strings.HasSuffix(f.Name, ".csv")) { + continue + } + rc, err := f.Open() + if err != nil { + return fmt.Errorf("error opening %s: %v", filename, err) + } + s, err := NewScanner(rc) + if err != nil { + rc.Close() + return fmt.Errorf("error reading %s: %v", f.Name, err) + } + for s.Scan() { + r := s.Record() + if r.Type == "populatedPlace" && r.MbrXMax != r.MbrXMin && r.MbrYMax != r.MbrYMin { + if err := handler(r); err != nil { + return err + } + } + } + if err = s.Err(); err != nil { + rc.Close() + return fmt.Errorf("error parsing %s: %v", f.Name, err) + } + rc.Close() + } + return nil +} + +// Save processes the OS OpenNames zip file and outputs bounded places in gob format. +func Save(inFile string, outFile string) error { + wc, err := os.OpenFile(outFile, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0644) + if err != nil { + return err + } + defer wc.Close() + enc := gob.NewEncoder(wc) + err = ProcessFile(inFile, func(r *Record) error { + b := placenames.NamedBoundary{ + Name: r.Name, + Xmin: r.MbrXMin, + Ymin: r.MbrYMin, + Xmax: r.MbrXMax, + Ymax: r.MbrYMax} + return enc.Encode(b) + }) + return err +} diff --git a/pkg/placenames/placenames.bin b/pkg/placenames/placenames.bin new file mode 100644 index 0000000..c8c35c2 --- /dev/null +++ b/pkg/placenames/placenames.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0db50946fc948c9db77ae4644aaca0ffa6d748b61ab0465840ab814a0b87833c +size 1711464 diff --git a/pkg/placenames/placenames.go b/pkg/placenames/placenames.go new file mode 100644 index 0000000..307c279 --- /dev/null +++ b/pkg/placenames/placenames.go @@ -0,0 +1,57 @@ +package placenames + +//go:generate mule -p placenames -o data.go placenames.bin + +import ( + "bytes" + "encoding/gob" + "errors" + "io" + + "github.com/dhconnelly/rtreego" +) + +type NamedBoundary struct { + Name string + Xmin float64 + Ymin float64 + Xmax float64 + Ymax float64 +} + +func (b *NamedBoundary) Bounds() *rtreego.Rect { + r, err := rtreego.NewRect(rtreego.Point{b.Xmin, b.Ymin}, []float64{b.Xmax - b.Xmin, b.Ymax - b.Ymin}) + if err != nil { + panic(err) + } + return r +} + +func (b *NamedBoundary) Contains(p rtreego.Point) bool { + if len(p) != 2 { + panic("Expected a 2-dimensional point") + } + return p[0] >= b.Xmin && p[0] <= b.Xmax && p[1] >= b.Ymin && p[1] <= b.Ymax +} + +// Restore reads bounded places in gob format and constructs an RTree index +func RestoreIndex() (*rtreego.Rtree, error) { + data, err := dataResource() + if err != nil { + return nil, err + } + dec := gob.NewDecoder(bytes.NewReader(data)) + var objs []rtreego.Spatial + for { + var b NamedBoundary + if err := dec.Decode(&b); err != nil { + if errors.Is(err, io.EOF) { + break + } + return nil, err + } + objs = append(objs, &b) + } + rt := rtreego.NewTree(2, 25, 50, objs...) + return rt, nil +} diff --git a/pkg/openname/summarize.go b/pkg/placenames/summarize.go similarity index 83% rename from pkg/openname/summarize.go rename to pkg/placenames/summarize.go index 4e31673..2397f72 100644 --- a/pkg/openname/summarize.go +++ b/pkg/placenames/summarize.go @@ -1,4 +1,4 @@ -package openname +package placenames import ( "io" @@ -16,16 +16,16 @@ type GPXSummarizer struct { trans osgb.CoordinateTransformer } -func NewGPXSummarizer(rt *rtreego.Rtree) (*GPXSummarizer, error) { +func NewGPXSummarizer() (*GPXSummarizer, error) { trans, err := osgb.NewOSTN15Transformer() if err != nil { return nil, err } - return &GPXSummarizer{rt: rt, trans: trans}, nil -} - -func insideLoc(p rtreego.Point, loc *Record) bool { - return p[0] >= loc.MbrXMin && p[0] <= loc.MbrXMax && p[1] >= loc.MbrYMin && p[1] <= loc.MbrYMax + rt, err := RestoreIndex() + if err != nil { + return nil, err + } + return &GPXSummarizer{rt, trans}, nil } func distance(p1, p2 rtreego.Point) float64 { @@ -87,7 +87,7 @@ func (gs *GPXSummarizer) SummarizeTrack(r io.Reader) (*TrackSummary, error) { } thisPoint := rtreego.Point{ngCoord.Easting, ngCoord.Northing} thisHeight := ngCoord.Height - nn, _ := gs.rt.NearestNeighbor(thisPoint).(*Record) + nn, _ := gs.rt.NearestNeighbor(thisPoint).(*NamedBoundary) if init { s.Start = nn.Name prevPlace = nn.Name @@ -102,7 +102,7 @@ func (gs *GPXSummarizer) SummarizeTrack(r io.Reader) (*TrackSummary, error) { if ascent := thisHeight - prevHeight; ascent > 0 { s.Ascent += ascent } - if insideLoc(thisPoint, nn) && nn.Name != prevPlace && distance(thisPoint, prevPlacePoint) > 0.2 { + if nn.Contains(thisPoint) && nn.Name != prevPlace && distance(thisPoint, prevPlacePoint) > 0.2 { s.PointsOfInterest = append(s.PointsOfInterest, POI{nn.Name, s.Distance}) prevPlace = nn.Name prevPlacePoint = thisPoint