Rework to embed binary data instead of reading wordlist from storage on start-up

This commit is contained in:
Ray Miller 2023-10-09 09:09:28 +01:00
parent f6920046a8
commit 0b37950a34
7 changed files with 83 additions and 63 deletions

View file

@ -8,18 +8,13 @@ import (
"github.com/ray1729/wordsearch/util" "github.com/ray1729/wordsearch/util"
) )
type DB interface { type DB map[string][]string
FindAnagrams(s string) []string
Add(s string) func New() DB {
return make(DB)
} }
type HashDBImpl map[string][]string func (db DB) Add(s string) {
func New() HashDBImpl {
return make(HashDBImpl)
}
func (db HashDBImpl) Add(s string) {
k := toKey(s) k := toKey(s)
db[k] = append(db[k], s) db[k] = append(db[k], s)
} }
@ -37,12 +32,9 @@ func Load(r io.Reader) (DB, error) {
db.Add(sc.Text()) db.Add(sc.Text())
} }
if err := sc.Err(); err != nil { return db, sc.Err()
return nil, err
}
return db, nil
} }
func (d HashDBImpl) FindAnagrams(s string) []string { func (d DB) FindAnagrams(s string) []string {
return d[toKey(s)] return d[toKey(s)]
} }

View file

@ -1,16 +1,14 @@
package wordsearch package wordsearch
import ( import (
"bufio" "embed"
"context" "encoding/gob"
"fmt" "fmt"
"log" "log"
"net/http" "net/http"
"os"
"sort" "sort"
"text/template" "text/template"
"cloud.google.com/go/storage"
"github.com/GoogleCloudPlatform/functions-framework-go/functions" "github.com/GoogleCloudPlatform/functions-framework-go/functions"
"github.com/ray1729/wordsearch/anagram" "github.com/ray1729/wordsearch/anagram"
"github.com/ray1729/wordsearch/match" "github.com/ray1729/wordsearch/match"
@ -20,37 +18,30 @@ import (
var anagramDB anagram.DB var anagramDB anagram.DB
var matchDB match.DB var matchDB match.DB
func initializeDB(ctx context.Context, bucketName, objectName string) error { func initializeDB() error {
client, err := storage.NewClient(ctx) anagrams, err := fs.Open("data/anagram.bin")
if err != nil { if err != nil {
return fmt.Errorf("error creating storage client: %v", err) return err
} }
r, err := client.Bucket(bucketName).Object(objectName).NewReader(ctx) defer anagrams.Close()
if err := gob.NewDecoder(anagrams).Decode(&anagramDB); err != nil {
return err
}
matches, err := fs.Open("data/match.bin")
if err != nil { if err != nil {
return fmt.Errorf("error opening gs://%s/%s: %v", bucketName, objectName, err) return err
} }
defer r.Close() defer matches.Close()
anagramDB = anagram.New() if err := gob.NewDecoder(matches).Decode(&matchDB); err != nil {
matchDB = match.New() return err
sc := bufio.NewScanner(r)
for sc.Scan() {
s := sc.Text()
anagramDB.Add(s)
matchDB.Add(s)
}
if err := sc.Err(); err != nil {
return fmt.Errorf("error reading gs://%s/%s: %v", bucketName, objectName, err)
} }
return nil return nil
} }
func init() { func init() {
ctx := context.Background()
bucketName := mustGetenv("WORDLIST_BUCKET")
objectName := mustGetenv("WORDLIST_PATH")
log.Println("Initializing databases") log.Println("Initializing databases")
if err := initializeDB(ctx, bucketName, objectName); err != nil { if err := initializeDB(); err != nil {
panic(err) log.Fatal(err)
} }
corsHandler := cors.New(cors.Options{ corsHandler := cors.New(cors.Options{
AllowedOrigins: []string{"*"}, AllowedOrigins: []string{"*"},
@ -65,14 +56,6 @@ func init() {
}) })
} }
func mustGetenv(s string) string {
v := os.Getenv(s)
if len(v) == 0 {
panic(fmt.Sprintf("environment variable %s not set", s))
}
return v
}
func handleFormSubmission(w http.ResponseWriter, r *http.Request) { func handleFormSubmission(w http.ResponseWriter, r *http.Request) {
if err := r.ParseForm(); err != nil { if err := r.ParseForm(); err != nil {
log.Printf("error parsing form: %v", err) log.Printf("error parsing form: %v", err)
@ -145,3 +128,6 @@ var resultsTmpl = template.Must(template.New("results").Parse(`
{{ end }} {{ end }}
</ul> </ul>
`)) `))
//go:embed data/*
var fs embed.FS

50
cmd/generate-data/main.go Normal file
View file

@ -0,0 +1,50 @@
package main
import (
"encoding/gob"
"flag"
"log"
"os"
"path/filepath"
"github.com/ray1729/wordsearch/anagram"
"github.com/ray1729/wordsearch/match"
)
var wordlist = flag.String("wordlist", "", "Path to wordlist")
var dataDir = flag.String("data", "", "Path to output directory")
func main() {
flag.Parse()
os.MkdirAll(*dataDir, 0755)
words, err := os.Open(*wordlist)
if err != nil {
log.Fatal(err)
}
anagramDB, err := anagram.Load(words)
if err != nil {
log.Fatal(err)
}
if err := writeData(*dataDir, "anagram.bin", anagramDB); err != nil {
log.Fatal(err)
}
words.Seek(0, 0)
matchDB, err := match.Load(words)
if err != nil {
log.Fatal(err)
}
if err := writeData(*dataDir, "match.bin", matchDB); err != nil {
log.Fatal(err)
}
}
func writeData(dirName string, fileName string, data interface{}) error {
path := filepath.Join(dirName, fileName)
f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0644)
if err != nil {
return err
}
defer f.Close()
return gob.NewEncoder(f).Encode(data)
}

BIN
data/anagram.bin Normal file

Binary file not shown.

BIN
data/match.bin Normal file

Binary file not shown.

View file

@ -4,6 +4,6 @@ set -e
V=$(git describe --tags) V=$(git describe --tags)
zip function.zip go.mod go.sum cloudfn.go match/match.go anagram/anagram.go util/util.go zip function.zip go.mod go.sum cloudfn.go match/match.go anagram/anagram.go util/util.go data/*.bin
gsutil cp function.zip gs://word-search-1729-assets/cloudfn/${V}/ gsutil cp function.zip gs://word-search-1729-assets/cloudfn/${V}/

View file

@ -7,20 +7,15 @@ import (
"github.com/ray1729/wordsearch/util" "github.com/ray1729/wordsearch/util"
) )
type DB interface { type DB struct {
FindMatches(s string) []string
Add(s string)
}
type PrefixTreeImpl struct {
Root *Node Root *Node
} }
func New() PrefixTreeImpl { func New() DB {
return PrefixTreeImpl{Root: &Node{}} return DB{Root: &Node{}}
} }
func (db PrefixTreeImpl) Add(s string) { func (db DB) Add(s string) {
xs := util.LowerCaseAlpha(s) xs := util.LowerCaseAlpha(s)
db.Root.add(xs, s) db.Root.add(xs, s)
} }
@ -57,13 +52,10 @@ func Load(r io.Reader) (DB, error) {
for sc.Scan() { for sc.Scan() {
db.Add(sc.Text()) db.Add(sc.Text())
} }
if err := sc.Err(); err != nil { return db, sc.Err()
return nil, err
}
return db, nil
} }
func (db PrefixTreeImpl) FindMatches(s string) []string { func (db DB) FindMatches(s string) []string {
return db.Root.find(util.LowerCaseAlphaOrDot(s)) return db.Root.find(util.LowerCaseAlphaOrDot(s))
} }