Rework to embed binary data instead of reading wordlist from storage on start-up

This commit is contained in:
Ray Miller 2023-10-09 09:09:28 +01:00
parent f6920046a8
commit 0b37950a34
7 changed files with 83 additions and 63 deletions

View file

@ -8,18 +8,13 @@ import (
"github.com/ray1729/wordsearch/util"
)
type DB interface {
FindAnagrams(s string) []string
Add(s string)
type DB map[string][]string
func New() DB {
return make(DB)
}
type HashDBImpl map[string][]string
func New() HashDBImpl {
return make(HashDBImpl)
}
func (db HashDBImpl) Add(s string) {
func (db DB) Add(s string) {
k := toKey(s)
db[k] = append(db[k], s)
}
@ -37,12 +32,9 @@ func Load(r io.Reader) (DB, error) {
db.Add(sc.Text())
}
if err := sc.Err(); err != nil {
return nil, err
}
return db, nil
return db, sc.Err()
}
func (d HashDBImpl) FindAnagrams(s string) []string {
func (d DB) FindAnagrams(s string) []string {
return d[toKey(s)]
}

View file

@ -1,16 +1,14 @@
package wordsearch
import (
"bufio"
"context"
"embed"
"encoding/gob"
"fmt"
"log"
"net/http"
"os"
"sort"
"text/template"
"cloud.google.com/go/storage"
"github.com/GoogleCloudPlatform/functions-framework-go/functions"
"github.com/ray1729/wordsearch/anagram"
"github.com/ray1729/wordsearch/match"
@ -20,37 +18,30 @@ import (
var anagramDB anagram.DB
var matchDB match.DB
func initializeDB(ctx context.Context, bucketName, objectName string) error {
client, err := storage.NewClient(ctx)
func initializeDB() error {
anagrams, err := fs.Open("data/anagram.bin")
if err != nil {
return fmt.Errorf("error creating storage client: %v", err)
return err
}
r, err := client.Bucket(bucketName).Object(objectName).NewReader(ctx)
defer anagrams.Close()
if err := gob.NewDecoder(anagrams).Decode(&anagramDB); err != nil {
return err
}
matches, err := fs.Open("data/match.bin")
if err != nil {
return fmt.Errorf("error opening gs://%s/%s: %v", bucketName, objectName, err)
return err
}
defer r.Close()
anagramDB = anagram.New()
matchDB = match.New()
sc := bufio.NewScanner(r)
for sc.Scan() {
s := sc.Text()
anagramDB.Add(s)
matchDB.Add(s)
}
if err := sc.Err(); err != nil {
return fmt.Errorf("error reading gs://%s/%s: %v", bucketName, objectName, err)
defer matches.Close()
if err := gob.NewDecoder(matches).Decode(&matchDB); err != nil {
return err
}
return nil
}
func init() {
ctx := context.Background()
bucketName := mustGetenv("WORDLIST_BUCKET")
objectName := mustGetenv("WORDLIST_PATH")
log.Println("Initializing databases")
if err := initializeDB(ctx, bucketName, objectName); err != nil {
panic(err)
if err := initializeDB(); err != nil {
log.Fatal(err)
}
corsHandler := cors.New(cors.Options{
AllowedOrigins: []string{"*"},
@ -65,14 +56,6 @@ func init() {
})
}
func mustGetenv(s string) string {
v := os.Getenv(s)
if len(v) == 0 {
panic(fmt.Sprintf("environment variable %s not set", s))
}
return v
}
func handleFormSubmission(w http.ResponseWriter, r *http.Request) {
if err := r.ParseForm(); err != nil {
log.Printf("error parsing form: %v", err)
@ -145,3 +128,6 @@ var resultsTmpl = template.Must(template.New("results").Parse(`
{{ end }}
</ul>
`))
//go:embed data/*
var fs embed.FS

50
cmd/generate-data/main.go Normal file
View file

@ -0,0 +1,50 @@
package main
import (
"encoding/gob"
"flag"
"log"
"os"
"path/filepath"
"github.com/ray1729/wordsearch/anagram"
"github.com/ray1729/wordsearch/match"
)
var wordlist = flag.String("wordlist", "", "Path to wordlist")
var dataDir = flag.String("data", "", "Path to output directory")
func main() {
flag.Parse()
os.MkdirAll(*dataDir, 0755)
words, err := os.Open(*wordlist)
if err != nil {
log.Fatal(err)
}
anagramDB, err := anagram.Load(words)
if err != nil {
log.Fatal(err)
}
if err := writeData(*dataDir, "anagram.bin", anagramDB); err != nil {
log.Fatal(err)
}
words.Seek(0, 0)
matchDB, err := match.Load(words)
if err != nil {
log.Fatal(err)
}
if err := writeData(*dataDir, "match.bin", matchDB); err != nil {
log.Fatal(err)
}
}
func writeData(dirName string, fileName string, data interface{}) error {
path := filepath.Join(dirName, fileName)
f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR|os.O_TRUNC, 0644)
if err != nil {
return err
}
defer f.Close()
return gob.NewEncoder(f).Encode(data)
}

BIN
data/anagram.bin Normal file

Binary file not shown.

BIN
data/match.bin Normal file

Binary file not shown.

View file

@ -4,6 +4,6 @@ set -e
V=$(git describe --tags)
zip function.zip go.mod go.sum cloudfn.go match/match.go anagram/anagram.go util/util.go
zip function.zip go.mod go.sum cloudfn.go match/match.go anagram/anagram.go util/util.go data/*.bin
gsutil cp function.zip gs://word-search-1729-assets/cloudfn/${V}/

View file

@ -7,20 +7,15 @@ import (
"github.com/ray1729/wordsearch/util"
)
type DB interface {
FindMatches(s string) []string
Add(s string)
}
type PrefixTreeImpl struct {
type DB struct {
Root *Node
}
func New() PrefixTreeImpl {
return PrefixTreeImpl{Root: &Node{}}
func New() DB {
return DB{Root: &Node{}}
}
func (db PrefixTreeImpl) Add(s string) {
func (db DB) Add(s string) {
xs := util.LowerCaseAlpha(s)
db.Root.add(xs, s)
}
@ -57,13 +52,10 @@ func Load(r io.Reader) (DB, error) {
for sc.Scan() {
db.Add(sc.Text())
}
if err := sc.Err(); err != nil {
return nil, err
}
return db, nil
return db, sc.Err()
}
func (db PrefixTreeImpl) FindMatches(s string) []string {
func (db DB) FindMatches(s string) []string {
return db.Root.find(util.LowerCaseAlphaOrDot(s))
}