diff options
| author | David Schlachter <t480-debian-git@schlachter.ca> | 2026-01-07 18:18:47 -0500 |
|---|---|---|
| committer | David Schlachter <t480-debian-git@schlachter.ca> | 2026-01-07 18:18:47 -0500 |
| commit | 5f9386c849f5152253960fdb4cba53b090db490d (patch) | |
| tree | d1e8e36091e6b112db6f4045447a8628ef1f59e8 /setup.go | |
| parent | 0675dfdbf1c64356c7d7fd671588a84d5443c275 (diff) | |
Actually populate the sqlite3 database
Diffstat (limited to 'setup.go')
| -rw-r--r-- | setup.go | 147 |
1 files changed, 147 insertions, 0 deletions
diff --git a/setup.go b/setup.go new file mode 100644 index 0000000..c4364de --- /dev/null +++ b/setup.go @@ -0,0 +1,147 @@ +package main + +import ( + "bufio" + "database/sql" + "encoding/json" + "fmt" + "html/template" + "log" + "os" + "strings" +) + +type rawDictionaryEntry struct { + Word string `json:"word"` + LangCode string `json:"lang_code"` + POS string `json:"pos_title"` + Etymology []string `json:"etymology_texts"` + Senses []sense `json:"senses"` + Sounds []sound `json:"sounds"` + Tags []string `json:"tags"` +} + +type sense struct { + Glosses []string `json:"glosses"` + Examples []example `json:"examples"` +} + +type example struct { + Text string `json:"text"` +} + +type sound struct { + IPA string `json:"ipa"` +} + +type templateReadyDictionaryEntry struct { + Word string + POS string + Etymology string + Senses []SenseForDictionaryEntry + Sound string + Gender string +} + +type SenseForDictionaryEntry struct { + Sense string + Example string +} + +func readDictionary(db *sql.DB) error { + log.Printf("preparing list of dictionary words...") + + // Set up the template + tmpl, err := template.New("entry").Parse(`<p>{{ .Word }} {{ .Sound }} <i>{{ .POS }} {{ .Gender }}</i></p> + <ol> + {{ range .Senses}} + <li>{{ .Sense }}<br> + {{ if .Example }} + <ul><li><i>{{ .Example }}</i></li></ul></li> + {{ end }} + {{ end }} + </ol>`) + if err != nil { + panic(err) + } + + // Set up a prepared statement + stmt, err := db.Prepare("insert into words(word, definition) values(?, ?)") + if err != nil { + log.Fatal(err) + } + defer stmt.Close() + + file, err := os.Open(rawDictionary) + if err != nil { + return fmt.Errorf("opening: %w", err) + } + defer file.Close() + + var wordsAdded int + scanner := bufio.NewScanner(file) + + maxCapacity := 1_000_000 + buf := make([]byte, maxCapacity) + scanner.Buffer(buf, maxCapacity) + + for scanner.Scan() { + var result rawDictionaryEntry + json.Unmarshal([]byte(scanner.Text()), &result) + if result.LangCode != "fr" { + continue + } + + // Create the definition text. + entry := templateReadyDictionaryEntry{ + Word: result.Word, + POS: strings.ToLower(result.POS), + } + if len(result.Etymology) > 0 { + entry.Etymology = result.Etymology[0] + } + if len(result.Sounds) > 0 { + entry.Sound = result.Sounds[0].IPA + } + for _, r := range result.Tags { + var genders []string + if r == "masculine" || r == "feminine" { + genders = append(genders, r) + } + entry.Gender = strings.Join(genders, " / ") + } + for _, s := range result.Senses { + var example string + if len(s.Examples) > 0 { + example = s.Examples[0].Text + } + sense := strings.Join(s.Glosses, "; ") + entry.Senses = append(entry.Senses, SenseForDictionaryEntry{Sense: sense, Example: example}) + } + + formattedDefinition := strings.Builder{} + err := tmpl.Execute(&formattedDefinition, entry) + if err != nil { + return fmt.Errorf("failed to render: %w", err) + } + + // Insert the entry + _, err = stmt.Exec(entry.Word, formattedDefinition.String()) + if err != nil { + return fmt.Errorf("inserting '%s': %w", entry.Word, err) + } + + wordsAdded++ + if wordsAdded%1000 == 0 && wordsAdded > 1 { + log.Printf("processed %d lines (most recent word was '%s')", wordsAdded, entry.Word) + } + + } + if err := scanner.Err(); err != nil { + return fmt.Errorf("scanning: %w", err) + } + + log.Printf("prepared %d dictionary entries", wordsAdded) + + return nil +} |
