summaryrefslogtreecommitdiff
path: root/setup.go
diff options
context:
space:
mode:
Diffstat (limited to 'setup.go')
-rw-r--r--setup.go147
1 files changed, 147 insertions, 0 deletions
diff --git a/setup.go b/setup.go
new file mode 100644
index 0000000..c4364de
--- /dev/null
+++ b/setup.go
@@ -0,0 +1,147 @@
+package main
+
+import (
+ "bufio"
+ "database/sql"
+ "encoding/json"
+ "fmt"
+ "html/template"
+ "log"
+ "os"
+ "strings"
+)
+
+type rawDictionaryEntry struct {
+ Word string `json:"word"`
+ LangCode string `json:"lang_code"`
+ POS string `json:"pos_title"`
+ Etymology []string `json:"etymology_texts"`
+ Senses []sense `json:"senses"`
+ Sounds []sound `json:"sounds"`
+ Tags []string `json:"tags"`
+}
+
+type sense struct {
+ Glosses []string `json:"glosses"`
+ Examples []example `json:"examples"`
+}
+
+type example struct {
+ Text string `json:"text"`
+}
+
+type sound struct {
+ IPA string `json:"ipa"`
+}
+
+type templateReadyDictionaryEntry struct {
+ Word string
+ POS string
+ Etymology string
+ Senses []SenseForDictionaryEntry
+ Sound string
+ Gender string
+}
+
+type SenseForDictionaryEntry struct {
+ Sense string
+ Example string
+}
+
+func readDictionary(db *sql.DB) error {
+ log.Printf("preparing list of dictionary words...")
+
+ // Set up the template
+ tmpl, err := template.New("entry").Parse(`<p>{{ .Word }} {{ .Sound }} <i>{{ .POS }} {{ .Gender }}</i></p>
+ <ol>
+ {{ range .Senses}}
+ <li>{{ .Sense }}<br>
+ {{ if .Example }}
+ <ul><li><i>{{ .Example }}</i></li></ul></li>
+ {{ end }}
+ {{ end }}
+ </ol>`)
+ if err != nil {
+ panic(err)
+ }
+
+ // Set up a prepared statement
+ stmt, err := db.Prepare("insert into words(word, definition) values(?, ?)")
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer stmt.Close()
+
+ file, err := os.Open(rawDictionary)
+ if err != nil {
+ return fmt.Errorf("opening: %w", err)
+ }
+ defer file.Close()
+
+ var wordsAdded int
+ scanner := bufio.NewScanner(file)
+
+ maxCapacity := 1_000_000
+ buf := make([]byte, maxCapacity)
+ scanner.Buffer(buf, maxCapacity)
+
+ for scanner.Scan() {
+ var result rawDictionaryEntry
+ json.Unmarshal([]byte(scanner.Text()), &result)
+ if result.LangCode != "fr" {
+ continue
+ }
+
+ // Create the definition text.
+ entry := templateReadyDictionaryEntry{
+ Word: result.Word,
+ POS: strings.ToLower(result.POS),
+ }
+ if len(result.Etymology) > 0 {
+ entry.Etymology = result.Etymology[0]
+ }
+ if len(result.Sounds) > 0 {
+ entry.Sound = result.Sounds[0].IPA
+ }
+ for _, r := range result.Tags {
+ var genders []string
+ if r == "masculine" || r == "feminine" {
+ genders = append(genders, r)
+ }
+ entry.Gender = strings.Join(genders, " / ")
+ }
+ for _, s := range result.Senses {
+ var example string
+ if len(s.Examples) > 0 {
+ example = s.Examples[0].Text
+ }
+ sense := strings.Join(s.Glosses, "; ")
+ entry.Senses = append(entry.Senses, SenseForDictionaryEntry{Sense: sense, Example: example})
+ }
+
+ formattedDefinition := strings.Builder{}
+ err := tmpl.Execute(&formattedDefinition, entry)
+ if err != nil {
+ return fmt.Errorf("failed to render: %w", err)
+ }
+
+ // Insert the entry
+ _, err = stmt.Exec(entry.Word, formattedDefinition.String())
+ if err != nil {
+ return fmt.Errorf("inserting '%s': %w", entry.Word, err)
+ }
+
+ wordsAdded++
+ if wordsAdded%1000 == 0 && wordsAdded > 1 {
+ log.Printf("processed %d lines (most recent word was '%s')", wordsAdded, entry.Word)
+ }
+
+ }
+ if err := scanner.Err(); err != nil {
+ return fmt.Errorf("scanning: %w", err)
+ }
+
+ log.Printf("prepared %d dictionary entries", wordsAdded)
+
+ return nil
+}