summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--main.go133
-rw-r--r--setup.go147
2 files changed, 150 insertions, 130 deletions
diff --git a/main.go b/main.go
index 37516cb..f1598bf 100644
--- a/main.go
+++ b/main.go
@@ -3,14 +3,8 @@
package main
import (
- "bufio"
"database/sql"
- "encoding/json"
- "fmt"
- "html/template"
"log"
- "os"
- "strings"
_ "github.com/mattn/go-sqlite3"
)
@@ -21,11 +15,11 @@ const dictionary = "/home/david/work/french-wiktionary-flashcards/raw-wiktextrac
func main() {
db, err := sql.Open("sqlite3", dictionary)
if err != nil {
- log.Fatalf("opening DB (you probably need to touch '%s'): %s", dictionary, err)
+ log.Fatalf("opening DB '%s': %s", dictionary, err)
}
defer db.Close()
- _, err = db.Exec("create table IF NOT EXISTS words (word text not null primary key, definition text);")
+ _, err = db.Exec("create table IF NOT EXISTS words (word text not null, definition text);")
if err != nil {
log.Fatalf("creating table: %s", err)
}
@@ -37,130 +31,9 @@ func main() {
log.Fatalf("counting rows: %s", err)
}
if count == 0 {
- if err = readDictionary(); err != nil {
+ if err = readDictionary(db); err != nil {
log.Fatalf("failed to prepare dictionary: %s", err)
}
}
}
-
-type rawDictionaryEntry struct {
- Word string `json:"word"`
- LangCode string `json:"lang_code"`
- POS string `json:"pos_title"`
- Etymology []string `json:"etymology_texts"`
- Senses []sense `json:"senses"`
- Sounds []sound `json:"sounds"`
- Tags []string `json:"tags"`
-}
-
-type sense struct {
- Glosses []string `json:"glosses"`
- Examples []example `json:"examples"`
-}
-
-type example struct {
- Text string `json:"text"`
-}
-
-type sound struct {
- IPA string `json:"ipa"`
-}
-
-type templateReadyDictionaryEntry struct {
- Word string
- POS string
- Etymology string
- Senses []SenseForDictionaryEntry
- Sound string
- Gender string
-}
-
-type SenseForDictionaryEntry struct {
- Sense string
- Example string
-}
-
-func readDictionary() error {
- log.Printf("preparing list of dictionary words...")
-
- // Set up the template
- tmpl, err := template.New("entry").Parse(`<p>{{ .Word }} {{ .Sound }} <i>{{ .POS }} {{ .Gender }}</i></p>
- <ol>
- {{ range .Senses}}
- <li>{{ .Sense }}<br><ul><li><i>{{ .Example }}</i></li></ul></li>
- {{ end }}
- </ol>`)
- if err != nil {
- panic(err)
- }
-
- file, err := os.Open(rawDictionary)
- if err != nil {
- return fmt.Errorf("opening: %w", err)
- }
- defer file.Close()
-
- var line int
- scanner := bufio.NewScanner(file)
-
- maxCapacity := 1_000_000
- buf := make([]byte, maxCapacity)
- scanner.Buffer(buf, maxCapacity)
-
- for scanner.Scan() {
- line++
-
- if line%10000 == 0 && line > 1 {
- log.Printf("processed %d lines", line)
- }
-
- var result rawDictionaryEntry
- json.Unmarshal([]byte(scanner.Text()), &result)
- if result.LangCode != "fr" {
- continue
- }
-
- // Create the definition text.
- entry := templateReadyDictionaryEntry{
- Word: result.Word,
- POS: strings.ToLower(result.POS),
- }
- if len(result.Etymology) > 0 {
- entry.Etymology = result.Etymology[0]
- }
- if len(result.Sounds) > 0 {
- entry.Sound = result.Sounds[0].IPA
- }
- for _, r := range result.Tags {
- var genders []string
- if r == "masculine" || r == "feminine" {
- genders = append(genders, r)
- }
- entry.Gender = strings.Join(genders, " / ")
- }
- for _, s := range result.Senses {
- var example string
- if len(s.Examples) > 0 {
- example = s.Examples[0].Text
- }
- sense := strings.Join(s.Glosses, "; ")
- entry.Senses = append(entry.Senses, SenseForDictionaryEntry{Sense: sense, Example: example})
- }
-
- out := strings.Builder{}
- err := tmpl.Execute(&out, entry)
- if err != nil {
- return fmt.Errorf("failed to render: %w", err)
- }
- fmt.Printf("%s", out.String())
-
- }
- if err := scanner.Err(); err != nil {
- return fmt.Errorf("scanning: %w", err)
- }
-
- log.Printf("prepared %d dictionary entries", line)
-
- return nil
-}
diff --git a/setup.go b/setup.go
new file mode 100644
index 0000000..c4364de
--- /dev/null
+++ b/setup.go
@@ -0,0 +1,147 @@
+package main
+
+import (
+ "bufio"
+ "database/sql"
+ "encoding/json"
+ "fmt"
+ "html/template"
+ "log"
+ "os"
+ "strings"
+)
+
+type rawDictionaryEntry struct {
+ Word string `json:"word"`
+ LangCode string `json:"lang_code"`
+ POS string `json:"pos_title"`
+ Etymology []string `json:"etymology_texts"`
+ Senses []sense `json:"senses"`
+ Sounds []sound `json:"sounds"`
+ Tags []string `json:"tags"`
+}
+
+type sense struct {
+ Glosses []string `json:"glosses"`
+ Examples []example `json:"examples"`
+}
+
+type example struct {
+ Text string `json:"text"`
+}
+
+type sound struct {
+ IPA string `json:"ipa"`
+}
+
+type templateReadyDictionaryEntry struct {
+ Word string
+ POS string
+ Etymology string
+ Senses []SenseForDictionaryEntry
+ Sound string
+ Gender string
+}
+
+type SenseForDictionaryEntry struct {
+ Sense string
+ Example string
+}
+
+func readDictionary(db *sql.DB) error {
+ log.Printf("preparing list of dictionary words...")
+
+ // Set up the template
+ tmpl, err := template.New("entry").Parse(`<p>{{ .Word }} {{ .Sound }} <i>{{ .POS }} {{ .Gender }}</i></p>
+ <ol>
+ {{ range .Senses}}
+ <li>{{ .Sense }}<br>
+ {{ if .Example }}
+ <ul><li><i>{{ .Example }}</i></li></ul></li>
+ {{ end }}
+ {{ end }}
+ </ol>`)
+ if err != nil {
+ panic(err)
+ }
+
+ // Set up a prepared statement
+ stmt, err := db.Prepare("insert into words(word, definition) values(?, ?)")
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer stmt.Close()
+
+ file, err := os.Open(rawDictionary)
+ if err != nil {
+ return fmt.Errorf("opening: %w", err)
+ }
+ defer file.Close()
+
+ var wordsAdded int
+ scanner := bufio.NewScanner(file)
+
+ maxCapacity := 1_000_000
+ buf := make([]byte, maxCapacity)
+ scanner.Buffer(buf, maxCapacity)
+
+ for scanner.Scan() {
+ var result rawDictionaryEntry
+ json.Unmarshal([]byte(scanner.Text()), &result)
+ if result.LangCode != "fr" {
+ continue
+ }
+
+ // Create the definition text.
+ entry := templateReadyDictionaryEntry{
+ Word: result.Word,
+ POS: strings.ToLower(result.POS),
+ }
+ if len(result.Etymology) > 0 {
+ entry.Etymology = result.Etymology[0]
+ }
+ if len(result.Sounds) > 0 {
+ entry.Sound = result.Sounds[0].IPA
+ }
+ for _, r := range result.Tags {
+ var genders []string
+ if r == "masculine" || r == "feminine" {
+ genders = append(genders, r)
+ }
+ entry.Gender = strings.Join(genders, " / ")
+ }
+ for _, s := range result.Senses {
+ var example string
+ if len(s.Examples) > 0 {
+ example = s.Examples[0].Text
+ }
+ sense := strings.Join(s.Glosses, "; ")
+ entry.Senses = append(entry.Senses, SenseForDictionaryEntry{Sense: sense, Example: example})
+ }
+
+ formattedDefinition := strings.Builder{}
+ err := tmpl.Execute(&formattedDefinition, entry)
+ if err != nil {
+ return fmt.Errorf("failed to render: %w", err)
+ }
+
+ // Insert the entry
+ _, err = stmt.Exec(entry.Word, formattedDefinition.String())
+ if err != nil {
+ return fmt.Errorf("inserting '%s': %w", entry.Word, err)
+ }
+
+ wordsAdded++
+ if wordsAdded%1000 == 0 && wordsAdded > 1 {
+ log.Printf("processed %d lines (most recent word was '%s')", wordsAdded, entry.Word)
+ }
+
+ }
+ if err := scanner.Err(); err != nil {
+ return fmt.Errorf("scanning: %w", err)
+ }
+
+ log.Printf("prepared %d dictionary entries", wordsAdded)
+
+ return nil
+}