summaryrefslogtreecommitdiff
path: root/main.go
diff options
context:
space:
mode:
Diffstat (limited to 'main.go')
-rw-r--r--main.go166
1 files changed, 166 insertions, 0 deletions
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..37516cb
--- /dev/null
+++ b/main.go
@@ -0,0 +1,166 @@
+// This program looks up words fromm Wiktionary, and creates Anki flashcards
+// from them.
+package main
+
+import (
+ "bufio"
+ "database/sql"
+ "encoding/json"
+ "fmt"
+ "html/template"
+ "log"
+ "os"
+ "strings"
+
+ _ "github.com/mattn/go-sqlite3"
+)
+
+const rawDictionary = "/home/david/work/french-wiktionary-flashcards/raw-wiktextract-data.jsonl"
+const dictionary = "/home/david/work/french-wiktionary-flashcards/raw-wiktextract-data.sqlite3"
+
+func main() {
+ db, err := sql.Open("sqlite3", dictionary)
+ if err != nil {
+ log.Fatalf("opening DB (you probably need to touch '%s'): %s", dictionary, err)
+ }
+ defer db.Close()
+
+ _, err = db.Exec("create table IF NOT EXISTS words (word text not null primary key, definition text);")
+ if err != nil {
+ log.Fatalf("creating table: %s", err)
+ }
+
+ row := db.QueryRow(`SELECT count(*) as count from words`)
+ var count int
+ err = row.Scan(&count)
+ if err != nil {
+ log.Fatalf("counting rows: %s", err)
+ }
+ if count == 0 {
+ if err = readDictionary(); err != nil {
+ log.Fatalf("failed to prepare dictionary: %s", err)
+ }
+ }
+
+}
+
+type rawDictionaryEntry struct {
+ Word string `json:"word"`
+ LangCode string `json:"lang_code"`
+ POS string `json:"pos_title"`
+ Etymology []string `json:"etymology_texts"`
+ Senses []sense `json:"senses"`
+ Sounds []sound `json:"sounds"`
+ Tags []string `json:"tags"`
+}
+
+type sense struct {
+ Glosses []string `json:"glosses"`
+ Examples []example `json:"examples"`
+}
+
+type example struct {
+ Text string `json:"text"`
+}
+
+type sound struct {
+ IPA string `json:"ipa"`
+}
+
+type templateReadyDictionaryEntry struct {
+ Word string
+ POS string
+ Etymology string
+ Senses []SenseForDictionaryEntry
+ Sound string
+ Gender string
+}
+
+type SenseForDictionaryEntry struct {
+ Sense string
+ Example string
+}
+
+func readDictionary() error {
+ log.Printf("preparing list of dictionary words...")
+
+ // Set up the template
+ tmpl, err := template.New("entry").Parse(`<p>{{ .Word }} {{ .Sound }} <i>{{ .POS }} {{ .Gender }}</i></p>
+ <ol>
+ {{ range .Senses}}
+ <li>{{ .Sense }}<br><ul><li><i>{{ .Example }}</i></li></ul></li>
+ {{ end }}
+ </ol>`)
+ if err != nil {
+ panic(err)
+ }
+
+ file, err := os.Open(rawDictionary)
+ if err != nil {
+ return fmt.Errorf("opening: %w", err)
+ }
+ defer file.Close()
+
+ var line int
+ scanner := bufio.NewScanner(file)
+
+ maxCapacity := 1_000_000
+ buf := make([]byte, maxCapacity)
+ scanner.Buffer(buf, maxCapacity)
+
+ for scanner.Scan() {
+ line++
+
+ if line%10000 == 0 && line > 1 {
+ log.Printf("processed %d lines", line)
+ }
+
+ var result rawDictionaryEntry
+ json.Unmarshal([]byte(scanner.Text()), &result)
+ if result.LangCode != "fr" {
+ continue
+ }
+
+ // Create the definition text.
+ entry := templateReadyDictionaryEntry{
+ Word: result.Word,
+ POS: strings.ToLower(result.POS),
+ }
+ if len(result.Etymology) > 0 {
+ entry.Etymology = result.Etymology[0]
+ }
+ if len(result.Sounds) > 0 {
+ entry.Sound = result.Sounds[0].IPA
+ }
+ for _, r := range result.Tags {
+ var genders []string
+ if r == "masculine" || r == "feminine" {
+ genders = append(genders, r)
+ }
+ entry.Gender = strings.Join(genders, " / ")
+ }
+ for _, s := range result.Senses {
+ var example string
+ if len(s.Examples) > 0 {
+ example = s.Examples[0].Text
+ }
+ sense := strings.Join(s.Glosses, "; ")
+ entry.Senses = append(entry.Senses, SenseForDictionaryEntry{Sense: sense, Example: example})
+ }
+
+ out := strings.Builder{}
+ err := tmpl.Execute(&out, entry)
+ if err != nil {
+ return fmt.Errorf("failed to render: %w", err)
+ }
+ fmt.Printf("%s", out.String())
+
+ }
+ if err := scanner.Err(); err != nil {
+ return fmt.Errorf("scanning: %w", err)
+ }
+
+ log.Printf("prepared %d dictionary entries", line)
+
+ return nil
+}