diff options
| author | David Schlachter <t480-debian-git@schlachter.ca> | 2026-01-07 18:18:47 -0500 |
|---|---|---|
| committer | David Schlachter <t480-debian-git@schlachter.ca> | 2026-01-07 18:18:47 -0500 |
| commit | 5f9386c849f5152253960fdb4cba53b090db490d (patch) | |
| tree | d1e8e36091e6b112db6f4045447a8628ef1f59e8 /main.go | |
| parent | 0675dfdbf1c64356c7d7fd671588a84d5443c275 (diff) | |
Actually populate the sqlite3 database
Diffstat (limited to 'main.go')
| -rw-r--r-- | main.go | 133 |
1 files changed, 3 insertions, 130 deletions
@@ -3,14 +3,8 @@ package main import ( - "bufio" "database/sql" - "encoding/json" - "fmt" - "html/template" "log" - "os" - "strings" _ "github.com/mattn/go-sqlite3" ) @@ -21,11 +15,11 @@ const dictionary = "/home/david/work/french-wiktionary-flashcards/raw-wiktextrac func main() { db, err := sql.Open("sqlite3", dictionary) if err != nil { - log.Fatalf("opening DB (you probably need to touch '%s'): %s", dictionary, err) + log.Fatalf("opening DB '%s': %s", dictionary, err) } defer db.Close() - _, err = db.Exec("create table IF NOT EXISTS words (word text not null primary key, definition text);") + _, err = db.Exec("create table IF NOT EXISTS words (word text not null, definition text);") if err != nil { log.Fatalf("creating table: %s", err) } @@ -37,130 +31,9 @@ func main() { log.Fatalf("counting rows: %s", err) } if count == 0 { - if err = readDictionary(); err != nil { + if err = readDictionary(db); err != nil { log.Fatalf("failed to prepare dictionary: %s", err) } } } - -type rawDictionaryEntry struct { - Word string `json:"word"` - LangCode string `json:"lang_code"` - POS string `json:"pos_title"` - Etymology []string `json:"etymology_texts"` - Senses []sense `json:"senses"` - Sounds []sound `json:"sounds"` - Tags []string `json:"tags"` -} - -type sense struct { - Glosses []string `json:"glosses"` - Examples []example `json:"examples"` -} - -type example struct { - Text string `json:"text"` -} - -type sound struct { - IPA string `json:"ipa"` -} - -type templateReadyDictionaryEntry struct { - Word string - POS string - Etymology string - Senses []SenseForDictionaryEntry - Sound string - Gender string -} - -type SenseForDictionaryEntry struct { - Sense string - Example string -} - -func readDictionary() error { - log.Printf("preparing list of dictionary words...") - - // Set up the template - tmpl, err := template.New("entry").Parse(`<p>{{ .Word }} {{ .Sound }} <i>{{ .POS }} {{ .Gender }}</i></p> - <ol> - {{ range .Senses}} - <li>{{ .Sense }}<br><ul><li><i>{{ .Example }}</i></li></ul></li> - {{ end }} - </ol>`) - if err != nil { - panic(err) - } - - file, err := os.Open(rawDictionary) - if err != nil { - return fmt.Errorf("opening: %w", err) - } - defer file.Close() - - var line int - scanner := bufio.NewScanner(file) - - maxCapacity := 1_000_000 - buf := make([]byte, maxCapacity) - scanner.Buffer(buf, maxCapacity) - - for scanner.Scan() { - line++ - - if line%10000 == 0 && line > 1 { - log.Printf("processed %d lines", line) - } - - var result rawDictionaryEntry - json.Unmarshal([]byte(scanner.Text()), &result) - if result.LangCode != "fr" { - continue - } - - // Create the definition text. - entry := templateReadyDictionaryEntry{ - Word: result.Word, - POS: strings.ToLower(result.POS), - } - if len(result.Etymology) > 0 { - entry.Etymology = result.Etymology[0] - } - if len(result.Sounds) > 0 { - entry.Sound = result.Sounds[0].IPA - } - for _, r := range result.Tags { - var genders []string - if r == "masculine" || r == "feminine" { - genders = append(genders, r) - } - entry.Gender = strings.Join(genders, " / ") - } - for _, s := range result.Senses { - var example string - if len(s.Examples) > 0 { - example = s.Examples[0].Text - } - sense := strings.Join(s.Glosses, "; ") - entry.Senses = append(entry.Senses, SenseForDictionaryEntry{Sense: sense, Example: example}) - } - - out := strings.Builder{} - err := tmpl.Execute(&out, entry) - if err != nil { - return fmt.Errorf("failed to render: %w", err) - } - fmt.Printf("%s", out.String()) - - } - if err := scanner.Err(); err != nil { - return fmt.Errorf("scanning: %w", err) - } - - log.Printf("prepared %d dictionary entries", line) - - return nil -} |
