From 5f9386c849f5152253960fdb4cba53b090db490d Mon Sep 17 00:00:00 2001 From: David Schlachter Date: Wed, 7 Jan 2026 18:18:47 -0500 Subject: Actually populate the sqlite3 database --- main.go | 133 ++-------------------------------------------------------------- 1 file changed, 3 insertions(+), 130 deletions(-) (limited to 'main.go') diff --git a/main.go b/main.go index 37516cb..f1598bf 100644 --- a/main.go +++ b/main.go @@ -3,14 +3,8 @@ package main import ( - "bufio" "database/sql" - "encoding/json" - "fmt" - "html/template" "log" - "os" - "strings" _ "github.com/mattn/go-sqlite3" ) @@ -21,11 +15,11 @@ const dictionary = "/home/david/work/french-wiktionary-flashcards/raw-wiktextrac func main() { db, err := sql.Open("sqlite3", dictionary) if err != nil { - log.Fatalf("opening DB (you probably need to touch '%s'): %s", dictionary, err) + log.Fatalf("opening DB '%s': %s", dictionary, err) } defer db.Close() - _, err = db.Exec("create table IF NOT EXISTS words (word text not null primary key, definition text);") + _, err = db.Exec("create table IF NOT EXISTS words (word text not null, definition text);") if err != nil { log.Fatalf("creating table: %s", err) } @@ -37,130 +31,9 @@ func main() { log.Fatalf("counting rows: %s", err) } if count == 0 { - if err = readDictionary(); err != nil { + if err = readDictionary(db); err != nil { log.Fatalf("failed to prepare dictionary: %s", err) } } } - -type rawDictionaryEntry struct { - Word string `json:"word"` - LangCode string `json:"lang_code"` - POS string `json:"pos_title"` - Etymology []string `json:"etymology_texts"` - Senses []sense `json:"senses"` - Sounds []sound `json:"sounds"` - Tags []string `json:"tags"` -} - -type sense struct { - Glosses []string `json:"glosses"` - Examples []example `json:"examples"` -} - -type example struct { - Text string `json:"text"` -} - -type sound struct { - IPA string `json:"ipa"` -} - -type templateReadyDictionaryEntry struct { - Word string - POS string - Etymology string - Senses []SenseForDictionaryEntry - Sound string - Gender string -} - -type SenseForDictionaryEntry struct { - Sense string - Example string -} - -func readDictionary() error { - log.Printf("preparing list of dictionary words...") - - // Set up the template - tmpl, err := template.New("entry").Parse(`

{{ .Word }} {{ .Sound }} {{ .POS }} {{ .Gender }}

-
    - {{ range .Senses}} -
  1. {{ .Sense }}
    • {{ .Example }}
  2. - {{ end }} -
`) - if err != nil { - panic(err) - } - - file, err := os.Open(rawDictionary) - if err != nil { - return fmt.Errorf("opening: %w", err) - } - defer file.Close() - - var line int - scanner := bufio.NewScanner(file) - - maxCapacity := 1_000_000 - buf := make([]byte, maxCapacity) - scanner.Buffer(buf, maxCapacity) - - for scanner.Scan() { - line++ - - if line%10000 == 0 && line > 1 { - log.Printf("processed %d lines", line) - } - - var result rawDictionaryEntry - json.Unmarshal([]byte(scanner.Text()), &result) - if result.LangCode != "fr" { - continue - } - - // Create the definition text. - entry := templateReadyDictionaryEntry{ - Word: result.Word, - POS: strings.ToLower(result.POS), - } - if len(result.Etymology) > 0 { - entry.Etymology = result.Etymology[0] - } - if len(result.Sounds) > 0 { - entry.Sound = result.Sounds[0].IPA - } - for _, r := range result.Tags { - var genders []string - if r == "masculine" || r == "feminine" { - genders = append(genders, r) - } - entry.Gender = strings.Join(genders, " / ") - } - for _, s := range result.Senses { - var example string - if len(s.Examples) > 0 { - example = s.Examples[0].Text - } - sense := strings.Join(s.Glosses, "; ") - entry.Senses = append(entry.Senses, SenseForDictionaryEntry{Sense: sense, Example: example}) - } - - out := strings.Builder{} - err := tmpl.Execute(&out, entry) - if err != nil { - return fmt.Errorf("failed to render: %w", err) - } - fmt.Printf("%s", out.String()) - - } - if err := scanner.Err(); err != nil { - return fmt.Errorf("scanning: %w", err) - } - - log.Printf("prepared %d dictionary entries", line) - - return nil -} -- cgit v1.2.3