package main import ( "bufio" "bytes" "database/sql" "fmt" "html/template" "io" "os" "strings" tea "github.com/charmbracelet/bubbletea" "github.com/goccy/go-json" ) func setupTables(db *sql.DB) error { _, err := db.Exec("create table IF NOT EXISTS words (word text not null, definition text);") if err != nil { return fmt.Errorf("creating table: %s", err) } // Faster import performance. _, err = db.Exec("PRAGMA synchronous = OFF;") if err != nil { return fmt.Errorf("setting risky writes: %s", err) } return nil } func isDatabaseEmpty(db *sql.DB) tea.Cmd { return func() tea.Msg { row := db.QueryRow(`SELECT count(*) as count from words`) var count int err := row.Scan(&count) if err != nil { return errMsg(fmt.Errorf("counting rows: %s", err)) } // Only populate the database if it is empty. return isDictionaryEmptyMsg(count == 0) } } type rawDictionaryEntry struct { Word string `json:"word"` LangCode string `json:"lang_code"` POS string `json:"pos_title"` Etymology []string `json:"etymology_texts"` Senses []sense `json:"senses"` Sounds []sound `json:"sounds"` Tags []string `json:"tags"` } type sense struct { Glosses []string `json:"glosses"` Examples []example `json:"examples"` } type example struct { Text string `json:"text"` } type sound struct { IPA string `json:"ipa"` } type templateReadyDictionaryEntry struct { Word string POS string Etymology string Senses []SenseForDictionaryEntry Sound string Gender string } type SenseForDictionaryEntry struct { Sense string Example string } // dictionaryPopulator contains all the information required to populate the // SQLite dictionary from the raw JSONL data. This is in a struct so that we can // report progress back to the UI, then resume where we left off. type dictionaryPopulator struct { db *sql.DB rawDictionaryPath string langCode string tx *sql.Tx stmt *sql.Stmt tmpl *template.Template fh *os.File scanner *bufio.Scanner totalLines int currentLine int } func setupPopulator(dp *dictionaryPopulator) tea.Cmd { return func() tea.Msg { var err error // Set up the template dp.tmpl, err = template.New("entry").Parse( `
{{ .Word }} {{ .Sound }} {{ .POS }} {{ .Gender }}
Étymologie: {{ .Etymology }}{{ end }}`) if err != nil { return errMsg(fmt.Errorf("preparing template: %w", err)) } dp.tx, err = dp.db.Begin() if err != nil { return errMsg(fmt.Errorf("starting transaction: %w", err)) } // Set up a prepared statement dp.stmt, err = dp.tx.Prepare("insert into words(word, definition) values(?, ?)") if err != nil { return errMsg(fmt.Errorf("preparing statement: %w", err)) } dp.fh, err = os.Open(dp.rawDictionaryPath) if err != nil { return errMsg(fmt.Errorf("opening: %w", err)) } // Figure out how many lines the file has, for reporting import // progress. lines, err := lineCounter(dp.fh) if err != nil { return errMsg(fmt.Errorf("reading lines from file: %w", err)) } dp.totalLines = lines // We've just read through the whole file, reset the read position to // the beginning because we're about to set up a scanner on it. dp.fh.Seek(0, 0) dp.scanner = bufio.NewScanner(dp.fh) maxCapacity := 2_000_000 buf := make([]byte, maxCapacity) dp.scanner.Buffer(buf, maxCapacity) return populatingDictionaryMsg(dp) } } func lineCounter(r io.Reader) (int, error) { buf := make([]byte, 64*1024) count := 0 lineSep := []byte{'\n'} for { c, err := r.Read(buf) count += bytes.Count(buf[:c], lineSep) switch { case err == io.EOF: return count, nil case err != nil: return count, err } } } func populateDictionary(dp *dictionaryPopulator) tea.Cmd { return func() tea.Msg { for dp.scanner.Scan() { dp.currentLine++ var result rawDictionaryEntry json.Unmarshal([]byte(dp.scanner.Text()), &result) if result.LangCode != dp.langCode { continue } // Clean up the word. Replace apostrophes (common in phrases) with // single quotes (more likely to be typed by a user). result.Word = strings.ReplaceAll(result.Word, `’`, `'`) // Create the definition text. entry := templateReadyDictionaryEntry{ Word: result.Word, POS: strings.ToLower(result.POS), } if len(result.Etymology) > 0 { entry.Etymology = strings.TrimSpace(result.Etymology[0]) } if len(result.Sounds) > 0 { entry.Sound = result.Sounds[0].IPA } var genders, numbers []string for _, r := range result.Tags { switch r { case "masculine": genders = append(genders, "masculin") case "feminine": genders = append(genders, "féminin") case "plural": numbers = append(numbers, "pluriel") case "singular": numbers = append(numbers, "singulier") } } entry.Gender = strings.Join( []string{ strings.Join(genders, " / "), strings.Join(numbers, " et "), }, " ", ) for _, s := range result.Senses { var example string if len(s.Examples) > 0 { example = s.Examples[0].Text } sense := strings.Join(s.Glosses, "; ") entry.Senses = append(entry.Senses, SenseForDictionaryEntry{Sense: sense, Example: example}) } formattedDefinition := strings.Builder{} err := dp.tmpl.Execute(&formattedDefinition, entry) if err != nil { return errMsg(fmt.Errorf("failed to render: %w", err)) } // Insert the entry _, err = dp.stmt.Exec(entry.Word, formattedDefinition.String()) if err != nil { return errMsg(fmt.Errorf("inserting '%s': %w", entry.Word, err)) } // Report status every once in a while by breaking out to the caller if dp.currentLine%10000 == 0 { return populatingDictionaryMsg(dp) } } // If we're outside of the loop, we either encountered an error, or it's // time to commit the changes. if err := dp.scanner.Err(); err != nil { return errMsg(fmt.Errorf("scanning: %w", err)) } if err := dp.tx.Commit(); err != nil { return errMsg(fmt.Errorf("committing: %w", err)) } _, err := dp.db.Exec("create index wordindex on words(word);") if err != nil { return errMsg(fmt.Errorf("creating index: %s", err)) } // Clean up resources dp.stmt.Close() dp.fh.Close() return isDictionaryEmptyMsg(false) // We're done! } }