package main
import (
"bufio"
"database/sql"
"fmt"
"html/template"
"log"
"os"
"strings"
"github.com/goccy/go-json"
)
func setupDatabase() (*sql.DB, error) {
db, err := sql.Open("sqlite3", dictionary)
if err != nil {
return nil, fmt.Errorf("opening DB '%s': %s", dictionary, err)
}
_, err = db.Exec("create table IF NOT EXISTS words (word text not null, definition text);")
if err != nil {
return nil, fmt.Errorf("creating table: %s", err)
}
row := db.QueryRow(`SELECT count(*) as count from words`)
var count int
err = row.Scan(&count)
if err != nil {
return nil, fmt.Errorf("counting rows: %s", err)
}
// Only populate the database if it is empty.
if count > 0 {
return db, nil
}
// Faster import performance.
_, err = db.Exec("PRAGMA synchronous = OFF;")
if err != nil {
return nil, fmt.Errorf("setting risky writes: %s", err)
}
if err = populateDictionary(db); err != nil {
return nil, fmt.Errorf("failed to prepare dictionary: %s", err)
}
return db, nil
}
type rawDictionaryEntry struct {
Word string `json:"word"`
LangCode string `json:"lang_code"`
POS string `json:"pos_title"`
Etymology []string `json:"etymology_texts"`
Senses []sense `json:"senses"`
Sounds []sound `json:"sounds"`
Tags []string `json:"tags"`
}
type sense struct {
Glosses []string `json:"glosses"`
Examples []example `json:"examples"`
}
type example struct {
Text string `json:"text"`
}
type sound struct {
IPA string `json:"ipa"`
}
type templateReadyDictionaryEntry struct {
Word string
POS string
Etymology string
Senses []SenseForDictionaryEntry
Sound string
Gender string
}
type SenseForDictionaryEntry struct {
Sense string
Example string
}
func populateDictionary(db *sql.DB) error {
log.Printf("preparing list of dictionary words...")
// Set up the template
tmpl, err := template.New("entry").Parse(
`
{{ .Word }} {{ .Sound }} {{ .POS }} {{ .Gender }}
{{ range .Senses}}
- {{ .Sense }}
{{ if .Example }}
{{ end }}
{{ end }}
`)
if err != nil {
panic(err)
}
tx, err := db.Begin()
if err != nil {
return fmt.Errorf("starting transaction: %w", err)
}
// Set up a prepared statement
stmt, err := tx.Prepare("insert into words(word, definition) values(?, ?)")
if err != nil {
return fmt.Errorf("preparing statement: %w", err)
}
defer stmt.Close()
file, err := os.Open(rawDictionary)
if err != nil {
return fmt.Errorf("opening: %w", err)
}
defer file.Close()
var wordsAdded int
scanner := bufio.NewScanner(file)
maxCapacity := 2_000_000
buf := make([]byte, maxCapacity)
scanner.Buffer(buf, maxCapacity)
for scanner.Scan() {
var result rawDictionaryEntry
json.Unmarshal([]byte(scanner.Text()), &result)
if result.LangCode != "fr" {
continue
}
// Create the definition text.
entry := templateReadyDictionaryEntry{
Word: result.Word,
POS: strings.ToLower(result.POS),
}
if len(result.Etymology) > 0 {
entry.Etymology = result.Etymology[0]
}
if len(result.Sounds) > 0 {
entry.Sound = result.Sounds[0].IPA
}
for _, r := range result.Tags {
var genders []string
if r == "masculine" || r == "feminine" {
genders = append(genders, r)
}
entry.Gender = strings.Join(genders, " / ")
}
for _, s := range result.Senses {
var example string
if len(s.Examples) > 0 {
example = s.Examples[0].Text
}
sense := strings.Join(s.Glosses, "; ")
entry.Senses = append(entry.Senses, SenseForDictionaryEntry{Sense: sense, Example: example})
}
formattedDefinition := strings.Builder{}
err := tmpl.Execute(&formattedDefinition, entry)
if err != nil {
return fmt.Errorf("failed to render: %w", err)
}
// Insert the entry
_, err = stmt.Exec(entry.Word, formattedDefinition.String())
if err != nil {
return fmt.Errorf("inserting '%s': %w", entry.Word, err)
}
wordsAdded++
if wordsAdded%10_000 == 0 && wordsAdded > 1 {
log.Printf("processed %d lines (most recent word was '%s')", wordsAdded, entry.Word)
}
}
if err := scanner.Err(); err != nil {
return fmt.Errorf("scanning: %w", err)
}
if err := tx.Commit(); err != nil {
return fmt.Errorf("committing: %w", err)
}
_, err = db.Exec("create index wordindex on words(word);")
if err != nil {
return fmt.Errorf("creating index: %s", err)
}
log.Printf("prepared %d dictionary entries", wordsAdded)
return nil
}