// This program looks up words fromm Wiktionary, and creates Anki flashcards // from them. package main import ( "bufio" "database/sql" "encoding/json" "fmt" "html/template" "log" "os" "strings" _ "github.com/mattn/go-sqlite3" ) const rawDictionary = "/home/david/work/french-wiktionary-flashcards/raw-wiktextract-data.jsonl" const dictionary = "/home/david/work/french-wiktionary-flashcards/raw-wiktextract-data.sqlite3" func main() { db, err := sql.Open("sqlite3", dictionary) if err != nil { log.Fatalf("opening DB (you probably need to touch '%s'): %s", dictionary, err) } defer db.Close() _, err = db.Exec("create table IF NOT EXISTS words (word text not null primary key, definition text);") if err != nil { log.Fatalf("creating table: %s", err) } row := db.QueryRow(`SELECT count(*) as count from words`) var count int err = row.Scan(&count) if err != nil { log.Fatalf("counting rows: %s", err) } if count == 0 { if err = readDictionary(); err != nil { log.Fatalf("failed to prepare dictionary: %s", err) } } } type rawDictionaryEntry struct { Word string `json:"word"` LangCode string `json:"lang_code"` POS string `json:"pos_title"` Etymology []string `json:"etymology_texts"` Senses []sense `json:"senses"` Sounds []sound `json:"sounds"` Tags []string `json:"tags"` } type sense struct { Glosses []string `json:"glosses"` Examples []example `json:"examples"` } type example struct { Text string `json:"text"` } type sound struct { IPA string `json:"ipa"` } type templateReadyDictionaryEntry struct { Word string POS string Etymology string Senses []SenseForDictionaryEntry Sound string Gender string } type SenseForDictionaryEntry struct { Sense string Example string } func readDictionary() error { log.Printf("preparing list of dictionary words...") // Set up the template tmpl, err := template.New("entry").Parse(`

{{ .Word }} {{ .Sound }} {{ .POS }} {{ .Gender }}

    {{ range .Senses}}
  1. {{ .Sense }}
  2. {{ end }}
`) if err != nil { panic(err) } file, err := os.Open(rawDictionary) if err != nil { return fmt.Errorf("opening: %w", err) } defer file.Close() var line int scanner := bufio.NewScanner(file) maxCapacity := 1_000_000 buf := make([]byte, maxCapacity) scanner.Buffer(buf, maxCapacity) for scanner.Scan() { line++ if line%10000 == 0 && line > 1 { log.Printf("processed %d lines", line) } var result rawDictionaryEntry json.Unmarshal([]byte(scanner.Text()), &result) if result.LangCode != "fr" { continue } // Create the definition text. entry := templateReadyDictionaryEntry{ Word: result.Word, POS: strings.ToLower(result.POS), } if len(result.Etymology) > 0 { entry.Etymology = result.Etymology[0] } if len(result.Sounds) > 0 { entry.Sound = result.Sounds[0].IPA } for _, r := range result.Tags { var genders []string if r == "masculine" || r == "feminine" { genders = append(genders, r) } entry.Gender = strings.Join(genders, " / ") } for _, s := range result.Senses { var example string if len(s.Examples) > 0 { example = s.Examples[0].Text } sense := strings.Join(s.Glosses, "; ") entry.Senses = append(entry.Senses, SenseForDictionaryEntry{Sense: sense, Example: example}) } out := strings.Builder{} err := tmpl.Execute(&out, entry) if err != nil { return fmt.Errorf("failed to render: %w", err) } fmt.Printf("%s", out.String()) } if err := scanner.Err(); err != nil { return fmt.Errorf("scanning: %w", err) } log.Printf("prepared %d dictionary entries", line) return nil }