diff options
| author | David Schlachter <t480-debian-git@schlachter.ca> | 2026-01-09 01:41:29 -0500 |
|---|---|---|
| committer | David Schlachter <t480-debian-git@schlachter.ca> | 2026-01-09 01:41:29 -0500 |
| commit | 07dea0730b6a1e39881e4b0e1c65531dabd03143 (patch) | |
| tree | 140bf47aab2bf2e0e19ef0344000c81b6b7cd88e | |
| parent | 78986c0f6c054f0b87de9de12db36ad9e97f1fcb (diff) | |
Unescape all HTML entities in definition previews
| -rw-r--r-- | README.md | 2 | ||||
| -rw-r--r-- | ui.go | 11 |
2 files changed, 6 insertions, 7 deletions
@@ -87,8 +87,6 @@ Usage of french-wiktionary-flashcards: - allow setting the language for initial processing, so that we could support languages other than French - general code cleanup & organization -- render HTML entities in definitions -- do some grepping to identify the most - common ones - italicise part-of-speech in the TUI (maybe remove numbers from them) - better gender tags -- jq through the source data to see what's common - some kind of tests? @@ -4,6 +4,7 @@ import ( "database/sql" "errors" "fmt" + "html" "net/http" "regexp" "strings" @@ -178,15 +179,15 @@ func formatDefinitionForDisplay(policy bluemonday.Policy, definition string, max // Remove all HTML tags str = policy.Sanitize(str) + // Some Wiktionary entries have HTML entities in them. That's okay for Anki, + // but it's not okay for displaying the plain text in the console interface. + str = html.UnescapeString(str) // Add some colour to the start of each definition str = strings.ReplaceAll(str, "\t- ", "\x1b[0;33;49m•\x1b[0m ") - // Replace common HTML entities - str = strings.ReplaceAll(str, "'", "’") - str = strings.ReplaceAll(str, "&", "&") - - // Wrap + // Limit the width of the displayed definition to 80 characters, or the + // width of the viewport (whichever is smaller). width := min(maxWidth, 80) return wordwrap.String(str, width) } |
