From 07dea0730b6a1e39881e4b0e1c65531dabd03143 Mon Sep 17 00:00:00 2001 From: David Schlachter Date: Fri, 9 Jan 2026 01:41:29 -0500 Subject: Unescape all HTML entities in definition previews --- README.md | 2 -- ui.go | 11 ++++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index dacf4af..6f21dda 100644 --- a/README.md +++ b/README.md @@ -87,8 +87,6 @@ Usage of french-wiktionary-flashcards: - allow setting the language for initial processing, so that we could support languages other than French - general code cleanup & organization -- render HTML entities in definitions -- do some grepping to identify the most - common ones - italicise part-of-speech in the TUI (maybe remove numbers from them) - better gender tags -- jq through the source data to see what's common - some kind of tests? diff --git a/ui.go b/ui.go index eea1269..3c4dbb7 100644 --- a/ui.go +++ b/ui.go @@ -4,6 +4,7 @@ import ( "database/sql" "errors" "fmt" + "html" "net/http" "regexp" "strings" @@ -178,15 +179,15 @@ func formatDefinitionForDisplay(policy bluemonday.Policy, definition string, max // Remove all HTML tags str = policy.Sanitize(str) + // Some Wiktionary entries have HTML entities in them. That's okay for Anki, + // but it's not okay for displaying the plain text in the console interface. + str = html.UnescapeString(str) // Add some colour to the start of each definition str = strings.ReplaceAll(str, "\t- ", "\x1b[0;33;49m•\x1b[0m ") - // Replace common HTML entities - str = strings.ReplaceAll(str, "'", "’") - str = strings.ReplaceAll(str, "&", "&") - - // Wrap + // Limit the width of the displayed definition to 80 characters, or the + // width of the viewport (whichever is smaller). width := min(maxWidth, 80) return wordwrap.String(str, width) } -- cgit v1.2.3