diff --git a/frontend/util.lua b/frontend/util.lua
index 3e487057f..a2792c61d 100644
--- a/frontend/util.lua
+++ b/frontend/util.lua
@@ -1000,20 +1000,24 @@ This may fail on complex HTML (with styles, scripts, comments), but should be fi
@treturn string plain text
]]
function util.htmlToPlainText(text)
- -- Replace
and
with \n
+ -- Replace
with \n
text = text:gsub("%s*<%s*br%s*/?>%s*", "\n") --
and
- text = text:gsub("%s*<%s*p%s*>%s*", "\n ") --
+ -- Replace
with \n\t (\t, unlike any combination of spaces, + -- ensures a constant indentation when text is justified.) text = text:gsub("%s*%s*p%s*>%s*", "\n") --
text = text:gsub("%s*<%s*p%s*/>%s*", "\n") -- standalone + text = text:gsub("%s*<%s*p%s*>%s*", "\n\t") --
+ -- (this one last, so \t is not removed by the others' %s)
-- Remove all HTML tags
text = text:gsub("<[^>]*>", "")
-- Convert HTML entities
text = util.htmlEntitiesToUtf8(text)
- -- Trim spaces and new lines at start and end
+ -- Trim spaces and new lines at start and end, including
+ -- the \t we added (this looks fine enough with multiple
+ -- paragraphs, but feels nicer with a single paragraph,
+ -- whether it contains
s or not).
text = text:gsub("^[\n%s]*", "")
text = text:gsub("[\n%s]*$", "")
- -- Trim non-breaking spaces from the start
- text = text:gsub("^\xC2\xA0\xC2\xA0\xC2\xA0\xC2\xA0", "")
return text
end