From 011c16868684470f589a6ba53365194955a5e72a Mon Sep 17 00:00:00 2001 From: poire-z Date: Fri, 19 May 2023 16:48:36 +0200 Subject: [PATCH] Wikipedia EPUBs: strip out invalid-XHTML (#10462) --- frontend/ui/wikipedia.lua | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/frontend/ui/wikipedia.lua b/frontend/ui/wikipedia.lua index 9a74c21a2..989fee27b 100644 --- a/frontend/ui/wikipedia.lua +++ b/frontend/ui/wikipedia.lua @@ -1218,6 +1218,12 @@ table { -- OEBPS/content.html -- Some small fixes to Wikipedia HTML to make crengine and the user happier + -- In some articles' HTML, we may get + -- (which, by specs, is an empty element) without the proper empty tag ending "/>", which + -- would cause crengine's EPUB XHTML parser to wait for a proper , hiding all the + -- following content... So, just remove them, as we don't make any use of them. + html = html:gsub("]*>", "") + -- Most images are in a link to the image info page, which is a useless -- external link for us, so let's remove this link. html = html:gsub("]*>%s*(<%s*img [^>]*>)%s*", "%1")