From 638f73f6a288b84184309f0f47b52d776b4ecf21 Mon Sep 17 00:00:00 2001 From: Yuri Baburov Date: Mon, 22 Sep 2014 15:31:31 +0700 Subject: [PATCH] Fix for #52: are not counted any more for "form removal" heuristic. --- readability/readability.py | 1 + 1 file changed, 1 insertion(+) diff --git a/readability/readability.py b/readability/readability.py index bf058ed..9b393d0 100755 --- a/readability/readability.py +++ b/readability/readability.py @@ -452,6 +452,7 @@ class Document: for kind in ['p', 'img', 'li', 'a', 'embed', 'input']: counts[kind] = len(el.findall('.//%s' % kind)) counts["li"] -= 100 + counts["input"] -= len(el.findall('.//input[@type="hidden"]')) # Count the text length excluding any surrounding whitespace content_length = text_length(el)