From e2e78e4d551ca92caa76bbd8036bcdb2641eebdf Mon Sep 17 00:00:00 2001 From: hush-hush Date: Thu, 13 Sep 2012 17:09:14 +0200 Subject: [PATCH] Make lxml clean tree available for user modifications. --- readability/readability.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/readability/readability.py b/readability/readability.py index ef8480d..fc37636 100755 --- a/readability/readability.py +++ b/readability/readability.py @@ -123,6 +123,9 @@ class Document: def short_title(self): return shorten_title(self._html(True)) + def get_clean_html(self): + return clean_attributes(tounicode(self.html)) + def summary(self, html_partial=False): """Generate the summary of the html docuemnt @@ -530,7 +533,8 @@ class Document: #el.attrib = {} #FIXME:Checkout the effects of disabling this pass - return clean_attributes(tounicode(node)) + self.html = node + return self.get_clean_html() class HashableElement():