Fixed indentation, encoding issue and README bug. Thanks to Greg Jastrab. Bump version to 0.2.3
This commit is contained in:
parent
6bf4948e69
commit
11c4d95411
4
README
4
README
@ -18,9 +18,9 @@ Based on:
|
||||
|
||||
Installation::
|
||||
|
||||
easy_install readability-xml
|
||||
easy_install readability-lxml
|
||||
or
|
||||
pip install readability-xml
|
||||
pip install readability-lxml
|
||||
|
||||
Usage::
|
||||
|
||||
|
@ -23,4 +23,3 @@ def describe(node, depth=2):
|
||||
if depth and node.getparent() is not None:
|
||||
return name+' - '+describe(node.getparent(), depth-1)
|
||||
return name
|
||||
|
||||
|
@ -19,4 +19,3 @@ def get_encoding(page):
|
||||
if enc == 'MacCyrillic':
|
||||
enc = 'cp1251'
|
||||
return enc
|
||||
|
||||
|
@ -121,8 +121,8 @@ class Document:
|
||||
else:
|
||||
logging.debug("Ruthless and lenient parsing did not work. Returning raw html")
|
||||
article = self.html.find('body')
|
||||
if article is None:
|
||||
article = self.html
|
||||
if article is None:
|
||||
article = self.html
|
||||
|
||||
cleaned_article = self.sanitize(article, candidates)
|
||||
of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH)
|
||||
@ -497,8 +497,8 @@ def main():
|
||||
import urllib
|
||||
file = urllib.urlopen(options.url)
|
||||
else:
|
||||
file = open(args[0])
|
||||
enc = sys.stdout.encoding or 'utf-8'
|
||||
file = open(args[0], 'rt')
|
||||
enc = sys.__stdout__.encoding or 'utf-8'
|
||||
try:
|
||||
print Document(file.read(), debug=options.verbose).summary().encode(enc, 'replace')
|
||||
finally:
|
||||
|
Loading…
Reference in New Issue
Block a user