Fixed indentation, encoding issue and README bug. Thanks to Greg Jastrab. Bump version to 0.2.3

This commit is contained in:
Yuri Baburov 2011-07-27 01:56:17 +07:00
parent 6bf4948e69
commit 11c4d95411
5 changed files with 7 additions and 9 deletions

4
README
View File

@ -18,9 +18,9 @@ Based on:
Installation::
easy_install readability-xml
easy_install readability-lxml
or
pip install readability-xml
pip install readability-lxml
Usage::

View File

@ -23,4 +23,3 @@ def describe(node, depth=2):
if depth and node.getparent() is not None:
return name+' - '+describe(node.getparent(), depth-1)
return name

View File

@ -19,4 +19,3 @@ def get_encoding(page):
if enc == 'MacCyrillic':
enc = 'cp1251'
return enc

View File

@ -121,8 +121,8 @@ class Document:
else:
logging.debug("Ruthless and lenient parsing did not work. Returning raw html")
article = self.html.find('body')
if article is None:
article = self.html
if article is None:
article = self.html
cleaned_article = self.sanitize(article, candidates)
of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH)
@ -497,8 +497,8 @@ def main():
import urllib
file = urllib.urlopen(options.url)
else:
file = open(args[0])
enc = sys.stdout.encoding or 'utf-8'
file = open(args[0], 'rt')
enc = sys.__stdout__.encoding or 'utf-8'
try:
print Document(file.read(), debug=options.verbose).summary().encode(enc, 'replace')
finally:

View File

@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup(
name="readability-lxml",
version="0.2.2",
version="0.2.3",
author="Yuri Baburov",
author_email="burchik@gmail.com",
description="fast python port of arc90's readability tool",