Fixed indentation, encoding issue and README bug. Thanks to Greg Jastrab. Bump version to 0.2.3

This commit is contained in:
Yuri Baburov 2011-07-27 01:56:17 +07:00
parent 6bf4948e69
commit 11c4d95411
5 changed files with 7 additions and 9 deletions

4
README
View File

@ -18,9 +18,9 @@ Based on:
Installation:: Installation::
easy_install readability-xml easy_install readability-lxml
or or
pip install readability-xml pip install readability-lxml
Usage:: Usage::

View File

@ -23,4 +23,3 @@ def describe(node, depth=2):
if depth and node.getparent() is not None: if depth and node.getparent() is not None:
return name+' - '+describe(node.getparent(), depth-1) return name+' - '+describe(node.getparent(), depth-1)
return name return name

View File

@ -19,4 +19,3 @@ def get_encoding(page):
if enc == 'MacCyrillic': if enc == 'MacCyrillic':
enc = 'cp1251' enc = 'cp1251'
return enc return enc

View File

@ -121,8 +121,8 @@ class Document:
else: else:
logging.debug("Ruthless and lenient parsing did not work. Returning raw html") logging.debug("Ruthless and lenient parsing did not work. Returning raw html")
article = self.html.find('body') article = self.html.find('body')
if article is None: if article is None:
article = self.html article = self.html
cleaned_article = self.sanitize(article, candidates) cleaned_article = self.sanitize(article, candidates)
of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH) of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH)
@ -497,8 +497,8 @@ def main():
import urllib import urllib
file = urllib.urlopen(options.url) file = urllib.urlopen(options.url)
else: else:
file = open(args[0]) file = open(args[0], 'rt')
enc = sys.stdout.encoding or 'utf-8' enc = sys.__stdout__.encoding or 'utf-8'
try: try:
print Document(file.read(), debug=options.verbose).summary().encode(enc, 'replace') print Document(file.read(), debug=options.verbose).summary().encode(enc, 'replace')
finally: finally:

View File

@ -3,7 +3,7 @@ from setuptools import setup, find_packages
setup( setup(
name="readability-lxml", name="readability-lxml",
version="0.2.2", version="0.2.3",
author="Yuri Baburov", author="Yuri Baburov",
author_email="burchik@gmail.com", author_email="burchik@gmail.com",
description="fast python port of arc90's readability tool", description="fast python port of arc90's readability tool",