Fixed indentation, encoding issue and README bug. Thanks to Greg Jastrab. Bump version to 0.2.3
This commit is contained in:
parent
6bf4948e69
commit
11c4d95411
4
README
4
README
@ -18,9 +18,9 @@ Based on:
|
|||||||
|
|
||||||
Installation::
|
Installation::
|
||||||
|
|
||||||
easy_install readability-xml
|
easy_install readability-lxml
|
||||||
or
|
or
|
||||||
pip install readability-xml
|
pip install readability-lxml
|
||||||
|
|
||||||
Usage::
|
Usage::
|
||||||
|
|
||||||
|
@ -23,4 +23,3 @@ def describe(node, depth=2):
|
|||||||
if depth and node.getparent() is not None:
|
if depth and node.getparent() is not None:
|
||||||
return name+' - '+describe(node.getparent(), depth-1)
|
return name+' - '+describe(node.getparent(), depth-1)
|
||||||
return name
|
return name
|
||||||
|
|
||||||
|
@ -19,4 +19,3 @@ def get_encoding(page):
|
|||||||
if enc == 'MacCyrillic':
|
if enc == 'MacCyrillic':
|
||||||
enc = 'cp1251'
|
enc = 'cp1251'
|
||||||
return enc
|
return enc
|
||||||
|
|
||||||
|
@ -121,8 +121,8 @@ class Document:
|
|||||||
else:
|
else:
|
||||||
logging.debug("Ruthless and lenient parsing did not work. Returning raw html")
|
logging.debug("Ruthless and lenient parsing did not work. Returning raw html")
|
||||||
article = self.html.find('body')
|
article = self.html.find('body')
|
||||||
if article is None:
|
if article is None:
|
||||||
article = self.html
|
article = self.html
|
||||||
|
|
||||||
cleaned_article = self.sanitize(article, candidates)
|
cleaned_article = self.sanitize(article, candidates)
|
||||||
of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH)
|
of_acceptable_length = len(cleaned_article or '') >= (self.options['retry_length'] or self.RETRY_LENGTH)
|
||||||
@ -497,8 +497,8 @@ def main():
|
|||||||
import urllib
|
import urllib
|
||||||
file = urllib.urlopen(options.url)
|
file = urllib.urlopen(options.url)
|
||||||
else:
|
else:
|
||||||
file = open(args[0])
|
file = open(args[0], 'rt')
|
||||||
enc = sys.stdout.encoding or 'utf-8'
|
enc = sys.__stdout__.encoding or 'utf-8'
|
||||||
try:
|
try:
|
||||||
print Document(file.read(), debug=options.verbose).summary().encode(enc, 'replace')
|
print Document(file.read(), debug=options.verbose).summary().encode(enc, 'replace')
|
||||||
finally:
|
finally:
|
||||||
|
2
setup.py
2
setup.py
@ -3,7 +3,7 @@ from setuptools import setup, find_packages
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="readability-lxml",
|
name="readability-lxml",
|
||||||
version="0.2.2",
|
version="0.2.3",
|
||||||
author="Yuri Baburov",
|
author="Yuri Baburov",
|
||||||
author_email="burchik@gmail.com",
|
author_email="burchik@gmail.com",
|
||||||
description="fast python port of arc90's readability tool",
|
description="fast python port of arc90's readability tool",
|
||||||
|
Loading…
Reference in New Issue
Block a user