diff --git a/breadability/client.py b/breadability/client.py index 74050bf..e57e045 100644 --- a/breadability/client.py +++ b/breadability/client.py @@ -1,96 +1,81 @@ # -*- coding: utf8 -*- +""" +A fast python port of arc90's readability tool + +Usage: + breadability [options] + breadability --version + breadability --help + +Arguments: + URL or file path to process in readable form. + +Options: + -f, --fragment Output html fragment by default. + -b, --browser Open the parsed content in your web browser. + -d, --debug Output the detailed scoring information for debugging + parsing. + -v, --verbose Increase logging verbosity to DEBUG. + --version Display program's version number and exit. + -h, --help Display this help message and exit. +""" + from __future__ import absolute_import +from __future__ import division, print_function, unicode_literals + -import argparse import logging -import codecs import locale -import sys import urllib import webbrowser -from tempfile import mkstemp - +from tempfile import NamedTemporaryFile +from docopt import docopt from ._version import VERSION from .readable import Article def parse_args(): - desc = "A fast python port of arc90's readability tool" - parser = argparse.ArgumentParser(description=desc) - parser.add_argument('--version', - action='version', version=VERSION) - - parser.add_argument('-v', '--verbose', - action='store_true', - default=False, - help='Increase logging verbosity to DEBUG.') - - parser.add_argument('-f', '--fragment', - action='store_false', - default=True, - help='Output html fragment by default.') - -# parser.add_argument('-m', '--metadata', -# action='store_true', -# default=False, -# help='print all metadata as well as content for the content') - - parser.add_argument('-b', '--browser', - action='store_true', - default=False, - help='open the parsed content in your web browser') - - parser.add_argument('-d', '--debug', - action='store_true', - default=False, - help='Output the detailed scoring information for debugging parsing') - - parser.add_argument('path', metavar='P', type=str, nargs=1, - help="The url or file path to process in readable form.") - - args = parser.parse_args() - return args + return docopt(__doc__, version=VERSION) def main(): args = parse_args() logger = logging.getLogger("breadability") - if args.verbose: - logger.seLevel(logging.DEBUG) + if args["--verbose"]: + logger.setLevel(logging.DEBUG) + resource = args[""] + if resource.startswith("www"): + resource = "http://" + resource - target = args.path[0] - logger.debug("Target: %r", target) + url = None + if resource.startswith("http://") or resource.startswith("https://"): + url = resource - if target.startswith('http') or target.startswith('www'): - is_url = True - url = target + response = urllib.urlopen(url) + content = response.read() + response.close() else: - is_url = False - url = None + with open(resource, "r") as file: + content = file.read() - if is_url: - req = urllib.urlopen(target) - content = req.read() - ucontent = unicode(content, 'utf-8') - else: - ucontent = codecs.open(target, "r", "utf-8").read() - - doc = Article(ucontent, url=url, fragment=args.fragment) - if args.browser: - fg, pathname = mkstemp(suffix='.html') - out = codecs.open(pathname, 'w', 'utf-8') - out.write(doc.readable) - out.close() - webbrowser.open(pathname) + document = Article(content, url=url, fragment=args["--fragment"]) + if args["--browser"]: + html_file = NamedTemporaryFile(mode="w", suffix=".html", delete=False) + + content = document.readable.encode("utf8") + html_file.write(content) + + webbrowser.open(html_file.name) + + html_file.close() else: - # Wrap sys.stdout into a StreamWriter to allow writing unicode. - sys.stdout = codecs.getwriter( - locale.getpreferredencoding())(sys.stdout) - sys.stdout.write(doc.readable) + encoding = locale.getpreferredencoding() + content = document.readable.encode(encoding) + print(content) if __name__ == '__main__': diff --git a/breadability/scripts/newtest.py b/breadability/scripts/newtest.py index 8d6eafe..fed3daa 100644 --- a/breadability/scripts/newtest.py +++ b/breadability/scripts/newtest.py @@ -1,18 +1,43 @@ -import argparse -import codecs -from os import mkdir -from os import path +# -*- coding: utf8 -*- + +""" +Helper to generate a new set of article test files for breadability. + +Usage: + breadability_newtest -n + breadability_newtest --version + breadability_newtest --help + +Arguments: + The url of content to fetch for the article.html + +Options: + -n , --name= Name of the test directory. + --version Show program's version number and exit. + -h, --help Show this help message and exit. +""" + +from __future__ import absolute_import +import io + +from os import mkdir +from os.path import join, dirname, pardir +from docopt import docopt from .._version import VERSION from .._py3k import urllib -TESTPATH = path.join( - path.dirname(path.dirname(__file__)), - 'tests', 'test_articles') +TEST_PATH = join( + dirname(__file__), + pardir, + "tests", + "test_articles" +) -TESTTPL = """ +TEST_TEMPLATE = """ import os + try: # Python < 2.7 import unittest2 as unittest @@ -23,86 +48,75 @@ from breadability.readable import Article class TestArticle(unittest.TestCase): - \"\"\"Test the scoring and parsing of the Article\"\"\" + '''Test the scoring and parsing of the Article''' def setUp(self): - \"\"\"Load up the article for us\"\"\" + '''Load up the article for us''' article_path = os.path.join(os.path.dirname(__file__), 'article.html') self.article = open(article_path).read() def tearDown(self): - \"\"\"Drop the article\"\"\" + '''Drop the article''' self.article = None def test_parses(self): - \"\"\"Verify we can parse the document.\"\"\" + '''Verify we can parse the document.''' doc = Article(self.article) self.assertTrue('id="readabilityBody"' in doc.readable) def test_content_exists(self): - \"\"\"Verify that some content exists.\"\"\" - pass + '''Verify that some content exists.''' + raise NotImplementedError() def test_content_does_not_exist(self): - \"\"\"Verify we cleaned out some content that shouldn't exist.\"\"\" - pass + '''Verify we cleaned out some content that shouldn't exist.''' + raise NotImplementedError() """ def parse_args(): - desc = "breadability helper to generate a new set of article test files." - parser = argparse.ArgumentParser(description=desc) - parser.add_argument('--version', - action='version', version=VERSION) - - parser.add_argument('-n', '--name', - action='store', - required=True, - help='Name of the test directory') + return docopt(__doc__, version=VERSION) - parser.add_argument('url', metavar='URL', type=str, nargs=1, - help='The url of content to fetch for the article.html') - args = parser.parse_args() - return args +def make_test_directory(name): + """Generates a new directory for tests.""" + directory_name = "test_" + name.replace(" ", "_") + directory_path = join(TEST_PATH, directory_name) + mkdir(directory_path) + return directory_path -def make_dir(name): - """Generate a new directory for tests. - """ - dir_name = 'test_' + name.replace(' ', '_') - updated_name = path.join(TESTPATH, dir_name) - mkdir(updated_name) - return updated_name - - -def make_files(dirname): - init_file = path.join(dirname, '__init__.py') - test_file = path.join(dirname, 'test.py') +def make_test_files(directory_path): + init_file = join(directory_path, "__init__.py") open(init_file, "a").close() - with open(test_file, 'w') as f: - f.write(TESTTPL) + test_file = join(directory_path, "test.py") + with open(test_file, "w") as file: + file.write(TEST_TEMPLATE) -def fetch_article(dirname, url): + +def fetch_article(directory_path, url): """Get the content of the url and make it the article.html""" opener = urllib.build_opener() opener.addheaders = [('Accept-Charset', 'utf-8')] - url_response = opener.open(url) - dl_html = url_response.read().decode('utf-8') - fh = codecs.open(path.join(dirname, 'article.html'), "w", "utf-8") - fh.write(dl_html) - fh.close() + response = opener.open(url) + html = response.read().decode("utf-8") + response.close() + + path = join(directory_path, "article.html") + file = io.open(path, "w", encoding="utf8") + file.write(html) + file.close() def main(): """Run the script.""" args = parse_args() - new_dir = make_dir(args.name) - make_files(new_dir) - fetch_article(new_dir, args.url[0]) + directory = make_test_directory(args[""]) + make_test_files(directory) + fetch_article(directory, args[""]) if __name__ == '__main__': diff --git a/requirements.txt b/requirements.txt index 2f0a00a..906d379 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +docopt==0.6.* charade lxml coverage diff --git a/setup.py b/setup.py index 6d5e4b6..afe15f1 100644 --- a/setup.py +++ b/setup.py @@ -9,6 +9,7 @@ NEWS = open(os.path.join(here, 'CHANGELOG.rst')).read() version = '0.1.11' install_requires = [ # http://packages.python.org/distribute/setuptools.html#declaring-dependencies + 'docopt==0.6.*', 'charade', 'lxml', ] @@ -19,8 +20,6 @@ tests_require = [ if sys.version_info < (2, 7): - # Require argparse since it's not in the stdlib yet. - install_requires.append('argparse') install_requires.append('unittest2') setup(