Use docopt as an argument parser

pull/21/head
Mišo Belica 12 years ago
parent 8470ef2b45
commit ec88a4efe6

@ -1,96 +1,81 @@
# -*- coding: utf8 -*- # -*- coding: utf8 -*-
"""
A fast python port of arc90's readability tool
Usage:
breadability [options] <resource>
breadability --version
breadability --help
Arguments:
<resource> URL or file path to process in readable form.
Options:
-f, --fragment Output html fragment by default.
-b, --browser Open the parsed content in your web browser.
-d, --debug Output the detailed scoring information for debugging
parsing.
-v, --verbose Increase logging verbosity to DEBUG.
--version Display program's version number and exit.
-h, --help Display this help message and exit.
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals
import argparse
import logging import logging
import codecs
import locale import locale
import sys
import urllib import urllib
import webbrowser import webbrowser
from tempfile import mkstemp from tempfile import NamedTemporaryFile
from docopt import docopt
from ._version import VERSION from ._version import VERSION
from .readable import Article from .readable import Article
def parse_args(): def parse_args():
desc = "A fast python port of arc90's readability tool" return docopt(__doc__, version=VERSION)
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('--version',
action='version', version=VERSION)
parser.add_argument('-v', '--verbose',
action='store_true',
default=False,
help='Increase logging verbosity to DEBUG.')
parser.add_argument('-f', '--fragment',
action='store_false',
default=True,
help='Output html fragment by default.')
# parser.add_argument('-m', '--metadata',
# action='store_true',
# default=False,
# help='print all metadata as well as content for the content')
parser.add_argument('-b', '--browser',
action='store_true',
default=False,
help='open the parsed content in your web browser')
parser.add_argument('-d', '--debug',
action='store_true',
default=False,
help='Output the detailed scoring information for debugging parsing')
parser.add_argument('path', metavar='P', type=str, nargs=1,
help="The url or file path to process in readable form.")
args = parser.parse_args()
return args
def main(): def main():
args = parse_args() args = parse_args()
logger = logging.getLogger("breadability") logger = logging.getLogger("breadability")
if args.verbose: if args["--verbose"]:
logger.seLevel(logging.DEBUG) logger.setLevel(logging.DEBUG)
resource = args["<resource>"]
if resource.startswith("www"):
resource = "http://" + resource
target = args.path[0] url = None
logger.debug("Target: %r", target) if resource.startswith("http://") or resource.startswith("https://"):
url = resource
if target.startswith('http') or target.startswith('www'): response = urllib.urlopen(url)
is_url = True content = response.read()
url = target response.close()
else: else:
is_url = False with open(resource, "r") as file:
url = None content = file.read()
if is_url: document = Article(content, url=url, fragment=args["--fragment"])
req = urllib.urlopen(target) if args["--browser"]:
content = req.read() html_file = NamedTemporaryFile(mode="w", suffix=".html", delete=False)
ucontent = unicode(content, 'utf-8')
else: content = document.readable.encode("utf8")
ucontent = codecs.open(target, "r", "utf-8").read() html_file.write(content)
doc = Article(ucontent, url=url, fragment=args.fragment) webbrowser.open(html_file.name)
if args.browser:
fg, pathname = mkstemp(suffix='.html') html_file.close()
out = codecs.open(pathname, 'w', 'utf-8')
out.write(doc.readable)
out.close()
webbrowser.open(pathname)
else: else:
# Wrap sys.stdout into a StreamWriter to allow writing unicode. encoding = locale.getpreferredencoding()
sys.stdout = codecs.getwriter( content = document.readable.encode(encoding)
locale.getpreferredencoding())(sys.stdout) print(content)
sys.stdout.write(doc.readable)
if __name__ == '__main__': if __name__ == '__main__':

@ -1,18 +1,43 @@
import argparse # -*- coding: utf8 -*-
import codecs
from os import mkdir """
from os import path Helper to generate a new set of article test files for breadability.
Usage:
breadability_newtest -n <name> <url>
breadability_newtest --version
breadability_newtest --help
Arguments:
<url> The url of content to fetch for the article.html
Options:
-n <name>, --name=<name> Name of the test directory.
--version Show program's version number and exit.
-h, --help Show this help message and exit.
"""
from __future__ import absolute_import
import io
from os import mkdir
from os.path import join, dirname, pardir
from docopt import docopt
from .._version import VERSION from .._version import VERSION
from .._py3k import urllib from .._py3k import urllib
TESTPATH = path.join( TEST_PATH = join(
path.dirname(path.dirname(__file__)), dirname(__file__),
'tests', 'test_articles') pardir,
"tests",
"test_articles"
)
TESTTPL = """ TEST_TEMPLATE = """
import os import os
try: try:
# Python < 2.7 # Python < 2.7
import unittest2 as unittest import unittest2 as unittest
@ -23,86 +48,75 @@ from breadability.readable import Article
class TestArticle(unittest.TestCase): class TestArticle(unittest.TestCase):
\"\"\"Test the scoring and parsing of the Article\"\"\" '''Test the scoring and parsing of the Article'''
def setUp(self): def setUp(self):
\"\"\"Load up the article for us\"\"\" '''Load up the article for us'''
article_path = os.path.join(os.path.dirname(__file__), 'article.html') article_path = os.path.join(os.path.dirname(__file__), 'article.html')
self.article = open(article_path).read() self.article = open(article_path).read()
def tearDown(self): def tearDown(self):
\"\"\"Drop the article\"\"\" '''Drop the article'''
self.article = None self.article = None
def test_parses(self): def test_parses(self):
\"\"\"Verify we can parse the document.\"\"\" '''Verify we can parse the document.'''
doc = Article(self.article) doc = Article(self.article)
self.assertTrue('id="readabilityBody"' in doc.readable) self.assertTrue('id="readabilityBody"' in doc.readable)
def test_content_exists(self): def test_content_exists(self):
\"\"\"Verify that some content exists.\"\"\" '''Verify that some content exists.'''
pass raise NotImplementedError()
def test_content_does_not_exist(self): def test_content_does_not_exist(self):
\"\"\"Verify we cleaned out some content that shouldn't exist.\"\"\" '''Verify we cleaned out some content that shouldn't exist.'''
pass raise NotImplementedError()
""" """
def parse_args(): def parse_args():
desc = "breadability helper to generate a new set of article test files." return docopt(__doc__, version=VERSION)
parser = argparse.ArgumentParser(description=desc)
parser.add_argument('--version',
action='version', version=VERSION)
parser.add_argument('-n', '--name',
action='store',
required=True,
help='Name of the test directory')
parser.add_argument('url', metavar='URL', type=str, nargs=1,
help='The url of content to fetch for the article.html')
args = parser.parse_args() def make_test_directory(name):
return args """Generates a new directory for tests."""
directory_name = "test_" + name.replace(" ", "_")
directory_path = join(TEST_PATH, directory_name)
mkdir(directory_path)
return directory_path
def make_dir(name):
"""Generate a new directory for tests.
""" def make_test_files(directory_path):
dir_name = 'test_' + name.replace(' ', '_') init_file = join(directory_path, "__init__.py")
updated_name = path.join(TESTPATH, dir_name)
mkdir(updated_name)
return updated_name
def make_files(dirname):
init_file = path.join(dirname, '__init__.py')
test_file = path.join(dirname, 'test.py')
open(init_file, "a").close() open(init_file, "a").close()
with open(test_file, 'w') as f:
f.write(TESTTPL)
test_file = join(directory_path, "test.py")
with open(test_file, "w") as file:
file.write(TEST_TEMPLATE)
def fetch_article(dirname, url):
def fetch_article(directory_path, url):
"""Get the content of the url and make it the article.html""" """Get the content of the url and make it the article.html"""
opener = urllib.build_opener() opener = urllib.build_opener()
opener.addheaders = [('Accept-Charset', 'utf-8')] opener.addheaders = [('Accept-Charset', 'utf-8')]
url_response = opener.open(url)
dl_html = url_response.read().decode('utf-8')
fh = codecs.open(path.join(dirname, 'article.html'), "w", "utf-8") response = opener.open(url)
fh.write(dl_html) html = response.read().decode("utf-8")
fh.close() response.close()
path = join(directory_path, "article.html")
file = io.open(path, "w", encoding="utf8")
file.write(html)
file.close()
def main(): def main():
"""Run the script.""" """Run the script."""
args = parse_args() args = parse_args()
new_dir = make_dir(args.name) directory = make_test_directory(args["<name>"])
make_files(new_dir) make_test_files(directory)
fetch_article(new_dir, args.url[0]) fetch_article(directory, args["<url>"])
if __name__ == '__main__': if __name__ == '__main__':

@ -1,3 +1,4 @@
docopt==0.6.*
charade charade
lxml lxml
coverage coverage

@ -9,6 +9,7 @@ NEWS = open(os.path.join(here, 'CHANGELOG.rst')).read()
version = '0.1.11' version = '0.1.11'
install_requires = [ install_requires = [
# http://packages.python.org/distribute/setuptools.html#declaring-dependencies # http://packages.python.org/distribute/setuptools.html#declaring-dependencies
'docopt==0.6.*',
'charade', 'charade',
'lxml', 'lxml',
] ]
@ -19,8 +20,6 @@ tests_require = [
if sys.version_info < (2, 7): if sys.version_info < (2, 7):
# Require argparse since it's not in the stdlib yet.
install_requires.append('argparse')
install_requires.append('unittest2') install_requires.append('unittest2')
setup( setup(

Loading…
Cancel
Save