You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
119 lines
3.0 KiB
Python
119 lines
3.0 KiB
Python
import argparse
|
|
import codecs
|
|
import locale
|
|
import sys
|
|
import urllib
|
|
import webbrowser
|
|
|
|
from tempfile import mkstemp
|
|
|
|
from breadability import VERSION
|
|
from breadability.logconfig import LOG
|
|
from breadability.logconfig import LNODE
|
|
from breadability.logconfig import set_logging_level
|
|
from breadability.readable import Article
|
|
|
|
|
|
LOGLEVEL = 'WARNING'
|
|
USER_AGENT = 'breadability /{version} ({url})'.format(
|
|
url="https://github.com/mitechie/breadability",
|
|
version=VERSION
|
|
)
|
|
|
|
|
|
# Setup a user agent on the requests out so that we make servers happy. We use
|
|
# a custom agent string to help assist others in identifying the traffic.
|
|
class AppURLopener(urllib.FancyURLopener):
|
|
version = USER_AGENT
|
|
urllib._urlopener = AppURLopener()
|
|
|
|
|
|
def parse_args():
|
|
desc = "A fast python port of arc90's readability tool"
|
|
parser = argparse.ArgumentParser(description=desc)
|
|
parser.add_argument(
|
|
'--version',
|
|
action='version', version=VERSION)
|
|
|
|
parser.add_argument(
|
|
'-v', '--verbose',
|
|
action='store_true',
|
|
default=False,
|
|
help='Increase logging verbosity to DEBUG.')
|
|
|
|
parser.add_argument(
|
|
'-f', '--fragment',
|
|
action='store_false',
|
|
default=True,
|
|
help='Output html fragment by default.')
|
|
|
|
# parser.add_argument('-m', '--metadata',
|
|
# action='store_true',
|
|
# default=False,
|
|
# help='print all metadata as well as content for the content')
|
|
|
|
parser.add_argument(
|
|
'-b', '--browser',
|
|
action='store_true',
|
|
default=False,
|
|
help='open the parsed content in your web browser')
|
|
|
|
parser.add_argument(
|
|
'-d', '--debug',
|
|
action='store_true',
|
|
default=False,
|
|
help='Output the detailed scoring information for debugging parsing')
|
|
|
|
parser.add_argument(
|
|
'path', metavar='P',
|
|
type=str,
|
|
nargs=1,
|
|
help="The url or file path to process in readable form.")
|
|
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def main():
|
|
args = parse_args()
|
|
|
|
if args.verbose:
|
|
set_logging_level('DEBUG')
|
|
|
|
if args.debug:
|
|
LNODE.activate()
|
|
|
|
target = args.path[0]
|
|
LOG.debug("Target: " + target)
|
|
|
|
if target.startswith('http') or target.startswith('www'):
|
|
is_url = True
|
|
url = target
|
|
else:
|
|
is_url = False
|
|
url = None
|
|
|
|
if is_url:
|
|
req = urllib.urlopen(target)
|
|
content = req.read()
|
|
ucontent = unicode(content, 'utf-8')
|
|
else:
|
|
ucontent = codecs.open(target, "r", "utf-8").read()
|
|
|
|
doc = Article(ucontent, url=url, fragment=args.fragment)
|
|
if args.browser:
|
|
fg, pathname = mkstemp(suffix='.html')
|
|
out = codecs.open(pathname, 'w', 'utf-8')
|
|
out.write(doc.readable)
|
|
out.close()
|
|
webbrowser.open(pathname)
|
|
else:
|
|
# Wrap sys.stdout into a StreamWriter to allow writing unicode.
|
|
sys.stdout = codecs.getwriter(
|
|
locale.getpreferredencoding())(sys.stdout)
|
|
sys.stdout.write(doc.readable)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|