Work on adding client.py to pull out cmd line code
This commit is contained in:
parent
1c1cbaefa5
commit
674e5f9ef2
54
src/readability_lxml/client.py
Normal file
54
src/readability_lxml/client.py
Normal file
@ -0,0 +1,54 @@
|
||||
import argparse
|
||||
import sys
|
||||
|
||||
from readability_lxmly import VERSION
|
||||
from readability_lxml.readability import Document
|
||||
|
||||
|
||||
def parse_args():
|
||||
desc = "fast python port of arc90's readability tool"
|
||||
parser = argparse.ArgumentParser(description=desc)
|
||||
parser.add_argument('--version',
|
||||
action='version', version=VERSION)
|
||||
|
||||
parser.add_argument('-v', '--verbose',
|
||||
action='store_true',
|
||||
default=False,
|
||||
help="Increase logging verbosity to DEBUG.")
|
||||
|
||||
parser.add_argument('-u', '--url',
|
||||
action='store',
|
||||
default=None,
|
||||
help="Indicate that this is a url path.")
|
||||
|
||||
parser.add_argument('path', metavar='P', type=str, nargs=1,
|
||||
help="The url or file path to process in readable form.")
|
||||
|
||||
args = parser.parse_args()
|
||||
return args
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
target = None
|
||||
if args.url:
|
||||
import urllib
|
||||
target = urllib.urlopen(args.path[0])
|
||||
else:
|
||||
target = open(args.path[0], 'rt')
|
||||
|
||||
enc = sys.__stdout__.encoding or 'utf-8'
|
||||
|
||||
try:
|
||||
doc = Document(target.read(),
|
||||
debug=args.verbose,
|
||||
url=args.url)
|
||||
print doc.summary().encode(enc, 'replace')
|
||||
|
||||
finally:
|
||||
target.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@ -531,59 +531,3 @@ class Document:
|
||||
pass
|
||||
|
||||
return clean_attributes(tounicode(node))
|
||||
|
||||
|
||||
class HashableElement():
|
||||
def __init__(self, node):
|
||||
self.node = node
|
||||
self._path = None
|
||||
|
||||
def _get_path(self):
|
||||
if self._path is None:
|
||||
reverse_path = []
|
||||
node = self.node
|
||||
while node is not None:
|
||||
node_id = (node.tag, tuple(node.attrib.items()), node.text)
|
||||
reverse_path.append(node_id)
|
||||
node = node.getparent()
|
||||
self._path = tuple(reverse_path)
|
||||
return self._path
|
||||
path = property(_get_path)
|
||||
|
||||
def __hash__(self):
|
||||
return hash(self.path)
|
||||
|
||||
def __eq__(self, other):
|
||||
return self.path == other.path
|
||||
|
||||
def __getattr__(self, tag):
|
||||
return getattr(self.node, tag)
|
||||
|
||||
|
||||
def main():
|
||||
from optparse import OptionParser
|
||||
parser = OptionParser(usage="%prog: [options] [file]")
|
||||
parser.add_option('-v', '--verbose', action='store_true')
|
||||
parser.add_option('-u', '--url', default=None, help="use URL instead of a local file")
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if not (len(args) == 1 or options.url):
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
file = None
|
||||
if options.url:
|
||||
import urllib
|
||||
file = urllib.urlopen(options.url)
|
||||
else:
|
||||
file = open(args[0], 'rt')
|
||||
enc = sys.__stdout__.encoding or 'utf-8'
|
||||
try:
|
||||
print Document(file.read(),
|
||||
debug=options.verbose,
|
||||
url=options.url).summary().encode(enc, 'replace')
|
||||
finally:
|
||||
file.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user