Update the name of the summary option to

This commit is contained in:
Richard Harding 2012-04-17 13:59:02 -04:00
parent a19e766900
commit bbb60ed077
2 changed files with 7 additions and 7 deletions

View File

@ -123,10 +123,10 @@ class Document:
def short_title(self):
return shorten_title(self._html(True))
def summary(self, html_partial=False):
def summary(self, enclose_with_html_tag=False):
"""Generate the summary of the html docuemnt
:param html_partial: return only the div of the document, don't wrap
:param enclose_with_html_tag: return only the div of the document, don't wrap
in html and body tags.
"""
@ -147,7 +147,7 @@ class Document:
if best_candidate:
article = self.get_article(candidates, best_candidate,
html_partial=html_partial)
enclose_with_html_tag=enclose_with_html_tag)
else:
if ruthless:
log.debug("ruthless removal did not work. ")
@ -180,7 +180,7 @@ class Document:
log.exception('error getting summary: ')
raise Unparseable(str(e)), None, sys.exc_info()[2]
def get_article(self, candidates, best_candidate, html_partial=False):
def get_article(self, candidates, best_candidate, enclose_with_html_tag=False):
# Now that we have the top candidate, look through its siblings for
# content that might also be related.
# Things like preambles, content split by ads that we removed, etc.
@ -188,7 +188,7 @@ class Document:
10,
best_candidate['content_score'] * 0.2])
# create a new html document with a html->body->div
if html_partial:
if enclose_with_html_tag:
output = fragment_fromstring('<div/>')
else:
output = document_fromstring('<div/>')
@ -219,7 +219,7 @@ class Document:
if append:
# We don't want to append directly to output, but the div
# in html->body->div
if html_partial:
if enclose_with_html_tag:
output.append(sibling)
else:
output.getchildren()[0].getchildren()[0].append(sibling)

View File

@ -34,6 +34,6 @@ class TestArticleOnly(unittest.TestCase):
"""Using the si sample, make sure we can get the article alone."""
sample = load_sample('si-game.sample.html')
doc = Document(sample, url='http://sportsillustrated.cnn.com/baseball/mlb/gameflash/2012/04/16/40630_preview.html')
res = doc.summary(html_partial=True)
res = doc.summary(enclose_with_html_tag=True)
self.assertEqual('<div><div class="', res[0:17])