Changed 'readable_annotated_text' -> 'main_text'

pull/21/head
Mišo Belica 11 years ago
parent c47530bfe0
commit 69dd9ef4fd

@ -2,8 +2,8 @@
Changelog for readability
==========================
- Added property ``Article.readable_annotated_text`` for gettng DOM
with same semantic tags (<em>, <strong>, ...).
- Added property ``Article.main_text`` for gettng DOM with same
semantic tags (<em>, <strong>, ...).
- Join node with 1 child of the same type. From
``<div><div>...</div></div>`` we get ``<div>...</div>``.
- Don't change <div> to <p> if it contains <p> elements.

@ -402,7 +402,7 @@ class Article(object):
return candidates
@cached_property
def readable_annotated_text(self):
def main_text(self):
dom = deepcopy(self.readable_dom)
for node in dom.get_element_by_id("readabilityBody").iterdescendants():
if node.tag not in ANNOTATION_TAGS:

@ -287,28 +287,28 @@ class TestSiblings(unittest.TestCase):
raise NotImplementedError()
class TestAnnotatedText(unittest.TestCase):
class TestMainText(unittest.TestCase):
def test_empty(self):
article = Article("")
dom = article.readable_annotated_text
dom = article.main_text
self.assertEqual(tounicode(dom),
'<div id="readabilityBody" class="parsing-error"/>')
def test_no_annotations(self):
article = Article("<div><p>This is text with no annotations</p></div>")
dom = article.readable_annotated_text
dom = article.main_text
self.assertEqual(tounicode(dom),
'<div id="readabilityBody"><p>This is text with no annotations</p></div>')
def test_one_annotation(self):
article = Article("<div><p>This is text with <del>no</del> annotations</p></div>")
dom = article.readable_annotated_text
dom = article.main_text
self.assertEqual(tounicode(dom),
'<div id="readabilityBody"><p>This is text with <del>no</del> annotations</p></div>')
def test_simple_document(self):
article = Article(load_snippet("annotated_1.html"))
dom = article.readable_annotated_text
dom = article.main_text
self.assertIn("Paragraph is more better", dom.text_content())
self.assertIn("This is not crap so readability me :)", dom.text_content())

Loading…
Cancel
Save