Renamed property of 'OriginalDocument': 'html' -> 'dom'

This commit is contained in:
Mišo Belica 2013-03-23 17:03:54 +01:00
parent 0e748a80a6
commit 7bd7231e25
3 changed files with 8 additions and 8 deletions

View File

@ -87,10 +87,10 @@ class OriginalDocument(object):
def __unicode__(self):
"""Renders the document as a string."""
return tounicode(self.html)
return tounicode(self.dom)
@cached_property
def html(self):
def dom(self):
"""Parsed HTML document from the input."""
html = self._html
if not isinstance(html, unicode):
@ -105,12 +105,12 @@ class OriginalDocument(object):
@cached_property
def links(self):
"""Links within the document."""
return self.html.findall(".//a")
return self.dom.findall(".//a")
@cached_property
def title(self):
"""Title attribute of the parsed document."""
title_element = self.html.find(".//title")
title_element = self.dom.find(".//title")
if title_element is None or title_element.text is None:
return ""
else:

View File

@ -383,10 +383,10 @@ class Article(object):
def dom(self):
"""Parsed lxml tree (Document Object Model) of the given html."""
try:
document = self._original_document.html
dom = self._original_document.dom
# cleaning doesn't return, just wipes in place
html_cleaner(document)
return leaf_div_elements_into_paragraphs(document)
html_cleaner(dom)
return leaf_div_elements_into_paragraphs(dom)
except ValueError:
return None

View File

@ -45,7 +45,7 @@ class TestOriginalDocument(unittest.TestCase):
def test_no_br_allowed(self):
"""We convert all <br/> tags to <p> tags"""
doc = OriginalDocument(load_snippet('document_min.html'))
self.assertIsNone(doc.html.find('.//br'))
self.assertIsNone(doc.dom.find('.//br'))
def test_empty_title(self):
"""We convert all <br/> tags to <p> tags"""