Renamed property of 'OriginalDocument': 'html' -> 'dom'

This commit is contained in:
Mišo Belica 2013-03-23 17:03:54 +01:00
parent 0e748a80a6
commit 7bd7231e25
3 changed files with 8 additions and 8 deletions

View File

@ -87,10 +87,10 @@ class OriginalDocument(object):
def __unicode__(self): def __unicode__(self):
"""Renders the document as a string.""" """Renders the document as a string."""
return tounicode(self.html) return tounicode(self.dom)
@cached_property @cached_property
def html(self): def dom(self):
"""Parsed HTML document from the input.""" """Parsed HTML document from the input."""
html = self._html html = self._html
if not isinstance(html, unicode): if not isinstance(html, unicode):
@ -105,12 +105,12 @@ class OriginalDocument(object):
@cached_property @cached_property
def links(self): def links(self):
"""Links within the document.""" """Links within the document."""
return self.html.findall(".//a") return self.dom.findall(".//a")
@cached_property @cached_property
def title(self): def title(self):
"""Title attribute of the parsed document.""" """Title attribute of the parsed document."""
title_element = self.html.find(".//title") title_element = self.dom.find(".//title")
if title_element is None or title_element.text is None: if title_element is None or title_element.text is None:
return "" return ""
else: else:

View File

@ -383,10 +383,10 @@ class Article(object):
def dom(self): def dom(self):
"""Parsed lxml tree (Document Object Model) of the given html.""" """Parsed lxml tree (Document Object Model) of the given html."""
try: try:
document = self._original_document.html dom = self._original_document.dom
# cleaning doesn't return, just wipes in place # cleaning doesn't return, just wipes in place
html_cleaner(document) html_cleaner(dom)
return leaf_div_elements_into_paragraphs(document) return leaf_div_elements_into_paragraphs(dom)
except ValueError: except ValueError:
return None return None

View File

@ -45,7 +45,7 @@ class TestOriginalDocument(unittest.TestCase):
def test_no_br_allowed(self): def test_no_br_allowed(self):
"""We convert all <br/> tags to <p> tags""" """We convert all <br/> tags to <p> tags"""
doc = OriginalDocument(load_snippet('document_min.html')) doc = OriginalDocument(load_snippet('document_min.html'))
self.assertIsNone(doc.html.find('.//br')) self.assertIsNone(doc.dom.find('.//br'))
def test_empty_title(self): def test_empty_title(self):
"""We convert all <br/> tags to <p> tags""" """We convert all <br/> tags to <p> tags"""