mirror of
https://github.com/bookieio/breadability
synced 2024-11-16 12:13:11 +00:00
Renamed property of 'OriginalDocument': 'html' -> 'dom'
This commit is contained in:
parent
0e748a80a6
commit
7bd7231e25
@ -87,10 +87,10 @@ class OriginalDocument(object):
|
|||||||
|
|
||||||
def __unicode__(self):
|
def __unicode__(self):
|
||||||
"""Renders the document as a string."""
|
"""Renders the document as a string."""
|
||||||
return tounicode(self.html)
|
return tounicode(self.dom)
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def html(self):
|
def dom(self):
|
||||||
"""Parsed HTML document from the input."""
|
"""Parsed HTML document from the input."""
|
||||||
html = self._html
|
html = self._html
|
||||||
if not isinstance(html, unicode):
|
if not isinstance(html, unicode):
|
||||||
@ -105,12 +105,12 @@ class OriginalDocument(object):
|
|||||||
@cached_property
|
@cached_property
|
||||||
def links(self):
|
def links(self):
|
||||||
"""Links within the document."""
|
"""Links within the document."""
|
||||||
return self.html.findall(".//a")
|
return self.dom.findall(".//a")
|
||||||
|
|
||||||
@cached_property
|
@cached_property
|
||||||
def title(self):
|
def title(self):
|
||||||
"""Title attribute of the parsed document."""
|
"""Title attribute of the parsed document."""
|
||||||
title_element = self.html.find(".//title")
|
title_element = self.dom.find(".//title")
|
||||||
if title_element is None or title_element.text is None:
|
if title_element is None or title_element.text is None:
|
||||||
return ""
|
return ""
|
||||||
else:
|
else:
|
||||||
|
@ -383,10 +383,10 @@ class Article(object):
|
|||||||
def dom(self):
|
def dom(self):
|
||||||
"""Parsed lxml tree (Document Object Model) of the given html."""
|
"""Parsed lxml tree (Document Object Model) of the given html."""
|
||||||
try:
|
try:
|
||||||
document = self._original_document.html
|
dom = self._original_document.dom
|
||||||
# cleaning doesn't return, just wipes in place
|
# cleaning doesn't return, just wipes in place
|
||||||
html_cleaner(document)
|
html_cleaner(dom)
|
||||||
return leaf_div_elements_into_paragraphs(document)
|
return leaf_div_elements_into_paragraphs(dom)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
@ -45,7 +45,7 @@ class TestOriginalDocument(unittest.TestCase):
|
|||||||
def test_no_br_allowed(self):
|
def test_no_br_allowed(self):
|
||||||
"""We convert all <br/> tags to <p> tags"""
|
"""We convert all <br/> tags to <p> tags"""
|
||||||
doc = OriginalDocument(load_snippet('document_min.html'))
|
doc = OriginalDocument(load_snippet('document_min.html'))
|
||||||
self.assertIsNone(doc.html.find('.//br'))
|
self.assertIsNone(doc.dom.find('.//br'))
|
||||||
|
|
||||||
def test_empty_title(self):
|
def test_empty_title(self):
|
||||||
"""We convert all <br/> tags to <p> tags"""
|
"""We convert all <br/> tags to <p> tags"""
|
||||||
|
Loading…
Reference in New Issue
Block a user