diff --git a/breadability/document.py b/breadability/document.py index 31780c7..1d9920d 100644 --- a/breadability/document.py +++ b/breadability/document.py @@ -8,10 +8,21 @@ import re import logging import charade -from lxml.etree import tostring, tounicode, XMLSyntaxError -from lxml.html import document_fromstring, HTMLParser - -from ._compat import unicode, to_bytes, to_unicode, unicode_compatible +from lxml.etree import ( + tounicode, + XMLSyntaxError, +) +from lxml.html import ( + document_fromstring, + HTMLParser, +) + +from ._compat import ( + to_bytes, + to_unicode, + unicode, + unicode_compatible, +) from .utils import cached_property diff --git a/tests/test_readable.py b/tests/test_readable.py index 727c793..483bb6f 100644 --- a/tests/test_readable.py +++ b/tests/test_readable.py @@ -29,6 +29,14 @@ class TestReadableDocument(unittest.TestCase): # We get back the document as a div tag currently by default. self.assertEqual(doc.readable_dom.tag, 'div') + def test_title_loads(self): + """Verify we can fetch the title of the parsed article""" + doc = Article(load_snippet('document_min.html')) + self.assertEqual( + doc._original_document.title, + 'Min Document Title' + ) + def test_doc_no_scripts_styles(self): """Step #1 remove all scripts from the document""" doc = Article(load_snippet('document_scripts.html')) @@ -129,18 +137,26 @@ class TestCleaning(unittest.TestCase): '') test_doc2 = document_fromstring(test_html2) self.assertEqual( - tounicode(leaf_div_elements_into_paragraphs(test_doc2)), - to_unicode('

simplelink

') + tounicode( + leaf_div_elements_into_paragraphs(test_doc2)), + to_unicode( + '

simplelink

') ) def test_dont_transform_div_with_div(self): """Verify that only child
element is replaced by

.""" dom = document_fromstring( - "

text
child
aftertext
") + "
text
child
" + "aftertext
" + ) self.assertEqual( - tounicode(leaf_div_elements_into_paragraphs(dom)), - to_unicode("
text

child

aftertext
") + tounicode( + leaf_div_elements_into_paragraphs(dom)), + to_unicode( + "
text

child

" + "aftertext
" + ) ) def test_bad_links(self):