|
|
|
@ -5,7 +5,8 @@ from __future__ import division, print_function, unicode_literals
|
|
|
|
|
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
from readability._py3k import to_unicode, to_bytes
|
|
|
|
|
from readability.document import OriginalDocument, determine_encoding
|
|
|
|
|
from readability.document import (OriginalDocument, determine_encoding,
|
|
|
|
|
replace_multi_br_to_paragraphs)
|
|
|
|
|
from .compat import unittest
|
|
|
|
|
from .utils import load_snippet
|
|
|
|
|
|
|
|
|
@ -13,6 +14,13 @@ from .utils import load_snippet
|
|
|
|
|
class TestOriginalDocument(unittest.TestCase):
|
|
|
|
|
"""Verify we can process html into a document to work off of."""
|
|
|
|
|
|
|
|
|
|
def test_replace_multi_br_to_paragraphs(self):
|
|
|
|
|
returned = replace_multi_br_to_paragraphs(
|
|
|
|
|
"<div>HI<br><br>How are you?<br><br> \t \n <br>Fine\n I guess</div>")
|
|
|
|
|
|
|
|
|
|
self.assertEqual(returned,
|
|
|
|
|
"<div>HI</p><p>How are you?</p><p>Fine\n I guess</div>")
|
|
|
|
|
|
|
|
|
|
def test_readin_min_document(self):
|
|
|
|
|
"""Verify we can read in a min html document"""
|
|
|
|
|
doc = OriginalDocument(load_snippet('document_min.html'))
|
|
|
|
|