Merge pull request #35 from bookieio/port-pytest

Migrate tests to pytest
6 years ago · abc3c0bbb9
parent d91236681e 501c35c8bc
commit abc3c0bbb9
19 changed files with 1094 additions and 1090 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,6 +1,10 @@
 *.py[co]
 *.prof
-.coverage
+
+# hide all files starting with dot: .idea, .pytest_cache, .coverage, ...
+.*
+!.gitignore
+!.travis.yml

 .installed.cfg
 bin
--- a/.travis.yml
+++ b/.travis.yml
@ -1,13 +1,22 @@
 language: python
 python:
+  # https://github.com/travis-ci/travis-ci/issues/2219#issuecomment-41804942
+  # https://snarky.ca/how-to-use-your-project-travis-to-help-test-python-itself/
  - "2.6"
  - "2.7"
  - "3.2"
  - "3.3"
  - "3.4"
+  - "3.5"
+  - "3.5-dev"
+  - "3.6"
+  - "3.6-dev"
+  - "3.7-dev"
+  - "nightly"
 before_install: sudo apt-get install libxml2-dev libxslt-dev
 # command to install dependencies
 install:
+  - pip install -U pip wheel setuptools
  - python setup.py install
  - make deps
 # command to run tests
--- a/MANIFEST.in
+++ b/MANIFEST.in
@ -2,3 +2,5 @@ include README.rst
 include CHANGELOG.rst
 include LICENSE.rst
 include AUTHORS.txt
+recursive-exclude * __pycache__
+recursive-exclude * *.py[co]
--- a/README.rst
+++ b/README.rst
@ -1,5 +1,5 @@
 breadability - another readability Python (v2.6-v3.3) port
-===========================================================
+==========================================================
 .. image:: https://api.travis-ci.org/bookieio/breadability.png?branch=master
   :target: https://travis-ci.org/bookieio/breadability.py

@ -55,7 +55,7 @@ Tests
 -----
 .. code-block:: bash

-    $ nosetests-2.6 tests && nosetests-3.2 tests && nosetests-2.7 tests && nosetests-3.3 tests
+    $ pytest tests


 Usage
--- a/breadability/document.py
+++ b/breadability/document.py
@ -4,30 +4,15 @@

 from __future__ import absolute_import

-import re
 import logging
-import chardet
-
-from lxml.etree import (
-    tounicode,
-    XMLSyntaxError,
-)
-from lxml.html import (
-    document_fromstring,
-    HTMLParser,
-)
+import re

-from ._compat import (
-    to_bytes,
-    to_unicode,
-    unicode,
-    unicode_compatible,
-)
-from .utils import (
-    cached_property,
-    ignored,
-)
+import chardet
+from lxml.etree import ParserError, XMLSyntaxError, tounicode
+from lxml.html import HTMLParser, document_fromstring

+from ._compat import to_bytes, to_unicode, unicode, unicode_compatible
+from .utils import cached_property, ignored

 logger = logging.getLogger("breadability")

@ -111,7 +96,7 @@ def build_document(html_content, base_href=None):

    try:
        document = document_fromstring(html_content, parser=UTF8_PARSER)
-    except XMLSyntaxError:
+    except (ParserError, XMLSyntaxError):
        raise ValueError("Failed to parse document contents.")

    if base_href:
--- a/breadability/scripts/test_helper.py
+++ b/breadability/scripts/test_helper.py
@ -35,41 +35,41 @@ TEST_PATH = join(

 TEST_TEMPLATE = '''# -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+"""
+Test the scoring and parsing of the article from URL below:
+%(source_url)s
+"""

-from os.path import join, dirname
-from breadability.readable import Article
-from ...compat import unittest
+from __future__ import absolute_import, division, print_function, unicode_literals

+import os

-class TestArticle(unittest.TestCase):
-    """
-    Test the scoring and parsing of the article from URL below:
-    %(source_url)s
-    """
+import pytest

-    def setUp(self):
+from breadability.readable import Article
+
+
+@pytest.fixture(scope="module")
+def article():
    """Load up the article for us"""
-        article_path = join(dirname(__file__), "article.html")
+    article_path = os.path.join(os.path.dirname(__file__), "article.html")
    with open(article_path, "rb") as file:
-            self.document = Article(file.read(), "%(source_url)s")
+        return Article(file.read(), "%(source_url)s")

-    def tearDown(self):
-        """Drop the article"""
-        self.document = None

-    def test_parses(self):
+def test_parses(article):
    """Verify we can parse the document."""
-        self.assertIn('id="readabilityBody"', self.document.readable)
+    assert 'id="readabilityBody"' in article.readable

-    def test_content_exists(self):
+
+def test_content_exists(article):
    """Verify that some content exists."""
-        self.assertIn("#&@#&@#&@", self.document.readable)
+    assert "#&@#&@#&@" in article.readable
+

-    def test_content_does_not_exist(self):
+def test_content_does_not_exist(article):
    """Verify we cleaned out some content that shouldn't exist."""
-        self.assertNotIn("", self.document.readable)
+    assert "" not in article.readable
 '''


--- a/setup.cfg
+++ b/setup.cfg
@ -1,7 +1,5 @@
-[nosetests]
-with-coverage=1
-cover-package=breadability
-cover-erase=1
+[tool:pytest]
+addopts = --quiet --tb=native --color=yes

-[wheel]
-universal=1
+[bdist_wheel]
+universal = 1
--- a/setup.py
+++ b/setup.py
@ -32,9 +32,6 @@ tests_require = [
 ]


-if sys.version_info < (2, 7):
-    install_requires.append("unittest2")
-
 console_script_targets = [
    "breadability = breadability.scripts.client:main",
    "breadability-{0} = breadability.scripts.client:main",
@ -76,6 +73,9 @@ setup(
        "Programming Language :: Python :: 3",
        "Programming Language :: Python :: 3.2",
        "Programming Language :: Python :: 3.3",
+        "Programming Language :: Python :: 3.4",
+        "Programming Language :: Python :: 3.5",
+        "Programming Language :: Python :: 3.6",
        "Programming Language :: Python :: Implementation :: CPython",
        "Topic :: Internet :: WWW/HTTP",
        "Topic :: Software Development :: Pre-processors",
@ -87,7 +87,7 @@ setup(
    zip_safe=False,
    install_requires=install_requires,
    tests_require=tests_require,
-    test_suite="nose.collector",
+    test_suite="tests",
    entry_points={
        "console_scripts": console_script_targets,
    }
--- a/tests/compat.py
+++ b/tests/compat.py
@ -2,8 +2,3 @@

 from __future__ import absolute_import
 from __future__ import division, print_function, unicode_literals
-
-try:
-    import unittest2 as unittest
-except ImportError:
-    import unittest
--- a/tests/test_annotated_text.py
+++ b/tests/test_annotated_text.py
@ -10,38 +10,36 @@ from __future__ import (
 from lxml.html import fragment_fromstring, document_fromstring
 from breadability.readable import Article
 from breadability.annotated_text import AnnotatedTextHandler
-from .compat import unittest
 from .utils import load_snippet, load_article


-class TestAnnotatedText(unittest.TestCase):
-    def test_simple_document(self):
+def test_simple_document():
    dom = fragment_fromstring("<p>This is\n\tsimple\ttext.</p>")
    annotated_text = AnnotatedTextHandler.parse(dom)

-        expected = [
+    assert annotated_text == [
        (
            ("This is\nsimple text.", None),
        ),
    ]
-        self.assertEqual(annotated_text, expected)

-    def test_empty_paragraph(self):
+
+def test_empty_paragraph():
    dom = fragment_fromstring("<div><p>Paragraph <p>\t  \n</div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

-        expected = [
+    assert annotated_text == [
        (
            ("Paragraph", None),
        ),
    ]
-        self.assertEqual(annotated_text, expected)

-    def test_multiple_paragraphs(self):
+
+def test_multiple_paragraphs():
    dom = fragment_fromstring("<div><p> 1 first<p> 2\tsecond <p>3\rthird   </div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

-        expected = [
+    assert annotated_text == [
        (
            ("1 first", None),
        ),
@ -52,13 +50,13 @@ class TestAnnotatedText(unittest.TestCase):
            ("3\nthird", None),
        ),
    ]
-        self.assertEqual(annotated_text, expected)

-    def test_single_annotation(self):
+
+def test_single_annotation():
    dom = fragment_fromstring("<div><p> text <em>emphasis</em> <p> last</div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

-        expected = [
+    assert annotated_text == [
        (
            ("text", None),
            ("emphasis", ("em",)),
@ -67,13 +65,13 @@ class TestAnnotatedText(unittest.TestCase):
            ("last", None),
        ),
    ]
-        self.assertEqual(annotated_text, expected)

-    def test_recursive_annotation(self):
+
+def test_recursive_annotation():
    dom = fragment_fromstring("<div><p> text <em><i><em>emphasis</em></i></em> <p> last</div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

-        expected = [
+    assert annotated_text == [
        (
            ("text", None),
            ("emphasis", ("em", "i")),
@ -82,22 +80,22 @@ class TestAnnotatedText(unittest.TestCase):
            ("last", None),
        ),
    ]
-        self.assertEqual(annotated_text, expected)

-    def test_annotations_without_explicit_paragraph(self):
+
+def test_annotations_without_explicit_paragraph():
    dom = fragment_fromstring("<div>text <strong>emphasis</strong>\t<b>hmm</b> </div>")
    annotated_text = AnnotatedTextHandler.parse(dom)

-        expected = [
+    assert annotated_text == [
        (
            ("text", None),
            ("emphasis", ("strong",)),
            ("hmm", ("b",)),
        ),
    ]
-        self.assertEqual(annotated_text, expected)

-    def test_process_paragraph_with_chunked_text(self):
+
+def test_process_paragraph_with_chunked_text():
    handler = AnnotatedTextHandler()
    paragraph = handler._process_paragraph([
        (" 1", ("b", "del")),
@ -108,18 +106,18 @@ class TestAnnotatedText(unittest.TestCase):
        (" 6", ("em",)),
    ])

-        expected = (
+    assert paragraph == (
        ("1 2", ("b", "del")),
        ("3 4 5", None),
        ("6", ("em",)),
    )
-        self.assertEqual(paragraph, expected)

-    def test_include_heading(self):
+
+def test_include_heading():
    dom = document_fromstring(load_snippet("h1_and_2_paragraphs.html"))
    annotated_text = AnnotatedTextHandler.parse(dom.find("body"))

-        expected = [
+    assert annotated_text == [
        (
            ('Nadpis H1, ktorý chce byť prvý s textom ale predbehol ho "title"', ("h1",)),
            ("Toto je prvý odstavec a to je fajn.", None),
@ -128,13 +126,13 @@ class TestAnnotatedText(unittest.TestCase):
            ("Tento text je tu aby vyplnil prázdne miesto v srdci súboru.\nAj súbory majú predsa city.", None),
        ),
    ]
-        self.assertSequenceEqual(annotated_text, expected)

-    def test_real_article(self):
+
+def test_real_article():
    article = Article(load_article("zdrojak_automaticke_zabezpeceni.html"))
    annotated_text = article.main_text

-        expected = [
+    assert annotated_text == [
        (
            ("Automatické zabezpečení", ("h1",)),
            ("Úroveň zabezpečení aplikace bych rozdělil do tří úrovní:", None),
@ -170,4 +168,3 @@ class TestAnnotatedText(unittest.TestCase):
            (".", None),
        ),
    ]
-        self.assertSequenceEqual(annotated_text, expected)
--- a/tests/test_articles/test_antipope_org/test_article.py
+++ b/tests/test_articles/test_antipope_org/test_article.py
@ -1,42 +1,45 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+"""Test the scoring and parsing of the Blog Post"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals

 import os

-from breadability.readable import Article
-from ...compat import unittest
+import pytest

+from breadability.readable import Article

-class TestAntipopeBlog(unittest.TestCase):
-    """Test the scoring and parsing of the Blog Post"""

-    def setUp(self):
+@pytest.fixture(scope="module")
+def article():
    """Load up the article for us"""
    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
-        self.article = open(article_path).read()
+    with open(article_path) as file:
+        return file.read()

-    def tearDown(self):
-        """Drop the article"""
-        self.article = None

-    def test_parses(self):
+def test_parses(article):
    """Verify we can parse the document."""
-        doc = Article(self.article)
-        self.assertTrue('id="readabilityBody"' in doc.readable)
+    doc = Article(article)
+
+    assert 'id="readabilityBody"' in doc.readable

-    def test_comments_cleaned(self):
+
+def test_comments_cleaned(article):
    """The div with the comments should be removed."""
-        doc = Article(self.article)
-        self.assertTrue('class="comments"' not in doc.readable)
+    doc = Article(article)
+
+    assert 'class="comments"' not in doc.readable

-    def test_beta_removed(self):
+
+def test_beta_removed(article):
    """The id=beta element should be removed

    It's link heavy and causing a lot of garbage content. This should be
    removed.

    """
-        doc = Article(self.article)
-        self.assertTrue('id="beta"' not in doc.readable)
+    doc = Article(article)
+
+    assert 'id="beta"' not in doc.readable
--- a/tests/test_articles/test_businessinsider-com/test_article.py
+++ b/tests/test_articles/test_businessinsider-com/test_article.py
@ -1,33 +1,34 @@
-import os
-try:
-    # Python < 2.7
-    import unittest2 as unittest
-except ImportError:
-    import unittest
+# -*- coding: utf8 -*-

-from breadability.readable import Article
+"""Test the scoring and parsing of the Blog Post"""

+from __future__ import absolute_import, division, print_function, unicode_literals

-class TestBusinessInsiderArticle(unittest.TestCase):
-    """Test the scoring and parsing of the Blog Post"""
+import os

-    def setUp(self):
+import pytest
+
+from breadability.readable import Article

+
+@pytest.fixture(scope="module")
+def article():
    """Load up the article for us"""
    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
-        self.article = open(article_path).read()
+    with open(article_path) as file:
+        return file.read()

-    def tearDown(self):
-        """Drop the article"""
-        self.article = None

-    def test_parses(self):
+def test_parses(article):
    """Verify we can parse the document."""
-        doc = Article(self.article)
-        self.assertTrue('id="readabilityBody"' in doc.readable)
+    doc = Article(article)
+
+    assert 'id="readabilityBody"' in doc.readable

-    def test_images_preserved(self):
+
+def test_images_preserved(article):
    """The div with the comments should be removed."""
-        doc = Article(self.article)
-        self.assertTrue('bharath-kumar-a-co-founder-at-pugmarksme-suggests-working-on-a-sunday-late-night.jpg' in doc.readable)
-        self.assertTrue('bryan-guido-hassin-a-university-professor-and-startup-junkie-uses-airplane-days.jpg' in doc.readable)
+    doc = Article(article)
+
+    assert 'bharath-kumar-a-co-founder-at-pugmarksme-suggests-working-on-a-sunday-late-night.jpg' in doc.readable
+    assert 'bryan-guido-hassin-a-university-professor-and-startup-junkie-uses-airplane-days.jpg' in doc.readable
--- a/tests/test_articles/test_businessinsider_com/test_article.py
+++ b/tests/test_articles/test_businessinsider_com/test_article.py
@ -1,39 +1,33 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+"""
+Test the scoring and parsing of the article from URL below:
+http://www.businessinsider.com/tech-ceos-favorite-productivity-hacks-2013-8
+"""

-from os.path import join, dirname
-from breadability.readable import Article
-from ...compat import unittest
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import os
+
+import pytest

+from breadability.readable import Article

-class TestArticle(unittest.TestCase):
-    """
-    Test the scoring and parsing of the article from URL below:
-    http://www.businessinsider.com/tech-ceos-favorite-productivity-hacks-2013-8
-    """

-    def setUp(self):
+@pytest.fixture(scope="module")
+def article():
    """Load up the article for us"""
-        article_path = join(dirname(__file__), "article.html")
+    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
    with open(article_path, "rb") as file:
-            self.document = Article(file.read(), "http://www.businessinsider.com/tech-ceos-favorite-productivity-hacks-2013-8")
+        return Article(file.read(), "http://www.businessinsider.com/tech-ceos-favorite-productivity-hacks-2013-8")

-    def tearDown(self):
-        """Drop the article"""
-        self.document = None

-    def test_parses(self):
+def test_parses(article):
    """Verify we can parse the document."""
-        self.assertIn('id="readabilityBody"', self.document.readable)
+    assert 'id="readabilityBody"' in article.readable

-    def test_images_preserved(self):
-        """The div with the comments should be removed."""
-        images = [
-            'bharath-kumar-a-co-founder-at-pugmarksme-suggests-working-on-a-sunday-late-night.jpg',
-            'bryan-guido-hassin-a-university-professor-and-startup-junkie-uses-airplane-days.jpg',
-        ]

-        for image in images:
-            self.assertIn(image, self.document.readable, image)
+def test_images_preserved(article):
+    """The div with the comments should be removed."""
+    assert 'bharath-kumar-a-co-founder-at-pugmarksme-suggests-working-on-a-sunday-late-night.jpg' in article.readable
+    assert 'bryan-guido-hassin-a-university-professor-and-startup-junkie-uses-airplane-days.jpg' in article.readable
--- a/tests/test_articles/test_cz_zdrojak_tests/test_article.py
+++ b/tests/test_articles/test_cz_zdrojak_tests/test_article.py
@ -1,44 +1,44 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+"""
+Test the scoring and parsing of the article from URL below:
+http://www.zdrojak.cz/clanky/jeste-k-testovani/
+"""

-from os.path import join, dirname
-from breadability.readable import Article
-from breadability._compat import unicode
-from ...compat import unittest
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import os
+
+import pytest

+from breadability._compat import unicode
+from breadability.readable import Article

-class TestArticle(unittest.TestCase):
-    """
-    Test the scoring and parsing of the article from URL below:
-    http://www.zdrojak.cz/clanky/jeste-k-testovani/
-    """

-    def setUp(self):
+@pytest.fixture(scope="module")
+def article():
    """Load up the article for us"""
-        article_path = join(dirname(__file__), "article.html")
+    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
    with open(article_path, "rb") as file:
-            self.document = Article(file.read(), "http://www.zdrojak.cz/clanky/jeste-k-testovani/")
+        return Article(file.read(), "http://www.zdrojak.cz/clanky/jeste-k-testovani/")

-    def tearDown(self):
-        """Drop the article"""
-        self.document = None

-    def test_parses(self):
+def test_parses(article):
    """Verify we can parse the document."""
-        self.assertIn('id="readabilityBody"', self.document.readable)
+    assert 'id="readabilityBody"' in article.readable

-    def test_content_exists(self):
+
+def test_content_exists(article):
    """Verify that some content exists."""
-        self.assertIsInstance(self.document.readable, unicode)
+    assert isinstance(article.readable, unicode)

    text = "S automatizovaným testováním kódu (a ve zbytku článku budu mít na mysli právě to) jsem se setkal v několika firmách."
-        self.assertIn(text, self.document.readable)
+    assert text in article.readable

    text = "Ke čtení naleznete mnoho různých materiálů, od teoretických po praktické ukázky."
-        self.assertIn(text, self.document.readable)
+    assert text in article.readable
+

-    def test_content_does_not_exist(self):
+def test_content_does_not_exist(article):
    """Verify we cleaned out some content that shouldn't exist."""
-        self.assertNotIn("Pokud vás problematika zajímá, využijte možnosti navštívit školení", self.document.readable)
+    assert "Pokud vás problematika zajímá, využijte možnosti navštívit školení" not in article.readable
--- a/tests/test_articles/test_scripting_com/test_article.py
+++ b/tests/test_articles/test_scripting_com/test_article.py
@ -1,72 +1,62 @@
 # -*- coding: utf8 -*-

-from __future__ import (
-    absolute_import,
-    division,
-    print_function,
-    unicode_literals
-)
+"""Test the scoring and parsing of the Article"""

-import os
+from __future__ import absolute_import, division, print_function, unicode_literals

+import os
 from operator import attrgetter
-from breadability.readable import Article
-from breadability.readable import check_siblings
-from breadability.readable import prep_article
-from ...compat import unittest

+import pytest
+
+from breadability.readable import Article, check_siblings, prep_article

-class TestArticle(unittest.TestCase):
-    """Test the scoring and parsing of the Article"""

-    def setUp(self):
+@pytest.fixture(scope="module")
+def article():
    """Load up the article for us"""
    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
-        self.article = open(article_path).read()
+    with open(article_path) as file:
+        return Article(file.read())

-    def tearDown(self):
-        """Drop the article"""
-        self.article = None

-    def test_parses(self):
+def test_parses(article):
    """Verify we can parse the document."""
-        doc = Article(self.article)
-        self.assertTrue('id="readabilityBody"' in doc.readable)
+    assert 'id="readabilityBody"' in article.readable

-    def test_content_exists(self):
+
+def test_content_exists(article):
    """Verify that some content exists."""
-        doc = Article(self.article)
-        self.assertTrue('Amazon and Google' in doc.readable)
-        self.assertFalse('Linkblog updated' in doc.readable)
-        self.assertFalse(
-            '#anExampleGoogleDoesntIntendToShareBlogAndItWill' in doc.readable)
-
-    @unittest.skip("Test fails because of some weird hash.")
-    def test_candidates(self):
+    assert 'Amazon and Google' in article.readable
+    assert not 'Linkblog updated' in article.readable
+    assert not '#anExampleGoogleDoesntIntendToShareBlogAndItWill' in article.readable
+
+
+@pytest.mark.skip("Test fails because of some weird hash.")
+def test_candidates(article):
    """Verify we have candidates."""
-        doc = Article(self.article)
    # from lxml.etree import tounicode
    found = False
    wanted_hash = '04e46055'

-        for node in doc.candidates.values():
+    for node in article.candidates.values():
        if node.hash_id == wanted_hash:
            found = node

-        self.assertTrue(found)
+    assert found

    # we have the right node, it must be deleted for some reason if it's
    # not still there when we need it to be.
    # Make sure it's not in our to drop list.
-        for node in doc._should_drop:
-            self.assertFalse(node == found.node)
+    for node in article._should_drop:
+        assert node != found.node

    by_score = sorted(
-            [c for c in doc.candidates.values()],
+        [c for c in article.candidates.values()],
        key=attrgetter('content_score'), reverse=True)
-        self.assertTrue(by_score[0].node == found.node)
+    assert by_score[0].node == found.node

-        updated_winner = check_siblings(by_score[0], doc.candidates)
+    updated_winner = check_siblings(by_score[0], article.candidates)
    updated_winner.node = prep_article(updated_winner.node)

    # This article hits up against the img > p conditional filtering
--- a/tests/test_articles/test_sweetshark/test_article.py
+++ b/tests/test_articles/test_sweetshark/test_article.py
@ -1,33 +1,32 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+"""
+Test the scoring and parsing of the article from URL below:
+http://sweetshark.livejournal.com/11564.html
+"""

-from os.path import join, dirname
-from breadability.readable import Article
-from ...compat import unittest
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import os

+import pytest

-class TestSweetsharkBlog(unittest.TestCase):
-    """
-    Test the scoring and parsing of the article from URL below:
-    http://sweetshark.livejournal.com/11564.html
-    """
+from breadability.readable import Article

-    def setUp(self):
+
+@pytest.fixture(scope="module")
+def article():
    """Load up the article for us"""
-        article_path = join(dirname(__file__), "article.html")
+    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
    with open(article_path, "rb") as file:
-            self.document = Article(file.read(), "http://sweetshark.livejournal.com/11564.html")
+        return Article(file.read(), "http://sweetshark.livejournal.com/11564.html")

-    def tearDown(self):
-        """Drop the article"""
-        self.document = None

-    def test_parses(self):
+def test_parses(article):
    """Verify we can parse the document."""
-        self.assertIn('id="readabilityBody"', self.document.readable)
+    assert 'id="readabilityBody"' in article.readable
+

-    def test_content_after_video(self):
+def test_content_after_video(article):
    """The div with the comments should be removed."""
-        self.assertIn('Stay hungry, Stay foolish', self.document.readable)
+    assert 'Stay hungry, Stay foolish' in article.readable
--- a/tests/test_orig_document.py
+++ b/tests/test_orig_document.py
@ -1,5 +1,7 @@
 # -*- coding: utf8 -*-

+"""Verify we can process html into a document to work off of."""
+
 from __future__ import absolute_import
 from __future__ import division, print_function, unicode_literals

@ -15,47 +17,44 @@ from breadability.document import (
    decode_html,
    OriginalDocument,
 )
-from .compat import unittest
 from .utils import load_snippet


-class TestOriginalDocument(unittest.TestCase):
-    """Verify we can process html into a document to work off of."""
-
-    def test_convert_br_tags_to_paragraphs(self):
+def test_convert_br_tags_to_paragraphs():
    returned = convert_breaks_to_paragraphs(
        ("<div>HI<br><br>How are you?<br><br> \t \n  <br>"
         "Fine\n I guess</div>"))

-        self.assertEqual(
-            returned,
-            "<div>HI</p><p>How are you?</p><p>Fine\n I guess</div>")
+    assert returned == "<div>HI</p><p>How are you?</p><p>Fine\n I guess</div>"
+

-    def test_convert_hr_tags_to_paragraphs(self):
+def test_convert_hr_tags_to_paragraphs():
    returned = convert_breaks_to_paragraphs(
        "<div>HI<br><br>How are you?<hr/> \t \n  <br>Fine\n I guess</div>")

-        self.assertEqual(
-            returned,
-            "<div>HI</p><p>How are you?</p><p>Fine\n I guess</div>")
+    assert returned == "<div>HI</p><p>How are you?</p><p>Fine\n I guess</div>"

-    def test_readin_min_document(self):
+
+def test_readin_min_document():
    """Verify we can read in a min html document"""
    doc = OriginalDocument(load_snippet('document_min.html'))
-        self.assertTrue(to_unicode(doc).startswith('<html>'))
-        self.assertEqual(doc.title, 'Min Document Title')

-    def test_readin_with_base_url(self):
+    assert to_unicode(doc).startswith('<html>')
+    assert doc.title == 'Min Document Title'
+
+
+def test_readin_with_base_url():
    """Passing a url should update links to be absolute links"""
    doc = OriginalDocument(
        load_snippet('document_absolute_url.html'),
        url="http://blog.mitechie.com/test.html")
-        self.assertTrue(to_unicode(doc).startswith('<html>'))
+
+    assert to_unicode(doc).startswith('<html>')

    # find the links on the page and make sure each one starts with out
    # base url we told it to use.
    links = doc.links
-        self.assertEqual(len(links), 3)
+    assert len(links) == 3
    # we should have two links that start with our blog url
    # and one link that starts with amazon
    link_counts = defaultdict(int)
@ -65,38 +64,50 @@ class TestOriginalDocument(unittest.TestCase):
        else:
            link_counts['other'] += 1

-        self.assertEqual(link_counts['blog'], 2)
-        self.assertEqual(link_counts['other'], 1)
+    assert link_counts['blog'] == 2
+    assert link_counts['other'] == 1
+

-    def test_no_br_allowed(self):
+def test_no_br_allowed():
    """We convert all <br/> tags to <p> tags"""
    doc = OriginalDocument(load_snippet('document_min.html'))
-        self.assertIsNone(doc.dom.find('.//br'))

-    def test_empty_title(self):
+    assert doc.dom.find('.//br') is None
+
+
+def test_empty_title():
    """We convert all <br/> tags to <p> tags"""
    document = OriginalDocument(
        "<html><head><title></title></head><body></body></html>")
-        self.assertEqual(document.title, "")

-    def test_title_only_with_tags(self):
+    assert document.title == ""
+
+
+def test_title_only_with_tags():
    """We convert all <br/> tags to <p> tags"""
    document = OriginalDocument(
        "<html><head><title><em></em></title></head><body></body></html>")
-        self.assertEqual(document.title, "")

-    def test_no_title(self):
+    assert document.title == ""
+
+
+def test_no_title():
    """We convert all <br/> tags to <p> tags"""
    document = OriginalDocument("<html><head></head><body></body></html>")
-        self.assertEqual(document.title, "")

-    def test_encoding(self):
+    assert document.title == ""
+
+
+def test_encoding():
    text = "ľščťžýáíéäúňôůě".encode("iso-8859-2")
    html = decode_html(text)
-        self.assertEqual(type(html), unicode)

-    def test_encoding_short(self):
+    assert type(html) is unicode
+
+
+def test_encoding_short():
    text = to_bytes("ľščťžýáíé")
    html = decode_html(text)
-        self.assertEqual(type(html), unicode)
-        self.assertEqual(html, "ľščťžýáíé")
+
+    assert type(html) is unicode
+    assert html == "ľščťžýáíé"
--- a/tests/test_readable.py
+++ b/tests/test_readable.py
@ -1,92 +1,95 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals

+import pytest
 from lxml.etree import tounicode
-from lxml.html import document_fromstring
-from lxml.html import fragment_fromstring
+from lxml.html import document_fromstring, fragment_fromstring
+
 from breadability._compat import to_unicode
-from breadability.readable import (
-    Article,
-    get_class_weight,
-    get_link_density,
-    is_bad_link,
-    leaf_div_elements_into_paragraphs,
-    score_candidates,
-)
+from breadability.readable import (Article, get_class_weight, get_link_density, is_bad_link,
+                                   leaf_div_elements_into_paragraphs, score_candidates, )
 from breadability.scoring import ScoredNode
-from .compat import unittest
-from .utils import load_snippet, load_article
+from .utils import load_article, load_snippet

+# TestReadableDocument
+"""Verify we can process html into a document to work off of."""

-class TestReadableDocument(unittest.TestCase):
-    """Verify we can process html into a document to work off of."""

-    def test_load_doc(self):
+def test_load_doc():
    """We get back an element tree from our original doc"""
    doc = Article(load_snippet('document_min.html'))
    # We get back the document as a div tag currently by default.
-        self.assertEqual(doc.readable_dom.tag, 'div')

-    def test_title_loads(self):
+    assert doc.readable_dom.tag == 'div'
+
+
+def test_title_loads():
    """Verify we can fetch the title of the parsed article"""
    doc = Article(load_snippet('document_min.html'))
-        self.assertEqual(
-            doc._original_document.title,
-            'Min Document Title'
-        )

-    def test_doc_no_scripts_styles(self):
+    assert doc._original_document.title == 'Min Document Title'
+
+
+def test_doc_no_scripts_styles():
    """Step #1 remove all scripts from the document"""
    doc = Article(load_snippet('document_scripts.html'))
    readable = doc.readable_dom
-        self.assertEqual(readable.findall(".//script"), [])
-        self.assertEqual(readable.findall(".//style"), [])
-        self.assertEqual(readable.findall(".//link"), [])

-    def test_find_body_exists(self):
+    assert readable.findall(".//script") == []
+    assert readable.findall(".//style") == []
+    assert readable.findall(".//link") == []
+
+
+def test_find_body_exists():
    """If the document has a body, we store that as the readable html

    No sense processing anything other than the body content.

    """
    doc = Article(load_snippet('document_min.html'))
-        self.assertEqual(doc.readable_dom.tag, 'div')
-        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')

-    def test_body_doesnt_exist(self):
+    assert doc.readable_dom.tag == 'div'
+    assert doc.readable_dom.get('id') == 'readabilityBody'
+
+
+def test_body_doesnt_exist():
    """If we can't find a body, then we create one.

    We build our doc around the rest of the html we parsed.

    """
    doc = Article(load_snippet('document_no_body.html'))
-        self.assertEqual(doc.readable_dom.tag, 'div')
-        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')

-    def test_bare_content(self):
+    assert doc.readable_dom.tag == 'div'
+    assert doc.readable_dom.get('id') == 'readabilityBody'
+
+
+def test_bare_content():
    """If the document is just pure content, no html tags we should be ok

    We build our doc around the rest of the html we parsed.

    """
    doc = Article(load_snippet('document_only_content.html'))
-        self.assertEqual(doc.readable_dom.tag, 'div')
-        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')

-    def test_no_content(self):
+    assert doc.readable_dom.tag == 'div'
+    assert doc.readable_dom.get('id') == 'readabilityBody'
+
+
+def test_no_content():
    """Without content we supply an empty unparsed doc."""
    doc = Article('')
-        self.assertEqual(doc.readable_dom.tag, 'div')
-        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')
-        self.assertEqual(doc.readable_dom.get('class'), 'parsing-error')

+    assert doc.readable_dom.tag == 'div'
+    assert doc.readable_dom.get('id') == 'readabilityBody'
+    assert doc.readable_dom.get('class') == 'parsing-error'

-class TestCleaning(unittest.TestCase):
-    """Test out our cleaning processing we do."""

-    def test_unlikely_hits(self):
+# Test out our cleaning processing we do.
+
+
+def test_unlikely_hits():
    """Verify we wipe out things from our unlikely list."""
    doc = Article(load_snippet('test_readable_unlikely.html'))
    readable = doc.readable_dom
@ -109,7 +112,7 @@ class TestCleaning(unittest.TestCase):
            for cls in test.get('class').split():
                if cls in want_to_appear:
                    found = True
-                self.assertTrue(found)
+            assert found

        by_ids = readable.get_element_by_id(i, False)
        if by_ids is not False:
@ -117,9 +120,10 @@ class TestCleaning(unittest.TestCase):
            for ids in test.get('id').split():
                if ids in want_to_appear:
                    found = True
-                self.assertTrue(found)
+            assert found

-    def test_misused_divs_transform(self):
+
+def test_misused_divs_transform():
    """Verify we replace leaf node divs with p's

    They should have the same content, just be a p vs a div
@ -127,39 +131,32 @@ class TestCleaning(unittest.TestCase):
    """
    test_html = "<html><body><div>simple</div></body></html>"
    test_doc = document_fromstring(test_html)
-        self.assertEqual(
-            tounicode(
-                leaf_div_elements_into_paragraphs(test_doc)),
-            to_unicode("<html><body><p>simple</p></body></html>")
+    assert tounicode(leaf_div_elements_into_paragraphs(test_doc)) == to_unicode(
+        "<html><body><p>simple</p></body></html>"
    )

    test_html2 = ('<html><body><div>simple<a href="">link</a>'
                  '</div></body></html>')
    test_doc2 = document_fromstring(test_html2)
-        self.assertEqual(
-            tounicode(
-                leaf_div_elements_into_paragraphs(test_doc2)),
-            to_unicode(
-                '<html><body><p>simple<a href="">link</a></p></body></html>')
+    assert tounicode(leaf_div_elements_into_paragraphs(test_doc2)) == to_unicode(
+        '<html><body><p>simple<a href="">link</a></p></body></html>'
    )

-    def test_dont_transform_div_with_div(self):
+
+def test_dont_transform_div_with_div():
    """Verify that only child <div> element is replaced by <p>."""
    dom = document_fromstring(
        "<html><body><div>text<div>child</div>"
        "aftertext</div></body></html>"
    )

-        self.assertEqual(
-            tounicode(
-                leaf_div_elements_into_paragraphs(dom)),
-            to_unicode(
+    assert tounicode(leaf_div_elements_into_paragraphs(dom)) == to_unicode(
        "<html><body><div>text<p>child</p>"
        "aftertext</div></body></html>"
    )
-        )

-    def test_bad_links(self):
+
+def test_bad_links():
    """Some links should just not belong."""
    bad_links = [
        '<a name="amazonAndGoogleHaveMadeAnAudaciousGrabOfNamespaceOnTheInternetAsFarAsICanSeeTheresBeenNoMentionOfThisInTheTechPress">&nbsp;</a>',
@ -169,13 +166,13 @@ class TestCleaning(unittest.TestCase):

    for l in bad_links:
        link = fragment_fromstring(l)
-            self.assertTrue(is_bad_link(link))
+        assert is_bad_link(link)
+

+# Candidate nodes are scoring containers we use.

-class TestCandidateNodes(unittest.TestCase):
-    """Candidate nodes are scoring containers we use."""

-    def test_candidate_scores(self):
+def test_candidate_scores():
    """We should be getting back objects with some scores."""
    fives = ['<div/>']
    threes = ['<pre/>', '<td/>', '<blockquote/>']
@ -184,54 +181,59 @@ class TestCandidateNodes(unittest.TestCase):

    for n in fives:
        doc = fragment_fromstring(n)
-            self.assertEqual(ScoredNode(doc).content_score, 5)
+        assert ScoredNode(doc).content_score == 5

    for n in threes:
        doc = fragment_fromstring(n)
-            self.assertEqual(ScoredNode(doc).content_score, 3)
+        assert ScoredNode(doc).content_score == 3

    for n in neg_threes:
        doc = fragment_fromstring(n)
-            self.assertEqual(ScoredNode(doc).content_score, -3)
+        assert ScoredNode(doc).content_score == -3

    for n in neg_fives:
        doc = fragment_fromstring(n)
-            self.assertEqual(ScoredNode(doc).content_score, -5)
+        assert ScoredNode(doc).content_score == -5

-    def test_article_enables_candidate_access(self):
+
+def test_article_enables_candidate_access():
    """Candidates are accessible after document processing."""
    doc = Article(load_article('ars.001.html'))
-        self.assertTrue(hasattr(doc, 'candidates'))
+
+    assert hasattr(doc, 'candidates')


-class TestClassWeights(unittest.TestCase):
-    """Certain ids and classes get us bonus points."""
+# Certain ids and classes get us bonus points.

-    def test_positive_class(self):
+
+def test_positive_class():
    """Some classes get us bonus points."""
    node = fragment_fromstring('<p class="article">')
-        self.assertEqual(get_class_weight(node), 25)
+    assert get_class_weight(node) == 25
+

-    def test_positive_ids(self):
+def test_positive_ids():
    """Some ids get us bonus points."""
    node = fragment_fromstring('<p id="content">')
-        self.assertEqual(get_class_weight(node), 25)
+    assert get_class_weight(node) == 25

-    def test_negative_class(self):
+
+def test_negative_class():
    """Some classes get us negative points."""
    node = fragment_fromstring('<p class="comment">')
-        self.assertEqual(get_class_weight(node), -25)
+    assert get_class_weight(node) == -25
+

-    def test_negative_ids(self):
+def test_negative_ids():
    """Some ids get us negative points."""
    node = fragment_fromstring('<p id="media">')
-        self.assertEqual(get_class_weight(node), -25)
+    assert get_class_weight(node) == -25


-class TestScoringNodes(unittest.TestCase):
-    """We take out list of potential nodes and score them up."""
+# We take out list of potential nodes and score them up.

-    def test_we_get_candidates(self):
+
+def test_we_get_candidates():
    """Processing candidates should get us a list of nodes to try out."""
    doc = document_fromstring(load_article("ars.001.html"))
    test_nodes = tuple(doc.iter("p", "td", "pre"))
@ -239,13 +241,14 @@ class TestScoringNodes(unittest.TestCase):

    # this might change as we tweak our algorithm, but if it does,
    # it signifies we need to look at what we changed.
-        self.assertEqual(len(candidates.keys()), 37)
+    assert len(candidates.keys()) == 37

    # one of these should have a decent score
    scores = sorted(c.content_score for c in candidates.values())
-        self.assertTrue(scores[-1] > 100)
+    assert scores[-1] > 100
+

-    def test_bonus_score_per_100_chars_in_p(self):
+def test_bonus_score_per_100_chars_in_p():
    """Nodes get 1 point per 100 characters up to max. 3 points."""
    def build_candidates(length):
        html = "<p>%s</p>" % ("c" * length)
@ -269,66 +272,69 @@ class TestScoringNodes(unittest.TestCase):
    candidates = score_candidates(test_nodes)
    pscore_400 = max(c.content_score for c in candidates.values())

-        self.assertAlmostEqual(pscore_50 + 0.5, pscore_100)
-        self.assertAlmostEqual(pscore_100 + 2.0, pscore_300)
-        self.assertAlmostEqual(pscore_300, pscore_400)
+    assert pscore_50 + 0.5 == pscore_100
+    assert pscore_100 + 2.0 == pscore_300
+    assert pscore_300 == pscore_400
+

+# Link density will adjust out candidate scoresself.

-class TestLinkDensityScoring(unittest.TestCase):
-    """Link density will adjust out candidate scoresself."""

-    def test_link_density(self):
+def test_link_density():
    """Test that we get a link density"""
    doc = document_fromstring(load_article('ars.001.html'))
    for node in doc.iter('p', 'td', 'pre'):
        density = get_link_density(node)

        # the density must be between 0, 1
-            self.assertTrue(density >= 0.0 and density <= 1.0)
+        assert density >= 0.0 and density <= 1.0


-class TestSiblings(unittest.TestCase):
-    """Siblings will be included if their content is related."""
+# Siblings will be included if their content is related.

-    @unittest.skip("Not implemented yet.")
-    def test_bad_siblings_not_counted(self):
+
+@pytest.mark.skip("Not implemented yet.")
+def test_bad_siblings_not_counted():
    raise NotImplementedError()

-    @unittest.skip("Not implemented yet.")
-    def test_good_siblings_counted(self):
+
+@pytest.mark.skip("Not implemented yet.")
+def test_good_siblings_counted():
    raise NotImplementedError()


-class TestMainText(unittest.TestCase):
-    def test_empty(self):
+# TestMainText
+
+def test_empty():
    article = Article("")
    annotated_text = article.main_text

-        self.assertEqual(annotated_text, [])
+    assert annotated_text == []
+

-    def test_no_annotations(self):
+def test_no_annotations():
    article = Article("<div><p>This is text with no annotations</p></div>")
    annotated_text = article.main_text

-        self.assertEqual(annotated_text,
-            [(("This is text with no annotations", None),)])
+    assert annotated_text == [(("This is text with no annotations", None),)]

-    def test_one_annotation(self):
+
+def test_one_annotation():
    article = Article("<div><p>This is text\r\twith <del>no</del> annotations</p></div>")
    annotated_text = article.main_text

-        expected = [(
+    assert annotated_text == [(
        ("This is text\nwith", None),
        ("no", ("del",)),
        ("annotations", None),
    )]
-        self.assertEqual(annotated_text, expected)

-    def test_simple_snippet(self):
+
+def test_simple_snippet():
    snippet = Article(load_snippet("annotated_1.html"))
    annotated_text = snippet.main_text

-        expected = [
+    assert annotated_text == [
        (
            ("Paragraph is more", None),
            ("better", ("em",)),
@ -344,4 +350,3 @@ class TestMainText(unittest.TestCase):
            ("me :)", None),
        )
    ]
-        self.assertEqual(annotated_text, expected)
--- a/tests/test_scoring.py
+++ b/tests/test_scoring.py
@ -1,143 +1,146 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals

 import re
-
 from operator import attrgetter
-from lxml.html import document_fromstring
-from lxml.html import fragment_fromstring
-from breadability.readable import Article
-from breadability.scoring import (
-    check_node_attributes,
-    generate_hash_id,
-    get_class_weight,
-    score_candidates,
-    ScoredNode,
-)
-from breadability.readable import (
-    get_link_density,
-    is_unlikely_node,
-)
-from .compat import unittest
+
+from lxml.html import document_fromstring, fragment_fromstring
+
+from breadability.readable import Article, get_link_density, is_unlikely_node
+from breadability.scoring import (ScoredNode, check_node_attributes, generate_hash_id, get_class_weight,
+                                  score_candidates)
 from .utils import load_snippet


-class TestHashId(unittest.TestCase):
-    def test_generate_hash(self):
+def test_generate_hash():
    dom = fragment_fromstring("<div>ľščťžýáí</div>")
    generate_hash_id(dom)

-    def test_hash_from_id_on_exception(self):
+
+def test_hash_from_id_on_exception():
    generate_hash_id(None)

-    def test_different_hashes(self):
+
+def test_different_hashes():
    dom = fragment_fromstring("<div>ľščťžýáí</div>")
    hash_dom = generate_hash_id(dom)
    hash_none = generate_hash_id(None)

-        self.assertNotEqual(hash_dom, hash_none)
+    assert hash_dom != hash_none

-    def test_equal_hashes(self):
+
+def test_equal_hashes():
    dom1 = fragment_fromstring("<div>ľščťžýáí</div>")
    dom2 = fragment_fromstring("<div>ľščťžýáí</div>")
    hash_dom1 = generate_hash_id(dom1)
    hash_dom2 = generate_hash_id(dom2)
-        self.assertEqual(hash_dom1, hash_dom2)
+    assert hash_dom1 == hash_dom2

    hash_none1 = generate_hash_id(None)
    hash_none2 = generate_hash_id(None)
-        self.assertEqual(hash_none1, hash_none2)
+    assert hash_none1 == hash_none2


-class TestCheckNodeAttr(unittest.TestCase):
-    """Verify a node has a class/id in the given set.
+# Verify a node has a class/id in the given set.
+# The idea is that we have sets of known good/bad ids and classes and need
+# to verify the given node does/doesn't have those classes/ids.

-    The idea is that we have sets of known good/bad ids and classes and need
-    to verify the given node does/doesn't have those classes/ids.

-    """
-    def test_has_class(self):
+def test_has_class():
    """Verify that a node has a class in our set."""
    test_pattern = re.compile('test1|test2', re.I)
    test_node = fragment_fromstring('<div/>')
    test_node.set('class', 'test2 comment')

-        self.assertTrue(
-            check_node_attributes(test_pattern, test_node, 'class'))
+    assert check_node_attributes(test_pattern, test_node, 'class')
+

-    def test_has_id(self):
+def test_has_id():
    """Verify that a node has an id in our set."""
    test_pattern = re.compile('test1|test2', re.I)
    test_node = fragment_fromstring('<div/>')
    test_node.set('id', 'test2')

-        self.assertTrue(check_node_attributes(test_pattern, test_node, 'id'))
+    assert check_node_attributes(test_pattern, test_node, 'id')

-    def test_lacks_class(self):
+
+def test_lacks_class():
    """Verify that a node does not have a class in our set."""
    test_pattern = re.compile('test1|test2', re.I)
    test_node = fragment_fromstring('<div/>')
    test_node.set('class', 'test4 comment')
-        self.assertFalse(
-            check_node_attributes(test_pattern, test_node, 'class'))

-    def test_lacks_id(self):
+    assert not check_node_attributes(test_pattern, test_node, 'class')
+
+
+def test_lacks_id():
    """Verify that a node does not have an id in our set."""
    test_pattern = re.compile('test1|test2', re.I)
    test_node = fragment_fromstring('<div/>')
    test_node.set('id', 'test4')
-        self.assertFalse(check_node_attributes(test_pattern, test_node, 'id'))

+    assert not check_node_attributes(test_pattern, test_node, 'id')
+
+
+# Verify we calc our link density correctly.

-class TestLinkDensity(unittest.TestCase):
-    """Verify we calc our link density correctly."""

-    def test_empty_node(self):
+def test_empty_node():
    """An empty node doesn't have much of a link density"""
    doc = Article("<div></div>")
-        self.assertEqual(get_link_density(doc.readable_dom), 0.0)

-    def test_small_doc_no_links(self):
+    assert get_link_density(doc.readable_dom) == 0.0
+
+
+def test_small_doc_no_links():
    doc = Article(load_snippet('document_min.html'))
-        self.assertEqual(get_link_density(doc.readable_dom), 0.0)

-    def test_several_links(self):
+    assert get_link_density(doc.readable_dom) == 0.0
+
+
+def test_several_links():
    """This doc has a 3 links with the majority of content."""
    doc = Article(load_snippet('document_absolute_url.html'))
-        self.assertAlmostEqual(get_link_density(doc.readable_dom), 22/37)
+
+    assert get_link_density(doc.readable_dom) == 22/37


-class TestClassWeight(unittest.TestCase):
-    """Verify we score nodes correctly based on their class/id attributes."""
+# Verify we score nodes correctly based on their class/id attributes.

-    def test_no_matches_zero(self):
+
+def test_no_matches_zero():
    """If you don't have the attribute then you get a weight of 0"""
    node = fragment_fromstring("<div></div>")
-        self.assertEqual(get_class_weight(node), 0)

-    def test_id_hits(self):
+    assert get_class_weight(node) == 0
+
+
+def test_id_hits():
    """If the id is in the list then it gets a weight"""
    test_div = '<div id="post">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), 25)
+
+    assert get_class_weight(node) == 25

    test_div = '<div id="comments">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), -25)

-    def test_class_hits(self):
+    assert get_class_weight(node) == -25
+
+
+def test_class_hits():
    """If the class is in the list then it gets a weight"""
    test_div = '<div class="something post">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), 25)
+    assert get_class_weight(node) == 25

    test_div = '<div class="something comments">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), -25)
+    assert get_class_weight(node) == -25
+

-    def test_scores_collide(self):
+def test_scores_collide():
    """We might hit both positive and negative scores.

    Positive and negative scoring is done independently so it's possible
@ -146,59 +149,65 @@ class TestClassWeight(unittest.TestCase):
    """
    test_div = '<div id="post" class="something comment">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), 0)
+    assert get_class_weight(node) == 0

    test_div = '<div id="post" class="post comment">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), 25)
+    assert get_class_weight(node) == 25

-    def test_scores_only_once(self):
+
+def test_scores_only_once():
    """Scoring is not cumulative within a class hit."""
    test_div = '<div class="post main">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), 25)

+    assert get_class_weight(node) == 25
+
+
+# is_unlikely_node should help verify our node is good/bad.

-class TestUnlikelyNode(unittest.TestCase):
-    """is_unlikely_node should help verify our node is good/bad."""

-    def test_body_is_always_likely(self):
+def test_body_is_always_likely():
    """The body tag is always a likely node."""
    test_div = '<body class="comment"><div>Content</div></body>'
    node = fragment_fromstring(test_div)
-        self.assertFalse(is_unlikely_node(node))

-    def test_is_unlikely(self):
-        "Keywords in the class/id will make us believe this is unlikely."
+    assert not is_unlikely_node(node)
+
+
+def test_is_unlikely():
+    """Keywords in the class/id will make us believe this is unlikely."""
    test_div = '<div class="something comments">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertTrue(is_unlikely_node(node))
+    assert is_unlikely_node(node)

    test_div = '<div id="comments">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertTrue(is_unlikely_node(node))
+    assert is_unlikely_node(node)

-    def test_not_unlikely(self):
+
+def test_not_unlikely():
    """Suck it double negatives."""
    test_div = '<div id="post">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertFalse(is_unlikely_node(node))
+    assert not is_unlikely_node(node)

    test_div = '<div class="something post">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertFalse(is_unlikely_node(node))
+    assert not is_unlikely_node(node)
+

-    def test_maybe_hits(self):
+def test_maybe_hits():
    """We've got some maybes that will overrule an unlikely node."""
    test_div = '<div id="comments" class="article">Content</div>'
    node = fragment_fromstring(test_div)
-        self.assertFalse(is_unlikely_node(node))
+    assert not is_unlikely_node(node)
+

+# ScoredNodes constructed have initial content_scores, etc.

-class TestScoredNode(unittest.TestCase):
-    """ScoredNodes constructed have initial content_scores, etc."""

-    def test_hash_id(self):
+def test_hash_id():
    """ScoredNodes have a hash_id based on their content

    Since this is based on the html there are chances for collisions, but
@ -209,50 +218,52 @@ class TestScoredNode(unittest.TestCase):
    test_div = '<div id="comments" class="article">Content</div>'
    node = fragment_fromstring(test_div)
    snode = ScoredNode(node)
-        self.assertEqual(snode.hash_id, 'ffa4c519')

-    def test_div_content_score(self):
+    assert snode.hash_id == 'ffa4c519'
+
+
+def test_div_content_score():
    """A div starts out with a score of 5 and modifies from there"""
    test_div = '<div id="" class="">Content</div>'
    node = fragment_fromstring(test_div)
    snode = ScoredNode(node)
-        self.assertEqual(snode.content_score, 5)
+    assert snode.content_score == 5

    test_div = '<div id="article" class="">Content</div>'
    node = fragment_fromstring(test_div)
    snode = ScoredNode(node)
-        self.assertEqual(snode.content_score, 30)
+    assert snode.content_score == 30

    test_div = '<div id="comments" class="">Content</div>'
    node = fragment_fromstring(test_div)
    snode = ScoredNode(node)
-        self.assertEqual(snode.content_score, -20)
+    assert snode.content_score == -20

-    def test_headings_score(self):
+
+def test_headings_score():
    """Heading tags aren't likely candidates, hurt their scores."""
    test_div = '<h2>Heading</h2>'
    node = fragment_fromstring(test_div)
    snode = ScoredNode(node)
-        self.assertEqual(snode.content_score, -5)

-    def test_list_items(self):
+    assert snode.content_score == -5
+
+
+def test_list_items():
    """Heading tags aren't likely candidates, hurt their scores."""
    test_div = '<li>list item</li>'
    node = fragment_fromstring(test_div)
    snode = ScoredNode(node)
-        self.assertEqual(snode.content_score, -3)
+    assert snode.content_score == -3


-class TestScoreCandidates(unittest.TestCase):
-    """The grand daddy of tests to make sure our scoring works
+# The grand daddy of tests to make sure our scoring works
+# Now scoring details will change over time, so the most important thing is
+# to make sure candidates come out in the right order, not necessarily how
+# they scored. Make sure to keep this in mind while getting tests going.

-    Now scoring details will change over time, so the most important thing is
-    to make sure candidates come out in the right order, not necessarily how
-    they scored. Make sure to keep this in mind while getting tests going.
-
-    """

-    def test_simple_candidate_set(self):
+def test_simple_candidate_set():
    """Tests a simple case of two candidate nodes"""
    html = """
        <html>
@ -276,9 +287,9 @@ class TestScoreCandidates(unittest.TestCase):
        (c for c in candidates.values()), reverse=True,
        key=attrgetter("content_score"))

-        self.assertEqual(ordered[0].node.tag, "div")
-        self.assertEqual(ordered[0].node.attrib["class"], "content")
-        self.assertEqual(ordered[1].node.tag, "body")
-        self.assertEqual(ordered[2].node.tag, "html")
-        self.assertEqual(ordered[3].node.tag, "div")
-        self.assertEqual(ordered[3].node.attrib["class"], "footer")
+    assert ordered[0].node.tag == "div"
+    assert ordered[0].node.attrib["class"] == "content"
+    assert ordered[1].node.tag == "body"
+    assert ordered[2].node.tag == "html"
+    assert ordered[3].node.tag == "div"
+    assert ordered[3].node.attrib["class"] == "footer"