Tests migrated into pytest style

6 years ago · aa83825334
parent 48acf389b1
commit aa83825334
13 changed files with 1061 additions and 1058 deletions
--- a/breadability/scripts/test_helper.py
+++ b/breadability/scripts/test_helper.py
@ -35,41 +35,41 @@ TEST_PATH = join(

 TEST_TEMPLATE = '''# -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+"""
+Test the scoring and parsing of the article from URL below:
+%(source_url)s
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import os
+
+import pytest

-from os.path import join, dirname
 from breadability.readable import Article
-from ...compat import unittest


-class TestArticle(unittest.TestCase):
-    """
-    Test the scoring and parsing of the article from URL below:
-    %(source_url)s
-    """
+@pytest.fixture(scope="module")
+def article():
+    """Load up the article for us"""
+    article_path = os.path.join(os.path.dirname(__file__), "article.html")
+    with open(article_path, "rb") as file:
+        return Article(file.read(), "%(source_url)s")
+

-    def setUp(self):
-        """Load up the article for us"""
-        article_path = join(dirname(__file__), "article.html")
-        with open(article_path, "rb") as file:
-            self.document = Article(file.read(), "%(source_url)s")
+def test_parses(article):
+    """Verify we can parse the document."""
+    assert 'id="readabilityBody"' in article.readable

-    def tearDown(self):
-        """Drop the article"""
-        self.document = None

-    def test_parses(self):
-        """Verify we can parse the document."""
-        self.assertIn('id="readabilityBody"', self.document.readable)
+def test_content_exists(article):
+    """Verify that some content exists."""
+    assert "#&@#&@#&@" in article.readable

-    def test_content_exists(self):
-        """Verify that some content exists."""
-        self.assertIn("#&@#&@#&@", self.document.readable)

-    def test_content_does_not_exist(self):
-        """Verify we cleaned out some content that shouldn't exist."""
-        self.assertNotIn("", self.document.readable)
+def test_content_does_not_exist(article):
+    """Verify we cleaned out some content that shouldn't exist."""
+    assert "" not in article.readable
 '''


--- a/setup.py
+++ b/setup.py
@ -32,9 +32,6 @@ tests_require = [
 ]


-if sys.version_info < (2, 7):
-    install_requires.append("unittest2")
-
 console_script_targets = [
    "breadability = breadability.scripts.client:main",
    "breadability-{0} = breadability.scripts.client:main",
--- a/tests/compat.py
+++ b/tests/compat.py
@ -2,8 +2,3 @@

 from __future__ import absolute_import
 from __future__ import division, print_function, unicode_literals
-
-try:
-    import unittest2 as unittest
-except ImportError:
-    import unittest
--- a/tests/test_annotated_text.py
+++ b/tests/test_annotated_text.py
@ -10,164 +10,161 @@ from __future__ import (
 from lxml.html import fragment_fromstring, document_fromstring
 from breadability.readable import Article
 from breadability.annotated_text import AnnotatedTextHandler
-from .compat import unittest
 from .utils import load_snippet, load_article


-class TestAnnotatedText(unittest.TestCase):
-    def test_simple_document(self):
-        dom = fragment_fromstring("<p>This is\n\tsimple\ttext.</p>")
-        annotated_text = AnnotatedTextHandler.parse(dom)
-
-        expected = [
-            (
-                ("This is\nsimple text.", None),
-            ),
-        ]
-        self.assertEqual(annotated_text, expected)
-
-    def test_empty_paragraph(self):
-        dom = fragment_fromstring("<div><p>Paragraph <p>\t  \n</div>")
-        annotated_text = AnnotatedTextHandler.parse(dom)
-
-        expected = [
-            (
-                ("Paragraph", None),
-            ),
-        ]
-        self.assertEqual(annotated_text, expected)
-
-    def test_multiple_paragraphs(self):
-        dom = fragment_fromstring("<div><p> 1 first<p> 2\tsecond <p>3\rthird   </div>")
-        annotated_text = AnnotatedTextHandler.parse(dom)
-
-        expected = [
-            (
-                ("1 first", None),
-            ),
-            (
-                ("2 second", None),
-            ),
-            (
-                ("3\nthird", None),
-            ),
-        ]
-        self.assertEqual(annotated_text, expected)
-
-    def test_single_annotation(self):
-        dom = fragment_fromstring("<div><p> text <em>emphasis</em> <p> last</div>")
-        annotated_text = AnnotatedTextHandler.parse(dom)
-
-        expected = [
-            (
-                ("text", None),
-                ("emphasis", ("em",)),
-            ),
-            (
-                ("last", None),
-            ),
-        ]
-        self.assertEqual(annotated_text, expected)
-
-    def test_recursive_annotation(self):
-        dom = fragment_fromstring("<div><p> text <em><i><em>emphasis</em></i></em> <p> last</div>")
-        annotated_text = AnnotatedTextHandler.parse(dom)
-
-        expected = [
-            (
-                ("text", None),
-                ("emphasis", ("em", "i")),
-            ),
-            (
-                ("last", None),
-            ),
-        ]
-        self.assertEqual(annotated_text, expected)
-
-    def test_annotations_without_explicit_paragraph(self):
-        dom = fragment_fromstring("<div>text <strong>emphasis</strong>\t<b>hmm</b> </div>")
-        annotated_text = AnnotatedTextHandler.parse(dom)
-
-        expected = [
-            (
-                ("text", None),
-                ("emphasis", ("strong",)),
-                ("hmm", ("b",)),
-            ),
-        ]
-        self.assertEqual(annotated_text, expected)
-
-    def test_process_paragraph_with_chunked_text(self):
-        handler = AnnotatedTextHandler()
-        paragraph = handler._process_paragraph([
-            (" 1", ("b", "del")),
-            (" 2", ("b", "del")),
-            (" 3", None),
-            (" 4", None),
-            (" 5", None),
-            (" 6", ("em",)),
-        ])
-
-        expected = (
-            ("1 2", ("b", "del")),
-            ("3 4 5", None),
-            ("6", ("em",)),
-        )
-        self.assertEqual(paragraph, expected)
-
-    def test_include_heading(self):
-        dom = document_fromstring(load_snippet("h1_and_2_paragraphs.html"))
-        annotated_text = AnnotatedTextHandler.parse(dom.find("body"))
-
-        expected = [
-            (
-                ('Nadpis H1, ktorý chce byť prvý s textom ale predbehol ho "title"', ("h1",)),
-                ("Toto je prvý odstavec a to je fajn.", None),
-            ),
-            (
-                ("Tento text je tu aby vyplnil prázdne miesto v srdci súboru.\nAj súbory majú predsa city.", None),
-            ),
-        ]
-        self.assertSequenceEqual(annotated_text, expected)
-
-    def test_real_article(self):
-        article = Article(load_article("zdrojak_automaticke_zabezpeceni.html"))
-        annotated_text = article.main_text
-
-        expected = [
-            (
-                ("Automatické zabezpečení", ("h1",)),
-                ("Úroveň zabezpečení aplikace bych rozdělil do tří úrovní:", None),
-            ),
-            (
-                ("Aplikace zabezpečená není, neošetřuje uživatelské vstupy ani své výstupy.", ("li", "ol")),
-                ("Aplikace se o zabezpečení snaží, ale takovým způsobem, že na ně lze zapomenout.", ("li", "ol")),
-                ("Aplikace se o zabezpečení stará sama, prakticky se nedá udělat chyba.", ("li", "ol")),
-            ),
-            (
-                ("Jak se tyto úrovně projevují v jednotlivých oblastech?", None),
-            ),
-            (
-                ("XSS", ("a", "h2")),
-                ("Druhou úroveň představuje ruční ošetřování pomocí", None),
-                ("htmlspecialchars", ("a", "kbd")),
-                (". Třetí úroveň zdánlivě reprezentuje automatické ošetřování v šablonách, např. v", None),
-                ("Nette Latte", ("a", "strong")),
-                (". Proč píšu zdánlivě? Problém je v tom, že ošetření se dá obvykle snadno zakázat, např. v Latte pomocí", None),
-                ("{!$var}", ("code",)),
-                (". Viděl jsem šablony plné vykřičníků i na místech, kde být neměly. Autor to vysvětlil tak, že psaní", None),
-                ("{$var}", ("code",)),
-                ("někde způsobovalo problémy, které po přidání vykřičníku zmizely, tak je začal psát všude.", None),
-            ),
-            (
-                ("<?php\n$safeHtml = $texy->process($content_texy);\n$content = Html::el()->setHtml($safeHtml);\n// v šabloně pak můžeme použít {$content}\n?>", ("pre", )),
-            ),
-            (
-                ("Ideální by bylo, když by už samotná metoda", None),
-                ("process()", ("code",)),
-                ("vracela instanci", None),
-                ("Html", ("code",)),
-                (".", None),
-            ),
-        ]
-        self.assertSequenceEqual(annotated_text, expected)
+def test_simple_document():
+    dom = fragment_fromstring("<p>This is\n\tsimple\ttext.</p>")
+    annotated_text = AnnotatedTextHandler.parse(dom)
+
+    assert annotated_text == [
+        (
+            ("This is\nsimple text.", None),
+        ),
+    ]
+
+
+def test_empty_paragraph():
+    dom = fragment_fromstring("<div><p>Paragraph <p>\t  \n</div>")
+    annotated_text = AnnotatedTextHandler.parse(dom)
+
+    assert annotated_text == [
+        (
+            ("Paragraph", None),
+        ),
+    ]
+
+
+def test_multiple_paragraphs():
+    dom = fragment_fromstring("<div><p> 1 first<p> 2\tsecond <p>3\rthird   </div>")
+    annotated_text = AnnotatedTextHandler.parse(dom)
+
+    assert annotated_text == [
+        (
+            ("1 first", None),
+        ),
+        (
+            ("2 second", None),
+        ),
+        (
+            ("3\nthird", None),
+        ),
+    ]
+
+
+def test_single_annotation():
+    dom = fragment_fromstring("<div><p> text <em>emphasis</em> <p> last</div>")
+    annotated_text = AnnotatedTextHandler.parse(dom)
+
+    assert annotated_text == [
+        (
+            ("text", None),
+            ("emphasis", ("em",)),
+        ),
+        (
+            ("last", None),
+        ),
+    ]
+
+
+def test_recursive_annotation():
+    dom = fragment_fromstring("<div><p> text <em><i><em>emphasis</em></i></em> <p> last</div>")
+    annotated_text = AnnotatedTextHandler.parse(dom)
+
+    assert annotated_text == [
+        (
+            ("text", None),
+            ("emphasis", ("em", "i")),
+        ),
+        (
+            ("last", None),
+        ),
+    ]
+
+
+def test_annotations_without_explicit_paragraph():
+    dom = fragment_fromstring("<div>text <strong>emphasis</strong>\t<b>hmm</b> </div>")
+    annotated_text = AnnotatedTextHandler.parse(dom)
+
+    assert annotated_text == [
+        (
+            ("text", None),
+            ("emphasis", ("strong",)),
+            ("hmm", ("b",)),
+        ),
+    ]
+
+
+def test_process_paragraph_with_chunked_text():
+    handler = AnnotatedTextHandler()
+    paragraph = handler._process_paragraph([
+        (" 1", ("b", "del")),
+        (" 2", ("b", "del")),
+        (" 3", None),
+        (" 4", None),
+        (" 5", None),
+        (" 6", ("em",)),
+    ])
+
+    assert paragraph == (
+        ("1 2", ("b", "del")),
+        ("3 4 5", None),
+        ("6", ("em",)),
+    )
+
+
+def test_include_heading():
+    dom = document_fromstring(load_snippet("h1_and_2_paragraphs.html"))
+    annotated_text = AnnotatedTextHandler.parse(dom.find("body"))
+
+    assert annotated_text == [
+        (
+            ('Nadpis H1, ktorý chce byť prvý s textom ale predbehol ho "title"', ("h1",)),
+            ("Toto je prvý odstavec a to je fajn.", None),
+        ),
+        (
+            ("Tento text je tu aby vyplnil prázdne miesto v srdci súboru.\nAj súbory majú predsa city.", None),
+        ),
+    ]
+
+
+def test_real_article():
+    article = Article(load_article("zdrojak_automaticke_zabezpeceni.html"))
+    annotated_text = article.main_text
+
+    assert annotated_text == [
+        (
+            ("Automatické zabezpečení", ("h1",)),
+            ("Úroveň zabezpečení aplikace bych rozdělil do tří úrovní:", None),
+        ),
+        (
+            ("Aplikace zabezpečená není, neošetřuje uživatelské vstupy ani své výstupy.", ("li", "ol")),
+            ("Aplikace se o zabezpečení snaží, ale takovým způsobem, že na ně lze zapomenout.", ("li", "ol")),
+            ("Aplikace se o zabezpečení stará sama, prakticky se nedá udělat chyba.", ("li", "ol")),
+        ),
+        (
+            ("Jak se tyto úrovně projevují v jednotlivých oblastech?", None),
+        ),
+        (
+            ("XSS", ("a", "h2")),
+            ("Druhou úroveň představuje ruční ošetřování pomocí", None),
+            ("htmlspecialchars", ("a", "kbd")),
+            (". Třetí úroveň zdánlivě reprezentuje automatické ošetřování v šablonách, např. v", None),
+            ("Nette Latte", ("a", "strong")),
+            (". Proč píšu zdánlivě? Problém je v tom, že ošetření se dá obvykle snadno zakázat, např. v Latte pomocí", None),
+            ("{!$var}", ("code",)),
+            (". Viděl jsem šablony plné vykřičníků i na místech, kde být neměly. Autor to vysvětlil tak, že psaní", None),
+            ("{$var}", ("code",)),
+            ("někde způsobovalo problémy, které po přidání vykřičníku zmizely, tak je začal psát všude.", None),
+        ),
+        (
+            ("<?php\n$safeHtml = $texy->process($content_texy);\n$content = Html::el()->setHtml($safeHtml);\n// v šabloně pak můžeme použít {$content}\n?>", ("pre", )),
+        ),
+        (
+            ("Ideální by bylo, když by už samotná metoda", None),
+            ("process()", ("code",)),
+            ("vracela instanci", None),
+            ("Html", ("code",)),
+            (".", None),
+        ),
+    ]
--- a/tests/test_articles/test_antipope_org/test_article.py
+++ b/tests/test_articles/test_antipope_org/test_article.py
@ -1,42 +1,45 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+"""Test the scoring and parsing of the Blog Post"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals

 import os

+import pytest
+
 from breadability.readable import Article
-from ...compat import unittest


-class TestAntipopeBlog(unittest.TestCase):
-    """Test the scoring and parsing of the Blog Post"""
+@pytest.fixture(scope="module")
+def article():
+    """Load up the article for us"""
+    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
+    with open(article_path) as file:
+        return file.read()
+
+
+def test_parses(article):
+    """Verify we can parse the document."""
+    doc = Article(article)
+
+    assert 'id="readabilityBody"' in doc.readable
+

-    def setUp(self):
-        """Load up the article for us"""
-        article_path = os.path.join(os.path.dirname(__file__), 'article.html')
-        self.article = open(article_path).read()
+def test_comments_cleaned(article):
+    """The div with the comments should be removed."""
+    doc = Article(article)

-    def tearDown(self):
-        """Drop the article"""
-        self.article = None
+    assert 'class="comments"' not in doc.readable

-    def test_parses(self):
-        """Verify we can parse the document."""
-        doc = Article(self.article)
-        self.assertTrue('id="readabilityBody"' in doc.readable)

-    def test_comments_cleaned(self):
-        """The div with the comments should be removed."""
-        doc = Article(self.article)
-        self.assertTrue('class="comments"' not in doc.readable)
+def test_beta_removed(article):
+    """The id=beta element should be removed

-    def test_beta_removed(self):
-        """The id=beta element should be removed
+    It's link heavy and causing a lot of garbage content. This should be
+    removed.

-        It's link heavy and causing a lot of garbage content. This should be
-        removed.
+    """
+    doc = Article(article)

-        """
-        doc = Article(self.article)
-        self.assertTrue('id="beta"' not in doc.readable)
+    assert 'id="beta"' not in doc.readable
--- a/tests/test_articles/test_businessinsider-com/test_article.py
+++ b/tests/test_articles/test_businessinsider-com/test_article.py
@ -1,33 +1,34 @@
+# -*- coding: utf8 -*-
+
+"""Test the scoring and parsing of the Blog Post"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
 import os
-try:
-    # Python < 2.7
-    import unittest2 as unittest
-except ImportError:
-    import unittest
+
+import pytest

 from breadability.readable import Article


-class TestBusinessInsiderArticle(unittest.TestCase):
-    """Test the scoring and parsing of the Blog Post"""
+@pytest.fixture(scope="module")
+def article():
+    """Load up the article for us"""
+    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
+    with open(article_path) as file:
+        return file.read()
+

-    def setUp(self):
+def test_parses(article):
+    """Verify we can parse the document."""
+    doc = Article(article)

-        """Load up the article for us"""
-        article_path = os.path.join(os.path.dirname(__file__), 'article.html')
-        self.article = open(article_path).read()
+    assert 'id="readabilityBody"' in doc.readable

-    def tearDown(self):
-        """Drop the article"""
-        self.article = None

-    def test_parses(self):
-        """Verify we can parse the document."""
-        doc = Article(self.article)
-        self.assertTrue('id="readabilityBody"' in doc.readable)
+def test_images_preserved(article):
+    """The div with the comments should be removed."""
+    doc = Article(article)

-    def test_images_preserved(self):
-        """The div with the comments should be removed."""
-        doc = Article(self.article)
-        self.assertTrue('bharath-kumar-a-co-founder-at-pugmarksme-suggests-working-on-a-sunday-late-night.jpg' in doc.readable)
-        self.assertTrue('bryan-guido-hassin-a-university-professor-and-startup-junkie-uses-airplane-days.jpg' in doc.readable)
+    assert 'bharath-kumar-a-co-founder-at-pugmarksme-suggests-working-on-a-sunday-late-night.jpg' in doc.readable
+    assert 'bryan-guido-hassin-a-university-professor-and-startup-junkie-uses-airplane-days.jpg' in doc.readable
--- a/tests/test_articles/test_businessinsider_com/test_article.py
+++ b/tests/test_articles/test_businessinsider_com/test_article.py
@ -1,39 +1,33 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+"""
+Test the scoring and parsing of the article from URL below:
+http://www.businessinsider.com/tech-ceos-favorite-productivity-hacks-2013-8
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import os
+
+import pytest

-from os.path import join, dirname
 from breadability.readable import Article
-from ...compat import unittest
-
-
-class TestArticle(unittest.TestCase):
-    """
-    Test the scoring and parsing of the article from URL below:
-    http://www.businessinsider.com/tech-ceos-favorite-productivity-hacks-2013-8
-    """
-
-    def setUp(self):
-        """Load up the article for us"""
-        article_path = join(dirname(__file__), "article.html")
-        with open(article_path, "rb") as file:
-            self.document = Article(file.read(), "http://www.businessinsider.com/tech-ceos-favorite-productivity-hacks-2013-8")
-
-    def tearDown(self):
-        """Drop the article"""
-        self.document = None
-
-    def test_parses(self):
-        """Verify we can parse the document."""
-        self.assertIn('id="readabilityBody"', self.document.readable)
-
-    def test_images_preserved(self):
-        """The div with the comments should be removed."""
-        images = [
-            'bharath-kumar-a-co-founder-at-pugmarksme-suggests-working-on-a-sunday-late-night.jpg',
-            'bryan-guido-hassin-a-university-professor-and-startup-junkie-uses-airplane-days.jpg',
-        ]
-
-        for image in images:
-            self.assertIn(image, self.document.readable, image)
+
+
+@pytest.fixture(scope="module")
+def article():
+    """Load up the article for us"""
+    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
+    with open(article_path, "rb") as file:
+        return Article(file.read(), "http://www.businessinsider.com/tech-ceos-favorite-productivity-hacks-2013-8")
+
+
+def test_parses(article):
+    """Verify we can parse the document."""
+    assert 'id="readabilityBody"' in article.readable
+
+
+def test_images_preserved(article):
+    """The div with the comments should be removed."""
+    assert 'bharath-kumar-a-co-founder-at-pugmarksme-suggests-working-on-a-sunday-late-night.jpg' in article.readable
+    assert 'bryan-guido-hassin-a-university-professor-and-startup-junkie-uses-airplane-days.jpg' in article.readable
--- a/tests/test_articles/test_cz_zdrojak_tests/test_article.py
+++ b/tests/test_articles/test_cz_zdrojak_tests/test_article.py
@ -1,44 +1,44 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+"""
+Test the scoring and parsing of the article from URL below:
+http://www.zdrojak.cz/clanky/jeste-k-testovani/
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import os
+
+import pytest

-from os.path import join, dirname
-from breadability.readable import Article
 from breadability._compat import unicode
-from ...compat import unittest
+from breadability.readable import Article
+

+@pytest.fixture(scope="module")
+def article():
+    """Load up the article for us"""
+    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
+    with open(article_path, "rb") as file:
+        return Article(file.read(), "http://www.zdrojak.cz/clanky/jeste-k-testovani/")

-class TestArticle(unittest.TestCase):
-    """
-    Test the scoring and parsing of the article from URL below:
-    http://www.zdrojak.cz/clanky/jeste-k-testovani/
-    """

-    def setUp(self):
-        """Load up the article for us"""
-        article_path = join(dirname(__file__), "article.html")
-        with open(article_path, "rb") as file:
-            self.document = Article(file.read(), "http://www.zdrojak.cz/clanky/jeste-k-testovani/")
+def test_parses(article):
+    """Verify we can parse the document."""
+    assert 'id="readabilityBody"' in article.readable

-    def tearDown(self):
-        """Drop the article"""
-        self.document = None

-    def test_parses(self):
-        """Verify we can parse the document."""
-        self.assertIn('id="readabilityBody"', self.document.readable)
+def test_content_exists(article):
+    """Verify that some content exists."""
+    assert isinstance(article.readable, unicode)

-    def test_content_exists(self):
-        """Verify that some content exists."""
-        self.assertIsInstance(self.document.readable, unicode)
+    text = "S automatizovaným testováním kódu (a ve zbytku článku budu mít na mysli právě to) jsem se setkal v několika firmách."
+    assert text in article.readable

-        text = "S automatizovaným testováním kódu (a ve zbytku článku budu mít na mysli právě to) jsem se setkal v několika firmách."
-        self.assertIn(text, self.document.readable)
+    text = "Ke čtení naleznete mnoho různých materiálů, od teoretických po praktické ukázky."
+    assert text in article.readable

-        text = "Ke čtení naleznete mnoho různých materiálů, od teoretických po praktické ukázky."
-        self.assertIn(text, self.document.readable)

-    def test_content_does_not_exist(self):
-        """Verify we cleaned out some content that shouldn't exist."""
-        self.assertNotIn("Pokud vás problematika zajímá, využijte možnosti navštívit školení", self.document.readable)
+def test_content_does_not_exist(article):
+    """Verify we cleaned out some content that shouldn't exist."""
+    assert "Pokud vás problematika zajímá, využijte možnosti navštívit školení" not in article.readable
--- a/tests/test_articles/test_scripting_com/test_article.py
+++ b/tests/test_articles/test_scripting_com/test_article.py
@ -1,74 +1,64 @@
 # -*- coding: utf8 -*-

-from __future__ import (
-    absolute_import,
-    division,
-    print_function,
-    unicode_literals
-)
+"""Test the scoring and parsing of the Article"""

-import os
+from __future__ import absolute_import, division, print_function, unicode_literals

+import os
 from operator import attrgetter
-from breadability.readable import Article
-from breadability.readable import check_siblings
-from breadability.readable import prep_article
-from ...compat import unittest
-
-
-class TestArticle(unittest.TestCase):
-    """Test the scoring and parsing of the Article"""
-
-    def setUp(self):
-        """Load up the article for us"""
-        article_path = os.path.join(os.path.dirname(__file__), 'article.html')
-        self.article = open(article_path).read()
-
-    def tearDown(self):
-        """Drop the article"""
-        self.article = None
-
-    def test_parses(self):
-        """Verify we can parse the document."""
-        doc = Article(self.article)
-        self.assertTrue('id="readabilityBody"' in doc.readable)
-
-    def test_content_exists(self):
-        """Verify that some content exists."""
-        doc = Article(self.article)
-        self.assertTrue('Amazon and Google' in doc.readable)
-        self.assertFalse('Linkblog updated' in doc.readable)
-        self.assertFalse(
-            '#anExampleGoogleDoesntIntendToShareBlogAndItWill' in doc.readable)
-
-    @unittest.skip("Test fails because of some weird hash.")
-    def test_candidates(self):
-        """Verify we have candidates."""
-        doc = Article(self.article)
-        # from lxml.etree import tounicode
-        found = False
-        wanted_hash = '04e46055'
-
-        for node in doc.candidates.values():
-            if node.hash_id == wanted_hash:
-                found = node
-
-        self.assertTrue(found)
-
-        # we have the right node, it must be deleted for some reason if it's
-        # not still there when we need it to be.
-        # Make sure it's not in our to drop list.
-        for node in doc._should_drop:
-            self.assertFalse(node == found.node)
-
-        by_score = sorted(
-            [c for c in doc.candidates.values()],
-            key=attrgetter('content_score'), reverse=True)
-        self.assertTrue(by_score[0].node == found.node)
-
-        updated_winner = check_siblings(by_score[0], doc.candidates)
-        updated_winner.node = prep_article(updated_winner.node)
-
-        # This article hits up against the img > p conditional filtering
-        # because of the many .gif images in the content. We've removed that
-        # rule.
+
+import pytest
+
+from breadability.readable import Article, check_siblings, prep_article
+
+
+@pytest.fixture(scope="module")
+def article():
+    """Load up the article for us"""
+    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
+    with open(article_path) as file:
+        return Article(file.read())
+
+
+def test_parses(article):
+    """Verify we can parse the document."""
+    assert 'id="readabilityBody"' in article.readable
+
+
+def test_content_exists(article):
+    """Verify that some content exists."""
+    assert 'Amazon and Google' in article.readable
+    assert not 'Linkblog updated' in article.readable
+    assert not '#anExampleGoogleDoesntIntendToShareBlogAndItWill' in article.readable
+
+
+@pytest.mark.skip("Test fails because of some weird hash.")
+def test_candidates(article):
+    """Verify we have candidates."""
+    # from lxml.etree import tounicode
+    found = False
+    wanted_hash = '04e46055'
+
+    for node in article.candidates.values():
+        if node.hash_id == wanted_hash:
+            found = node
+
+    assert found
+
+    # we have the right node, it must be deleted for some reason if it's
+    # not still there when we need it to be.
+    # Make sure it's not in our to drop list.
+    for node in article._should_drop:
+        assert node != found.node
+
+    by_score = sorted(
+        [c for c in article.candidates.values()],
+        key=attrgetter('content_score'), reverse=True)
+    assert by_score[0].node == found.node
+
+    updated_winner = check_siblings(by_score[0], article.candidates)
+    updated_winner.node = prep_article(updated_winner.node)
+
+    # This article hits up against the img > p conditional filtering
+    # because of the many .gif images in the content. We've removed that
+    # rule.
--- a/tests/test_articles/test_sweetshark/test_article.py
+++ b/tests/test_articles/test_sweetshark/test_article.py
@ -1,33 +1,32 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+"""
+Test the scoring and parsing of the article from URL below:
+http://sweetshark.livejournal.com/11564.html
+"""
+
+from __future__ import absolute_import, division, print_function, unicode_literals
+
+import os
+
+import pytest

-from os.path import join, dirname
 from breadability.readable import Article
-from ...compat import unittest


-class TestSweetsharkBlog(unittest.TestCase):
-    """
-    Test the scoring and parsing of the article from URL below:
-    http://sweetshark.livejournal.com/11564.html
-    """
+@pytest.fixture(scope="module")
+def article():
+    """Load up the article for us"""
+    article_path = os.path.join(os.path.dirname(__file__), 'article.html')
+    with open(article_path, "rb") as file:
+        return Article(file.read(), "http://sweetshark.livejournal.com/11564.html")

-    def setUp(self):
-        """Load up the article for us"""
-        article_path = join(dirname(__file__), "article.html")
-        with open(article_path, "rb") as file:
-            self.document = Article(file.read(), "http://sweetshark.livejournal.com/11564.html")

-    def tearDown(self):
-        """Drop the article"""
-        self.document = None
+def test_parses(article):
+    """Verify we can parse the document."""
+    assert 'id="readabilityBody"' in article.readable

-    def test_parses(self):
-        """Verify we can parse the document."""
-        self.assertIn('id="readabilityBody"', self.document.readable)

-    def test_content_after_video(self):
-        """The div with the comments should be removed."""
-        self.assertIn('Stay hungry, Stay foolish', self.document.readable)
+def test_content_after_video(article):
+    """The div with the comments should be removed."""
+    assert 'Stay hungry, Stay foolish' in article.readable
--- a/tests/test_orig_document.py
+++ b/tests/test_orig_document.py
@ -1,5 +1,7 @@
 # -*- coding: utf8 -*-

+"""Verify we can process html into a document to work off of."""
+
 from __future__ import absolute_import
 from __future__ import division, print_function, unicode_literals

@ -15,88 +17,97 @@ from breadability.document import (
    decode_html,
    OriginalDocument,
 )
-from .compat import unittest
 from .utils import load_snippet


-class TestOriginalDocument(unittest.TestCase):
-    """Verify we can process html into a document to work off of."""
-
-    def test_convert_br_tags_to_paragraphs(self):
-        returned = convert_breaks_to_paragraphs(
-            ("<div>HI<br><br>How are you?<br><br> \t \n  <br>"
-             "Fine\n I guess</div>"))
-
-        self.assertEqual(
-            returned,
-            "<div>HI</p><p>How are you?</p><p>Fine\n I guess</div>")
-
-    def test_convert_hr_tags_to_paragraphs(self):
-        returned = convert_breaks_to_paragraphs(
-            "<div>HI<br><br>How are you?<hr/> \t \n  <br>Fine\n I guess</div>")
-
-        self.assertEqual(
-            returned,
-            "<div>HI</p><p>How are you?</p><p>Fine\n I guess</div>")
-
-    def test_readin_min_document(self):
-        """Verify we can read in a min html document"""
-        doc = OriginalDocument(load_snippet('document_min.html'))
-        self.assertTrue(to_unicode(doc).startswith('<html>'))
-        self.assertEqual(doc.title, 'Min Document Title')
-
-    def test_readin_with_base_url(self):
-        """Passing a url should update links to be absolute links"""
-        doc = OriginalDocument(
-            load_snippet('document_absolute_url.html'),
-            url="http://blog.mitechie.com/test.html")
-        self.assertTrue(to_unicode(doc).startswith('<html>'))
-
-        # find the links on the page and make sure each one starts with out
-        # base url we told it to use.
-        links = doc.links
-        self.assertEqual(len(links), 3)
-        # we should have two links that start with our blog url
-        # and one link that starts with amazon
-        link_counts = defaultdict(int)
-        for link in links:
-            if link.get('href').startswith('http://blog.mitechie.com'):
-                link_counts['blog'] += 1
-            else:
-                link_counts['other'] += 1
-
-        self.assertEqual(link_counts['blog'], 2)
-        self.assertEqual(link_counts['other'], 1)
-
-    def test_no_br_allowed(self):
-        """We convert all <br/> tags to <p> tags"""
-        doc = OriginalDocument(load_snippet('document_min.html'))
-        self.assertIsNone(doc.dom.find('.//br'))
-
-    def test_empty_title(self):
-        """We convert all <br/> tags to <p> tags"""
-        document = OriginalDocument(
-            "<html><head><title></title></head><body></body></html>")
-        self.assertEqual(document.title, "")
-
-    def test_title_only_with_tags(self):
-        """We convert all <br/> tags to <p> tags"""
-        document = OriginalDocument(
-            "<html><head><title><em></em></title></head><body></body></html>")
-        self.assertEqual(document.title, "")
-
-    def test_no_title(self):
-        """We convert all <br/> tags to <p> tags"""
-        document = OriginalDocument("<html><head></head><body></body></html>")
-        self.assertEqual(document.title, "")
-
-    def test_encoding(self):
-        text = "ľščťžýáíéäúňôůě".encode("iso-8859-2")
-        html = decode_html(text)
-        self.assertEqual(type(html), unicode)
-
-    def test_encoding_short(self):
-        text = to_bytes("ľščťžýáíé")
-        html = decode_html(text)
-        self.assertEqual(type(html), unicode)
-        self.assertEqual(html, "ľščťžýáíé")
+def test_convert_br_tags_to_paragraphs():
+    returned = convert_breaks_to_paragraphs(
+        ("<div>HI<br><br>How are you?<br><br> \t \n  <br>"
+         "Fine\n I guess</div>"))
+
+    assert returned == "<div>HI</p><p>How are you?</p><p>Fine\n I guess</div>"
+
+
+def test_convert_hr_tags_to_paragraphs():
+    returned = convert_breaks_to_paragraphs(
+        "<div>HI<br><br>How are you?<hr/> \t \n  <br>Fine\n I guess</div>")
+
+    assert returned == "<div>HI</p><p>How are you?</p><p>Fine\n I guess</div>"
+
+
+def test_readin_min_document():
+    """Verify we can read in a min html document"""
+    doc = OriginalDocument(load_snippet('document_min.html'))
+
+    assert to_unicode(doc).startswith('<html>')
+    assert doc.title == 'Min Document Title'
+
+
+def test_readin_with_base_url():
+    """Passing a url should update links to be absolute links"""
+    doc = OriginalDocument(
+        load_snippet('document_absolute_url.html'),
+        url="http://blog.mitechie.com/test.html")
+
+    assert to_unicode(doc).startswith('<html>')
+
+    # find the links on the page and make sure each one starts with out
+    # base url we told it to use.
+    links = doc.links
+    assert len(links) == 3
+    # we should have two links that start with our blog url
+    # and one link that starts with amazon
+    link_counts = defaultdict(int)
+    for link in links:
+        if link.get('href').startswith('http://blog.mitechie.com'):
+            link_counts['blog'] += 1
+        else:
+            link_counts['other'] += 1
+
+    assert link_counts['blog'] == 2
+    assert link_counts['other'] == 1
+
+
+def test_no_br_allowed():
+    """We convert all <br/> tags to <p> tags"""
+    doc = OriginalDocument(load_snippet('document_min.html'))
+
+    assert doc.dom.find('.//br') is None
+
+
+def test_empty_title():
+    """We convert all <br/> tags to <p> tags"""
+    document = OriginalDocument(
+        "<html><head><title></title></head><body></body></html>")
+
+    assert document.title == ""
+
+
+def test_title_only_with_tags():
+    """We convert all <br/> tags to <p> tags"""
+    document = OriginalDocument(
+        "<html><head><title><em></em></title></head><body></body></html>")
+
+    assert document.title == ""
+
+
+def test_no_title():
+    """We convert all <br/> tags to <p> tags"""
+    document = OriginalDocument("<html><head></head><body></body></html>")
+
+    assert document.title == ""
+
+
+def test_encoding():
+    text = "ľščťžýáíéäúňôůě".encode("iso-8859-2")
+    html = decode_html(text)
+
+    assert type(html) is unicode
+
+
+def test_encoding_short():
+    text = to_bytes("ľščťžýáíé")
+    html = decode_html(text)
+
+    assert type(html) is unicode
+    assert html == "ľščťžýáíé"
--- a/tests/test_readable.py
+++ b/tests/test_readable.py
@ -1,347 +1,352 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals

+import pytest
 from lxml.etree import tounicode
-from lxml.html import document_fromstring
-from lxml.html import fragment_fromstring
+from lxml.html import document_fromstring, fragment_fromstring
+
 from breadability._compat import to_unicode
-from breadability.readable import (
-    Article,
-    get_class_weight,
-    get_link_density,
-    is_bad_link,
-    leaf_div_elements_into_paragraphs,
-    score_candidates,
-)
+from breadability.readable import (Article, get_class_weight, get_link_density, is_bad_link,
+                                   leaf_div_elements_into_paragraphs, score_candidates, )
 from breadability.scoring import ScoredNode
-from .compat import unittest
-from .utils import load_snippet, load_article
+from .utils import load_article, load_snippet

+# TestReadableDocument
+"""Verify we can process html into a document to work off of."""

-class TestReadableDocument(unittest.TestCase):
-    """Verify we can process html into a document to work off of."""

-    def test_load_doc(self):
-        """We get back an element tree from our original doc"""
-        doc = Article(load_snippet('document_min.html'))
-        # We get back the document as a div tag currently by default.
-        self.assertEqual(doc.readable_dom.tag, 'div')
+def test_load_doc():
+    """We get back an element tree from our original doc"""
+    doc = Article(load_snippet('document_min.html'))
+    # We get back the document as a div tag currently by default.

-    def test_title_loads(self):
-        """Verify we can fetch the title of the parsed article"""
-        doc = Article(load_snippet('document_min.html'))
-        self.assertEqual(
-            doc._original_document.title,
-            'Min Document Title'
-        )
+    assert doc.readable_dom.tag == 'div'

-    def test_doc_no_scripts_styles(self):
-        """Step #1 remove all scripts from the document"""
-        doc = Article(load_snippet('document_scripts.html'))
-        readable = doc.readable_dom
-        self.assertEqual(readable.findall(".//script"), [])
-        self.assertEqual(readable.findall(".//style"), [])
-        self.assertEqual(readable.findall(".//link"), [])
-
-    def test_find_body_exists(self):
-        """If the document has a body, we store that as the readable html
-
-        No sense processing anything other than the body content.
-
-        """
-        doc = Article(load_snippet('document_min.html'))
-        self.assertEqual(doc.readable_dom.tag, 'div')
-        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')
-
-    def test_body_doesnt_exist(self):
-        """If we can't find a body, then we create one.
-
-        We build our doc around the rest of the html we parsed.
-
-        """
-        doc = Article(load_snippet('document_no_body.html'))
-        self.assertEqual(doc.readable_dom.tag, 'div')
-        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')
-
-    def test_bare_content(self):
-        """If the document is just pure content, no html tags we should be ok
-
-        We build our doc around the rest of the html we parsed.
-
-        """
-        doc = Article(load_snippet('document_only_content.html'))
-        self.assertEqual(doc.readable_dom.tag, 'div')
-        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')
-
-    def test_no_content(self):
-        """Without content we supply an empty unparsed doc."""
-        doc = Article('')
-        self.assertEqual(doc.readable_dom.tag, 'div')
-        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')
-        self.assertEqual(doc.readable_dom.get('class'), 'parsing-error')
-
-
-class TestCleaning(unittest.TestCase):
-    """Test out our cleaning processing we do."""
-
-    def test_unlikely_hits(self):
-        """Verify we wipe out things from our unlikely list."""
-        doc = Article(load_snippet('test_readable_unlikely.html'))
-        readable = doc.readable_dom
-        must_not_appear = [
-            'comment', 'community', 'disqus', 'extra', 'foot',
-            'header', 'menu', 'remark', 'rss', 'shoutbox', 'sidebar',
-            'sponsor', 'ad-break', 'agegate', 'pagination' '', 'pager',
-            'popup', 'tweet', 'twitter', 'imgBlogpostPermalink']
-
-        want_to_appear = ['and', 'article', 'body', 'column', 'main', 'shadow']
-
-        for i in must_not_appear:
-            # we cannot find any class or id with this value
-            by_class = readable.find_class(i)
-
-            for test in by_class:
-                # if it's here it cannot have the must not class without the
-                # want to appear class
-                found = False
-                for cls in test.get('class').split():
-                    if cls in want_to_appear:
-                        found = True
-                self.assertTrue(found)
-
-            by_ids = readable.get_element_by_id(i, False)
-            if by_ids is not False:
-                found = False
-                for ids in test.get('id').split():
-                    if ids in want_to_appear:
-                        found = True
-                self.assertTrue(found)
-
-    def test_misused_divs_transform(self):
-        """Verify we replace leaf node divs with p's
-
-        They should have the same content, just be a p vs a div
-
-        """
-        test_html = "<html><body><div>simple</div></body></html>"
-        test_doc = document_fromstring(test_html)
-        self.assertEqual(
-            tounicode(
-                leaf_div_elements_into_paragraphs(test_doc)),
-            to_unicode("<html><body><p>simple</p></body></html>")
-        )

-        test_html2 = ('<html><body><div>simple<a href="">link</a>'
-                      '</div></body></html>')
-        test_doc2 = document_fromstring(test_html2)
-        self.assertEqual(
-            tounicode(
-                leaf_div_elements_into_paragraphs(test_doc2)),
-            to_unicode(
-                '<html><body><p>simple<a href="">link</a></p></body></html>')
-        )
+def test_title_loads():
+    """Verify we can fetch the title of the parsed article"""
+    doc = Article(load_snippet('document_min.html'))

-    def test_dont_transform_div_with_div(self):
-        """Verify that only child <div> element is replaced by <p>."""
-        dom = document_fromstring(
-            "<html><body><div>text<div>child</div>"
-            "aftertext</div></body></html>"
-        )
+    assert doc._original_document.title == 'Min Document Title'

-        self.assertEqual(
-            tounicode(
-                leaf_div_elements_into_paragraphs(dom)),
-            to_unicode(
-                "<html><body><div>text<p>child</p>"
-                "aftertext</div></body></html>"
-            )
-        )

-    def test_bad_links(self):
-        """Some links should just not belong."""
-        bad_links = [
-            '<a name="amazonAndGoogleHaveMadeAnAudaciousGrabOfNamespaceOnTheInternetAsFarAsICanSeeTheresBeenNoMentionOfThisInTheTechPress">&nbsp;</a>',
-            '<a href="#amazonAndGoogleHaveMadeAnAudaciousGrabOfNamespaceOnTheInternetAsFarAsICanSeeTheresBeenNoMentionOfThisInTheTechPress"><img src="http://scripting.com/images/2001/09/20/sharpPermaLink3.gif" class="imgBlogpostPermalink" width="6" height="9" border="0" alt="permalink"></a>',
-            '<a href="http://scripting.com/stories/2012/06/15/theTechPressIsOutToLunch.html#anExampleGoogleDoesntIntendToShareBlogAndItWillOnlyBeUsedToPointToBloggerSitesIfYouHaveATumblrOrWordpressBlogYouCantHaveABlogDomainHereIsTheAHrefhttpgtldresulticannorgapplicationresultapplicationstatusapplicationdetails527publicListingaOfGooglesAHrefhttpdropboxscriptingcomdavemiscgoogleblogapplicationhtmlapplicationa"><img src="http://scripting.com/images/2001/09/20/sharpPermaLink3.gif" class="imgBlogpostPermalink" width="6" height="9" border="0" alt="permalink"></a>'
-        ]
+def test_doc_no_scripts_styles():
+    """Step #1 remove all scripts from the document"""
+    doc = Article(load_snippet('document_scripts.html'))
+    readable = doc.readable_dom
+
+    assert readable.findall(".//script") == []
+    assert readable.findall(".//style") == []
+    assert readable.findall(".//link") == []
+
+
+def test_find_body_exists():
+    """If the document has a body, we store that as the readable html
+
+    No sense processing anything other than the body content.
+
+    """
+    doc = Article(load_snippet('document_min.html'))
+
+    assert doc.readable_dom.tag == 'div'
+    assert doc.readable_dom.get('id') == 'readabilityBody'
+
+
+def test_body_doesnt_exist():
+    """If we can't find a body, then we create one.
+
+    We build our doc around the rest of the html we parsed.
+
+    """
+    doc = Article(load_snippet('document_no_body.html'))
+
+    assert doc.readable_dom.tag == 'div'
+    assert doc.readable_dom.get('id') == 'readabilityBody'
+
+
+def test_bare_content():
+    """If the document is just pure content, no html tags we should be ok
+
+    We build our doc around the rest of the html we parsed.
+
+    """
+    doc = Article(load_snippet('document_only_content.html'))
+
+    assert doc.readable_dom.tag == 'div'
+    assert doc.readable_dom.get('id') == 'readabilityBody'
+
+
+def test_no_content():
+    """Without content we supply an empty unparsed doc."""
+    doc = Article('')
+
+    assert doc.readable_dom.tag == 'div'
+    assert doc.readable_dom.get('id') == 'readabilityBody'
+    assert doc.readable_dom.get('class') == 'parsing-error'
+
+
+# Test out our cleaning processing we do.
+
+
+def test_unlikely_hits():
+    """Verify we wipe out things from our unlikely list."""
+    doc = Article(load_snippet('test_readable_unlikely.html'))
+    readable = doc.readable_dom
+    must_not_appear = [
+        'comment', 'community', 'disqus', 'extra', 'foot',
+        'header', 'menu', 'remark', 'rss', 'shoutbox', 'sidebar',
+        'sponsor', 'ad-break', 'agegate', 'pagination' '', 'pager',
+        'popup', 'tweet', 'twitter', 'imgBlogpostPermalink']
+
+    want_to_appear = ['and', 'article', 'body', 'column', 'main', 'shadow']
+
+    for i in must_not_appear:
+        # we cannot find any class or id with this value
+        by_class = readable.find_class(i)
+
+        for test in by_class:
+            # if it's here it cannot have the must not class without the
+            # want to appear class
+            found = False
+            for cls in test.get('class').split():
+                if cls in want_to_appear:
+                    found = True
+            assert found
+
+        by_ids = readable.get_element_by_id(i, False)
+        if by_ids is not False:
+            found = False
+            for ids in test.get('id').split():
+                if ids in want_to_appear:
+                    found = True
+            assert found
+
+
+def test_misused_divs_transform():
+    """Verify we replace leaf node divs with p's
+
+    They should have the same content, just be a p vs a div
+
+    """
+    test_html = "<html><body><div>simple</div></body></html>"
+    test_doc = document_fromstring(test_html)
+    assert tounicode(leaf_div_elements_into_paragraphs(test_doc)) == to_unicode(
+        "<html><body><p>simple</p></body></html>"
+    )
+
+    test_html2 = ('<html><body><div>simple<a href="">link</a>'
+                  '</div></body></html>')
+    test_doc2 = document_fromstring(test_html2)
+    assert tounicode(leaf_div_elements_into_paragraphs(test_doc2)) == to_unicode(
+        '<html><body><p>simple<a href="">link</a></p></body></html>'
+    )
+
+
+def test_dont_transform_div_with_div():
+    """Verify that only child <div> element is replaced by <p>."""
+    dom = document_fromstring(
+        "<html><body><div>text<div>child</div>"
+        "aftertext</div></body></html>"
+    )
+
+    assert tounicode(leaf_div_elements_into_paragraphs(dom)) == to_unicode(
+        "<html><body><div>text<p>child</p>"
+        "aftertext</div></body></html>"
+    )
+

-        for l in bad_links:
-            link = fragment_fromstring(l)
-            self.assertTrue(is_bad_link(link))
+def test_bad_links():
+    """Some links should just not belong."""
+    bad_links = [
+        '<a name="amazonAndGoogleHaveMadeAnAudaciousGrabOfNamespaceOnTheInternetAsFarAsICanSeeTheresBeenNoMentionOfThisInTheTechPress">&nbsp;</a>',
+        '<a href="#amazonAndGoogleHaveMadeAnAudaciousGrabOfNamespaceOnTheInternetAsFarAsICanSeeTheresBeenNoMentionOfThisInTheTechPress"><img src="http://scripting.com/images/2001/09/20/sharpPermaLink3.gif" class="imgBlogpostPermalink" width="6" height="9" border="0" alt="permalink"></a>',
+        '<a href="http://scripting.com/stories/2012/06/15/theTechPressIsOutToLunch.html#anExampleGoogleDoesntIntendToShareBlogAndItWillOnlyBeUsedToPointToBloggerSitesIfYouHaveATumblrOrWordpressBlogYouCantHaveABlogDomainHereIsTheAHrefhttpgtldresulticannorgapplicationresultapplicationstatusapplicationdetails527publicListingaOfGooglesAHrefhttpdropboxscriptingcomdavemiscgoogleblogapplicationhtmlapplicationa"><img src="http://scripting.com/images/2001/09/20/sharpPermaLink3.gif" class="imgBlogpostPermalink" width="6" height="9" border="0" alt="permalink"></a>'
+    ]

+    for l in bad_links:
+        link = fragment_fromstring(l)
+        assert is_bad_link(link)

-class TestCandidateNodes(unittest.TestCase):
-    """Candidate nodes are scoring containers we use."""

-    def test_candidate_scores(self):
-        """We should be getting back objects with some scores."""
-        fives = ['<div/>']
-        threes = ['<pre/>', '<td/>', '<blockquote/>']
-        neg_threes = ['<address/>', '<ol/>']
-        neg_fives = ['<h1/>', '<h2/>', '<h3/>', '<h4/>']
+# Candidate nodes are scoring containers we use.

-        for n in fives:
-            doc = fragment_fromstring(n)
-            self.assertEqual(ScoredNode(doc).content_score, 5)

-        for n in threes:
-            doc = fragment_fromstring(n)
-            self.assertEqual(ScoredNode(doc).content_score, 3)
+def test_candidate_scores():
+    """We should be getting back objects with some scores."""
+    fives = ['<div/>']
+    threes = ['<pre/>', '<td/>', '<blockquote/>']
+    neg_threes = ['<address/>', '<ol/>']
+    neg_fives = ['<h1/>', '<h2/>', '<h3/>', '<h4/>']

-        for n in neg_threes:
-            doc = fragment_fromstring(n)
-            self.assertEqual(ScoredNode(doc).content_score, -3)
+    for n in fives:
+        doc = fragment_fromstring(n)
+        assert ScoredNode(doc).content_score == 5

-        for n in neg_fives:
-            doc = fragment_fromstring(n)
-            self.assertEqual(ScoredNode(doc).content_score, -5)
+    for n in threes:
+        doc = fragment_fromstring(n)
+        assert ScoredNode(doc).content_score == 3

-    def test_article_enables_candidate_access(self):
-        """Candidates are accessible after document processing."""
-        doc = Article(load_article('ars.001.html'))
-        self.assertTrue(hasattr(doc, 'candidates'))
+    for n in neg_threes:
+        doc = fragment_fromstring(n)
+        assert ScoredNode(doc).content_score == -3

+    for n in neg_fives:
+        doc = fragment_fromstring(n)
+        assert ScoredNode(doc).content_score == -5

-class TestClassWeights(unittest.TestCase):
-    """Certain ids and classes get us bonus points."""

-    def test_positive_class(self):
-        """Some classes get us bonus points."""
-        node = fragment_fromstring('<p class="article">')
-        self.assertEqual(get_class_weight(node), 25)
+def test_article_enables_candidate_access():
+    """Candidates are accessible after document processing."""
+    doc = Article(load_article('ars.001.html'))

-    def test_positive_ids(self):
-        """Some ids get us bonus points."""
-        node = fragment_fromstring('<p id="content">')
-        self.assertEqual(get_class_weight(node), 25)
+    assert hasattr(doc, 'candidates')

-    def test_negative_class(self):
-        """Some classes get us negative points."""
-        node = fragment_fromstring('<p class="comment">')
-        self.assertEqual(get_class_weight(node), -25)

-    def test_negative_ids(self):
-        """Some ids get us negative points."""
-        node = fragment_fromstring('<p id="media">')
-        self.assertEqual(get_class_weight(node), -25)
+# Certain ids and classes get us bonus points.


-class TestScoringNodes(unittest.TestCase):
-    """We take out list of potential nodes and score them up."""
+def test_positive_class():
+    """Some classes get us bonus points."""
+    node = fragment_fromstring('<p class="article">')
+    assert get_class_weight(node) == 25

-    def test_we_get_candidates(self):
-        """Processing candidates should get us a list of nodes to try out."""
-        doc = document_fromstring(load_article("ars.001.html"))
-        test_nodes = tuple(doc.iter("p", "td", "pre"))
-        candidates = score_candidates(test_nodes)

-        # this might change as we tweak our algorithm, but if it does,
-        # it signifies we need to look at what we changed.
-        self.assertEqual(len(candidates.keys()), 37)
+def test_positive_ids():
+    """Some ids get us bonus points."""
+    node = fragment_fromstring('<p id="content">')
+    assert get_class_weight(node) == 25

-        # one of these should have a decent score
-        scores = sorted(c.content_score for c in candidates.values())
-        self.assertTrue(scores[-1] > 100)
-
-    def test_bonus_score_per_100_chars_in_p(self):
-        """Nodes get 1 point per 100 characters up to max. 3 points."""
-        def build_candidates(length):
-            html = "<p>%s</p>" % ("c" * length)
-            node = fragment_fromstring(html)
-
-            return [node]
-
-        test_nodes = build_candidates(50)
-        candidates = score_candidates(test_nodes)
-        pscore_50 = max(c.content_score for c in candidates.values())
-
-        test_nodes = build_candidates(100)
-        candidates = score_candidates(test_nodes)
-        pscore_100 = max(c.content_score for c in candidates.values())
-
-        test_nodes = build_candidates(300)
-        candidates = score_candidates(test_nodes)
-        pscore_300 = max(c.content_score for c in candidates.values())
-
-        test_nodes = build_candidates(400)
-        candidates = score_candidates(test_nodes)
-        pscore_400 = max(c.content_score for c in candidates.values())
-
-        self.assertAlmostEqual(pscore_50 + 0.5, pscore_100)
-        self.assertAlmostEqual(pscore_100 + 2.0, pscore_300)
-        self.assertAlmostEqual(pscore_300, pscore_400)
-
-
-class TestLinkDensityScoring(unittest.TestCase):
-    """Link density will adjust out candidate scoresself."""
-
-    def test_link_density(self):
-        """Test that we get a link density"""
-        doc = document_fromstring(load_article('ars.001.html'))
-        for node in doc.iter('p', 'td', 'pre'):
-            density = get_link_density(node)
-
-            # the density must be between 0, 1
-            self.assertTrue(density >= 0.0 and density <= 1.0)
-
-
-class TestSiblings(unittest.TestCase):
-    """Siblings will be included if their content is related."""
-
-    @unittest.skip("Not implemented yet.")
-    def test_bad_siblings_not_counted(self):
-        raise NotImplementedError()
-
-    @unittest.skip("Not implemented yet.")
-    def test_good_siblings_counted(self):
-        raise NotImplementedError()
-
-
-class TestMainText(unittest.TestCase):
-    def test_empty(self):
-        article = Article("")
-        annotated_text = article.main_text
-
-        self.assertEqual(annotated_text, [])
-
-    def test_no_annotations(self):
-        article = Article("<div><p>This is text with no annotations</p></div>")
-        annotated_text = article.main_text
-
-        self.assertEqual(annotated_text,
-            [(("This is text with no annotations", None),)])
-
-    def test_one_annotation(self):
-        article = Article("<div><p>This is text\r\twith <del>no</del> annotations</p></div>")
-        annotated_text = article.main_text
-
-        expected = [(
-            ("This is text\nwith", None),
-            ("no", ("del",)),
-            ("annotations", None),
-        )]
-        self.assertEqual(annotated_text, expected)
-
-    def test_simple_snippet(self):
-        snippet = Article(load_snippet("annotated_1.html"))
-        annotated_text = snippet.main_text
-
-        expected = [
-            (
-                ("Paragraph is more", None),
-                ("better", ("em",)),
-                (".\nThis text is very", None),
-                ("pretty", ("strong",)),
-                ("'cause she's girl.", None),
-            ),
-            (
-                ("This is not", None),
-                ("crap", ("big",)),
-                ("so", None),
-                ("readability", ("dfn",)),
-                ("me :)", None),
-            )
-        ]
-        self.assertEqual(annotated_text, expected)
+
+def test_negative_class():
+    """Some classes get us negative points."""
+    node = fragment_fromstring('<p class="comment">')
+    assert get_class_weight(node) == -25
+
+
+def test_negative_ids():
+    """Some ids get us negative points."""
+    node = fragment_fromstring('<p id="media">')
+    assert get_class_weight(node) == -25
+
+
+# We take out list of potential nodes and score them up.
+
+
+def test_we_get_candidates():
+    """Processing candidates should get us a list of nodes to try out."""
+    doc = document_fromstring(load_article("ars.001.html"))
+    test_nodes = tuple(doc.iter("p", "td", "pre"))
+    candidates = score_candidates(test_nodes)
+
+    # this might change as we tweak our algorithm, but if it does,
+    # it signifies we need to look at what we changed.
+    assert len(candidates.keys()) == 37
+
+    # one of these should have a decent score
+    scores = sorted(c.content_score for c in candidates.values())
+    assert scores[-1] > 100
+
+
+def test_bonus_score_per_100_chars_in_p():
+    """Nodes get 1 point per 100 characters up to max. 3 points."""
+    def build_candidates(length):
+        html = "<p>%s</p>" % ("c" * length)
+        node = fragment_fromstring(html)
+
+        return [node]
+
+    test_nodes = build_candidates(50)
+    candidates = score_candidates(test_nodes)
+    pscore_50 = max(c.content_score for c in candidates.values())
+
+    test_nodes = build_candidates(100)
+    candidates = score_candidates(test_nodes)
+    pscore_100 = max(c.content_score for c in candidates.values())
+
+    test_nodes = build_candidates(300)
+    candidates = score_candidates(test_nodes)
+    pscore_300 = max(c.content_score for c in candidates.values())
+
+    test_nodes = build_candidates(400)
+    candidates = score_candidates(test_nodes)
+    pscore_400 = max(c.content_score for c in candidates.values())
+
+    assert pscore_50 + 0.5 == pscore_100
+    assert pscore_100 + 2.0 == pscore_300
+    assert pscore_300 == pscore_400
+
+
+# Link density will adjust out candidate scoresself.
+
+
+def test_link_density():
+    """Test that we get a link density"""
+    doc = document_fromstring(load_article('ars.001.html'))
+    for node in doc.iter('p', 'td', 'pre'):
+        density = get_link_density(node)
+
+        # the density must be between 0, 1
+        assert density >= 0.0 and density <= 1.0
+
+
+# Siblings will be included if their content is related.
+
+
+@pytest.mark.skip("Not implemented yet.")
+def test_bad_siblings_not_counted():
+    raise NotImplementedError()
+
+
+@pytest.mark.skip("Not implemented yet.")
+def test_good_siblings_counted():
+    raise NotImplementedError()
+
+
+# TestMainText
+
+def test_empty():
+    article = Article("")
+    annotated_text = article.main_text
+
+    assert annotated_text == []
+
+
+def test_no_annotations():
+    article = Article("<div><p>This is text with no annotations</p></div>")
+    annotated_text = article.main_text
+
+    assert annotated_text == [(("This is text with no annotations", None),)]
+
+
+def test_one_annotation():
+    article = Article("<div><p>This is text\r\twith <del>no</del> annotations</p></div>")
+    annotated_text = article.main_text
+
+    assert annotated_text == [(
+        ("This is text\nwith", None),
+        ("no", ("del",)),
+        ("annotations", None),
+    )]
+
+
+def test_simple_snippet():
+    snippet = Article(load_snippet("annotated_1.html"))
+    annotated_text = snippet.main_text
+
+    assert annotated_text == [
+        (
+            ("Paragraph is more", None),
+            ("better", ("em",)),
+            (".\nThis text is very", None),
+            ("pretty", ("strong",)),
+            ("'cause she's girl.", None),
+        ),
+        (
+            ("This is not", None),
+            ("crap", ("big",)),
+            ("so", None),
+            ("readability", ("dfn",)),
+            ("me :)", None),
+        )
+    ]
--- a/tests/test_scoring.py
+++ b/tests/test_scoring.py
@ -1,284 +1,295 @@
 # -*- coding: utf8 -*-

-from __future__ import absolute_import
-from __future__ import division, print_function, unicode_literals
+from __future__ import absolute_import, division, print_function, unicode_literals

 import re
-
 from operator import attrgetter
-from lxml.html import document_fromstring
-from lxml.html import fragment_fromstring
-from breadability.readable import Article
-from breadability.scoring import (
-    check_node_attributes,
-    generate_hash_id,
-    get_class_weight,
-    score_candidates,
-    ScoredNode,
-)
-from breadability.readable import (
-    get_link_density,
-    is_unlikely_node,
-)
-from .compat import unittest
+
+from lxml.html import document_fromstring, fragment_fromstring
+
+from breadability.readable import Article, get_link_density, is_unlikely_node
+from breadability.scoring import (ScoredNode, check_node_attributes, generate_hash_id, get_class_weight,
+                                  score_candidates)
 from .utils import load_snippet


-class TestHashId(unittest.TestCase):
-    def test_generate_hash(self):
-        dom = fragment_fromstring("<div>ľščťžýáí</div>")
-        generate_hash_id(dom)
+def test_generate_hash():
+    dom = fragment_fromstring("<div>ľščťžýáí</div>")
+    generate_hash_id(dom)

-    def test_hash_from_id_on_exception(self):
-        generate_hash_id(None)

-    def test_different_hashes(self):
-        dom = fragment_fromstring("<div>ľščťžýáí</div>")
-        hash_dom = generate_hash_id(dom)
-        hash_none = generate_hash_id(None)
+def test_hash_from_id_on_exception():
+    generate_hash_id(None)

-        self.assertNotEqual(hash_dom, hash_none)

-    def test_equal_hashes(self):
-        dom1 = fragment_fromstring("<div>ľščťžýáí</div>")
-        dom2 = fragment_fromstring("<div>ľščťžýáí</div>")
-        hash_dom1 = generate_hash_id(dom1)
-        hash_dom2 = generate_hash_id(dom2)
-        self.assertEqual(hash_dom1, hash_dom2)
+def test_different_hashes():
+    dom = fragment_fromstring("<div>ľščťžýáí</div>")
+    hash_dom = generate_hash_id(dom)
+    hash_none = generate_hash_id(None)

-        hash_none1 = generate_hash_id(None)
-        hash_none2 = generate_hash_id(None)
-        self.assertEqual(hash_none1, hash_none2)
+    assert hash_dom != hash_none


-class TestCheckNodeAttr(unittest.TestCase):
-    """Verify a node has a class/id in the given set.
+def test_equal_hashes():
+    dom1 = fragment_fromstring("<div>ľščťžýáí</div>")
+    dom2 = fragment_fromstring("<div>ľščťžýáí</div>")
+    hash_dom1 = generate_hash_id(dom1)
+    hash_dom2 = generate_hash_id(dom2)
+    assert hash_dom1 == hash_dom2

-    The idea is that we have sets of known good/bad ids and classes and need
-    to verify the given node does/doesn't have those classes/ids.
+    hash_none1 = generate_hash_id(None)
+    hash_none2 = generate_hash_id(None)
+    assert hash_none1 == hash_none2
+
+
+# Verify a node has a class/id in the given set.
+# The idea is that we have sets of known good/bad ids and classes and need
+# to verify the given node does/doesn't have those classes/ids.
+
+
+def test_has_class():
+    """Verify that a node has a class in our set."""
+    test_pattern = re.compile('test1|test2', re.I)
+    test_node = fragment_fromstring('<div/>')
+    test_node.set('class', 'test2 comment')
+
+    assert check_node_attributes(test_pattern, test_node, 'class')
+
+
+def test_has_id():
+    """Verify that a node has an id in our set."""
+    test_pattern = re.compile('test1|test2', re.I)
+    test_node = fragment_fromstring('<div/>')
+    test_node.set('id', 'test2')
+
+    assert check_node_attributes(test_pattern, test_node, 'id')
+
+
+def test_lacks_class():
+    """Verify that a node does not have a class in our set."""
+    test_pattern = re.compile('test1|test2', re.I)
+    test_node = fragment_fromstring('<div/>')
+    test_node.set('class', 'test4 comment')
+
+    assert not check_node_attributes(test_pattern, test_node, 'class')
+
+
+def test_lacks_id():
+    """Verify that a node does not have an id in our set."""
+    test_pattern = re.compile('test1|test2', re.I)
+    test_node = fragment_fromstring('<div/>')
+    test_node.set('id', 'test4')
+
+    assert not check_node_attributes(test_pattern, test_node, 'id')
+
+
+# Verify we calc our link density correctly.
+
+
+def test_empty_node():
+    """An empty node doesn't have much of a link density"""
+    doc = Article("<div></div>")
+
+    assert get_link_density(doc.readable_dom) == 0.0
+
+
+def test_small_doc_no_links():
+    doc = Article(load_snippet('document_min.html'))
+
+    assert get_link_density(doc.readable_dom) == 0.0
+
+
+def test_several_links():
+    """This doc has a 3 links with the majority of content."""
+    doc = Article(load_snippet('document_absolute_url.html'))
+
+    assert get_link_density(doc.readable_dom) == 22/37
+
+
+# Verify we score nodes correctly based on their class/id attributes.
+
+
+def test_no_matches_zero():
+    """If you don't have the attribute then you get a weight of 0"""
+    node = fragment_fromstring("<div></div>")
+
+    assert get_class_weight(node) == 0

-    """
-    def test_has_class(self):
-        """Verify that a node has a class in our set."""
-        test_pattern = re.compile('test1|test2', re.I)
-        test_node = fragment_fromstring('<div/>')
-        test_node.set('class', 'test2 comment')
-
-        self.assertTrue(
-            check_node_attributes(test_pattern, test_node, 'class'))
-
-    def test_has_id(self):
-        """Verify that a node has an id in our set."""
-        test_pattern = re.compile('test1|test2', re.I)
-        test_node = fragment_fromstring('<div/>')
-        test_node.set('id', 'test2')
-
-        self.assertTrue(check_node_attributes(test_pattern, test_node, 'id'))
-
-    def test_lacks_class(self):
-        """Verify that a node does not have a class in our set."""
-        test_pattern = re.compile('test1|test2', re.I)
-        test_node = fragment_fromstring('<div/>')
-        test_node.set('class', 'test4 comment')
-        self.assertFalse(
-            check_node_attributes(test_pattern, test_node, 'class'))
-
-    def test_lacks_id(self):
-        """Verify that a node does not have an id in our set."""
-        test_pattern = re.compile('test1|test2', re.I)
-        test_node = fragment_fromstring('<div/>')
-        test_node.set('id', 'test4')
-        self.assertFalse(check_node_attributes(test_pattern, test_node, 'id'))
-
-
-class TestLinkDensity(unittest.TestCase):
-    """Verify we calc our link density correctly."""
-
-    def test_empty_node(self):
-        """An empty node doesn't have much of a link density"""
-        doc = Article("<div></div>")
-        self.assertEqual(get_link_density(doc.readable_dom), 0.0)
-
-    def test_small_doc_no_links(self):
-        doc = Article(load_snippet('document_min.html'))
-        self.assertEqual(get_link_density(doc.readable_dom), 0.0)
-
-    def test_several_links(self):
-        """This doc has a 3 links with the majority of content."""
-        doc = Article(load_snippet('document_absolute_url.html'))
-        self.assertAlmostEqual(get_link_density(doc.readable_dom), 22/37)
-
-
-class TestClassWeight(unittest.TestCase):
-    """Verify we score nodes correctly based on their class/id attributes."""
-
-    def test_no_matches_zero(self):
-        """If you don't have the attribute then you get a weight of 0"""
-        node = fragment_fromstring("<div></div>")
-        self.assertEqual(get_class_weight(node), 0)
-
-    def test_id_hits(self):
-        """If the id is in the list then it gets a weight"""
-        test_div = '<div id="post">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), 25)
-
-        test_div = '<div id="comments">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), -25)
-
-    def test_class_hits(self):
-        """If the class is in the list then it gets a weight"""
-        test_div = '<div class="something post">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), 25)
-
-        test_div = '<div class="something comments">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), -25)
-
-    def test_scores_collide(self):
-        """We might hit both positive and negative scores.
-
-        Positive and negative scoring is done independently so it's possible
-        to hit both positive and negative scores and cancel each other out.
-
-        """
-        test_div = '<div id="post" class="something comment">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), 0)
-
-        test_div = '<div id="post" class="post comment">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), 25)
-
-    def test_scores_only_once(self):
-        """Scoring is not cumulative within a class hit."""
-        test_div = '<div class="post main">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertEqual(get_class_weight(node), 25)
-
-
-class TestUnlikelyNode(unittest.TestCase):
-    """is_unlikely_node should help verify our node is good/bad."""
-
-    def test_body_is_always_likely(self):
-        """The body tag is always a likely node."""
-        test_div = '<body class="comment"><div>Content</div></body>'
-        node = fragment_fromstring(test_div)
-        self.assertFalse(is_unlikely_node(node))
-
-    def test_is_unlikely(self):
-        "Keywords in the class/id will make us believe this is unlikely."
-        test_div = '<div class="something comments">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertTrue(is_unlikely_node(node))
-
-        test_div = '<div id="comments">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertTrue(is_unlikely_node(node))
-
-    def test_not_unlikely(self):
-        """Suck it double negatives."""
-        test_div = '<div id="post">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertFalse(is_unlikely_node(node))
-
-        test_div = '<div class="something post">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertFalse(is_unlikely_node(node))
-
-    def test_maybe_hits(self):
-        """We've got some maybes that will overrule an unlikely node."""
-        test_div = '<div id="comments" class="article">Content</div>'
-        node = fragment_fromstring(test_div)
-        self.assertFalse(is_unlikely_node(node))
-
-
-class TestScoredNode(unittest.TestCase):
-    """ScoredNodes constructed have initial content_scores, etc."""
-
-    def test_hash_id(self):
-        """ScoredNodes have a hash_id based on their content
-
-        Since this is based on the html there are chances for collisions, but
-        it helps us follow and identify nodes through the scoring process. Two
-        identical nodes would score the same, so meh all good.
-
-        """
-        test_div = '<div id="comments" class="article">Content</div>'
-        node = fragment_fromstring(test_div)
-        snode = ScoredNode(node)
-        self.assertEqual(snode.hash_id, 'ffa4c519')
-
-    def test_div_content_score(self):
-        """A div starts out with a score of 5 and modifies from there"""
-        test_div = '<div id="" class="">Content</div>'
-        node = fragment_fromstring(test_div)
-        snode = ScoredNode(node)
-        self.assertEqual(snode.content_score, 5)
-
-        test_div = '<div id="article" class="">Content</div>'
-        node = fragment_fromstring(test_div)
-        snode = ScoredNode(node)
-        self.assertEqual(snode.content_score, 30)
-
-        test_div = '<div id="comments" class="">Content</div>'
-        node = fragment_fromstring(test_div)
-        snode = ScoredNode(node)
-        self.assertEqual(snode.content_score, -20)
-
-    def test_headings_score(self):
-        """Heading tags aren't likely candidates, hurt their scores."""
-        test_div = '<h2>Heading</h2>'
-        node = fragment_fromstring(test_div)
-        snode = ScoredNode(node)
-        self.assertEqual(snode.content_score, -5)
-
-    def test_list_items(self):
-        """Heading tags aren't likely candidates, hurt their scores."""
-        test_div = '<li>list item</li>'
-        node = fragment_fromstring(test_div)
-        snode = ScoredNode(node)
-        self.assertEqual(snode.content_score, -3)
-
-
-class TestScoreCandidates(unittest.TestCase):
-    """The grand daddy of tests to make sure our scoring works
-
-    Now scoring details will change over time, so the most important thing is
-    to make sure candidates come out in the right order, not necessarily how
-    they scored. Make sure to keep this in mind while getting tests going.
+
+def test_id_hits():
+    """If the id is in the list then it gets a weight"""
+    test_div = '<div id="post">Content</div>'
+    node = fragment_fromstring(test_div)
+
+    assert get_class_weight(node) == 25
+
+    test_div = '<div id="comments">Content</div>'
+    node = fragment_fromstring(test_div)
+
+    assert get_class_weight(node) == -25
+
+
+def test_class_hits():
+    """If the class is in the list then it gets a weight"""
+    test_div = '<div class="something post">Content</div>'
+    node = fragment_fromstring(test_div)
+    assert get_class_weight(node) == 25
+
+    test_div = '<div class="something comments">Content</div>'
+    node = fragment_fromstring(test_div)
+    assert get_class_weight(node) == -25
+
+
+def test_scores_collide():
+    """We might hit both positive and negative scores.
+
+    Positive and negative scoring is done independently so it's possible
+    to hit both positive and negative scores and cancel each other out.

    """
+    test_div = '<div id="post" class="something comment">Content</div>'
+    node = fragment_fromstring(test_div)
+    assert get_class_weight(node) == 0
+
+    test_div = '<div id="post" class="post comment">Content</div>'
+    node = fragment_fromstring(test_div)
+    assert get_class_weight(node) == 25
+
+
+def test_scores_only_once():
+    """Scoring is not cumulative within a class hit."""
+    test_div = '<div class="post main">Content</div>'
+    node = fragment_fromstring(test_div)
+
+    assert get_class_weight(node) == 25
+
+
+# is_unlikely_node should help verify our node is good/bad.
+

-    def test_simple_candidate_set(self):
-        """Tests a simple case of two candidate nodes"""
-        html = """
-            <html>
-            <body>
-                <div class="content">
-                    <p>This is a great amount of info</p>
-                    <p>And more content <a href="/index">Home</a>
-                </div>
-                <div class="footer">
-                    <p>This is a footer</p>
-                    <p>And more content <a href="/index">Home</a>
-                </div>
-            </body>
-            </html>
-        """
-        dom = document_fromstring(html)
-        div_nodes = dom.findall(".//div")
-
-        candidates = score_candidates(div_nodes)
-        ordered = sorted(
-            (c for c in candidates.values()), reverse=True,
-            key=attrgetter("content_score"))
-
-        self.assertEqual(ordered[0].node.tag, "div")
-        self.assertEqual(ordered[0].node.attrib["class"], "content")
-        self.assertEqual(ordered[1].node.tag, "body")
-        self.assertEqual(ordered[2].node.tag, "html")
-        self.assertEqual(ordered[3].node.tag, "div")
-        self.assertEqual(ordered[3].node.attrib["class"], "footer")
+def test_body_is_always_likely():
+    """The body tag is always a likely node."""
+    test_div = '<body class="comment"><div>Content</div></body>'
+    node = fragment_fromstring(test_div)
+
+    assert not is_unlikely_node(node)
+
+
+def test_is_unlikely():
+    """Keywords in the class/id will make us believe this is unlikely."""
+    test_div = '<div class="something comments">Content</div>'
+    node = fragment_fromstring(test_div)
+    assert is_unlikely_node(node)
+
+    test_div = '<div id="comments">Content</div>'
+    node = fragment_fromstring(test_div)
+    assert is_unlikely_node(node)
+
+
+def test_not_unlikely():
+    """Suck it double negatives."""
+    test_div = '<div id="post">Content</div>'
+    node = fragment_fromstring(test_div)
+    assert not is_unlikely_node(node)
+
+    test_div = '<div class="something post">Content</div>'
+    node = fragment_fromstring(test_div)
+    assert not is_unlikely_node(node)
+
+
+def test_maybe_hits():
+    """We've got some maybes that will overrule an unlikely node."""
+    test_div = '<div id="comments" class="article">Content</div>'
+    node = fragment_fromstring(test_div)
+    assert not is_unlikely_node(node)
+
+
+# ScoredNodes constructed have initial content_scores, etc.
+
+
+def test_hash_id():
+    """ScoredNodes have a hash_id based on their content
+
+    Since this is based on the html there are chances for collisions, but
+    it helps us follow and identify nodes through the scoring process. Two
+    identical nodes would score the same, so meh all good.
+
+    """
+    test_div = '<div id="comments" class="article">Content</div>'
+    node = fragment_fromstring(test_div)
+    snode = ScoredNode(node)
+
+    assert snode.hash_id == 'ffa4c519'
+
+
+def test_div_content_score():
+    """A div starts out with a score of 5 and modifies from there"""
+    test_div = '<div id="" class="">Content</div>'
+    node = fragment_fromstring(test_div)
+    snode = ScoredNode(node)
+    assert snode.content_score == 5
+
+    test_div = '<div id="article" class="">Content</div>'
+    node = fragment_fromstring(test_div)
+    snode = ScoredNode(node)
+    assert snode.content_score == 30
+
+    test_div = '<div id="comments" class="">Content</div>'
+    node = fragment_fromstring(test_div)
+    snode = ScoredNode(node)
+    assert snode.content_score == -20
+
+
+def test_headings_score():
+    """Heading tags aren't likely candidates, hurt their scores."""
+    test_div = '<h2>Heading</h2>'
+    node = fragment_fromstring(test_div)
+    snode = ScoredNode(node)
+
+    assert snode.content_score == -5
+
+
+def test_list_items():
+    """Heading tags aren't likely candidates, hurt their scores."""
+    test_div = '<li>list item</li>'
+    node = fragment_fromstring(test_div)
+    snode = ScoredNode(node)
+    assert snode.content_score == -3
+
+
+# The grand daddy of tests to make sure our scoring works
+# Now scoring details will change over time, so the most important thing is
+# to make sure candidates come out in the right order, not necessarily how
+# they scored. Make sure to keep this in mind while getting tests going.
+
+
+def test_simple_candidate_set():
+    """Tests a simple case of two candidate nodes"""
+    html = """
+        <html>
+        <body>
+            <div class="content">
+                <p>This is a great amount of info</p>
+                <p>And more content <a href="/index">Home</a>
+            </div>
+            <div class="footer">
+                <p>This is a footer</p>
+                <p>And more content <a href="/index">Home</a>
+            </div>
+        </body>
+        </html>
+    """
+    dom = document_fromstring(html)
+    div_nodes = dom.findall(".//div")
+
+    candidates = score_candidates(div_nodes)
+    ordered = sorted(
+        (c for c in candidates.values()), reverse=True,
+        key=attrgetter("content_score"))
+
+    assert ordered[0].node.tag == "div"
+    assert ordered[0].node.attrib["class"] == "content"
+    assert ordered[1].node.tag == "body"
+    assert ordered[2].node.tag == "html"
+    assert ordered[3].node.tag == "div"
+    assert ordered[3].node.attrib["class"] == "footer"