diff --git a/.gitignore b/.gitignore index dfb5e01..a66183c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ *.pyc +*.prof .installed.cfg bin diff --git a/src/breadability/scoring.py b/src/breadability/scoring.py index df4d408..2c527a2 100644 --- a/src/breadability/scoring.py +++ b/src/breadability/scoring.py @@ -123,7 +123,7 @@ def score_candidates(nodes): # For every 100 characters in this paragraph, add another point. Up to # 3 points. - length_points = len(innertext) % 100 if innertext else 0 + length_points = len(innertext) / 100 if innertext else 0 if length_points > 3: content_score += 3 else: diff --git a/src/breadability/tests/test_readable.py b/src/breadability/tests/test_readable.py index f0b3456..5576172 100644 --- a/src/breadability/tests/test_readable.py +++ b/src/breadability/tests/test_readable.py @@ -57,7 +57,7 @@ class TestReadableDocument(TestCase): """ doc = Article(load_snippet('document_only_content.html')) - + self.assertEqual(doc._readable.tag, 'div') self.assertEqual(doc._readable.get('id'), 'readabilityBody') @@ -119,6 +119,7 @@ class TestCleaning(TestCase): u'

simplelink

' ) + class TestCandidateNodes(TestCase): """Candidate nodes are scoring containers we use.""" @@ -192,6 +193,35 @@ class TestScoringNodes(TestCase): scores = sorted([c.content_score for c in candidates.values()]) self.assertTrue(scores[-1] > 100) + def test_bonus_score_per_100_chars_in_p(self): + """Nodes get 1pt per 100 characters up to 3 max points""" + def build_doc(length): + div = '

{0}

' + content = 'c' * length + test_div = div.format(content) + doc = document_fromstring('' + test_div + '') + test_nodes = [] + for node in doc.getiterator(): + if node.tag == 'p': + test_nodes.append(node) + return test_nodes + + test_nodes = build_doc(400) + candidates = score_candidates(test_nodes) + pscore_400 = max([c.content_score for c in candidates.values()]) + + test_nodes = build_doc(100) + candidates = score_candidates(test_nodes) + pscore_100 = max([c.content_score for c in candidates.values()]) + + test_nodes = build_doc(50) + candidates = score_candidates(test_nodes) + pscore_50 = max([c.content_score for c in candidates.values()]) + + self.assertEqual(pscore_100, pscore_50 + 1) + self.assertEqual(pscore_400, pscore_50 + 3) + + class TestLinkDensityScoring(TestCase): """Link density will adjust out candidate scoresself."""