Start to add scoring file specific tests

pull/4/merge
Richard Harding 12 years ago
parent ab79d9632b
commit 6b16b7b21f

@ -104,7 +104,7 @@ def check_siblings(candidate_node, candidate_list):
append = True
# Give a bonus if sibling nodes and top candidates have the example
# same classname
# same class name
if candidate_css and sibling.get('class') == candidate_css:
content_bonus += candidate_node.content_score * 0.2
@ -148,7 +148,6 @@ def prep_article(doc):
- extra tags
"""
def clean_document(node):
"""Remove the style attribute on every element."""
clean_list = ['object', 'h1']
@ -202,9 +201,6 @@ def prep_article(doc):
def clean_conditionally(doc, clean_el):
"""Remove the clean_el if it looks like bad content based on rules."""
def clean_objects():
pass
doc = clean_document(doc)
return doc

@ -0,0 +1,42 @@
from lxml.html import fragment_fromstring
from unittest import TestCase
from breadability.scoring import check_node_attr
class TestCheckNodeAttr(TestCase):
"""Verify a node has a class/id in the given set.
The idea is that we have sets of known good/bad ids and classes and need
to verify the given node does/doesn't have those classes/ids.
"""
def test_has_class(self):
"""Verify that a node has a class in our set."""
test_set = set(['test1', 'test2'])
test_node = fragment_fromstring('<div/>')
test_node.set('class', 'test2 comment')
self.assertTrue(check_node_attr(test_node, 'class', test_set))
def test_has_id(self):
"""Verify that a node has an id in our set."""
test_set = set(['test1', 'test2'])
test_node = fragment_fromstring('<div/>')
test_node.set('id', 'test2')
self.assertTrue(check_node_attr(test_node, 'id', test_set))
def test_lacks_class(self):
"""Verify that a node does not have a class in our set."""
test_set = set(['test1', 'test2'])
test_node = fragment_fromstring('<div/>')
test_node.set('class', 'test4 comment')
self.assertFalse(check_node_attr(test_node, 'class', test_set))
def test_lacks_id(self):
"""Verify that a node does not have an id in our set."""
test_set = set(['test1', 'test2'])
test_node = fragment_fromstring('<div/>')
test_node.set('id', 'test4')
self.assertFalse(check_node_attr(test_node, 'id', test_set))
Loading…
Cancel
Save