Refactored checking of node's attribute

pull/21/head
Mišo Belica 11 years ago
parent 636a38d705
commit 3f71e1b7d4

@ -26,13 +26,13 @@ CLS_WEIGHT_NEGATIVE = re.compile(('combx|comment|com-|contact|foot|footer|'
logger = logging.getLogger("breadability")
def check_node_attr(node, attr, checkset):
value = node.get(attr) or ""
check = checkset.search(value)
if check:
return True
else:
def check_node_attribute(node, attribute_name, pattern):
attribute = node.get(attribute_name)
if attribute is None:
return False
else:
return bool(pattern.search(attribute))
def generate_hash_id(node):
@ -76,14 +76,14 @@ def get_class_weight(node):
"""
weight = 0
if check_node_attr(node, 'class', CLS_WEIGHT_NEGATIVE):
if check_node_attribute(node, 'class', CLS_WEIGHT_NEGATIVE):
weight = weight - 25
if check_node_attr(node, 'class', CLS_WEIGHT_POSITIVE):
if check_node_attribute(node, 'class', CLS_WEIGHT_POSITIVE):
weight = weight + 25
if check_node_attr(node, 'id', CLS_WEIGHT_NEGATIVE):
if check_node_attribute(node, 'id', CLS_WEIGHT_NEGATIVE):
weight = weight - 25
if check_node_attr(node, 'id', CLS_WEIGHT_POSITIVE):
if check_node_attribute(node, 'id', CLS_WEIGHT_POSITIVE):
weight = weight + 25
return weight
@ -96,11 +96,11 @@ def is_unlikely_node(node):
class/id in the likely list then it might need to be removed.
"""
unlikely = check_node_attr(node, 'class', CLS_UNLIKELY) or \
check_node_attr(node, 'id', CLS_UNLIKELY)
unlikely = check_node_attribute(node, 'class', CLS_UNLIKELY) or \
check_node_attribute(node, 'id', CLS_UNLIKELY)
maybe = check_node_attr(node, 'class', CLS_MAYBE) or \
check_node_attr(node, 'id', CLS_MAYBE)
maybe = check_node_attribute(node, 'class', CLS_MAYBE) or \
check_node_attribute(node, 'id', CLS_MAYBE)
if unlikely and not maybe and node.tag != 'body':
return True

@ -15,7 +15,7 @@ except ImportError:
from breadability._py3k import to_unicode
from breadability.readable import Article
from breadability.scoring import check_node_attr
from breadability.scoring import check_node_attribute
from breadability.scoring import get_class_weight
from breadability.scoring import ScoredNode
from breadability.scoring import score_candidates
@ -65,7 +65,7 @@ class TestCheckNodeAttr(unittest.TestCase):
test_node = fragment_fromstring('<div/>')
test_node.set('class', 'test2 comment')
self.assertTrue(check_node_attr(test_node, 'class', test_re))
self.assertTrue(check_node_attribute(test_node, 'class', test_re))
def test_has_id(self):
"""Verify that a node has an id in our set."""
@ -73,21 +73,21 @@ class TestCheckNodeAttr(unittest.TestCase):
test_node = fragment_fromstring('<div/>')
test_node.set('id', 'test2')
self.assertTrue(check_node_attr(test_node, 'id', test_re))
self.assertTrue(check_node_attribute(test_node, 'id', test_re))
def test_lacks_class(self):
"""Verify that a node does not have a class in our set."""
test_re = re.compile('test1|test2', re.I)
test_node = fragment_fromstring('<div/>')
test_node.set('class', 'test4 comment')
self.assertFalse(check_node_attr(test_node, 'class', test_re))
self.assertFalse(check_node_attribute(test_node, 'class', test_re))
def test_lacks_id(self):
"""Verify that a node does not have an id in our set."""
test_re = re.compile('test1|test2', re.I)
test_node = fragment_fromstring('<div/>')
test_node.set('id', 'test4')
self.assertFalse(check_node_attr(test_node, 'id', test_re))
self.assertFalse(check_node_attribute(test_node, 'id', test_re))
class TestLinkDensity(unittest.TestCase):

Loading…
Cancel
Save