This is a great amount of info
And more content Home
import re from lxml.html import document_fromstring from lxml.html import fragment_fromstring from operator import attrgetter try: # Python < 2.7 import unittest2 as unittest except ImportError: import unittest from breadability._py3k import to_unicode from breadability.readable import Article from breadability.scoring import check_node_attr from breadability.scoring import get_class_weight from breadability.scoring import ScoredNode from breadability.scoring import score_candidates from breadability.readable import get_link_density from breadability.readable import is_unlikely_node from utils import load_snippet class TestCheckNodeAttr(unittest.TestCase): """Verify a node has a class/id in the given set. The idea is that we have sets of known good/bad ids and classes and need to verify the given node does/doesn't have those classes/ids. """ def test_has_class(self): """Verify that a node has a class in our set.""" test_re = re.compile('test1|test2', re.I) test_node = fragment_fromstring('
') test_node.set('class', 'test2 comment') self.assertTrue(check_node_attr(test_node, 'class', test_re)) def test_has_id(self): """Verify that a node has an id in our set.""" test_re = re.compile('test1|test2', re.I) test_node = fragment_fromstring('') test_node.set('id', 'test2') self.assertTrue(check_node_attr(test_node, 'id', test_re)) def test_lacks_class(self): """Verify that a node does not have a class in our set.""" test_re = re.compile('test1|test2', re.I) test_node = fragment_fromstring('') test_node.set('class', 'test4 comment') self.assertFalse(check_node_attr(test_node, 'class', test_re)) def test_lacks_id(self): """Verify that a node does not have an id in our set.""" test_re = re.compile('test1|test2', re.I) test_node = fragment_fromstring('') test_node.set('id', 'test4') self.assertFalse(check_node_attr(test_node, 'id', test_re)) class TestLinkDensity(unittest.TestCase): """Verify we calc our link density correctly.""" def test_empty_node(self): """An empty node doesn't have much of a link density""" empty_div = to_unicode("") doc = Article(empty_div) assert 0 == get_link_density(doc._readable), "Link density is nadda" def test_small_doc_no_links(self): doc = Article(load_snippet('document_min.html')) assert 0 == get_link_density(doc._readable), "Still no link density" def test_several_links(self): """This doc has a 3 links with the majority of content.""" doc = Article(load_snippet('document_absolute_url.html')) self.assertAlmostEqual( get_link_density(doc._readable), 0.349, places=3) class TestClassWeight(unittest.TestCase): """Verify we score nodes correctly based on their class/id attributes.""" def test_no_matches_zero(self): """If you don't have the attribute then you get a weight of 0""" empty_div = to_unicode("") node = fragment_fromstring(empty_div) self.assertEqual(get_class_weight(node), 0) def test_id_hits(self): """If the id is in the list then it gets a weight""" test_div = 'This is a great amount of info
And more content Home