Update to catch back up to craig's image helper

pull/23/head
Richard Harding 11 years ago
parent 433195e122
commit dc0493f99b

@ -90,7 +90,15 @@ def get_link_density(node, node_text=None):
if text_length == 0:
return 0.0
links_length = sum(map(_get_normalized_text_length, node.findall(".//a")))
link_length = sum([len(a.text_content()) or 0
for a in node.findall(".//a")])
# For each img, give 50 bonus chars worth of length.
# Tweaking this 50 down a notch should help if we hit false positives.
links_length = max(link_length -
sum([50 for img in node.findall(".//img")]), 0)
return links_length / text_length

@ -37,7 +37,10 @@ from ..readable import Article
HEADERS = {
"User-Agent": "Readability (Readable content parser) Version/%s" % __version__,
"User-Agent": 'breadability/{version} ({url})'.format(
url="https://github.com/bookieio/breadability",
version=__version__
)
}

@ -5,7 +5,7 @@ try:
except ImportError:
import unittest
from breadability.readable import Article
from readability.readable import Article
class TestBusinessInsiderArticle(unittest.TestCase):

@ -5,7 +5,7 @@ try:
except ImportError:
import unittest
from breadability.readable import Article
from readability.readable import Article
class TestSweetsharkBlog(unittest.TestCase):

Loading…
Cancel
Save