Garden and lint

12 years ago · e83a753b82
parent 6d380712c5
commit e83a753b82
4 changed files with 46 additions and 27 deletions
--- a/src/breadability/logconfig.py
+++ b/src/breadability/logconfig.py
@ -22,8 +22,6 @@ except ImportError:
 LOGLEVEL = "WARNING"


-
-
 # Logging bits stolen and adapted from:
 # http://www.tornadoweb.org/documentation/_modules/tornado/options.html
 LogOptions = namedtuple('LogOptions', [
@ -43,7 +41,6 @@ options = LogOptions(
 )


-
 def set_logging_level(level):
    """Adjust the current logging level.

@ -120,8 +117,8 @@ class LogHelper(object):
            hashed = md5()
            try:
                hashed.update(content.encode('utf-8', errors="replace"))
-            except Exception, e:
-                LOG.error("Cannot hash the current node.")
+            except Exception, exc:
+                LOG.error("Cannot hash the current node." + str(exc))
            hash_id = hashed.hexdigest()[0:8]
            # if hash_id in ['9c880b27', '8393b7d7', '69bfebdd']:
            print(u"{0} :: {1}\n{2}".format(
--- a/src/breadability/readable.py
+++ b/src/breadability/readable.py
@ -51,11 +51,10 @@ def drop_tag(doc, *tags):
    return doc


-
 def ok_embedded_video(node):
    """Check if this embed/video is an ok one to count."""
    keep_keywords = ['youtube', 'blip.tv', 'vimeo']
-    node_str = tounicode(n)
+    node_str = tounicode(node)
    for key in keep_keywords:
        if key in node_str:
            return True
@ -305,7 +304,8 @@ def prep_article(doc):
                # we could get around this by checking for caption info in the
                # images to try to do some scoring of good v. bad images.
                # failing example:
-                # arstechnica.com/science/news/2012/05/1859s-great-auroral-stormthe-week-the-sun-touched-the-earth.ars
+                # arstechnica.com/science/news/2012/05/1859s
+                # -great-auroral-stormthe-week-the-sun-touched-the-earth.ars
                LNODE.log(node, 2, 'Conditional drop: img > p')
                remove_node = True
            elif li > p and node.tag != 'ul' and node.tag != 'ol':
@ -315,16 +315,20 @@ def prep_article(doc):
                LNODE.log(node, 2, 'Conditional drop: inputs > p/3.0')
                remove_node = True
            elif content_length < 25 and (img == 0 or img > 2):
-                LNODE.log(node, 2, 'Conditional drop: len < 25 and 0/>2 images')
+                LNODE.log(node, 2,
+                    'Conditional drop: len < 25 and 0/>2 images')
                remove_node = True
            elif weight < 25 and link_density > 0.2:
-                LNODE.log(node, 2, 'Conditional drop: weight small and link is dense')
+                LNODE.log(node, 2,
+                    'Conditional drop: weight small and link is dense')
                remove_node = True
            elif weight >= 25 and link_density > 0.5:
-                LNODE.log(node, 2, 'Conditional drop: weight big but link heavy')
+                LNODE.log(node, 2,
+                    'Conditional drop: weight big but link heavy')
                remove_node = True
            elif (embed == 1 and content_length < 75) or embed > 1:
-                LNODE.log(node, 2, 'Conditional drop: embed without much content or many embed')
+                LNODE.log(node, 2,
+                    'Conditional drop: embed w/o much content or many embed')
                remove_node = True
            return remove_node

--- a/src/breadability/scoring.py
+++ b/src/breadability/scoring.py
@ -37,7 +37,8 @@ def get_link_density(node, node_text=None):
    :returns float:

    """
-    link_length = sum([len(a.text_content()) or 0 for a in node.findall(".//a")])
+    link_length = sum([len(a.text_content()) or 0
+        for a in node.findall(".//a")])
    if node_text:
        text_length = len(node_text)
    else:
@ -98,12 +99,16 @@ def score_candidates(nodes):
        innertext = node.text_content()

        if parent is None or grand is None:
-            LNODE.log(node, 1, "Skipping candidate because parent/grand are none")
+            LNODE.log(
+                node, 1,
+                "Skipping candidate because parent/grand are none")
            continue

        # If this paragraph is less than 25 characters, don't even count it.
        if innertext and len(innertext) < MIN_HIT_LENTH:
-            LNODE.log(node, 1, "Skipping candidate because not enough content.")
+            LNODE.log(
+                node, 1,
+                "Skipping candidate because not enough content.")
            continue

        # Initialize readability data for the parent.
@ -128,21 +133,36 @@ def score_candidates(nodes):
            content_score += 3
        else:
            content_score += length_points
-        LNODE.log(node, 1, "Length/content points: {0} : {1}".format(length_points, content_score))
+        LNODE.log(
+            node, 1,
+            "Length/content points: {0} : {1}".format(length_points,
+                                                      content_score))

        # Add the score to the parent.
        LNODE.log(node, 1, "From this current node.")
        candidates[parent].content_score += content_score
-        LNODE.log(candidates[parent].node, 1, "Giving parent bonus points: " + str(candidates[parent].content_score))
+        LNODE.log(
+            candidates[parent].node,
+            1,
+            "Giving parent bonus points: " + str(
+                candidates[parent].content_score))
        # The grandparent gets half.
        LNODE.log(candidates[grand].node, 1, "Giving grand bonus points")
        candidates[grand].content_score += (content_score / 2.0)
-        LNODE.log(candidates[parent].node, 1, "Giving grand bonus points: " + str(candidates[grand].content_score))
+        LNODE.log(
+            candidates[parent].node,
+            1,
+            "Giving grand bonus points: " + str(
+                candidates[grand].content_score))

    for candidate in candidates.values():
-        LNODE.log(candidate.node, 1, "Getting link density adjustment: {0} * {1} ".format(
-            candidate.content_score, (1 - get_link_density(candidate.node))))
-        candidate.content_score = candidate.content_score * (1 - get_link_density(candidate.node))
+        adjustment = 1 - get_link_density(candidate.node)
+        LNODE.log(
+            candidate.node,
+            1,
+            "Getting link density adjustment: {0} * {1} ".format(
+                candidate.content_score, adjustment))
+        candidate.content_score = candidate.content_score * (adjustment)

    return candidates

--- a/src/breadability/utils.py
+++ b/src/breadability/utils.py
@ -1,12 +1,11 @@
 import time

-
-
 #
 # ? 2011 Christopher Arndt, MIT License
 #
 class cached_property(object):
-    '''Decorator for read-only properties evaluated only once within TTL period.
+    '''Decorator for read-only properties evaluated only once within TTL
+    period.

    It can be used to created a cached property like this::

@ -15,8 +14,7 @@ class cached_property(object):
        # the class containing the property must be a new-style class
        class MyClass(object):
            # create property whose value is cached for ten minutes
-            @cached_property(ttl=600)
-            def randint(self):
+            @cached_property(ttl=600) def randint(self):
                # will only be evaluated every 10 min. at maximum.
                return random.randint(0, 100)

@ -32,7 +30,7 @@ class cached_property(object):
    zero for the cached value to never expire.

    To expire a cached property value manually just do::
-    
+
        del instance._cache[<property name>]

    '''