Replace implementation of 'cached_property'

Parameter 'ttl' isn't needed.
11 years ago · cc00976533
parent e3b6ee2fd6
commit cc00976533
5 changed files with 45 additions and 80 deletions
--- a/breadability/document.py
+++ b/breadability/document.py
@ -102,17 +102,17 @@ class OriginalDocument(object):
            doc.resolve_base_href()
        return doc

-    @cached_property(ttl=600)
+    @cached_property
    def html(self):
        """The parsed html document from the input"""
        return self._parse(self.orig_html)

-    @cached_property(ttl=600)
+    @cached_property
    def links(self):
        """Links within the document"""
        return self.html.findall(".//a")

-    @cached_property(ttl=600)
+    @cached_property
    def title(self):
        """Pull the title attribute out of the parsed document"""
        titleElem = self.html.find('.//title')
--- a/breadability/readable.py
+++ b/breadability/readable.py
@ -421,12 +421,12 @@ class Article(object):
        self.fragment = fragment

    def __str__(self):
-        return tostring(self._readable)
+        return tostring(self._readable())

    def __unicode__(self):
-        return tounicode(self._readable)
+        return tounicode(self._readable())

-    @cached_property(ttl=600)
+    @cached_property
    def doc(self):
        """The doc is the parsed xml tree of the given html."""
        try:
@ -439,7 +439,7 @@ class Article(object):
        except ValueError:
            return None

-    @cached_property(ttl=600)
+    @cached_property
    def candidates(self):
        """Generate the list of candidates from the doc."""
        doc = self.doc
@ -450,11 +450,14 @@ class Article(object):
        else:
            return None

-    @cached_property(ttl=600)
+    @cached_property
    def readable(self):
-        return tounicode(self._readable)
+        return tounicode(self.readable_dom)
+
+    @cached_property
+    def readable_dom(self):
+        return self._readable()

-    @cached_property(ttl=600)
    def _readable(self):
        """The readable parsed article"""
        if self.candidates:
--- a/breadability/utils.py
+++ b/breadability/utils.py
@ -1,61 +1,23 @@
-import time
+# -*- coding: utf8 -*-


-#
-# ? 2011 Christopher Arndt, MIT License
-#
-class cached_property(object):
-    '''Decorator for read-only properties evaluated only once within TTL
-    period.
+def cached_property(getter):
+    """
+    Decorator that converts a method into memoized property.
+    The decorator will work as expected only for immutable properties.
+    """
+    def decorator(self):
+        if not hasattr(self, "__cached_property_data"):
+            self.__cached_property_data = {}

-    It can be used to created a cached property like this::
+        key = getter.__name__
+        if key not in self.__cached_property_data:
+            self.__cached_property_data[key] = getter(self)

-        import random
+        return self.__cached_property_data[key]

-        # the class containing the property must be a new-style class
-        class MyClass(object):
-            # create property whose value is cached for ten minutes
-            @cached_property(ttl=600) def randint(self):
-                # will only be evaluated every 10 min. at maximum.
-                return random.randint(0, 100)
+    decorator.__name__ = getter.__name__
+    decorator.__module__ = getter.__module__
+    decorator.__doc__ = getter.__doc__

-    The value is cached  in the '_cache' attribute of the object instance that
-    has the property getter method wrapped by this decorator. The '_cache'
-    attribute value is a dictionary which has a key for every property of the
-    object which is wrapped by this decorator. Each entry in the cache is
-    created only when the property is accessed for the first time and is a
-    two-element tuple with the last computed property value and the last time
-    it was updated in seconds since the epoch.
-
-    The default time-to-live (TTL) is 300 seconds (5 minutes). Set the TTL to
-    zero for the cached value to never expire.
-
-    To expire a cached property value manually just do::
-
-        del instance._cache[<property name>]
-
-    '''
-    def __init__(self, ttl=300):
-        self.ttl = ttl
-
-    def __call__(self, fget, doc=None):
-        self.fget = fget
-        self.__doc__ = doc or fget.__doc__
-        self.__name__ = fget.__name__
-        self.__module__ = fget.__module__
-        return self
-
-    def __get__(self, inst, owner):
-        now = time.time()
-        try:
-            value, last_update = inst._cache[self.__name__]
-            if self.ttl > 0 and now - last_update > self.ttl:
-                raise AttributeError
-        except (KeyError, AttributeError):
-            value = self.fget(inst)
-            try:
-                cache = inst._cache
-            except AttributeError:
-                cache = inst._cache = {}
-            cache[self.__name__] = (value, now)
-        return value
+    return property(decorator)
--- a/tests/test_readable.py
+++ b/tests/test_readable.py
@ -25,12 +25,12 @@ class TestReadableDocument(unittest.TestCase):
        """We get back an element tree from our original doc"""
        doc = Article(load_snippet('document_min.html'))
        # We get back the document as a div tag currently by default.
-        self.assertEqual(doc._readable.tag, 'div')
+        self.assertEqual(doc.readable_dom.tag, 'div')

    def test_doc_no_scripts_styles(self):
        """Step #1 remove all scripts from the document"""
        doc = Article(load_snippet('document_scripts.html'))
-        readable = doc._readable
+        readable = doc.readable_dom
        self.assertEqual(readable.findall(".//script"), [])
        self.assertEqual(readable.findall(".//style"), [])
        self.assertEqual(readable.findall(".//link"), [])
@ -42,8 +42,8 @@ class TestReadableDocument(unittest.TestCase):

        """
        doc = Article(load_snippet('document_min.html'))
-        self.assertEqual(doc._readable.tag, 'div')
-        self.assertEqual(doc._readable.get('id'), 'readabilityBody')
+        self.assertEqual(doc.readable_dom.tag, 'div')
+        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')

    def test_body_doesnt_exist(self):
        """If we can't find a body, then we create one.
@ -52,8 +52,8 @@ class TestReadableDocument(unittest.TestCase):

        """
        doc = Article(load_snippet('document_no_body.html'))
-        self.assertEqual(doc._readable.tag, 'div')
-        self.assertEqual(doc._readable.get('id'), 'readabilityBody')
+        self.assertEqual(doc.readable_dom.tag, 'div')
+        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')

    def test_bare_content(self):
        """If the document is just pure content, no html tags we should be ok
@ -62,16 +62,16 @@ class TestReadableDocument(unittest.TestCase):

        """
        doc = Article(load_snippet('document_only_content.html'))
-        self.assertEqual(doc._readable.tag, 'div')
-        self.assertEqual(doc._readable.get('id'), 'readabilityBody')
+        self.assertEqual(doc.readable_dom.tag, 'div')
+        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')


    def test_no_content(self):
        """Without content we supply an empty unparsed doc."""
        doc = Article('')
-        self.assertEqual(doc._readable.tag, 'div')
-        self.assertEqual(doc._readable.get('id'), 'readabilityBody')
-        self.assertEqual(doc._readable.get('class'), 'parsing-error')
+        self.assertEqual(doc.readable_dom.tag, 'div')
+        self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')
+        self.assertEqual(doc.readable_dom.get('class'), 'parsing-error')


 class TestCleaning(unittest.TestCase):
@ -80,7 +80,7 @@ class TestCleaning(unittest.TestCase):
    def test_unlikely_hits(self):
        """Verify we wipe out things from our unlikely list."""
        doc = Article(load_snippet('test_readable_unlikely.html'))
-        readable = doc._readable
+        readable = doc.readable_dom
        must_not_appear = ['comment', 'community', 'disqus', 'extra', 'foot',
                'header', 'menu', 'remark', 'rss', 'shoutbox', 'sidebar',
                'sponsor', 'ad-break', 'agegate', 'pagination' '', 'pager',
--- a/tests/test_scoring.py
+++ b/tests/test_scoring.py
@ -65,17 +65,17 @@ class TestLinkDensity(unittest.TestCase):
        """An empty node doesn't have much of a link density"""
        empty_div = to_unicode("<div></div>")
        doc = Article(empty_div)
-        assert 0 == get_link_density(doc._readable), "Link density is nadda"
+        assert 0 == get_link_density(doc.readable_dom), "Link density is nadda"

    def test_small_doc_no_links(self):
        doc = Article(load_snippet('document_min.html'))
-        assert 0 == get_link_density(doc._readable), "Still no link density"
+        assert 0 == get_link_density(doc.readable_dom), "Still no link density"

    def test_several_links(self):
        """This doc has a 3 links with the majority of content."""
        doc = Article(load_snippet('document_absolute_url.html'))
        self.assertAlmostEqual(
-                get_link_density(doc._readable), 0.349,
+                get_link_density(doc.readable_dom), 0.349,
                places=3)