Replace implementation of 'cached_property'

Parameter 'ttl' isn't needed.
pull/21/head
Mišo Belica 11 years ago
parent e3b6ee2fd6
commit cc00976533

@ -102,17 +102,17 @@ class OriginalDocument(object):
doc.resolve_base_href()
return doc
@cached_property(ttl=600)
@cached_property
def html(self):
"""The parsed html document from the input"""
return self._parse(self.orig_html)
@cached_property(ttl=600)
@cached_property
def links(self):
"""Links within the document"""
return self.html.findall(".//a")
@cached_property(ttl=600)
@cached_property
def title(self):
"""Pull the title attribute out of the parsed document"""
titleElem = self.html.find('.//title')

@ -421,12 +421,12 @@ class Article(object):
self.fragment = fragment
def __str__(self):
return tostring(self._readable)
return tostring(self._readable())
def __unicode__(self):
return tounicode(self._readable)
return tounicode(self._readable())
@cached_property(ttl=600)
@cached_property
def doc(self):
"""The doc is the parsed xml tree of the given html."""
try:
@ -439,7 +439,7 @@ class Article(object):
except ValueError:
return None
@cached_property(ttl=600)
@cached_property
def candidates(self):
"""Generate the list of candidates from the doc."""
doc = self.doc
@ -450,11 +450,14 @@ class Article(object):
else:
return None
@cached_property(ttl=600)
@cached_property
def readable(self):
return tounicode(self._readable)
return tounicode(self.readable_dom)
@cached_property
def readable_dom(self):
return self._readable()
@cached_property(ttl=600)
def _readable(self):
"""The readable parsed article"""
if self.candidates:

@ -1,61 +1,23 @@
import time
# -*- coding: utf8 -*-
#
# ? 2011 Christopher Arndt, MIT License
#
class cached_property(object):
'''Decorator for read-only properties evaluated only once within TTL
period.
def cached_property(getter):
"""
Decorator that converts a method into memoized property.
The decorator will work as expected only for immutable properties.
"""
def decorator(self):
if not hasattr(self, "__cached_property_data"):
self.__cached_property_data = {}
It can be used to created a cached property like this::
key = getter.__name__
if key not in self.__cached_property_data:
self.__cached_property_data[key] = getter(self)
import random
return self.__cached_property_data[key]
# the class containing the property must be a new-style class
class MyClass(object):
# create property whose value is cached for ten minutes
@cached_property(ttl=600) def randint(self):
# will only be evaluated every 10 min. at maximum.
return random.randint(0, 100)
decorator.__name__ = getter.__name__
decorator.__module__ = getter.__module__
decorator.__doc__ = getter.__doc__
The value is cached in the '_cache' attribute of the object instance that
has the property getter method wrapped by this decorator. The '_cache'
attribute value is a dictionary which has a key for every property of the
object which is wrapped by this decorator. Each entry in the cache is
created only when the property is accessed for the first time and is a
two-element tuple with the last computed property value and the last time
it was updated in seconds since the epoch.
The default time-to-live (TTL) is 300 seconds (5 minutes). Set the TTL to
zero for the cached value to never expire.
To expire a cached property value manually just do::
del instance._cache[<property name>]
'''
def __init__(self, ttl=300):
self.ttl = ttl
def __call__(self, fget, doc=None):
self.fget = fget
self.__doc__ = doc or fget.__doc__
self.__name__ = fget.__name__
self.__module__ = fget.__module__
return self
def __get__(self, inst, owner):
now = time.time()
try:
value, last_update = inst._cache[self.__name__]
if self.ttl > 0 and now - last_update > self.ttl:
raise AttributeError
except (KeyError, AttributeError):
value = self.fget(inst)
try:
cache = inst._cache
except AttributeError:
cache = inst._cache = {}
cache[self.__name__] = (value, now)
return value
return property(decorator)

@ -25,12 +25,12 @@ class TestReadableDocument(unittest.TestCase):
"""We get back an element tree from our original doc"""
doc = Article(load_snippet('document_min.html'))
# We get back the document as a div tag currently by default.
self.assertEqual(doc._readable.tag, 'div')
self.assertEqual(doc.readable_dom.tag, 'div')
def test_doc_no_scripts_styles(self):
"""Step #1 remove all scripts from the document"""
doc = Article(load_snippet('document_scripts.html'))
readable = doc._readable
readable = doc.readable_dom
self.assertEqual(readable.findall(".//script"), [])
self.assertEqual(readable.findall(".//style"), [])
self.assertEqual(readable.findall(".//link"), [])
@ -42,8 +42,8 @@ class TestReadableDocument(unittest.TestCase):
"""
doc = Article(load_snippet('document_min.html'))
self.assertEqual(doc._readable.tag, 'div')
self.assertEqual(doc._readable.get('id'), 'readabilityBody')
self.assertEqual(doc.readable_dom.tag, 'div')
self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')
def test_body_doesnt_exist(self):
"""If we can't find a body, then we create one.
@ -52,8 +52,8 @@ class TestReadableDocument(unittest.TestCase):
"""
doc = Article(load_snippet('document_no_body.html'))
self.assertEqual(doc._readable.tag, 'div')
self.assertEqual(doc._readable.get('id'), 'readabilityBody')
self.assertEqual(doc.readable_dom.tag, 'div')
self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')
def test_bare_content(self):
"""If the document is just pure content, no html tags we should be ok
@ -62,16 +62,16 @@ class TestReadableDocument(unittest.TestCase):
"""
doc = Article(load_snippet('document_only_content.html'))
self.assertEqual(doc._readable.tag, 'div')
self.assertEqual(doc._readable.get('id'), 'readabilityBody')
self.assertEqual(doc.readable_dom.tag, 'div')
self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')
def test_no_content(self):
"""Without content we supply an empty unparsed doc."""
doc = Article('')
self.assertEqual(doc._readable.tag, 'div')
self.assertEqual(doc._readable.get('id'), 'readabilityBody')
self.assertEqual(doc._readable.get('class'), 'parsing-error')
self.assertEqual(doc.readable_dom.tag, 'div')
self.assertEqual(doc.readable_dom.get('id'), 'readabilityBody')
self.assertEqual(doc.readable_dom.get('class'), 'parsing-error')
class TestCleaning(unittest.TestCase):
@ -80,7 +80,7 @@ class TestCleaning(unittest.TestCase):
def test_unlikely_hits(self):
"""Verify we wipe out things from our unlikely list."""
doc = Article(load_snippet('test_readable_unlikely.html'))
readable = doc._readable
readable = doc.readable_dom
must_not_appear = ['comment', 'community', 'disqus', 'extra', 'foot',
'header', 'menu', 'remark', 'rss', 'shoutbox', 'sidebar',
'sponsor', 'ad-break', 'agegate', 'pagination' '', 'pager',

@ -65,17 +65,17 @@ class TestLinkDensity(unittest.TestCase):
"""An empty node doesn't have much of a link density"""
empty_div = to_unicode("<div></div>")
doc = Article(empty_div)
assert 0 == get_link_density(doc._readable), "Link density is nadda"
assert 0 == get_link_density(doc.readable_dom), "Link density is nadda"
def test_small_doc_no_links(self):
doc = Article(load_snippet('document_min.html'))
assert 0 == get_link_density(doc._readable), "Still no link density"
assert 0 == get_link_density(doc.readable_dom), "Still no link density"
def test_several_links(self):
"""This doc has a 3 links with the majority of content."""
doc = Article(load_snippet('document_absolute_url.html'))
self.assertAlmostEqual(
get_link_density(doc._readable), 0.349,
get_link_density(doc.readable_dom), 0.349,
places=3)

Loading…
Cancel
Save