From 60da675da5b4d2e600fe4ba58803035e238a2a25 Mon Sep 17 00:00:00 2001 From: Richard Harding Date: Mon, 27 Aug 2012 17:31:14 -0400 Subject: [PATCH] Reprocess without candidate in case of errors using one - Fixes #10 --- NEWS.txt | 9 +++++++++ setup.py | 2 +- src/breadability/__init__.py | 2 +- src/breadability/readable.py | 31 +++++++++++++++++++++---------- 4 files changed, 32 insertions(+), 12 deletions(-) diff --git a/NEWS.txt b/NEWS.txt index 21da42f..1787072 100644 --- a/NEWS.txt +++ b/NEWS.txt @@ -6,6 +6,15 @@ News ==== +0.1.9 +------ + +* Release date: Aug 27nd 2012* + +* In case of an issue dealing with candidates we need to act like we didn't + find any candidates for the article content. #10 + + 0.1.8 ------ diff --git a/setup.py b/setup.py index 4aa06af..e364522 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ README = open(os.path.join(here, 'README.rst')).read() NEWS = open(os.path.join(here, 'NEWS.txt')).read() -version = '0.1.8' +version = '0.1.9' install_requires = [ # List your project dependencies here. # For more details, see: diff --git a/src/breadability/__init__.py b/src/breadability/__init__.py index 3849284..79639a6 100644 --- a/src/breadability/__init__.py +++ b/src/breadability/__init__.py @@ -1,3 +1,3 @@ -VERSION = '0.1.8' +VERSION = '0.1.9' import client from scripts import newtest diff --git a/src/breadability/readable.py b/src/breadability/readable.py index 8497474..fcfff75 100644 --- a/src/breadability/readable.py +++ b/src/breadability/readable.py @@ -479,18 +479,29 @@ class Article(object): updated_winner = check_siblings(winner, self.candidates) LOG.debug('Begin final prep of article') updated_winner.node = prep_article(updated_winner.node) - doc = build_base_document(updated_winner.node, self.fragment) + if updated_winner.node is not None: + doc = build_base_document(updated_winner.node, self.fragment) + else: + LOG.warning('Had candidates but failed to find a cleaned winning doc.') + doc = self._handle_no_candidates() else: LOG.warning('No candidates found: using document.') LOG.debug('Begin final prep of article') - # since we've not found a good candidate we're should help this - if self.doc is not None and len(self.doc): - # cleanup by removing the should_drop we spotted. - [n.drop_tree() for n in self._should_drop] - doc = prep_article(self.doc) - doc = build_base_document(doc, self.fragment) - else: - LOG.warning('No document to use.') - doc = build_error_document(self.fragment) + doc = self._handle_no_candidates() + + return doc + + def _handle_no_candidates(self): + """If we fail to find a good candidate we need to find something else.""" + # since we've not found a good candidate we're should help this + if self.doc is not None and len(self.doc): + # cleanup by removing the should_drop we spotted. + [n.drop_tree() for n in self._should_drop + if n.getparent() is not None] + doc = prep_article(self.doc) + doc = build_base_document(doc, self.fragment) + else: + LOG.warning('No document to use.') + doc = build_error_document(self.fragment) return doc