Tests passes for both Python v2.7, v3.3

11 years ago · 94f6b0a84e
parent 912bb50b76
commit 94f6b0a84e
11 changed files with 149 additions and 32 deletions
--- a/.travis.yml
+++ b/.travis.yml
@ -2,6 +2,8 @@ language: python
 python:
  - "2.7"
  - "2.6"
+  - "3.2"
+  - "3.3"
 before_install: sudo apt-get install libxml2-dev libxslt-dev
 # command to install dependencies
 install: pip install -r requirements.txt --use-mirrors
--- a/breadability/init.py
+++ b/breadability/init.py
@ -1,3 +1,3 @@
-VERSION = '0.1.11'
-import client
-from scripts import newtest
+from ._version import VERSION
+from .scripts import newtest
+from . import client
--- a/breadability/_py3k.py
+++ b/breadability/_py3k.py
@ -0,0 +1,95 @@
+# -*- coding: utf8 -*-
+
+from __future__ import absolute_import
+from __future__ import division, print_function, unicode_literals
+
+from sys import version_info
+
+
+PY3 = version_info[0] == 3
+
+
+if PY3:
+    bytes = bytes
+    unicode = str
+else:
+    bytes = str
+    unicode = unicode
+string_types = (bytes, unicode,)
+
+
+try:
+    callable = callable
+except NameError:
+    def callable(object):
+        """Checks if given object is callable."""
+        return hasattr(object, "__call__")
+
+
+try:
+    import urllib2 as urllib
+except ImportError:
+    import urllib.request as urllib
+
+
+def to_string(object):
+    return to_unicode(object) if PY3 else to_bytes(object)
+
+
+def to_bytes(object):
+    try:
+        if isinstance(object, bytes):
+            return object
+        elif isinstance(object, unicode):
+            return object.encode("utf8")
+        else:
+            # try encode instance to bytes
+            return instance_to_bytes(object)
+    except UnicodeError:
+        # recover from codec error and use 'repr' function
+        return to_bytes(repr(object))
+
+
+
+def to_unicode(object):
+    try:
+        if isinstance(object, unicode):
+            return object
+        elif isinstance(object, bytes):
+            return object.decode("utf8")
+        else:
+            # try decode instance to unicode
+            return instance_to_unicode(object)
+    except UnicodeError:
+        # recover from codec error and use 'repr' function
+        return to_unicode(repr(object))
+
+
+def instance_to_bytes(instance):
+    if PY3:
+        if hasattr(instance, "__bytes__"):
+            return bytes(instance)
+        elif hasattr(instance, "__str__"):
+            return unicode(instance).encode("utf8")
+    else:
+        if hasattr(instance, "__str__"):
+            return bytes(instance)
+        elif hasattr(instance, "__unicode__"):
+            return unicode(instance).encode("utf8")
+
+    return to_bytes(repr(instance))
+
+
+def instance_to_unicode(instance):
+    if PY3:
+        if hasattr(instance, "__str__"):
+            return unicode(instance)
+        elif hasattr(instance, "__bytes__"):
+            return bytes(instance).decode("utf8")
+    else:
+        if hasattr(instance, "__unicode__"):
+            return unicode(instance)
+        elif hasattr(instance, "__str__"):
+            return bytes(instance).decode("utf8")
+
+    return to_unicode(repr(instance))
--- a/breadability/_version.py
+++ b/breadability/_version.py
@ -0,0 +1 @@
+VERSION = '0.1.11'
--- a/breadability/client.py
+++ b/breadability/client.py
@ -1,3 +1,7 @@
+# -*- coding: utf8 -*-
+
+from __future__ import absolute_import
+
 import argparse
 import codecs
 import locale
@ -7,11 +11,11 @@ import webbrowser

 from tempfile import mkstemp

-from breadability import VERSION
-from breadability.logconfig import LOG
-from breadability.logconfig import LNODE
-from breadability.logconfig import set_logging_level
-from breadability.readable import Article
+from ._version import VERSION
+from .logconfig import LOG
+from .logconfig import LNODE
+from .logconfig import set_logging_level
+from .readable import Article


 LOGLEVEL = 'WARNING'
--- a/breadability/document.py
+++ b/breadability/document.py
@ -1,5 +1,9 @@
+# -*- coding: utf8 -*-
+
 """Generate a clean nice starting html document to process for an article."""

+from __future__ import absolute_import
+
 import chardet
 import re
 from lxml.etree import tostring
@ -8,8 +12,9 @@ from lxml.etree import XMLSyntaxError
 from lxml.html import document_fromstring
 from lxml.html import HTMLParser

-from breadability.logconfig import LOG
-from breadability.utils import cached_property
+from ._py3k import unicode, to_string
+from .logconfig import LOG
+from .utils import cached_property


 utf8_parser = HTMLParser(encoding='utf-8')
@ -60,7 +65,7 @@ def build_doc(page):
            page_unicode.encode('utf-8', 'replace'),
            parser=utf8_parser)
        return doc
-    except XMLSyntaxError, exc:
+    except XMLSyntaxError as exc:
        LOG.error('Failed to parse: ' + str(exc))
        raise ValueError('Failed to parse document contents.')

@ -75,7 +80,7 @@ class OriginalDocument(object):

    def __str__(self):
        """Render out our document as a string"""
-        return tostring(self.html)
+        return to_string(tostring(self.html))

    def __unicode__(self):
        """Render out our document as a string"""
--- a/breadability/logconfig.py
+++ b/breadability/logconfig.py
@ -121,7 +121,7 @@ class LogHelper(object):
            hashed = md5()
            try:
                hashed.update(content.encode('utf-8', errors="replace"))
-            except Exception, exc:
+            except Exception as exc:
                LOG.error("Cannot hash the current node." + str(exc))
            hash_id = hashed.hexdigest()[0:8]
            # if hash_id in ['9c880b27', '8393b7d7', '69bfebdd']:
@ -162,7 +162,7 @@ class _LogFormatter(logging.Formatter):
    def format(self, record):
        try:
            record.message = record.getMessage()
-        except Exception, e:
+        except Exception as e:
            record.message = "Bad message (%r): %r" % (e, record.__dict__)
        record.asctime = time.strftime(
            "%y%m%d %H:%M:%S", self.converter(record.created))
--- a/breadability/readable.py
+++ b/breadability/readable.py
@ -1,3 +1,7 @@
+# -*- coding: utf8 -*-
+
+from __future__ import absolute_import
+
 import re
 from lxml.etree import tounicode
 from lxml.etree import tostring
@ -7,14 +11,14 @@ from lxml.html import fromstring
 from operator import attrgetter
 from pprint import PrettyPrinter

-from breadability.document import OriginalDocument
-from breadability.logconfig import LOG
-from breadability.logconfig import LNODE
-from breadability.scoring import score_candidates
-from breadability.scoring import get_link_density
-from breadability.scoring import get_class_weight
-from breadability.scoring import is_unlikely_node
-from breadability.utils import cached_property
+from .document import OriginalDocument
+from .logconfig import LOG
+from .logconfig import LNODE
+from .scoring import score_candidates
+from .scoring import get_link_density
+from .scoring import get_class_weight
+from .scoring import is_unlikely_node
+from .utils import cached_property


 html_cleaner = Cleaner(scripts=True, javascript=True, comments=True,
--- a/breadability/scoring.py
+++ b/breadability/scoring.py
@ -1,10 +1,15 @@
+# -*- coding: utf8 -*-
+
 """Handle dealing with scoring nodes and content for our parsing."""
+
+from __future__ import absolute_import
+
 import re
+
 from hashlib import md5
 from lxml.etree import tounicode
-
-from breadability.logconfig import LNODE
-from breadability.logconfig import LOG
+from .logconfig import LNODE
+from .logconfig import LOG

 # A series of sets of attributes we check to help in determining if a node is
 # a potential candidate or not.
@ -38,7 +43,7 @@ def generate_hash_id(node):
    hashed = md5()
    try:
        hashed.update(content.encode('utf-8', "replace"))
-    except Exception, e:
+    except Exception as e:
        LOG.error("BOOM! " + str(e))

    return hashed.hexdigest()[0:8]
@ -153,7 +158,7 @@ def score_candidates(nodes):

            # For every 100 characters in this paragraph, add another point.
            # Up to 3 points.
-            length_points = len(innertext) / 100
+            length_points = len(innertext) // 100

            if length_points > 3:
                content_score += 3
--- a/breadability/scripts/newtest.py
+++ b/breadability/scripts/newtest.py
@ -1,10 +1,10 @@
 import argparse
 import codecs
-import urllib2
 from os import mkdir
 from os import path

-from breadability import VERSION
+from .._version import VERSION
+from .._py3k import urllib


 TESTPATH = path.join(
@ -87,7 +87,7 @@ def make_files(dirname):

 def fetch_article(dirname, url):
    """Get the content of the url and make it the article.html"""
-    opener = urllib2.build_opener()
+    opener = urllib.build_opener()
    opener.addheaders = [('Accept-Charset', 'utf-8')]
    url_response = opener.open(url)
    dl_html = url_response.read().decode('utf-8')
--- a/tests/test_orig_document.py
+++ b/tests/test_orig_document.py
@ -6,6 +6,7 @@ try:
 except ImportError:
    import unittest

+from breadability._py3k import to_unicode
 from breadability.document import OriginalDocument
 from utils import load_snippet

@ -17,7 +18,7 @@ class TestOriginalDocument(unittest.TestCase):
    def test_readin_min_document(self):
        """Verify we can read in a min html document"""
        doc = OriginalDocument(load_snippet('document_min.html'))
-        self.assertTrue(str(doc).startswith(u'<html>'))
+        self.assertTrue(to_unicode(doc).startswith(u'<html>'))
        self.assertEqual(doc.title, 'Min Document Title')

    def test_readin_with_base_url(self):
@ -25,7 +26,7 @@ class TestOriginalDocument(unittest.TestCase):
        doc = OriginalDocument(
            load_snippet('document_absolute_url.html'),
            url="http://blog.mitechie.com/test.html")
-        self.assertTrue(str(doc).startswith(u'<html>'))
+        self.assertTrue(to_unicode(doc).startswith(u'<html>'))

        # find the links on the page and make sure each one starts with out
        # base url we told it to use.