From 347f3ea0b59d57a99829ad7bca60238747bceacd Mon Sep 17 00:00:00 2001 From: Richard Harding Date: Thu, 2 Jan 2014 21:24:37 -0500 Subject: [PATCH] Lint --- Makefile | 7 +++++++ breadability/_compat.py | 4 ++++ breadability/document.py | 11 +++++++++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index eaf10c1..45a8ee3 100644 --- a/Makefile +++ b/Makefile @@ -49,6 +49,13 @@ clean_all: clean_venv fi +bin/flake8: venv + bin/pip install flake8 + +lint: bin/flake8 + flake8 breadability + + # ########### # Deploy # ########### diff --git a/breadability/_compat.py b/breadability/_compat.py index 76f5c98..dcc7d47 100644 --- a/breadability/_compat.py +++ b/breadability/_compat.py @@ -19,9 +19,13 @@ string_types = (bytes, unicode,) try: + # Assert to hush pyflakes about the unused import. This is a _compat + # module and we expect this to aid in other code importing urllib. import urllib2 as urllib + assert urllib except ImportError: import urllib.request as urllib + assert urllib def unicode_compatible(cls): diff --git a/breadability/document.py b/breadability/document.py index 1d9920d..8c08523 100644 --- a/breadability/document.py +++ b/breadability/document.py @@ -30,6 +30,9 @@ logger = logging.getLogger("breadability") TAG_MARK_PATTERN = re.compile(to_bytes(r"]*>\s*")) +UTF8_PARSER = HTMLParser(encoding="utf8") + + def determine_encoding(page): encoding = "utf8" text = TAG_MARK_PATTERN.sub(to_bytes(" "), page) @@ -54,7 +57,12 @@ def determine_encoding(page): return encoding -BREAK_TAGS_PATTERN = re.compile(to_unicode(r"(?:<\s*[bh]r[^>]*>\s*)+"), re.IGNORECASE) +BREAK_TAGS_PATTERN = re.compile( + to_unicode(r"(?:<\s*[bh]r[^>]*>\s*)+"), + re.IGNORECASE +) + + def convert_breaks_to_paragraphs(html): """ Converts
tag and multiple
tags into paragraph. @@ -75,7 +83,6 @@ def _replace_break_tags(match): return tags -UTF8_PARSER = HTMLParser(encoding="utf8") def build_document(html_content, base_href=None): """Requires that the `html_content` not be None""" assert html_content is not None