Use chardet rather than charade.

The changes from charade have been merged into upstream chardet,
and chardet is available in Debian/Ubuntu whereas charade is not.
pull/26/head
Jelmer Vernooij 10 years ago
parent e2f3391dc3
commit 6f912830c0

@ -6,7 +6,7 @@ from __future__ import absolute_import
import re import re
import logging import logging
import charade import chardet
from lxml.etree import ( from lxml.etree import (
tounicode, tounicode,
@ -43,7 +43,7 @@ CHARSET_META_TAG_PATTERN = re.compile(
def decode_html(html): def decode_html(html):
""" """
Converts bytes stream containing an HTML page into Unicode. Converts bytes stream containing an HTML page into Unicode.
Tries to guess character encoding from meta tag of by "charade" library. Tries to guess character encoding from meta tag of by "chardet" library.
""" """
if isinstance(html, unicode): if isinstance(html, unicode):
return html return html
@ -69,7 +69,7 @@ def decode_html(html):
# try detect encoding # try detect encoding
encoding = "utf8" encoding = "utf8"
encoding_detector = charade.detect(text) encoding_detector = chardet.detect(text)
if encoding_detector["encoding"]: if encoding_detector["encoding"]:
encoding = encoding_detector["encoding"] encoding = encoding_detector["encoding"]

@ -1,5 +1,5 @@
docopt>=0.6.1,<0.7 docopt>=0.6.1,<0.7
charade chardet
lxml lxml
nose-selecttests nose-selecttests

@ -16,7 +16,7 @@ with open(join(CURRENT_DIRECTORY, "README.rst")) as readme:
install_requires = [ install_requires = [
"docopt>=0.6.1,<0.7", "docopt>=0.6.1,<0.7",
"charade", "chardet",
"lxml>=2.0", "lxml>=2.0",
] ]
tests_require = [ tests_require = [

Loading…
Cancel
Save