Use chardet rather than charade.

The changes from charade have been merged into upstream chardet,
and chardet is available in Debian/Ubuntu whereas charade is not.
pull/26/head
Jelmer Vernooij 10 years ago
parent e2f3391dc3
commit 6f912830c0

@ -6,7 +6,7 @@ from __future__ import absolute_import
import re
import logging
import charade
import chardet
from lxml.etree import (
tounicode,
@ -43,7 +43,7 @@ CHARSET_META_TAG_PATTERN = re.compile(
def decode_html(html):
"""
Converts bytes stream containing an HTML page into Unicode.
Tries to guess character encoding from meta tag of by "charade" library.
Tries to guess character encoding from meta tag of by "chardet" library.
"""
if isinstance(html, unicode):
return html
@ -69,7 +69,7 @@ def decode_html(html):
# try detect encoding
encoding = "utf8"
encoding_detector = charade.detect(text)
encoding_detector = chardet.detect(text)
if encoding_detector["encoding"]:
encoding = encoding_detector["encoding"]

@ -1,5 +1,5 @@
docopt>=0.6.1,<0.7
charade
chardet
lxml
nose-selecttests

@ -16,7 +16,7 @@ with open(join(CURRENT_DIRECTORY, "README.rst")) as readme:
install_requires = [
"docopt>=0.6.1,<0.7",
"charade",
"chardet",
"lxml>=2.0",
]
tests_require = [

Loading…
Cancel
Save