pull/26/merge
Jelmer Vernooij 10 years ago
commit badf625184

@ -6,7 +6,7 @@ from __future__ import absolute_import
import re
import logging
import charade
import chardet
from lxml.etree import (
tounicode,
@ -43,7 +43,7 @@ CHARSET_META_TAG_PATTERN = re.compile(
def decode_html(html):
"""
Converts bytes stream containing an HTML page into Unicode.
Tries to guess character encoding from meta tag of by "charade" library.
Tries to guess character encoding from meta tag of by "chardet" library.
"""
if isinstance(html, unicode):
return html
@ -69,7 +69,7 @@ def decode_html(html):
# try detect encoding
encoding = "utf8"
encoding_detector = charade.detect(text)
encoding_detector = chardet.detect(text)
if encoding_detector["encoding"]:
encoding = encoding_detector["encoding"]

@ -1,5 +1,5 @@
docopt>=0.6.1,<0.7
charade
chardet
lxml
nose-selecttests

@ -16,7 +16,7 @@ with open(join(CURRENT_DIRECTORY, "README.rst")) as readme:
install_requires = [
"docopt>=0.6.1,<0.7",
"charade",
"chardet",
"lxml>=2.0",
]
tests_require = [

Loading…
Cancel
Save