pull/23/merge
Jeffrey Nappi 10 years ago
commit a4813821cf

@ -88,7 +88,7 @@ def build_document(html_content, base_href=None):
assert html_content is not None
if isinstance(html_content, unicode):
html_content = html_content.encode("utf8", "replace")
html_content = html_content.encode("utf8", "ignore")
try:
document = document_fromstring(html_content, parser=UTF8_PARSER)
@ -126,7 +126,7 @@ class OriginalDocument(object):
html = self._html
if not isinstance(html, unicode):
encoding = determine_encoding(html)
html = html.decode(encoding)
html = html.decode(encoding, "ignore")
html = convert_breaks_to_paragraphs(html)
document = build_document(html, self._url)

Loading…
Cancel
Save