From 0751fe0c97a0cd5a4a5e099ee7d17d2fc73d3596 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mi=C5=A1o=20Belica?= Date: Sat, 31 Mar 2018 12:38:36 +0200 Subject: [PATCH] Fixed failing tests Problem was that when input document was empty lxml raised ParseError instead of XMLSyntaxError in new versions. --- breadability/document.py | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) diff --git a/breadability/document.py b/breadability/document.py index b14c2f1..980fc1e 100644 --- a/breadability/document.py +++ b/breadability/document.py @@ -4,30 +4,15 @@ from __future__ import absolute_import -import re import logging -import chardet - -from lxml.etree import ( - tounicode, - XMLSyntaxError, -) -from lxml.html import ( - document_fromstring, - HTMLParser, -) +import re -from ._compat import ( - to_bytes, - to_unicode, - unicode, - unicode_compatible, -) -from .utils import ( - cached_property, - ignored, -) +import chardet +from lxml.etree import ParserError, XMLSyntaxError, tounicode +from lxml.html import HTMLParser, document_fromstring +from ._compat import to_bytes, to_unicode, unicode, unicode_compatible +from .utils import cached_property, ignored logger = logging.getLogger("breadability") @@ -111,7 +96,7 @@ def build_document(html_content, base_href=None): try: document = document_fromstring(html_content, parser=UTF8_PARSER) - except XMLSyntaxError: + except (ParserError, XMLSyntaxError): raise ValueError("Failed to parse document contents.") if base_href: