Allow passing unicode objects
This commit is contained in:
parent
ad38fac40a
commit
f3d0a8d842
@ -10,9 +10,12 @@ logging.getLogger().setLevel(logging.DEBUG)
|
|||||||
utf8_parser = lxml.html.HTMLParser(encoding='utf-8')
|
utf8_parser = lxml.html.HTMLParser(encoding='utf-8')
|
||||||
|
|
||||||
def build_doc(page):
|
def build_doc(page):
|
||||||
|
if type(page) != unicode:
|
||||||
enc = get_encoding(page)
|
enc = get_encoding(page)
|
||||||
page_enc = page.decode(enc, 'replace').encode('utf-8')
|
page_enc = page.decode(enc, 'replace')
|
||||||
doc = lxml.html.document_fromstring(page_enc, parser=utf8_parser)
|
else:
|
||||||
|
page_enc = page
|
||||||
|
doc = lxml.html.document_fromstring(page_enc.encode('utf-8'), parser=utf8_parser)
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
def js_re(src, pattern, flags, repl):
|
def js_re(src, pattern, flags, repl):
|
||||||
|
Loading…
Reference in New Issue
Block a user