Allow passing unicode objects

This commit is contained in:
Lee Semel 2011-06-28 00:54:36 +08:00 committed by Yuri Baburov
parent ad38fac40a
commit f3d0a8d842

View File

@ -10,9 +10,12 @@ logging.getLogger().setLevel(logging.DEBUG)
utf8_parser = lxml.html.HTMLParser(encoding='utf-8') utf8_parser = lxml.html.HTMLParser(encoding='utf-8')
def build_doc(page): def build_doc(page):
if type(page) != unicode:
enc = get_encoding(page) enc = get_encoding(page)
page_enc = page.decode(enc, 'replace').encode('utf-8') page_enc = page.decode(enc, 'replace')
doc = lxml.html.document_fromstring(page_enc, parser=utf8_parser) else:
page_enc = page
doc = lxml.html.document_fromstring(page_enc.encode('utf-8'), parser=utf8_parser)
return doc return doc
def js_re(src, pattern, flags, repl): def js_re(src, pattern, flags, repl):