Merge pull request #56 from nathanathan/patch-1

Defaulting to utf-8 when chardet returns None
This commit is contained in:
Yuri Baburov 2014-12-20 02:11:53 +05:00
commit 2d4cfdb2c8

View File

@ -26,7 +26,7 @@ def get_encoding(page):
if not text.strip() or len(text) < 10:
return enc # can't guess
res = chardet.detect(text)
enc = res['encoding']
enc = res['encoding'] or 'utf-8'
#print '->', enc, "%.2f" % res['confidence']
enc = custom_decode(enc)
return enc
@ -45,4 +45,4 @@ def custom_decode(encoding):
if encoding in alternates:
return alternates[encoding]
else:
return encoding
return encoding