Merge pull request #56 from nathanathan/patch-1
Defaulting to utf-8 when chardet returns None
This commit is contained in:
commit
2d4cfdb2c8
@ -26,7 +26,7 @@ def get_encoding(page):
|
||||
if not text.strip() or len(text) < 10:
|
||||
return enc # can't guess
|
||||
res = chardet.detect(text)
|
||||
enc = res['encoding']
|
||||
enc = res['encoding'] or 'utf-8'
|
||||
#print '->', enc, "%.2f" % res['confidence']
|
||||
enc = custom_decode(enc)
|
||||
return enc
|
||||
@ -45,4 +45,4 @@ def custom_decode(encoding):
|
||||
if encoding in alternates:
|
||||
return alternates[encoding]
|
||||
else:
|
||||
return encoding
|
||||
return encoding
|
||||
|
Loading…
Reference in New Issue
Block a user