|
|
|
@ -155,13 +155,18 @@ def clean_text( text ):
|
|
|
|
|
|
|
|
|
|
""" Removes characters that can cause trouble """
|
|
|
|
|
|
|
|
|
|
if six.PY2:
|
|
|
|
|
# Python 2 Fix
|
|
|
|
|
# TODO: Provide a proper fix that doesn't revolve removing characters
|
|
|
|
|
text = text.encode('ascii', 'ignore').decode('ascii')
|
|
|
|
|
|
|
|
|
|
text = text.strip()
|
|
|
|
|
|
|
|
|
|
if r'&' in text:
|
|
|
|
|
text = text.replace(r'&', '&')
|
|
|
|
|
text = text.replace(r'&', r'&')
|
|
|
|
|
|
|
|
|
|
if r'&#' in text:
|
|
|
|
|
# replace common ascii codes, will expand if needed
|
|
|
|
|
text = text.replace(r'"', '"').replace(r'&', '&').replace(r''', '’')
|
|
|
|
|
text = text.replace(r'"', r'"').replace(r'&', r'&').replace(r''', r"'")
|
|
|
|
|
|
|
|
|
|
return text
|
|
|
|
|