|
|
@ -4,7 +4,6 @@ import urllib.parse as urlparse
|
|
|
|
from urllib.parse import parse_qs
|
|
|
|
from urllib.parse import parse_qs
|
|
|
|
import re
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
SKIP_ARGS = ['ref_src', 'utm']
|
|
|
|
SKIP_ARGS = ['ref_src', 'utm']
|
|
|
|
SKIP_PREFIX = ['//www.', '//mobile.', '//m.']
|
|
|
|
SKIP_PREFIX = ['//www.', '//mobile.', '//m.']
|
|
|
|
GOOG_STATIC = 'www.gstatic.com'
|
|
|
|
GOOG_STATIC = 'www.gstatic.com'
|
|
|
@ -14,7 +13,6 @@ BLANK_B64 = ('data:image/png;base64,'
|
|
|
|
'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw'
|
|
|
|
'iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAQAAAAnOwc2AAAAD0lEQVR42mNkw'
|
|
|
|
'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC')
|
|
|
|
'AIYh7IgAAVVAAuInjI5AAAAAElFTkSuQmCC')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Ad keywords
|
|
|
|
# Ad keywords
|
|
|
|
BLACKLIST = [
|
|
|
|
BLACKLIST = [
|
|
|
|
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama',
|
|
|
|
'ad', 'anuncio', 'annuncio', 'annonce', 'Anzeige', '广告', '廣告', 'Reklama',
|
|
|
|