|
|
@ -1223,20 +1223,17 @@ def avoidWikimediaProjects(config={}, other={}):
|
|
|
|
def getWikiEngine(url=''):
|
|
|
|
def getWikiEngine(url=''):
|
|
|
|
""" Returns the wiki engine of a URL, if known """
|
|
|
|
""" Returns the wiki engine of a URL, if known """
|
|
|
|
|
|
|
|
|
|
|
|
req = urllib2.Request(url=url, headers={'User-Agent': getUserAgent(), 'Accept-Encoding': 'gzip'})
|
|
|
|
session = requests.Session()
|
|
|
|
f = urllib2.urlopen(req)
|
|
|
|
session.headers = {'User-Agent': getUserAgent()}
|
|
|
|
if f.headers.get('Content-Encoding') and 'gzip' in f.headers.get('Content-Encoding'):
|
|
|
|
r = session.post(url=url)
|
|
|
|
raw = gzip.GzipFile(fileobj=StringIO.StringIO(f.read())).read()
|
|
|
|
result = r.text
|
|
|
|
else:
|
|
|
|
|
|
|
|
raw = f.read()
|
|
|
|
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
wikiengine = 'Unknown'
|
|
|
|
wikiengine = 'Unknown'
|
|
|
|
if re.search(ur'(?im)(<meta name="generator" content="DokuWiki)', raw):
|
|
|
|
if re.search(ur'(?im)(<meta name="generator" content="DokuWiki)', result):
|
|
|
|
wikiengine = 'DokuWiki'
|
|
|
|
wikiengine = 'DokuWiki'
|
|
|
|
elif re.search(ur'(?im)(alt="Powered by MediaWiki"|<meta name="generator" content="MediaWiki)', raw):
|
|
|
|
elif re.search(ur'(?im)(alt="Powered by MediaWiki"|<meta name="generator" content="MediaWiki)', result):
|
|
|
|
wikiengine = 'MediaWiki'
|
|
|
|
wikiengine = 'MediaWiki'
|
|
|
|
elif re.search(ur'(?im)(>MoinMoin Powered</a>)', raw):
|
|
|
|
elif re.search(ur'(?im)(>MoinMoin Powered</a>)', result):
|
|
|
|
wikiengine = 'MoinMoin'
|
|
|
|
wikiengine = 'MoinMoin'
|
|
|
|
|
|
|
|
|
|
|
|
return wikiengine
|
|
|
|
return wikiengine
|
|
|
|