2
0
mirror of https://github.com/WikiTeam/wikiteam synced 2024-11-10 13:10:27 +00:00

adding wiki engine detector

This commit is contained in:
Emilio J. Rodríguez-Posada 2014-07-03 18:17:02 +02:00
parent eb97cf1adf
commit 9553e3550c
2 changed files with 52 additions and 10 deletions

View File

@ -881,13 +881,15 @@ def getParameters(params=[]):
parser = argparse.ArgumentParser(description='') parser = argparse.ArgumentParser(description='')
parser.add_argument('-v', '--version', action='version', version=(params[0] + " version " + getVersion())) parser.add_argument('-v', '--version', action='version', version=getVersion())
parser.add_argument('--cookies', metavar="cookies.txt", help="path to a cookies.txt file") parser.add_argument('--cookies', metavar="cookies.txt", help="path to a cookies.txt file")
parser.add_argument('--delay', metavar=5, default=0, help="adds a delay (in seconds)") parser.add_argument('--delay', metavar=5, default=0, help="adds a delay (in seconds)")
parser.add_argument('--get-wiki-engine', action='store_true', help="returns the wiki engine")
groupAPIOrIndex = parser.add_mutually_exclusive_group(required=True) groupWikiOrAPIOrIndex = parser.add_mutually_exclusive_group(required=True)
groupAPIOrIndex.add_argument('--api', help="URL to api.php") groupWikiOrAPIOrIndex.add_argument('wiki', default='', nargs='?', help="URL to wiki")
groupAPIOrIndex.add_argument('--index', help="URL to index.php") groupWikiOrAPIOrIndex.add_argument('--api', help="URL to api.php")
groupWikiOrAPIOrIndex.add_argument('--index', help="URL to index.php")
groupXMLOrImages = parser.add_argument_group() groupXMLOrImages = parser.add_argument_group()
groupXMLOrImages.add_argument('--xml', action='store_true', help="generates a full history XML dump (--xml --curonly for current revisions only)") groupXMLOrImages.add_argument('--xml', action='store_true', help="generates a full history XML dump (--xml --curonly for current revisions only)")
@ -902,16 +904,24 @@ def getParameters(params=[]):
parser.add_argument('--exnamespaces', metavar="1,2,3", help='comma-separated value of namespaces to exclude') parser.add_argument('--exnamespaces', metavar="1,2,3", help='comma-separated value of namespaces to exclude')
args = parser.parse_args() args = parser.parse_args()
#print args
# Execute excluding args
if args.get_wiki_engine and args.wiki and (args.wiki.startswith('http://') or args.wiki.startswith('https://')):
print getWikiEngine(url=args.wiki)
sys.exit()
# End execute excluding args
# check API URL # check API URL
if args.api and (not args.api.startswith('http://') and not args.api.startswith('https://')): if args.api and (not args.api.startswith('http://') and not args.api.startswith('https://')):
print 'api.php must start with http:// or https://\n' print args.api
print 'ERROR: URL to api.php must start with http:// or https://\n'
parser.print_usage() parser.print_usage()
sys.exit(1) sys.exit(1)
# check index URL # check index URL
if args.index and (not args.index.startswith('http://') and not args.index.startswith('https://')): if args.index and (not args.index.startswith('http://') and not args.index.startswith('https://')):
print 'index.php must start with http:// or https://\n' print 'ERROR: URL to index.php must start with http:// or https://\n'
parser.print_usage() parser.print_usage()
sys.exit(1) sys.exit(1)
@ -999,7 +1009,6 @@ def getParameters(params=[]):
print 'Error in index.php, please, provide a correct path to index.php' print 'Error in index.php, please, provide a correct path to index.php'
sys.exit() sys.exit()
#calculating path, if not defined by user with --path= #calculating path, if not defined by user with --path=
if not config['path']: if not config['path']:
config['path'] = './%s-%s-wikidump' % (domain2prefix(config=config), config['date']) config['path'] = './%s-%s-wikidump' % (domain2prefix(config=config), config['date'])
@ -1290,13 +1299,35 @@ def avoidWikimediaProjects(config={}, other={}):
print 'Thanks!' print 'Thanks!'
sys.exit() sys.exit()
def getWikiEngine(url=''):
""" Returns the wiki engine of a URL, if known """
req = urllib2.Request(url=url, headers={'User-Agent': getUserAgent(), 'Accept-Encoding': 'gzip'})
f = urllib2.urlopen(req)
if f.headers.get('Content-Encoding') and 'gzip' in f.headers.get('Content-Encoding'):
raw = gzip.GzipFile(fileobj=StringIO.StringIO(f.read())).read()
else:
raw = f.read()
f.close()
wikiengine = 'Unknown'
if re.search(ur'(?im)(<meta name="generator" content="DokuWiki)', raw):
wikiengine = 'DokuWiki'
elif re.search(ur'(?im)(alt="Powered by MediaWiki"|<meta name="generator" content="MediaWiki)', raw):
wikiengine = 'MediaWiki'
elif re.search(ur'(?im)(>MoinMoin Powered</a>)', raw):
wikiengine = 'MoinMoin'
return wikiengine
def main(params=[]): def main(params=[]):
""" Main function """ """ Main function """
print welcome()
configfilename = 'config.txt' configfilename = 'config.txt'
config, other = getParameters(params=params) config, other = getParameters(params=params)
avoidWikimediaProjects(config=config, other=other) avoidWikimediaProjects(config=config, other=other)
print welcome()
print 'Analysing %s' % (config['api'] and config['api'] or config['index']) print 'Analysing %s' % (config['api'] and config['api'] or config['index'])
#creating path or resuming if desired #creating path or resuming if desired

View File

@ -22,7 +22,7 @@ import time
import unittest import unittest
import urllib import urllib
import urllib2 import urllib2
from dumpgenerator import delay, getImageFilenamesURL, getImageFilenamesURLAPI, getUserAgent from dumpgenerator import delay, getImageFilenamesURL, getImageFilenamesURLAPI, getUserAgent, getWikiEngine
class TestDumpgenerator(unittest.TestCase): class TestDumpgenerator(unittest.TestCase):
#Documentation #Documentation
@ -73,8 +73,19 @@ class TestDumpgenerator(unittest.TestCase):
self.assertTrue(len(result_index) == imagecount) self.assertTrue(len(result_index) == imagecount)
self.assertTrue(filetocheck in [filename for filename, url, uploader in result_index]) self.assertTrue(filetocheck in [filename for filename, url, uploader in result_index])
def test_getWikiEngine(self):
tests = [
['https://www.dokuwiki.org', 'DokuWiki'],
['http://wiki.openwrt.org', 'DokuWiki'],
['http://moinmo.in', 'MoinMoin'],
['https://wiki.debian.org', 'MoinMoin'],
]
for wiki, engine in tests:
print 'Testing', wiki
self.assertTrue(getWikiEngine(wiki) == engine)
if __name__ == '__main__': if __name__ == '__main__':
#copying dumpgenerator.py to this directory #copying dumpgenerator.py to this directory
shutil.copy2('../dumpgenerator.py', './dumpgenerator.py') #shutil.copy2('../dumpgenerator.py', './dumpgenerator.py')
unittest.main() unittest.main()