mirror of
https://github.com/WikiTeam/wikiteam
synced 2024-11-10 13:10:27 +00:00
adding wiki engine detector
This commit is contained in:
parent
eb97cf1adf
commit
9553e3550c
@ -881,13 +881,15 @@ def getParameters(params=[]):
|
||||
|
||||
parser = argparse.ArgumentParser(description='')
|
||||
|
||||
parser.add_argument('-v', '--version', action='version', version=(params[0] + " version " + getVersion()))
|
||||
parser.add_argument('-v', '--version', action='version', version=getVersion())
|
||||
parser.add_argument('--cookies', metavar="cookies.txt", help="path to a cookies.txt file")
|
||||
parser.add_argument('--delay', metavar=5, default=0, help="adds a delay (in seconds)")
|
||||
parser.add_argument('--get-wiki-engine', action='store_true', help="returns the wiki engine")
|
||||
|
||||
groupAPIOrIndex = parser.add_mutually_exclusive_group(required=True)
|
||||
groupAPIOrIndex.add_argument('--api', help="URL to api.php")
|
||||
groupAPIOrIndex.add_argument('--index', help="URL to index.php")
|
||||
groupWikiOrAPIOrIndex = parser.add_mutually_exclusive_group(required=True)
|
||||
groupWikiOrAPIOrIndex.add_argument('wiki', default='', nargs='?', help="URL to wiki")
|
||||
groupWikiOrAPIOrIndex.add_argument('--api', help="URL to api.php")
|
||||
groupWikiOrAPIOrIndex.add_argument('--index', help="URL to index.php")
|
||||
|
||||
groupXMLOrImages = parser.add_argument_group()
|
||||
groupXMLOrImages.add_argument('--xml', action='store_true', help="generates a full history XML dump (--xml --curonly for current revisions only)")
|
||||
@ -902,16 +904,24 @@ def getParameters(params=[]):
|
||||
parser.add_argument('--exnamespaces', metavar="1,2,3", help='comma-separated value of namespaces to exclude')
|
||||
|
||||
args = parser.parse_args()
|
||||
#print args
|
||||
|
||||
# Execute excluding args
|
||||
if args.get_wiki_engine and args.wiki and (args.wiki.startswith('http://') or args.wiki.startswith('https://')):
|
||||
print getWikiEngine(url=args.wiki)
|
||||
sys.exit()
|
||||
# End execute excluding args
|
||||
|
||||
# check API URL
|
||||
if args.api and (not args.api.startswith('http://') and not args.api.startswith('https://')):
|
||||
print 'api.php must start with http:// or https://\n'
|
||||
print args.api
|
||||
print 'ERROR: URL to api.php must start with http:// or https://\n'
|
||||
parser.print_usage()
|
||||
sys.exit(1)
|
||||
|
||||
# check index URL
|
||||
if args.index and (not args.index.startswith('http://') and not args.index.startswith('https://')):
|
||||
print 'index.php must start with http:// or https://\n'
|
||||
print 'ERROR: URL to index.php must start with http:// or https://\n'
|
||||
parser.print_usage()
|
||||
sys.exit(1)
|
||||
|
||||
@ -999,7 +1009,6 @@ def getParameters(params=[]):
|
||||
print 'Error in index.php, please, provide a correct path to index.php'
|
||||
sys.exit()
|
||||
|
||||
|
||||
#calculating path, if not defined by user with --path=
|
||||
if not config['path']:
|
||||
config['path'] = './%s-%s-wikidump' % (domain2prefix(config=config), config['date'])
|
||||
@ -1290,13 +1299,35 @@ def avoidWikimediaProjects(config={}, other={}):
|
||||
print 'Thanks!'
|
||||
sys.exit()
|
||||
|
||||
def getWikiEngine(url=''):
|
||||
""" Returns the wiki engine of a URL, if known """
|
||||
|
||||
req = urllib2.Request(url=url, headers={'User-Agent': getUserAgent(), 'Accept-Encoding': 'gzip'})
|
||||
f = urllib2.urlopen(req)
|
||||
if f.headers.get('Content-Encoding') and 'gzip' in f.headers.get('Content-Encoding'):
|
||||
raw = gzip.GzipFile(fileobj=StringIO.StringIO(f.read())).read()
|
||||
else:
|
||||
raw = f.read()
|
||||
f.close()
|
||||
|
||||
wikiengine = 'Unknown'
|
||||
if re.search(ur'(?im)(<meta name="generator" content="DokuWiki)', raw):
|
||||
wikiengine = 'DokuWiki'
|
||||
elif re.search(ur'(?im)(alt="Powered by MediaWiki"|<meta name="generator" content="MediaWiki)', raw):
|
||||
wikiengine = 'MediaWiki'
|
||||
elif re.search(ur'(?im)(>MoinMoin Powered</a>)', raw):
|
||||
wikiengine = 'MoinMoin'
|
||||
|
||||
return wikiengine
|
||||
|
||||
def main(params=[]):
|
||||
""" Main function """
|
||||
|
||||
print welcome()
|
||||
configfilename = 'config.txt'
|
||||
config, other = getParameters(params=params)
|
||||
avoidWikimediaProjects(config=config, other=other)
|
||||
|
||||
print welcome()
|
||||
print 'Analysing %s' % (config['api'] and config['api'] or config['index'])
|
||||
|
||||
#creating path or resuming if desired
|
||||
|
@ -22,7 +22,7 @@ import time
|
||||
import unittest
|
||||
import urllib
|
||||
import urllib2
|
||||
from dumpgenerator import delay, getImageFilenamesURL, getImageFilenamesURLAPI, getUserAgent
|
||||
from dumpgenerator import delay, getImageFilenamesURL, getImageFilenamesURLAPI, getUserAgent, getWikiEngine
|
||||
|
||||
class TestDumpgenerator(unittest.TestCase):
|
||||
#Documentation
|
||||
@ -73,8 +73,19 @@ class TestDumpgenerator(unittest.TestCase):
|
||||
self.assertTrue(len(result_index) == imagecount)
|
||||
self.assertTrue(filetocheck in [filename for filename, url, uploader in result_index])
|
||||
|
||||
def test_getWikiEngine(self):
|
||||
tests = [
|
||||
['https://www.dokuwiki.org', 'DokuWiki'],
|
||||
['http://wiki.openwrt.org', 'DokuWiki'],
|
||||
['http://moinmo.in', 'MoinMoin'],
|
||||
['https://wiki.debian.org', 'MoinMoin'],
|
||||
]
|
||||
for wiki, engine in tests:
|
||||
print 'Testing', wiki
|
||||
self.assertTrue(getWikiEngine(wiki) == engine)
|
||||
|
||||
if __name__ == '__main__':
|
||||
#copying dumpgenerator.py to this directory
|
||||
shutil.copy2('../dumpgenerator.py', './dumpgenerator.py')
|
||||
#shutil.copy2('../dumpgenerator.py', './dumpgenerator.py')
|
||||
|
||||
unittest.main()
|
||||
|
Loading…
Reference in New Issue
Block a user