mirror of
https://github.com/WikiTeam/wikiteam
synced 2024-11-10 13:10:27 +00:00
adding wiki engine detector
This commit is contained in:
parent
eb97cf1adf
commit
9553e3550c
@ -881,13 +881,15 @@ def getParameters(params=[]):
|
|||||||
|
|
||||||
parser = argparse.ArgumentParser(description='')
|
parser = argparse.ArgumentParser(description='')
|
||||||
|
|
||||||
parser.add_argument('-v', '--version', action='version', version=(params[0] + " version " + getVersion()))
|
parser.add_argument('-v', '--version', action='version', version=getVersion())
|
||||||
parser.add_argument('--cookies', metavar="cookies.txt", help="path to a cookies.txt file")
|
parser.add_argument('--cookies', metavar="cookies.txt", help="path to a cookies.txt file")
|
||||||
parser.add_argument('--delay', metavar=5, default=0, help="adds a delay (in seconds)")
|
parser.add_argument('--delay', metavar=5, default=0, help="adds a delay (in seconds)")
|
||||||
|
parser.add_argument('--get-wiki-engine', action='store_true', help="returns the wiki engine")
|
||||||
|
|
||||||
groupAPIOrIndex = parser.add_mutually_exclusive_group(required=True)
|
groupWikiOrAPIOrIndex = parser.add_mutually_exclusive_group(required=True)
|
||||||
groupAPIOrIndex.add_argument('--api', help="URL to api.php")
|
groupWikiOrAPIOrIndex.add_argument('wiki', default='', nargs='?', help="URL to wiki")
|
||||||
groupAPIOrIndex.add_argument('--index', help="URL to index.php")
|
groupWikiOrAPIOrIndex.add_argument('--api', help="URL to api.php")
|
||||||
|
groupWikiOrAPIOrIndex.add_argument('--index', help="URL to index.php")
|
||||||
|
|
||||||
groupXMLOrImages = parser.add_argument_group()
|
groupXMLOrImages = parser.add_argument_group()
|
||||||
groupXMLOrImages.add_argument('--xml', action='store_true', help="generates a full history XML dump (--xml --curonly for current revisions only)")
|
groupXMLOrImages.add_argument('--xml', action='store_true', help="generates a full history XML dump (--xml --curonly for current revisions only)")
|
||||||
@ -902,16 +904,24 @@ def getParameters(params=[]):
|
|||||||
parser.add_argument('--exnamespaces', metavar="1,2,3", help='comma-separated value of namespaces to exclude')
|
parser.add_argument('--exnamespaces', metavar="1,2,3", help='comma-separated value of namespaces to exclude')
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
#print args
|
||||||
|
|
||||||
|
# Execute excluding args
|
||||||
|
if args.get_wiki_engine and args.wiki and (args.wiki.startswith('http://') or args.wiki.startswith('https://')):
|
||||||
|
print getWikiEngine(url=args.wiki)
|
||||||
|
sys.exit()
|
||||||
|
# End execute excluding args
|
||||||
|
|
||||||
# check API URL
|
# check API URL
|
||||||
if args.api and (not args.api.startswith('http://') and not args.api.startswith('https://')):
|
if args.api and (not args.api.startswith('http://') and not args.api.startswith('https://')):
|
||||||
print 'api.php must start with http:// or https://\n'
|
print args.api
|
||||||
|
print 'ERROR: URL to api.php must start with http:// or https://\n'
|
||||||
parser.print_usage()
|
parser.print_usage()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
# check index URL
|
# check index URL
|
||||||
if args.index and (not args.index.startswith('http://') and not args.index.startswith('https://')):
|
if args.index and (not args.index.startswith('http://') and not args.index.startswith('https://')):
|
||||||
print 'index.php must start with http:// or https://\n'
|
print 'ERROR: URL to index.php must start with http:// or https://\n'
|
||||||
parser.print_usage()
|
parser.print_usage()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
@ -999,7 +1009,6 @@ def getParameters(params=[]):
|
|||||||
print 'Error in index.php, please, provide a correct path to index.php'
|
print 'Error in index.php, please, provide a correct path to index.php'
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
|
||||||
#calculating path, if not defined by user with --path=
|
#calculating path, if not defined by user with --path=
|
||||||
if not config['path']:
|
if not config['path']:
|
||||||
config['path'] = './%s-%s-wikidump' % (domain2prefix(config=config), config['date'])
|
config['path'] = './%s-%s-wikidump' % (domain2prefix(config=config), config['date'])
|
||||||
@ -1290,13 +1299,35 @@ def avoidWikimediaProjects(config={}, other={}):
|
|||||||
print 'Thanks!'
|
print 'Thanks!'
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
|
def getWikiEngine(url=''):
|
||||||
|
""" Returns the wiki engine of a URL, if known """
|
||||||
|
|
||||||
|
req = urllib2.Request(url=url, headers={'User-Agent': getUserAgent(), 'Accept-Encoding': 'gzip'})
|
||||||
|
f = urllib2.urlopen(req)
|
||||||
|
if f.headers.get('Content-Encoding') and 'gzip' in f.headers.get('Content-Encoding'):
|
||||||
|
raw = gzip.GzipFile(fileobj=StringIO.StringIO(f.read())).read()
|
||||||
|
else:
|
||||||
|
raw = f.read()
|
||||||
|
f.close()
|
||||||
|
|
||||||
|
wikiengine = 'Unknown'
|
||||||
|
if re.search(ur'(?im)(<meta name="generator" content="DokuWiki)', raw):
|
||||||
|
wikiengine = 'DokuWiki'
|
||||||
|
elif re.search(ur'(?im)(alt="Powered by MediaWiki"|<meta name="generator" content="MediaWiki)', raw):
|
||||||
|
wikiengine = 'MediaWiki'
|
||||||
|
elif re.search(ur'(?im)(>MoinMoin Powered</a>)', raw):
|
||||||
|
wikiengine = 'MoinMoin'
|
||||||
|
|
||||||
|
return wikiengine
|
||||||
|
|
||||||
def main(params=[]):
|
def main(params=[]):
|
||||||
""" Main function """
|
""" Main function """
|
||||||
|
|
||||||
print welcome()
|
|
||||||
configfilename = 'config.txt'
|
configfilename = 'config.txt'
|
||||||
config, other = getParameters(params=params)
|
config, other = getParameters(params=params)
|
||||||
avoidWikimediaProjects(config=config, other=other)
|
avoidWikimediaProjects(config=config, other=other)
|
||||||
|
|
||||||
|
print welcome()
|
||||||
print 'Analysing %s' % (config['api'] and config['api'] or config['index'])
|
print 'Analysing %s' % (config['api'] and config['api'] or config['index'])
|
||||||
|
|
||||||
#creating path or resuming if desired
|
#creating path or resuming if desired
|
||||||
|
@ -22,7 +22,7 @@ import time
|
|||||||
import unittest
|
import unittest
|
||||||
import urllib
|
import urllib
|
||||||
import urllib2
|
import urllib2
|
||||||
from dumpgenerator import delay, getImageFilenamesURL, getImageFilenamesURLAPI, getUserAgent
|
from dumpgenerator import delay, getImageFilenamesURL, getImageFilenamesURLAPI, getUserAgent, getWikiEngine
|
||||||
|
|
||||||
class TestDumpgenerator(unittest.TestCase):
|
class TestDumpgenerator(unittest.TestCase):
|
||||||
#Documentation
|
#Documentation
|
||||||
@ -73,8 +73,19 @@ class TestDumpgenerator(unittest.TestCase):
|
|||||||
self.assertTrue(len(result_index) == imagecount)
|
self.assertTrue(len(result_index) == imagecount)
|
||||||
self.assertTrue(filetocheck in [filename for filename, url, uploader in result_index])
|
self.assertTrue(filetocheck in [filename for filename, url, uploader in result_index])
|
||||||
|
|
||||||
|
def test_getWikiEngine(self):
|
||||||
|
tests = [
|
||||||
|
['https://www.dokuwiki.org', 'DokuWiki'],
|
||||||
|
['http://wiki.openwrt.org', 'DokuWiki'],
|
||||||
|
['http://moinmo.in', 'MoinMoin'],
|
||||||
|
['https://wiki.debian.org', 'MoinMoin'],
|
||||||
|
]
|
||||||
|
for wiki, engine in tests:
|
||||||
|
print 'Testing', wiki
|
||||||
|
self.assertTrue(getWikiEngine(wiki) == engine)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
#copying dumpgenerator.py to this directory
|
#copying dumpgenerator.py to this directory
|
||||||
shutil.copy2('../dumpgenerator.py', './dumpgenerator.py')
|
#shutil.copy2('../dumpgenerator.py', './dumpgenerator.py')
|
||||||
|
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
Loading…
Reference in New Issue
Block a user