|
|
|
@ -26,7 +26,7 @@ import time
|
|
|
|
|
import unittest
|
|
|
|
|
import urllib
|
|
|
|
|
import urllib2
|
|
|
|
|
from dumpgenerator import delay, getImageNames, getPageTitles, getUserAgent, getWikiEngine
|
|
|
|
|
from dumpgenerator import delay, getImageNames, getPageTitles, getUserAgent, getWikiEngine, mwGetAPIAndIndex
|
|
|
|
|
|
|
|
|
|
class TestDumpgenerator(unittest.TestCase):
|
|
|
|
|
# Documentation
|
|
|
|
@ -37,7 +37,7 @@ class TestDumpgenerator(unittest.TestCase):
|
|
|
|
|
# - Check one wiki per wikifarm at least (page titles & images, with/out API)
|
|
|
|
|
|
|
|
|
|
def test_delay(self):
|
|
|
|
|
""" This test checks several delays """
|
|
|
|
|
# This test checks several delays
|
|
|
|
|
|
|
|
|
|
print '#'*73, '\n', 'test_delay', '\n', '#'*73
|
|
|
|
|
for i in [0, 1, 2, 3]:
|
|
|
|
@ -49,10 +49,10 @@ class TestDumpgenerator(unittest.TestCase):
|
|
|
|
|
self.assertTrue(t2 > i and t2 < i + 1)
|
|
|
|
|
|
|
|
|
|
def test_getImages(self):
|
|
|
|
|
""" This test download the image list using API and index.php """
|
|
|
|
|
""" Compare both lists in length and file by file """
|
|
|
|
|
""" Check the presence of some special files, like odd chars filenames """
|
|
|
|
|
""" The tested wikis are from different wikifarms and some alone """
|
|
|
|
|
# This test download the image list using API and index.php
|
|
|
|
|
# Compare both lists in length and file by file
|
|
|
|
|
# Check the presence of some special files, like odd chars filenames
|
|
|
|
|
# The tested wikis are from different wikifarms and some alone
|
|
|
|
|
|
|
|
|
|
print '#'*73, '\n', 'test_getImages', '\n', '#'*73
|
|
|
|
|
tests = [
|
|
|
|
@ -74,7 +74,7 @@ class TestDumpgenerator(unittest.TestCase):
|
|
|
|
|
['http://digimon.neoseeker.com/w/index.php', 'http://digimon.neoseeker.com/w/api.php', u'Ogremon card.png'],
|
|
|
|
|
|
|
|
|
|
# Orain wikifarm
|
|
|
|
|
['http://mc.orain.org/w/index.php', 'http://mc.orain.org/w/api.php', u'Mojang logo.svg'],
|
|
|
|
|
#['http://mc.orain.org/w/index.php', 'http://mc.orain.org/w/api.php', u'Mojang logo.svg'],
|
|
|
|
|
|
|
|
|
|
# Referata wikifarm
|
|
|
|
|
['http://wikipapers.referata.com/w/index.php', 'http://wikipapers.referata.com/w/api.php', u'Avbot logo.png'],
|
|
|
|
@ -91,28 +91,27 @@ class TestDumpgenerator(unittest.TestCase):
|
|
|
|
|
session = requests.Session()
|
|
|
|
|
session.headers = {'User-Agent': getUserAgent()}
|
|
|
|
|
for index, api, filetocheck in tests:
|
|
|
|
|
print '\n'
|
|
|
|
|
# Testing with API
|
|
|
|
|
print '\nTesting', api
|
|
|
|
|
config_api = {'api': api, 'delay': 0}
|
|
|
|
|
req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()})
|
|
|
|
|
f = urllib2.urlopen(req)
|
|
|
|
|
imagecount = int(json.loads(f.read())['query']['statistics']['images'])
|
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
|
|
print 'Testing', config_api['api']
|
|
|
|
|
|
|
|
|
|
print 'Trying to parse', filetocheck, 'with API'
|
|
|
|
|
result_api = getImageNames(config=config_api, session=session)
|
|
|
|
|
self.assertEqual(len(result_api), imagecount)
|
|
|
|
|
self.assertTrue(filetocheck in [filename for filename, url, uploader in result_api])
|
|
|
|
|
|
|
|
|
|
# Testing with index
|
|
|
|
|
print '\nTesting', index
|
|
|
|
|
config_index = {'index': index, 'delay': 0}
|
|
|
|
|
req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()})
|
|
|
|
|
f = urllib2.urlopen(req)
|
|
|
|
|
imagecount = int(json.loads(f.read())['query']['statistics']['images'])
|
|
|
|
|
f.close()
|
|
|
|
|
|
|
|
|
|
print 'Testing', config_index['index']
|
|
|
|
|
print 'Trying to parse', filetocheck, 'with index'
|
|
|
|
|
result_index = getImageNames(config=config_index, session=session)
|
|
|
|
|
#print 111, set([filename for filename, url, uploader in result_api]) - set([filename for filename, url, uploader in result_index])
|
|
|
|
@ -128,10 +127,10 @@ class TestDumpgenerator(unittest.TestCase):
|
|
|
|
|
c += 1
|
|
|
|
|
|
|
|
|
|
def test_getPageTitles(self):
|
|
|
|
|
""" This test download the title list using API and index.php """
|
|
|
|
|
""" Compare both lists in length and title by title """
|
|
|
|
|
""" Check the presence of some special titles, like odd chars """
|
|
|
|
|
""" The tested wikis are from different wikifarms and some alone """
|
|
|
|
|
# This test download the title list using API and index.php
|
|
|
|
|
# Compare both lists in length and title by title
|
|
|
|
|
# Check the presence of some special titles, like odd chars
|
|
|
|
|
# The tested wikis are from different wikifarms and some alone
|
|
|
|
|
|
|
|
|
|
print '#'*73, '\n', 'test_getPageTitles', '\n', '#'*73
|
|
|
|
|
tests = [
|
|
|
|
@ -146,18 +145,17 @@ class TestDumpgenerator(unittest.TestCase):
|
|
|
|
|
session = requests.Session()
|
|
|
|
|
session.headers = {'User-Agent': getUserAgent()}
|
|
|
|
|
for index, api, pagetocheck in tests:
|
|
|
|
|
print '\n'
|
|
|
|
|
# Testing with API
|
|
|
|
|
config_api = {'api': api, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []}
|
|
|
|
|
print 'Testing', config_api['api']
|
|
|
|
|
print '\nTesting', api
|
|
|
|
|
print 'Trying to parse', pagetocheck, 'with API'
|
|
|
|
|
config_api = {'api': api, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []}
|
|
|
|
|
result_api = getPageTitles(config=config_api, session=session)
|
|
|
|
|
self.assertTrue(pagetocheck in result_api)
|
|
|
|
|
|
|
|
|
|
# Testing with index
|
|
|
|
|
config_index = {'index': index, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []}
|
|
|
|
|
print 'Testing', config_index['index']
|
|
|
|
|
print 'Testing', index
|
|
|
|
|
print 'Trying to parse', pagetocheck, 'with index'
|
|
|
|
|
config_index = {'index': index, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []}
|
|
|
|
|
result_index = getPageTitles(config=config_index, session=session)
|
|
|
|
|
self.assertTrue(pagetocheck in result_index)
|
|
|
|
|
self.assertEqual(len(result_api), len(result_index))
|
|
|
|
@ -178,8 +176,47 @@ class TestDumpgenerator(unittest.TestCase):
|
|
|
|
|
]
|
|
|
|
|
for wiki, engine in tests:
|
|
|
|
|
print 'Testing', wiki
|
|
|
|
|
self.assertTrue(getWikiEngine(wiki) == engine)
|
|
|
|
|
|
|
|
|
|
self.assertEqual(getWikiEngine(wiki), engine)
|
|
|
|
|
|
|
|
|
|
def test_mwGetAPIAndIndex(self):
|
|
|
|
|
tests = [
|
|
|
|
|
# Alone wikis
|
|
|
|
|
['http://archiveteam.org', 'http://archiveteam.org/api.php', 'http://archiveteam.org/index.php'],
|
|
|
|
|
['http://skilledtests.com/wiki/', 'http://skilledtests.com/wiki/api.php', 'http://skilledtests.com/wiki/index.php5'],
|
|
|
|
|
|
|
|
|
|
# Editthis wikifarm
|
|
|
|
|
# It has a page view limit
|
|
|
|
|
|
|
|
|
|
# Gamepedia wikifarm
|
|
|
|
|
['http://dawngate.gamepedia.com', 'http://dawngate.gamepedia.com/api.php', 'http://dawngate.gamepedia.com/index.php'],
|
|
|
|
|
|
|
|
|
|
# Gentoo wikifarm
|
|
|
|
|
['http://wiki.gentoo.org', 'http://wiki.gentoo.org/api.php', 'http://wiki.gentoo.org/index.php'],
|
|
|
|
|
|
|
|
|
|
# Neoseeker wikifarm
|
|
|
|
|
#['http://digimon.neoseeker.com', 'http://digimon.neoseeker.com/w/api.php', 'http://digimon.neoseeker.com/w/index.php'],
|
|
|
|
|
|
|
|
|
|
# Orain wikifarm
|
|
|
|
|
#['http://mc.orain.org', 'http://mc.orain.org/w/api.php', 'http://mc.orain.org/w/index.php'],
|
|
|
|
|
|
|
|
|
|
# Referata wikifarm
|
|
|
|
|
['http://wikipapers.referata.com', 'http://wikipapers.referata.com/w/api.php', 'http://wikipapers.referata.com/w/index.php'],
|
|
|
|
|
|
|
|
|
|
# ShoutWiki wikifarm
|
|
|
|
|
['http://commandos.shoutwiki.com', 'http://commandos.shoutwiki.com/w/api.php', 'http://commandos.shoutwiki.com/w/index.php'],
|
|
|
|
|
|
|
|
|
|
# Wiki-site wikifarm
|
|
|
|
|
#['http://minlingo.wiki-site.com', 'http://minlingo.wiki-site.com/api.php', 'http://minlingo.wiki-site.com/index.php'],
|
|
|
|
|
|
|
|
|
|
# Wikkii wikifarm
|
|
|
|
|
# It seems offline
|
|
|
|
|
]
|
|
|
|
|
for wiki, api, index in tests:
|
|
|
|
|
print 'Testing', wiki
|
|
|
|
|
api2, index2 = mwGetAPIAndIndex(wiki)
|
|
|
|
|
self.assertEqual(api, api2)
|
|
|
|
|
self.assertEqual(index, index2)
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
#copying dumpgenerator.py to this directory
|
|
|
|
|
#shutil.copy2('../dumpgenerator.py', './dumpgenerator.py')
|
|
|
|
|