added test_mwGetAPIAndIndex; commented slow/404 error wikis;

pull/163/head^2
Emilio J. Rodríguez-Posada 10 years ago
parent 51806f5a3d
commit 386c6be036

@ -26,7 +26,7 @@ import time
import unittest
import urllib
import urllib2
from dumpgenerator import delay, getImageNames, getPageTitles, getUserAgent, getWikiEngine
from dumpgenerator import delay, getImageNames, getPageTitles, getUserAgent, getWikiEngine, mwGetAPIAndIndex
class TestDumpgenerator(unittest.TestCase):
# Documentation
@ -37,7 +37,7 @@ class TestDumpgenerator(unittest.TestCase):
# - Check one wiki per wikifarm at least (page titles & images, with/out API)
def test_delay(self):
""" This test checks several delays """
# This test checks several delays
print '#'*73, '\n', 'test_delay', '\n', '#'*73
for i in [0, 1, 2, 3]:
@ -49,10 +49,10 @@ class TestDumpgenerator(unittest.TestCase):
self.assertTrue(t2 > i and t2 < i + 1)
def test_getImages(self):
""" This test download the image list using API and index.php """
""" Compare both lists in length and file by file """
""" Check the presence of some special files, like odd chars filenames """
""" The tested wikis are from different wikifarms and some alone """
# This test download the image list using API and index.php
# Compare both lists in length and file by file
# Check the presence of some special files, like odd chars filenames
# The tested wikis are from different wikifarms and some alone
print '#'*73, '\n', 'test_getImages', '\n', '#'*73
tests = [
@ -74,7 +74,7 @@ class TestDumpgenerator(unittest.TestCase):
['http://digimon.neoseeker.com/w/index.php', 'http://digimon.neoseeker.com/w/api.php', u'Ogremon card.png'],
# Orain wikifarm
['http://mc.orain.org/w/index.php', 'http://mc.orain.org/w/api.php', u'Mojang logo.svg'],
#['http://mc.orain.org/w/index.php', 'http://mc.orain.org/w/api.php', u'Mojang logo.svg'],
# Referata wikifarm
['http://wikipapers.referata.com/w/index.php', 'http://wikipapers.referata.com/w/api.php', u'Avbot logo.png'],
@ -91,28 +91,27 @@ class TestDumpgenerator(unittest.TestCase):
session = requests.Session()
session.headers = {'User-Agent': getUserAgent()}
for index, api, filetocheck in tests:
print '\n'
# Testing with API
print '\nTesting', api
config_api = {'api': api, 'delay': 0}
req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()})
f = urllib2.urlopen(req)
imagecount = int(json.loads(f.read())['query']['statistics']['images'])
f.close()
print 'Testing', config_api['api']
print 'Trying to parse', filetocheck, 'with API'
result_api = getImageNames(config=config_api, session=session)
self.assertEqual(len(result_api), imagecount)
self.assertTrue(filetocheck in [filename for filename, url, uploader in result_api])
# Testing with index
print '\nTesting', index
config_index = {'index': index, 'delay': 0}
req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()})
f = urllib2.urlopen(req)
imagecount = int(json.loads(f.read())['query']['statistics']['images'])
f.close()
print 'Testing', config_index['index']
print 'Trying to parse', filetocheck, 'with index'
result_index = getImageNames(config=config_index, session=session)
#print 111, set([filename for filename, url, uploader in result_api]) - set([filename for filename, url, uploader in result_index])
@ -128,10 +127,10 @@ class TestDumpgenerator(unittest.TestCase):
c += 1
def test_getPageTitles(self):
""" This test download the title list using API and index.php """
""" Compare both lists in length and title by title """
""" Check the presence of some special titles, like odd chars """
""" The tested wikis are from different wikifarms and some alone """
# This test download the title list using API and index.php
# Compare both lists in length and title by title
# Check the presence of some special titles, like odd chars
# The tested wikis are from different wikifarms and some alone
print '#'*73, '\n', 'test_getPageTitles', '\n', '#'*73
tests = [
@ -146,18 +145,17 @@ class TestDumpgenerator(unittest.TestCase):
session = requests.Session()
session.headers = {'User-Agent': getUserAgent()}
for index, api, pagetocheck in tests:
print '\n'
# Testing with API
config_api = {'api': api, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []}
print 'Testing', config_api['api']
print '\nTesting', api
print 'Trying to parse', pagetocheck, 'with API'
config_api = {'api': api, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []}
result_api = getPageTitles(config=config_api, session=session)
self.assertTrue(pagetocheck in result_api)
# Testing with index
config_index = {'index': index, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []}
print 'Testing', config_index['index']
print 'Testing', index
print 'Trying to parse', pagetocheck, 'with index'
config_index = {'index': index, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []}
result_index = getPageTitles(config=config_index, session=session)
self.assertTrue(pagetocheck in result_index)
self.assertEqual(len(result_api), len(result_index))
@ -178,8 +176,47 @@ class TestDumpgenerator(unittest.TestCase):
]
for wiki, engine in tests:
print 'Testing', wiki
self.assertTrue(getWikiEngine(wiki) == engine)
self.assertEqual(getWikiEngine(wiki), engine)
def test_mwGetAPIAndIndex(self):
tests = [
# Alone wikis
['http://archiveteam.org', 'http://archiveteam.org/api.php', 'http://archiveteam.org/index.php'],
['http://skilledtests.com/wiki/', 'http://skilledtests.com/wiki/api.php', 'http://skilledtests.com/wiki/index.php5'],
# Editthis wikifarm
# It has a page view limit
# Gamepedia wikifarm
['http://dawngate.gamepedia.com', 'http://dawngate.gamepedia.com/api.php', 'http://dawngate.gamepedia.com/index.php'],
# Gentoo wikifarm
['http://wiki.gentoo.org', 'http://wiki.gentoo.org/api.php', 'http://wiki.gentoo.org/index.php'],
# Neoseeker wikifarm
#['http://digimon.neoseeker.com', 'http://digimon.neoseeker.com/w/api.php', 'http://digimon.neoseeker.com/w/index.php'],
# Orain wikifarm
#['http://mc.orain.org', 'http://mc.orain.org/w/api.php', 'http://mc.orain.org/w/index.php'],
# Referata wikifarm
['http://wikipapers.referata.com', 'http://wikipapers.referata.com/w/api.php', 'http://wikipapers.referata.com/w/index.php'],
# ShoutWiki wikifarm
['http://commandos.shoutwiki.com', 'http://commandos.shoutwiki.com/w/api.php', 'http://commandos.shoutwiki.com/w/index.php'],
# Wiki-site wikifarm
#['http://minlingo.wiki-site.com', 'http://minlingo.wiki-site.com/api.php', 'http://minlingo.wiki-site.com/index.php'],
# Wikkii wikifarm
# It seems offline
]
for wiki, api, index in tests:
print 'Testing', wiki
api2, index2 = mwGetAPIAndIndex(wiki)
self.assertEqual(api, api2)
self.assertEqual(index, index2)
if __name__ == '__main__':
#copying dumpgenerator.py to this directory
#shutil.copy2('../dumpgenerator.py', './dumpgenerator.py')

Loading…
Cancel
Save