From 386c6be0366d50c5e7395b029ff9ec6d422e9d65 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20J=2E=20Rodr=C3=ADguez-Posada?= Date: Sun, 13 Jul 2014 12:34:09 +0200 Subject: [PATCH] added test_mwGetAPIAndIndex; commented slow/404 error wikis; --- testing/test_dumpgenerator.py | 81 +++++++++++++++++++++++++---------- 1 file changed, 59 insertions(+), 22 deletions(-) diff --git a/testing/test_dumpgenerator.py b/testing/test_dumpgenerator.py index 5880d1a..bd97e75 100644 --- a/testing/test_dumpgenerator.py +++ b/testing/test_dumpgenerator.py @@ -26,7 +26,7 @@ import time import unittest import urllib import urllib2 -from dumpgenerator import delay, getImageNames, getPageTitles, getUserAgent, getWikiEngine +from dumpgenerator import delay, getImageNames, getPageTitles, getUserAgent, getWikiEngine, mwGetAPIAndIndex class TestDumpgenerator(unittest.TestCase): # Documentation @@ -37,7 +37,7 @@ class TestDumpgenerator(unittest.TestCase): # - Check one wiki per wikifarm at least (page titles & images, with/out API) def test_delay(self): - """ This test checks several delays """ + # This test checks several delays print '#'*73, '\n', 'test_delay', '\n', '#'*73 for i in [0, 1, 2, 3]: @@ -49,10 +49,10 @@ class TestDumpgenerator(unittest.TestCase): self.assertTrue(t2 > i and t2 < i + 1) def test_getImages(self): - """ This test download the image list using API and index.php """ - """ Compare both lists in length and file by file """ - """ Check the presence of some special files, like odd chars filenames """ - """ The tested wikis are from different wikifarms and some alone """ + # This test download the image list using API and index.php + # Compare both lists in length and file by file + # Check the presence of some special files, like odd chars filenames + # The tested wikis are from different wikifarms and some alone print '#'*73, '\n', 'test_getImages', '\n', '#'*73 tests = [ @@ -74,7 +74,7 @@ class TestDumpgenerator(unittest.TestCase): ['http://digimon.neoseeker.com/w/index.php', 'http://digimon.neoseeker.com/w/api.php', u'Ogremon card.png'], # Orain wikifarm - ['http://mc.orain.org/w/index.php', 'http://mc.orain.org/w/api.php', u'Mojang logo.svg'], + #['http://mc.orain.org/w/index.php', 'http://mc.orain.org/w/api.php', u'Mojang logo.svg'], # Referata wikifarm ['http://wikipapers.referata.com/w/index.php', 'http://wikipapers.referata.com/w/api.php', u'Avbot logo.png'], @@ -91,28 +91,27 @@ class TestDumpgenerator(unittest.TestCase): session = requests.Session() session.headers = {'User-Agent': getUserAgent()} for index, api, filetocheck in tests: - print '\n' # Testing with API + print '\nTesting', api config_api = {'api': api, 'delay': 0} req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()}) f = urllib2.urlopen(req) imagecount = int(json.loads(f.read())['query']['statistics']['images']) f.close() - - print 'Testing', config_api['api'] + print 'Trying to parse', filetocheck, 'with API' result_api = getImageNames(config=config_api, session=session) self.assertEqual(len(result_api), imagecount) self.assertTrue(filetocheck in [filename for filename, url, uploader in result_api]) # Testing with index + print '\nTesting', index config_index = {'index': index, 'delay': 0} req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()}) f = urllib2.urlopen(req) imagecount = int(json.loads(f.read())['query']['statistics']['images']) f.close() - print 'Testing', config_index['index'] print 'Trying to parse', filetocheck, 'with index' result_index = getImageNames(config=config_index, session=session) #print 111, set([filename for filename, url, uploader in result_api]) - set([filename for filename, url, uploader in result_index]) @@ -128,10 +127,10 @@ class TestDumpgenerator(unittest.TestCase): c += 1 def test_getPageTitles(self): - """ This test download the title list using API and index.php """ - """ Compare both lists in length and title by title """ - """ Check the presence of some special titles, like odd chars """ - """ The tested wikis are from different wikifarms and some alone """ + # This test download the title list using API and index.php + # Compare both lists in length and title by title + # Check the presence of some special titles, like odd chars + # The tested wikis are from different wikifarms and some alone print '#'*73, '\n', 'test_getPageTitles', '\n', '#'*73 tests = [ @@ -146,18 +145,17 @@ class TestDumpgenerator(unittest.TestCase): session = requests.Session() session.headers = {'User-Agent': getUserAgent()} for index, api, pagetocheck in tests: - print '\n' # Testing with API - config_api = {'api': api, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []} - print 'Testing', config_api['api'] + print '\nTesting', api print 'Trying to parse', pagetocheck, 'with API' + config_api = {'api': api, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []} result_api = getPageTitles(config=config_api, session=session) self.assertTrue(pagetocheck in result_api) # Testing with index - config_index = {'index': index, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []} - print 'Testing', config_index['index'] + print 'Testing', index print 'Trying to parse', pagetocheck, 'with index' + config_index = {'index': index, 'delay': 0, 'namespaces': ['all'], 'exnamespaces': []} result_index = getPageTitles(config=config_index, session=session) self.assertTrue(pagetocheck in result_index) self.assertEqual(len(result_api), len(result_index)) @@ -178,8 +176,47 @@ class TestDumpgenerator(unittest.TestCase): ] for wiki, engine in tests: print 'Testing', wiki - self.assertTrue(getWikiEngine(wiki) == engine) - + self.assertEqual(getWikiEngine(wiki), engine) + + def test_mwGetAPIAndIndex(self): + tests = [ + # Alone wikis + ['http://archiveteam.org', 'http://archiveteam.org/api.php', 'http://archiveteam.org/index.php'], + ['http://skilledtests.com/wiki/', 'http://skilledtests.com/wiki/api.php', 'http://skilledtests.com/wiki/index.php5'], + + # Editthis wikifarm + # It has a page view limit + + # Gamepedia wikifarm + ['http://dawngate.gamepedia.com', 'http://dawngate.gamepedia.com/api.php', 'http://dawngate.gamepedia.com/index.php'], + + # Gentoo wikifarm + ['http://wiki.gentoo.org', 'http://wiki.gentoo.org/api.php', 'http://wiki.gentoo.org/index.php'], + + # Neoseeker wikifarm + #['http://digimon.neoseeker.com', 'http://digimon.neoseeker.com/w/api.php', 'http://digimon.neoseeker.com/w/index.php'], + + # Orain wikifarm + #['http://mc.orain.org', 'http://mc.orain.org/w/api.php', 'http://mc.orain.org/w/index.php'], + + # Referata wikifarm + ['http://wikipapers.referata.com', 'http://wikipapers.referata.com/w/api.php', 'http://wikipapers.referata.com/w/index.php'], + + # ShoutWiki wikifarm + ['http://commandos.shoutwiki.com', 'http://commandos.shoutwiki.com/w/api.php', 'http://commandos.shoutwiki.com/w/index.php'], + + # Wiki-site wikifarm + #['http://minlingo.wiki-site.com', 'http://minlingo.wiki-site.com/api.php', 'http://minlingo.wiki-site.com/index.php'], + + # Wikkii wikifarm + # It seems offline + ] + for wiki, api, index in tests: + print 'Testing', wiki + api2, index2 = mwGetAPIAndIndex(wiki) + self.assertEqual(api, api2) + self.assertEqual(index, index2) + if __name__ == '__main__': #copying dumpgenerator.py to this directory #shutil.copy2('../dumpgenerator.py', './dumpgenerator.py')