From 78f04a4bcdf8195b64a36c20a08110294aa24040 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20J=2E=20Rodr=C3=ADguez-Posada?= Date: Fri, 4 Jul 2014 19:45:34 +0200 Subject: [PATCH] adding more tests for image lists --- testing/test_dumpgenerator.py | 73 ++++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 10 deletions(-) diff --git a/testing/test_dumpgenerator.py b/testing/test_dumpgenerator.py index a127a79..4a74b70 100644 --- a/testing/test_dumpgenerator.py +++ b/testing/test_dumpgenerator.py @@ -16,6 +16,10 @@ # along with this program. If not, see . import json +try: + from hashlib import md5 +except ImportError: # Python 2.4 compatibility + from md5 import new as md5 import requests import shutil import time @@ -25,9 +29,12 @@ import urllib2 from dumpgenerator import delay, getImageFilenamesURL, getImageFilenamesURLAPI, getUserAgent, getWikiEngine class TestDumpgenerator(unittest.TestCase): - #Documentation - #http://revista.python.org.ar/1/html/unittest.html - #https://docs.python.org/2/library/unittest.html + # Documentation + # http://revista.python.org.ar/1/html/unittest.html + # https://docs.python.org/2/library/unittest.html + + # Ideas: + # - Check one wiki per wikifarm at least (page titles & images, with/out API) def test_delay(self): print '#'*73, '\n', 'test_delay', '\n', '#'*73 @@ -40,14 +47,49 @@ class TestDumpgenerator(unittest.TestCase): self.assertTrue(t2 > i and t2 < i + 1) def test_getImages(self): + """ This test download the image list using API and index.php """ + """ Compare both lists in length and file by file """ + """ Check the presence of some spcial files, like odd chars filenames """ + """ The tested wikis are one for every wikifarm and some alone """ + print '#'*73, '\n', 'test_getImages', '\n', '#'*73 tests = [ - ['http://wiki.annotation.jp/index.php', 'http://wiki.annotation.jp/api.php', u'かずさアノテーション - ソーシャル・ゲノム・アノテーション.jpg'], - ['http://archiveteam.org/index.php', 'http://archiveteam.org/api.php', u'Archive-is 2013-07-02 17-05-40.png'], + # Alone wikis + #['http://wiki.annotation.jp/index.php', 'http://wiki.annotation.jp/api.php', u'かずさアノテーション - ソーシャル・ゲノム・アノテーション.jpg'], + ['http://archiveteam.org/index.php', 'http://archiveteam.org/api.php', u'Archive-is 2013-07-02 17-05-40.png'], + + # Editthis wikifarm + # It has a page view limit + + # Gamepedia wikifarm + ['http://dawngate.gamepedia.com/index.php', 'http://dawngate.gamepedia.com/api.php', u'Spell Vanquish.png'], + + # Gentoo wikifarm + ['http://wiki.gentoo.org/index.php', 'http://wiki.gentoo.org/api.php', u'Openclonk screenshot1.png'], + + # Neoseeker wikifarm + ['http://digimon.neoseeker.com/w/index.php', 'http://digimon.neoseeker.com/w/api.php', u'Ogremon card.png'], + + # Orain wikifarm + ['http://mc.orain.org/w/index.php', 'http://mc.orain.org/w/api.php', u'Mojang logo.svg'], + + # Referata wikifarm + ['http://wikipapers.referata.com/w/index.php', 'http://wikipapers.referata.com/w/api.php', u'Avbot logo.png'], + + # ShoutWiki wikifarm + ['http://commandos.shoutwiki.com/w/index.php', 'http://commandos.shoutwiki.com/w/api.php', u'Night of the Wolves loading.png'], + + # Wiki-site wikifarm + ['http://minlingo.wiki-site.com/index.php', 'http://minlingo.wiki-site.com/api.php', u'一 (書方灋ᅗᅩ).png'], + + # Wikkii wikifarm + # It seems offline ] + session = requests.Session() + session.headers = {'User-Agent': getUserAgent()} for index, api, filetocheck in tests: print '\n' - #testing with API + # Testing with API config_api = {'api': api, 'delay': 0} req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()}) f = urllib2.urlopen(req) @@ -56,11 +98,11 @@ class TestDumpgenerator(unittest.TestCase): print 'Testing', config_api['api'] print 'Trying to parse', filetocheck, 'with API' - result_api = getImageFilenamesURLAPI(config=config_api) + result_api = getImageFilenamesURLAPI(config=config_api, session=session) self.assertTrue(len(result_api) == imagecount) self.assertTrue(filetocheck in [filename for filename, url, uploader in result_api]) - #testing with index + # Testing with index config_index = {'index': index, 'delay': 0} req = urllib2.Request(url=api, data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'siprop': 'statistics', 'format': 'json'}), headers={'User-Agent': getUserAgent()}) f = urllib2.urlopen(req) @@ -69,14 +111,25 @@ class TestDumpgenerator(unittest.TestCase): print 'Testing', config_index['index'] print 'Trying to parse', filetocheck, 'with index' - result_index = getImageFilenamesURL(config=config_index) + result_index = getImageFilenamesURL(config=config_index, session=session) + + #print 111, set([filename for filename, url, uploader in result_api]) - set([filename for filename, url, uploader in result_index]) + self.assertTrue(len(result_index) == imagecount) self.assertTrue(filetocheck in [filename for filename, url, uploader in result_index]) + + # Compare every image in both lists, with/without API + c = 0 + for filename_api, url_api, uploader_api in result_api: + self.assertEqual(filename_api, result_index[c][0], u'{0} and {1} are different'.format(filename_api, result_index[c][0])) + self.assertEqual(url_api, result_index[c][1], u'{0} and {1} are different'.format(url_api, result_index[c][1])) + self.assertEqual(uploader_api, result_index[c][2], u'{0} and {1} are different'.format(uploader_api, result_index[c][2])) + c += 1 def test_getWikiEngine(self): tests = [ ['https://www.dokuwiki.org', 'DokuWiki'], - ['http://wiki.openwrt.org', 'DokuWiki'], + #['http://wiki.openwrt.org', 'DokuWiki'], ['http://moinmo.in', 'MoinMoin'], ['https://wiki.debian.org', 'MoinMoin'], ]