pull/248/head
Tim 9 years ago
commit 8d07d48e44

@ -15,11 +15,13 @@
# You should have received a copy of the GNU General Public License # You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>. # along with this program. If not, see <http://www.gnu.org/licenses/>.
import datetime
import json import json
try: try:
from hashlib import md5 from hashlib import md5
except ImportError: # Python 2.4 compatibility except ImportError: # Python 2.4 compatibility
from md5 import new as md5 from md5 import new as md5
import os
import requests import requests
import shutil import shutil
import time import time
@ -27,8 +29,7 @@ import unittest
import urllib import urllib
import urllib2 import urllib2
import tempfile import tempfile
import os from dumpgenerator import delay, domain2prefix, getImageNames, getPageTitles, getUserAgent, getWikiEngine, mwGetAPIAndIndex
from dumpgenerator import delay, getImageNames, getPageTitles, getUserAgent, getWikiEngine, mwGetAPIAndIndex, domain2prefix
class TestDumpgenerator(unittest.TestCase): class TestDumpgenerator(unittest.TestCase):
# Documentation # Documentation
@ -41,7 +42,7 @@ class TestDumpgenerator(unittest.TestCase):
def test_delay(self): def test_delay(self):
# This test checks several delays # This test checks several delays
print '#'*73, '\n', 'test_delay', '\n', '#'*73 print '\n', '#'*73, '\n', 'test_delay', '\n', '#'*73
for i in [0, 1, 2, 3]: for i in [0, 1, 2, 3]:
print 'Testing delay:', i print 'Testing delay:', i
config = {'delay': i} config = {'delay': i}
@ -57,7 +58,7 @@ class TestDumpgenerator(unittest.TestCase):
# Check the presence of some special files, like odd chars filenames # Check the presence of some special files, like odd chars filenames
# The tested wikis are from different wikifarms and some alone # The tested wikis are from different wikifarms and some alone
print '#'*73, '\n', 'test_getImages', '\n', '#'*73 print '\n', '#'*73, '\n', 'test_getImages', '\n', '#'*73
tests = [ tests = [
# Alone wikis # Alone wikis
#['http://wiki.annotation.jp/index.php', 'http://wiki.annotation.jp/api.php', u'かずさアノテーション - ソーシャル・ゲノム・アノテーション.jpg'], #['http://wiki.annotation.jp/index.php', 'http://wiki.annotation.jp/api.php', u'かずさアノテーション - ソーシャル・ゲノム・アノテーション.jpg'],
@ -142,7 +143,7 @@ class TestDumpgenerator(unittest.TestCase):
# Check the presence of some special titles, like odd chars # Check the presence of some special titles, like odd chars
# The tested wikis are from different wikifarms and some alone # The tested wikis are from different wikifarms and some alone
print '#'*73, '\n', 'test_getPageTitles', '\n', '#'*73 print '\n', '#'*73, '\n', 'test_getPageTitles', '\n', '#'*73
tests = [ tests = [
# Alone wikis # Alone wikis
['http://archiveteam.org/index.php', 'http://archiveteam.org/api.php', u'April Fools\' Day'], ['http://archiveteam.org/index.php', 'http://archiveteam.org/api.php', u'April Fools\' Day'],
@ -161,42 +162,32 @@ class TestDumpgenerator(unittest.TestCase):
# Testing with API # Testing with API
print '\nTesting', api print '\nTesting', api
print 'Trying to parse', pagetocheck, 'with API' print 'Trying to parse', pagetocheck, 'with API'
config_api = {'api': api, config_api = {'api': api, 'index': '', 'delay': 0, 'namespaces': ['all'], 'exnamespaces': [], 'date': datetime.datetime.now().strftime('%Y%m%d'), 'path': '.'}
'delay': 0, getPageTitles(config=config_api, session=session)
'namespaces': ['all'], titles_api = './%s-%s-titles.txt' % (domain2prefix(config=config_api), config_api['date'])
'exnamespaces': [], result_api = open(titles_api, 'r').read().splitlines()
'retries': 5, os.remove(titles_api)
'date': "20150807", self.assertTrue(pagetocheck in result_api)
}
config_api['path'] = tempfile.mkdtemp()
result_api = getPageTitles(config=config_api, session=session)
titles_api = open(os.path.join(config_api['path'], result_api), "rt").read().decode("utf-8").split("\n")
self.assertTrue(pagetocheck in titles_api)
# Testing with index # Testing with index
print 'Testing', index print 'Testing', index
print 'Trying to parse', pagetocheck, 'with index' print 'Trying to parse', pagetocheck, 'with index'
config_index = {'index': index, config_index = {'index': index, 'api': '', 'delay': 0, 'namespaces': ['all'], 'exnamespaces': [], 'date': datetime.datetime.now().strftime('%Y%m%d'), 'path': '.'}
'delay': 0, getPageTitles(config=config_index, session=session)
'namespaces': ['all'], titles_index = './%s-%s-titles.txt' % (domain2prefix(config=config_index), config_index['date'])
'exnamespaces': [], result_index = open(titles_index, 'r').read().splitlines()
'retries': 5, os.remove(titles_index)
'date': "20150807", self.assertTrue(pagetocheck in result_index)
'api': False self.assertEqual(len(result_api), len(result_index))
}
config_index['path'] = tempfile.mkdtemp()
result_index = getPageTitles(config=config_index, session=session)
titles_index = open(os.path.join(config_api['path'], result_index), "rt").read().decode("utf-8").split("\n")
self.assertTrue(pagetocheck in titles_index)
self.assertEqual(len(titles_api), len(titles_index))
# Compare every page in both lists, with/without API # Compare every page in both lists, with/without API
c = 0 c = 0
for pagename_api in result_api: for pagename_api in result_api:
self.assertEqual(pagename_api, result_index[c], u'{0} and {1} are different'.format(pagename_api, result_index[c])) self.assertEqual(pagename_api.decode('utf8'), result_index[c].decode('utf8'), u'{0} and {1} are different'.format(pagename_api.decode('utf8'), result_index[c].decode('utf8')))
c += 1 c += 1
def test_getWikiEngine(self): def test_getWikiEngine(self):
print '\n', '#'*73, '\n', 'test_getWikiEngine', '\n', '#'*73
tests = [ tests = [
['https://www.dokuwiki.org', 'DokuWiki'], ['https://www.dokuwiki.org', 'DokuWiki'],
['http://wiki.openwrt.org', 'DokuWiki'], ['http://wiki.openwrt.org', 'DokuWiki'],
@ -221,11 +212,11 @@ class TestDumpgenerator(unittest.TestCase):
['https://confluence.atlassian.com/', 'Confluence'], ['https://confluence.atlassian.com/', 'Confluence'],
#['https://wiki.hybris.com/dashboard.action', 'Confluence'], #['https://wiki.hybris.com/dashboard.action', 'Confluence'],
['https://confluence.sakaiproject.org/', 'Confluence'], ['https://confluence.sakaiproject.org/', 'Confluence'],
#['http://demo.bananadance.org/', 'Banana Dance'], ['http://demo.bananadance.org/', 'Banana Dance'],
['http://wagn.org/', 'Wagn'], ['http://wagn.org/', 'Wagn'],
['http://wiki.ace-mod.net/', 'Wagn'], ['http://wiki.ace-mod.net/', 'Wagn'],
#['https://success.mindtouch.com/', 'MindTouch'], ['https://success.mindtouch.com/', 'MindTouch'],
#['https://jspwiki.apache.org/', 'JSPWiki'], ['https://jspwiki.apache.org/', 'JSPWiki'],
['http://www.ihear.com/FreeCLAS/', 'JSPWiki'], ['http://www.ihear.com/FreeCLAS/', 'JSPWiki'],
['http://www.wikkawiki.org/HomePage', 'WikkaWiki'], ['http://www.wikkawiki.org/HomePage', 'WikkaWiki'],
['http://puppylinux.org/wikka/', 'WikkaWiki'], ['http://puppylinux.org/wikka/', 'WikkaWiki'],
@ -260,6 +251,7 @@ class TestDumpgenerator(unittest.TestCase):
self.assertEqual(guess_engine, engine) self.assertEqual(guess_engine, engine)
def test_mwGetAPIAndIndex(self): def test_mwGetAPIAndIndex(self):
print '\n', '#'*73, '\n', 'test_mwGetAPIAndIndex', '\n', '#'*73
tests = [ tests = [
# Alone wikis # Alone wikis
['http://archiveteam.org', 'http://archiveteam.org/api.php', 'http://archiveteam.org/index.php'], ['http://archiveteam.org', 'http://archiveteam.org/api.php', 'http://archiveteam.org/index.php'],

Loading…
Cancel
Save