From ff0d230d0807656e4ebcb3b34d60a0b8d0eec54f Mon Sep 17 00:00:00 2001 From: "Pi R. Squared" Date: Sun, 14 Sep 2014 11:10:43 -0400 Subject: [PATCH] Get as much information from siteinfo as possible Properly fixes #74. Algorithm: 1. Try all siteinfo props. If this gives an error, continue. Otherwise, stop. 2. Try MediaWiki 1.11-1.12 siteinfo props. If this gives an error, continue. Otherwise, stop. 3. Try minimal siteinfo props. Stop. Not using sishowalldb=1 to avoid possible error (by default), since this data is of little use anyway. --- dumpgenerator.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index bb9fa58..f1e4312 100644 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -1430,8 +1430,25 @@ def saveSiteInfo(config={}, session=None): print 'siteinfo.json exists, do not overwrite' else: print 'Downloading site info as siteinfo.json' + + # MediaWiki 1.13+ r = session.post(url=config['api'], data={ - 'action': 'query', 'meta': 'siteinfo', 'format': 'json'}) + 'action': 'query', + 'meta': 'siteinfo', + 'siprop': 'general|namespaces|statistics|dbrepllag|interwikimap|namespacealiases|specialpagealiases|usergroups|extensions|skins|magicwords|fileextensions|rightsinfo', + 'sinumberingroup': 1, + 'format': 'json'}) + # MediaWiki 1.11-1.12 + if not 'query' in json.loads(r.text): + r = session.post(url=config['api'], data={ + 'action': 'query', + 'meta': 'siteinfo', + 'siprop': 'general|namespaces|statistics|dbrepllag|interwikimap', + 'format': 'json'}) + # MediaWiki 1.8-1.10 + if not 'query' in json.loads(r.text): + r = session.post(url=config['api'], data={ + 'action': 'query', 'meta': 'siteinfo', 'siprop': 'general|namespaces', 'format': 'json'}) result = json.loads(r.text) delay(config=config, session=session) with open('%s/siteinfo.json' % (config['path']), 'w') as outfile: