From 610764619ac71f96977ed3d93914ca27081f3309 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20J=2E=20Rodr=C3=ADguez-Posada?= Date: Thu, 26 Jun 2014 10:38:59 +0200 Subject: [PATCH] add saveSiteInfo() to download meta=siteinfo data from API to a file --- dumpgenerator.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/dumpgenerator.py b/dumpgenerator.py index 4ea9056..941b822 100644 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -1237,6 +1237,22 @@ def saveIndexPHP(config={}): f.write(raw) f.close() +def saveSiteInfo(config={}): + """ Save a file with site info """ + + if os.path.exists('%s/siteinfo.json' % (config['path'])): + print 'siteinfo.json exists, do not overwrite' + else: + print 'Downloading site info' + req = urllib2.Request(url=config['api'], data=urllib.urlencode({'action': 'query', 'meta': 'siteinfo', 'format': 'json'}), headers={'User-Agent': getUserAgent()}) + f = urllib2.urlopen(req) + result = f.read() + f.close() + delay(config=config) + f = open('%s/siteinfo.json' % (config['path']), 'w') + f.write(result) + f.close() + def avoidWikimediaProjects(config={}): """ Skip Wikimedia projects and redirect to the dumps website """ @@ -1293,6 +1309,7 @@ def main(params=[]): saveIndexPHP(config=config) saveSpecialVersion(config=config) + saveSiteInfo(config=config) bye() if __name__ == "__main__":