From 80288cf49e42f90d6017cb939d5c190be8dc3811 Mon Sep 17 00:00:00 2001 From: Federico Leva Date: Mon, 21 May 2018 16:40:09 +0300 Subject: [PATCH] Catch allpages and namespaces API without query results --- dumpgenerator.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index 0519f4a..3e65582 100755 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -210,25 +210,32 @@ def getNamespacesAPI(config={}, session=None): ) result = getJSON(r) delay(config=config, session=session) + try: + nsquery = result['query']['namespaces'] + except KeyError: + print "Error: could not get namespaces from the API request" + print "HTTP %d" % r.status_code + print r.text + return None if 'all' in namespaces: namespaces = [] - for i in result['query']['namespaces'].keys(): + for i in nsquery.keys(): if int(i) < 0: # -1: Special, -2: Media, excluding continue namespaces.append(int(i)) - namespacenames[int(i)] = result['query']['namespaces'][i]['*'] + namespacenames[int(i)] = nsquery[i]['*'] else: # check if those namespaces really exist in this wiki namespaces2 = [] - for i in result['query']['namespaces'].keys(): + for i in nsquery.keys(): bi = i i = int(i) if i < 0: # -1: Special, -2: Media, excluding continue if i in namespaces: namespaces2.append(i) - namespacenames[i] = result['query']['namespaces'][bi]['*'] + namespacenames[i] = nsquery[bi]['*'] namespaces = namespaces2 else: namespaces = [0] @@ -289,7 +296,12 @@ def getPageTitlesAPI(config={}, session=None): # print apfrom # print jsontitles - allpages = jsontitles['query']['allpages'] + try: + allpages = jsontitles['query']['allpages'] + except KeyError: + print "The allpages API returned nothing. Exit." + sys.exit(1) + # Hack for old versions of MediaWiki API where result is dict if isinstance(allpages, dict): allpages = allpages.values()