From d543f7d4ddeaf01d690d9d66e2913cdf26222ec8 Mon Sep 17 00:00:00 2001 From: Federico Leva Date: Thu, 13 Feb 2020 15:45:17 +0200 Subject: [PATCH] Check the API URL against mwclient too, so it doesn't fail later Change the protocol from HTTP to HTTPS if needed. Fixes: http://nimiarkisto.fi/w/api.php --- dumpgenerator.py | 57 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 42 insertions(+), 15 deletions(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index f861aef..3fe6637 100755 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -782,6 +782,7 @@ def getXMLRevisions(config={}, session=None, allpages=False): # FIXME: force the protocol we asked for! Or don't verify SSL if we asked HTTP? # https://github.com/WikiTeam/wikiteam/issues/358 site = mwclient.Site(apiurl.netloc, apiurl.path.replace("api.php", ""), scheme=apiurl.scheme) + if not 'all' in config['namespaces']: namespaces = config['namespaces'] else: @@ -1735,23 +1736,13 @@ def getParameters(params=[]): index2 = None if api: - retry = 0 - maxretries = args.retries - retrydelay = 20 - check = None - while retry < maxretries: - try: - check = checkAPI(api=api, session=session) - break - except requests.exceptions.ConnectionError as e: - print 'Connection error: %s'%(str(e)) - retry += 1 - print "Start retry attempt %d in %d seconds."%(retry+1, retrydelay) - time.sleep(retrydelay) + check, checkedapi = checkRetryAPI(api, args.retries, args.xmlrevisions, session) + if api and check: + # Replace the index URL we got from the API check index2 = check[1] - api = check[2] - print 'API is OK: ' + api + api = checkedapi + print 'API is OK: ' + checkedapi else: if index and not args.wiki: print 'API not available. Trying with index.php only.' @@ -1865,6 +1856,42 @@ def getParameters(params=[]): return config, other +def checkRetryAPI(api=None, retries=5, apiclient=False, session=None): + """ Call checkAPI and mwclient if necessary """ + retry = 0 + retrydelay = 20 + check = None + while retry < retries: + try: + check = checkAPI(api, session=session) + break + except requests.exceptions.ConnectionError as e: + print 'Connection error: %s'%(str(e)) + retry += 1 + print "Start retry attempt %d in %d seconds."%(retry+1, retrydelay) + time.sleep(retrydelay) + + if check and apiclient: + apiurl = urlparse(api) + try: + site = mwclient.Site(apiurl.netloc, apiurl.path.replace("api.php", ""), scheme=apiurl.scheme) + except KeyError: + # Probably KeyError: 'query' + if apiurl.scheme == "https": + newscheme = "http" + api = api.replace("https://", "http://") + else: + newscheme = "https" + api = api.replace("http://", "https://") + print("WARNING: The provided API URL did not work with mwclient. Switched protocol to: {}".format(newscheme)) + + try: + site = mwclient.Site(apiurl.netloc, apiurl.path.replace("api.php", ""), scheme=newscheme) + except KeyError: + check = False + + return check, api + def checkAPI(api=None, session=None): """ Checking API availability """ global cj