Catch HTTP Error 405 and switch from POST to GET for API requests

Seen on http://wiki.ainigma.eu/index.php?title=Hlavn%C3%AD_strana:
HTTPError: HTTP Error 405: Method Not Allowed
pull/359/head
Federico Leva 4 years ago
parent 8b5378f991
commit 49017e3f20

@ -840,7 +840,12 @@ def getXMLRevisions(config={}, session=None, allpages=False):
print("Trying to get wikitext from the allrevisions API and to build the XML") print("Trying to get wikitext from the allrevisions API and to build the XML")
while True: while True:
try: try:
arvrequest = site.api(**arvparams) arvrequest = site.api(http_method=config['http_method'], **arvparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
continue
except requests.exceptions.ReadTimeout as err: except requests.exceptions.ReadTimeout as err:
# Hopefully temporary, just wait a bit and continue with the same request. # Hopefully temporary, just wait a bit and continue with the same request.
# No point putting a limit to retries, we'd need to abort everything. # No point putting a limit to retries, we'd need to abort everything.
@ -865,7 +870,13 @@ def getXMLRevisions(config={}, session=None, allpages=False):
print("Trying to list the revisions and to export them one by one") print("Trying to list the revisions and to export them one by one")
# We only need the revision ID, all the rest will come from the raw export # We only need the revision ID, all the rest will come from the raw export
arvparams['arvprop'] = 'ids' arvparams['arvprop'] = 'ids'
arvrequest = site.api(**arvparams) try:
arvrequest = site.api(http_method=config['http_method'], **arvparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
continue
exportparams = { exportparams = {
'action': 'query', 'action': 'query',
'export': '1', 'export': '1',
@ -888,7 +899,14 @@ def getXMLRevisions(config={}, session=None, allpages=False):
# chooses to give us only the latest for each page # chooses to give us only the latest for each page
for revid in revids: for revid in revids:
exportparams['revids'] = revid exportparams['revids'] = revid
exportrequest = site.api(**exportparams) try:
exportrequest = site.api(http_method=config['http_method'], **exportparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
exportrequest = site.api(http_method=config['http_method'], **exportparams)
# This gives us a self-standing <mediawiki> element # This gives us a self-standing <mediawiki> element
# but we only need the inner <page>: we can live with # but we only need the inner <page>: we can live with
# duplication and non-ordering of page titles, but the # duplication and non-ordering of page titles, but the
@ -900,7 +918,12 @@ def getXMLRevisions(config={}, session=None, allpages=False):
# Get the new ones # Get the new ones
arvparams['arvcontinue'] = arvrequest['continue']['arvcontinue'] arvparams['arvcontinue'] = arvrequest['continue']['arvcontinue']
try: try:
arvrequest = site.api(**arvparams) arvrequest = site.api(http_method=config['http_method'], **arvparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
arvrequest = site.api(http_method=config['http_method'], **arvparams)
except requests.exceptions.ReadTimeout as err: except requests.exceptions.ReadTimeout as err:
# As above # As above
print("ERROR: {}".format(str(err))) print("ERROR: {}".format(str(err)))
@ -932,7 +955,14 @@ def getXMLRevisions(config={}, session=None, allpages=False):
'titles': title, 'titles': title,
'export': '1', 'export': '1',
} }
exportrequest = site.api(**exportparams) try:
exportrequest = site.api(http_method=config['http_method'], **exportparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
exportrequest = site.api(http_method=config['http_method'], **exportparams)
xml = exportrequest['query']['export']['*'] xml = exportrequest['query']['export']['*']
c += 1 c += 1
if c % 10 == 0: if c % 10 == 0:
@ -959,7 +989,14 @@ def getXMLRevisions(config={}, session=None, allpages=False):
'rvlimit': 50, 'rvlimit': 50,
'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content', 'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content',
} }
prequest = site.api(**pparams) try:
prequest = site.api(http_method=config['http_method'], **pparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
exportrequest = site.api(http_method=config['http_method'], **exportparams)
c += 1 c += 1
if c % 10 == 0: if c % 10 == 0:
print('Downloaded {} pages'.format(c)) print('Downloaded {} pages'.format(c))
@ -987,7 +1024,13 @@ def getXMLRevisions(config={}, session=None, allpages=False):
if 'continue' in prequest.keys(): if 'continue' in prequest.keys():
print("Getting more revisions for page {}".format(title)) print("Getting more revisions for page {}".format(title))
pparams['rvcontinue'] = prequest['continue']['rvcontinue'] pparams['rvcontinue'] = prequest['continue']['rvcontinue']
prequest = site.api(**pparams) try:
prequest = site.api(http_method=config['http_method'], **pparams)
except requests.exceptions.HTTPError as e:
if e.response.status_code == 405 and config['http_method'] == "POST":
print("POST request to the API failed, retrying with GET")
config['http_method'] = "GET"
prequest = site.api(http_method=config['http_method'], **pparams)
# mwclient seems to rewrite query-continue # mwclient seems to rewrite query-continue
#if 'query-continue' in prequest.keys(): #if 'query-continue' in prequest.keys():
# pparams['rvcontinue'] = prequest['query-continue']['revisions']['rvcontinue'] # pparams['rvcontinue'] = prequest['query-continue']['revisions']['rvcontinue']
@ -1826,6 +1869,7 @@ def getParameters(params=[]):
'date': datetime.datetime.now().strftime('%Y%m%d'), 'date': datetime.datetime.now().strftime('%Y%m%d'),
'api': api, 'api': api,
'failfast': args.failfast, 'failfast': args.failfast,
'http_method': "POST",
'index': index, 'index': index,
'images': args.images, 'images': args.images,
'logs': False, 'logs': False,

Loading…
Cancel
Save