Consistently use POST params instead of data

Also match URLs which end in ".php$" in domain2prefix().
pull/319/head
Federico Leva 6 years ago
parent 754027de42
commit d4f0869ecc

@ -159,7 +159,7 @@ def getNamespacesScraper(config={}, session=None):
namespacenames = {0: ''} # main is 0, no prefix
if namespaces:
r = session.post(
url=config['index'], data={'title': 'Special:Allpages'}, timeout=30)
url=config['index'], params={'title': 'Special:Allpages'}, timeout=30)
raw = r.text
delay(config=config, session=session)
@ -474,7 +474,7 @@ def getXMLHeader(config={}, session=None):
print "Trying the local name for the Special namespace instead"
r = session.post(
url=config['api'],
data={
params={
'action': 'query',
'meta': 'siteinfo',
'siprop': 'namespaces',
@ -578,7 +578,7 @@ def getXMLPageCore(headers={}, params={}, config={}, session=None):
return '' # empty xml
# FIXME HANDLE HTTP Errors HERE
try:
r = session.post(url=config['index'], data=params, headers=headers, timeout=10)
r = session.post(url=config['index'], params=params, headers=headers, timeout=10)
handleStatusCode(r)
xml = fixBOM(r)
except requests.exceptions.ConnectionError as e:
@ -948,7 +948,7 @@ def getImageNamesScraper(config={}, session=None):
# http://www.memoryarchive.org/en/index.php?title=Special:Imagelist&sort=byname&limit=50&wpIlMatch=
r = session.post(
url=config['index'],
data={
params={
'title': 'Special:Imagelist',
'limit': limit,
'offset': offset},
@ -1053,7 +1053,7 @@ def getImageNamesAPI(config={}, session=None):
'format': 'json',
'ailimit': 500}
# FIXME Handle HTTP Errors HERE
r = session.post(url=config['api'], data=params, timeout=30)
r = session.post(url=config['api'], params=params, timeout=30)
handleStatusCode(r)
jsonimages = getJSON(r)
delay(config=config, session=session)
@ -1111,7 +1111,7 @@ def getImageNamesAPI(config={}, session=None):
'iiprop': 'user|url',
'format': 'json'}
# FIXME Handle HTTP Errors HERE
r = session.post(url=config['api'], data=params, timeout=30)
r = session.post(url=config['api'], params=params, timeout=30)
handleStatusCode(r)
jsonimages = getJSON(r)
delay(config=config, session=session)
@ -1260,7 +1260,7 @@ def domain2prefix(config={}, session=None):
domain = config['index']
domain = domain.lower()
domain = re.sub(r'(https?://|www\.|/index\.php.+|/api\.php.+)', '', domain)
domain = re.sub(r'(https?://|www\.|/index\.php.*|/api\.php.*)', '', domain)
domain = re.sub(r'/', '_', domain)
domain = re.sub(r'\.', '', domain)
domain = re.sub(r'[^A-Za-z0-9]', '_', domain)
@ -1666,7 +1666,7 @@ def checkAPI(api=None, session=None):
def checkIndex(index=None, cookies=None, session=None):
""" Checking index.php availability """
r = session.post(url=index, data={'title': 'Special:Version'}, timeout=30)
r = session.post(url=index, params={'title': 'Special:Version'}, timeout=30)
raw = r.text
print 'Checking index.php...', index
# Workaround for issue 71
@ -1928,7 +1928,7 @@ def saveSpecialVersion(config={}, session=None):
else:
print 'Downloading Special:Version with extensions and other related info'
r = session.post(
url=config['index'], data={'title': 'Special:Version'}, timeout=10)
url=config['index'], params={'title': 'Special:Version'}, timeout=10)
raw = r.text
delay(config=config, session=session)
raw = removeIP(raw=raw)
@ -1943,7 +1943,7 @@ def saveIndexPHP(config={}, session=None):
print 'index.html exists, do not overwrite'
else:
print 'Downloading index.php (Main Page) as index.html'
r = session.post(url=config['index'], data={}, timeout=10)
r = session.post(url=config['index'], params={}, timeout=10)
raw = r.text
delay(config=config, session=session)
raw = removeIP(raw=raw)
@ -1963,7 +1963,7 @@ def saveSiteInfo(config={}, session=None):
# MediaWiki 1.13+
r = session.post(
url=config['api'],
data={
params={
'action': 'query',
'meta': 'siteinfo',
'siprop': 'general|namespaces|statistics|dbrepllag|interwikimap|namespacealiases|specialpagealiases|usergroups|extensions|skins|magicwords|fileextensions|rightsinfo',
@ -1974,7 +1974,7 @@ def saveSiteInfo(config={}, session=None):
if not 'query' in getJSON(r):
r = session.post(
url=config['api'],
data={
params={
'action': 'query',
'meta': 'siteinfo',
'siprop': 'general|namespaces|statistics|dbrepllag|interwikimap',
@ -1984,7 +1984,7 @@ def saveSiteInfo(config={}, session=None):
if not 'query' in getJSON(r):
r = session.post(
url=config['api'],
data={
params={
'action': 'query',
'meta': 'siteinfo',
'siprop': 'general|namespaces',

Loading…
Cancel
Save