For old MediaWiki releases, use rawcontinue and wikitools query()

Otherwise the query continuation may fail and only the top revisions
will be exported. Tested with Wikia:
http://clubpenguin.wikia.com/api.php?action=query&prop=revisions&titles=Club_Penguin_Wiki

Also add parentid since it's available after all.

https://github.com/WikiTeam/wikiteam/issues/311#issuecomment-391957783
pull/319/head
Federico Leva 6 years ago
parent d11df60516
commit 73902d39c0

@ -805,7 +805,7 @@ def getXMLRevisions(config={}, session=None, allpages=False):
try:
for namespace in namespaces:
print "Exporting revisions from namespace %s" % namespace
print "Trying to export all revisions from namespace %s" % namespace
arvparams = {
'action': 'query',
'list': 'allrevisions',
@ -864,22 +864,22 @@ def getXMLRevisions(config={}, session=None, allpages=False):
'titles': title,
'prop': 'revisions',
'rvlimit': 'max',
'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content'
'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content',
'rawcontinue': 'yes'
}
prequest = wikitools.api.APIRequest(site, pparams)
results = prequest.queryGen()
for result in results:
pages = result['query']['pages']
for page in pages:
try:
xml = makeXmlFromPage(pages[page])
except PageMissingError:
logerror(
config=config,
text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8'))
)
continue
yield xml
results = prequest.query()
pages = results['query']['pages']
for page in pages:
try:
xml = makeXmlFromPage(pages[page])
except PageMissingError:
logerror(
config=config,
text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8'))
)
continue
yield xml
except wikitools.api.APIError:
print "This wikitools version seems not to work for us. Exiting."
@ -896,6 +896,7 @@ def makeXmlFromPage(page):
for rev in page['revisions']:
revision = E.revision(
E.id(to_unicode(rev['revid'])),
E.parentid(to_unicode(rev['parentid'])),
E.timestamp(rev['timestamp']),
E.contributor(
E.id(to_unicode(rev['userid'])),

Loading…
Cancel
Save