For old MediaWiki releases, use rawcontinue and wikitools query()

Otherwise the query continuation may fail and only the top revisions
will be exported. Tested with Wikia:
http://clubpenguin.wikia.com/api.php?action=query&prop=revisions&titles=Club_Penguin_Wiki

Also add parentid since it's available after all.

https://github.com/WikiTeam/wikiteam/issues/311#issuecomment-391957783
pull/319/head
Federico Leva 6 years ago
parent d11df60516
commit 73902d39c0

@ -805,7 +805,7 @@ def getXMLRevisions(config={}, session=None, allpages=False):
try: try:
for namespace in namespaces: for namespace in namespaces:
print "Exporting revisions from namespace %s" % namespace print "Trying to export all revisions from namespace %s" % namespace
arvparams = { arvparams = {
'action': 'query', 'action': 'query',
'list': 'allrevisions', 'list': 'allrevisions',
@ -864,22 +864,22 @@ def getXMLRevisions(config={}, session=None, allpages=False):
'titles': title, 'titles': title,
'prop': 'revisions', 'prop': 'revisions',
'rvlimit': 'max', 'rvlimit': 'max',
'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content' 'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content',
'rawcontinue': 'yes'
} }
prequest = wikitools.api.APIRequest(site, pparams) prequest = wikitools.api.APIRequest(site, pparams)
results = prequest.queryGen() results = prequest.query()
for result in results: pages = results['query']['pages']
pages = result['query']['pages'] for page in pages:
for page in pages: try:
try: xml = makeXmlFromPage(pages[page])
xml = makeXmlFromPage(pages[page]) except PageMissingError:
except PageMissingError: logerror(
logerror( config=config,
config=config, text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8'))
text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8')) )
) continue
continue yield xml
yield xml
except wikitools.api.APIError: except wikitools.api.APIError:
print "This wikitools version seems not to work for us. Exiting." print "This wikitools version seems not to work for us. Exiting."
@ -896,6 +896,7 @@ def makeXmlFromPage(page):
for rev in page['revisions']: for rev in page['revisions']:
revision = E.revision( revision = E.revision(
E.id(to_unicode(rev['revid'])), E.id(to_unicode(rev['revid'])),
E.parentid(to_unicode(rev['parentid'])),
E.timestamp(rev['timestamp']), E.timestamp(rev['timestamp']),
E.contributor( E.contributor(
E.id(to_unicode(rev['userid'])), E.id(to_unicode(rev['userid'])),

Loading…
Cancel
Save