From 73902d39c0d8043c6ebd62abddca377a0feb71b6 Mon Sep 17 00:00:00 2001 From: Federico Leva Date: Fri, 25 May 2018 10:53:45 +0300 Subject: [PATCH] For old MediaWiki releases, use rawcontinue and wikitools query() Otherwise the query continuation may fail and only the top revisions will be exported. Tested with Wikia: http://clubpenguin.wikia.com/api.php?action=query&prop=revisions&titles=Club_Penguin_Wiki Also add parentid since it's available after all. https://github.com/WikiTeam/wikiteam/issues/311#issuecomment-391957783 --- dumpgenerator.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index bf3ec3e..598cf32 100755 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -805,7 +805,7 @@ def getXMLRevisions(config={}, session=None, allpages=False): try: for namespace in namespaces: - print "Exporting revisions from namespace %s" % namespace + print "Trying to export all revisions from namespace %s" % namespace arvparams = { 'action': 'query', 'list': 'allrevisions', @@ -864,22 +864,22 @@ def getXMLRevisions(config={}, session=None, allpages=False): 'titles': title, 'prop': 'revisions', 'rvlimit': 'max', - 'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content' + 'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content', + 'rawcontinue': 'yes' } prequest = wikitools.api.APIRequest(site, pparams) - results = prequest.queryGen() - for result in results: - pages = result['query']['pages'] - for page in pages: - try: - xml = makeXmlFromPage(pages[page]) - except PageMissingError: - logerror( - config=config, - text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8')) - ) - continue - yield xml + results = prequest.query() + pages = results['query']['pages'] + for page in pages: + try: + xml = makeXmlFromPage(pages[page]) + except PageMissingError: + logerror( + config=config, + text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8')) + ) + continue + yield xml except wikitools.api.APIError: print "This wikitools version seems not to work for us. Exiting." @@ -896,6 +896,7 @@ def makeXmlFromPage(page): for rev in page['revisions']: revision = E.revision( E.id(to_unicode(rev['revid'])), + E.parentid(to_unicode(rev['parentid'])), E.timestamp(rev['timestamp']), E.contributor( E.id(to_unicode(rev['userid'])),