diff --git a/dumpgenerator.py b/dumpgenerator.py index bf3ec3e..598cf32 100755 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -805,7 +805,7 @@ def getXMLRevisions(config={}, session=None, allpages=False): try: for namespace in namespaces: - print "Exporting revisions from namespace %s" % namespace + print "Trying to export all revisions from namespace %s" % namespace arvparams = { 'action': 'query', 'list': 'allrevisions', @@ -864,22 +864,22 @@ def getXMLRevisions(config={}, session=None, allpages=False): 'titles': title, 'prop': 'revisions', 'rvlimit': 'max', - 'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content' + 'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content', + 'rawcontinue': 'yes' } prequest = wikitools.api.APIRequest(site, pparams) - results = prequest.queryGen() - for result in results: - pages = result['query']['pages'] - for page in pages: - try: - xml = makeXmlFromPage(pages[page]) - except PageMissingError: - logerror( - config=config, - text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8')) - ) - continue - yield xml + results = prequest.query() + pages = results['query']['pages'] + for page in pages: + try: + xml = makeXmlFromPage(pages[page]) + except PageMissingError: + logerror( + config=config, + text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8')) + ) + continue + yield xml except wikitools.api.APIError: print "This wikitools version seems not to work for us. Exiting." @@ -896,6 +896,7 @@ def makeXmlFromPage(page): for rev in page['revisions']: revision = E.revision( E.id(to_unicode(rev['revid'])), + E.parentid(to_unicode(rev['parentid'])), E.timestamp(rev['timestamp']), E.contributor( E.id(to_unicode(rev['userid'])),