Actually convert the titles query method to mwclient too

4 years ago · 6b12e20a9d
parent f10adb71af
commit 6b12e20a9d
1 changed files with 26 additions and 13 deletions
--- a/dumpgenerator.py
+++ b/dumpgenerator.py
@ -892,7 +892,8 @@ def getXMLRevisions(config={}, session=None, allpages=False):
        # TODO: check whether the KeyError was really for a missing arv API
        print "Warning. Could not use allrevisions. Wiki too old?"
        if config['curonly']:
-            # The raw XML export in the API gets a title and gives the latest revision
+            # The raw XML export in the API gets a title and gives the latest revision.
            # We could also use the allpages API as generator but let's be consistent.
            for title in readTitles(config):
                # TODO: as we're doing one page and revision at a time, we might
                # as well use xml format and exportnowrap=1 to use the string of,
@ -909,6 +910,8 @@ def getXMLRevisions(config={}, session=None, allpages=False):
        else:
            # This is the closest to what we usually do with Special:Export:
            # take one title at a time and try to get all revisions exported.
            # It differs from the allrevisions method because it actually needs
            # to be input the page titles; otherwise, the requests are similar.
            # The XML needs to be made manually because the export=1 option
            # refuses to return an arbitrary number of revisions (see above).
            for title in readTitles(config):
@ -920,21 +923,31 @@ def getXMLRevisions(config={}, session=None, allpages=False):
                    'rvprop': 'ids|timestamp|user|userid|size|sha1|contentmodel|comment|content',
                }
                prequest = site.api(**pparams)
                # The array is called "pages" even if there's only one.
                # TODO: we could actually batch titles a bit here if desired. How many?
                try:
-                    results = prequest.query()
+                    pages = prequest['query']['pages']
                    pages = results['query']['pages']
                except KeyError:
                    raise PageMissingError(title, xml='')
-                for page in pages:
+                # Be ready to iterate if there is continuation.
-                    try:
+                while True:
-                        xml = makeXmlFromPage(pages[page])
+                    # Go through the data we got to build the XML.
-                    except PageMissingError:
+                    for page in pages:
-                        logerror(
+                        try:
-                            config=config,
+                            xml = makeXmlFromPage(pages[page])
-                            text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8'))
+                        except PageMissingError:
-                        )
+                            logerror(
-                        continue
+                                config=config,
-                    yield xml
+                                text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8'))
                            )
                            continue
                        yield xml
                    # Get next batch of revisions if there's more.
                    if 'continue' in prequest:
                        pparams['rvcontinue'] = prequest['rvcontinue']
                        prequest = site.api(**pparams)
    except mwclient.errors.MwClientError:
        print "This mwclient version seems not to work for us. Exiting."