Raise and catch PageMissingError when revisions API result is incomplete

https://github.com/WikiTeam/wikiteam/issues/317
pull/319/head
Federico Leva 6 years ago
parent 7a655f0074
commit d76b4b4e01

@ -871,7 +871,15 @@ def getXMLRevisions(config={}, session=None, allpages=False):
for result in results:
pages = result['query']['pages']
for page in pages:
yield makeXmlFromPage(pages[page])
try:
xml = makeXmlFromPage(pages[page])
except PageMissingError:
logerror(
config=config,
text=u'Error: empty revision from API. Could not export page: %s' % (title.decode('utf-8'))
)
continue
yield xml
except wikitools.api.APIError:
print "This wikitools version seems not to work for us. Exiting."
@ -879,28 +887,31 @@ def getXMLRevisions(config={}, session=None, allpages=False):
def makeXmlFromPage(page):
""" Output an XML document as a string from a page as in the API JSON """
p = E.page(
E.title(page['title']),
E.ns(to_unicode(page['ns'])),
E.id(to_unicode(page['pageid'])),
)
for rev in page['revisions']:
revision = E.revision(
E.id(to_unicode(rev['revid'])),
E.timestamp(rev['timestamp']),
E.contributor(
E.id(to_unicode(rev['userid'])),
E.username(to_unicode(rev['user'])),
),
E.comment(rev['comment']),
E.text(rev['*'], space="preserve", bytes=to_unicode(rev['size'])),
try:
p = E.page(
E.title(page['title']),
E.ns(to_unicode(page['ns'])),
E.id(to_unicode(page['pageid'])),
)
if 'contentmodel' in rev:
revision.append(E.model(rev['contentmodel'])
# The sha1 may not have been backfilled on older wikis or lack for other reasons (Wikia).
if 'sha1' in rev:
revision.append(E.sha1(rev['sha1']))
p.append(revision)
for rev in page['revisions']:
revision = E.revision(
E.id(to_unicode(rev['revid'])),
E.timestamp(rev['timestamp']),
E.contributor(
E.id(to_unicode(rev['userid'])),
E.username(to_unicode(rev['user'])),
),
E.comment(rev['comment']),
E.text(rev['*'], space="preserve", bytes=to_unicode(rev['size'])),
)
if 'contentmodel' in rev:
revision.append(E.model(rev['contentmodel']))
# The sha1 may not have been backfilled on older wikis or lack for other reasons (Wikia).
if 'sha1' in rev:
revision.append(E.sha1(rev['sha1']))
p.append(revision)
except KeyError:
raise PageMissingError(page['title'], '')
return etree.tostring(p, pretty_print=True)
def readTitles(config={}, start=None):

Loading…
Cancel
Save