From 376e8a11a3282474c84f11ef904040d8b4971574 Mon Sep 17 00:00:00 2001 From: Daniel Oaks Date: Thu, 22 Oct 2015 23:19:50 +1000 Subject: [PATCH] Avoid out-of-memory error in two extra places --- dumpgenerator.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index daa65b9..41dcb53 100755 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -600,8 +600,13 @@ def getXMLPage(config={}, title='', verbose=True, session=None): while not truncated and params['offset']: # next chunk # get the last timestamp from the acum XML params['offset'] = re.findall(r_timestamp, xml)[-1] - xml2 = getXMLPageCore( - params=params, config=config, session=session) + try: + xml2 = getXMLPageCore( + params=params, config=config, session=session) + except MemoryError: + print "The page's history exceeds our memory, halving limit." + params['limit'] = params['limit'] / 2 + continue # are there more edits in this next XML chunk or no ? if re.findall(r_timestamp, xml2): @@ -626,8 +631,8 @@ def getXMLPage(config={}, title='', verbose=True, session=None): """ # offset is OK in this wiki, merge with the previous chunk # of this page history and continue - xml2 = xml2.split("")[0] try: + xml2 = xml2.split("")[0] yield ' ' + (''.join(xml2.split('')[1:])) except MemoryError: print "The page's history exceeds our memory, halving limit."