diff --git a/dumpgenerator.py b/dumpgenerator.py
index 08c8efb..fb08e2b 100644
--- a/dumpgenerator.py
+++ b/dumpgenerator.py
@@ -921,6 +921,22 @@ def removeIP(raw=''):
raw = re.sub(r'(?i)[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}', '0:0:0:0:0:0:0:0', raw)
return raw
+def checkXMLIntegrity(config={}):
+ print "Verifying dump..."
+ os.chdir(config['path'])
+ checktitles = os.system('grep "
" *.xml -c > /dev/null')
+ checkpageopen = os.system('grep "" *.xml -c > /dev/null')
+ checkpageclose = os.system('grep "" *.xml -c > /dev/null')
+ checkrevisionopen = os.system('grep "" *.xml -c > /dev/null')
+ checkrevisionclose = os.system('grep "" *.xml -c > /dev/null')
+ os.chdir('..')
+ if (checktitles == checkpageopen and checktitles == checkpageclose and checkpageopen == checkpageclose):
+ xmlisgood = True
+ else:
+ xmlisgood = False
+ print "XML dump is corrupted, regenerating a new dump"
+ generateXMLDump(config=config, titles=titles)
+
def main(params=[]):
""" Main function """
welcome()
@@ -1081,6 +1097,7 @@ def main(params=[]):
titles += getPageTitles(config=config)
saveTitles(config=config, titles=titles)
generateXMLDump(config=config, titles=titles)
+ checkXMLIntegrity(config=config)
if config['images']:
if config['api']:
images += getImageFilenamesURLAPI(config=config)
@@ -1122,4 +1139,4 @@ def main(params=[]):
bye()
if __name__ == "__main__":
- main()
\ No newline at end of file
+ main()