From bbffb24a0c94575eaf69dbd5a94c3470a0778bf8 Mon Sep 17 00:00:00 2001 From: Hydriz Date: Fri, 22 Jun 2012 11:34:27 +0000 Subject: [PATCH] (Issue 34) XML integry check inside the code An attempt to integrate a XML integrity checker into the script. If the dump integrity check fails, the script will be redumped from the start. git-svn-id: https://wikiteam.googlecode.com/svn/trunk@710 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 --- dumpgenerator.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/dumpgenerator.py b/dumpgenerator.py index 08c8efb..fb08e2b 100644 --- a/dumpgenerator.py +++ b/dumpgenerator.py @@ -921,6 +921,22 @@ def removeIP(raw=''): raw = re.sub(r'(?i)[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}:[\da-f]{0,4}', '0:0:0:0:0:0:0:0', raw) return raw +def checkXMLIntegrity(config={}): + print "Verifying dump..." + os.chdir(config['path']) + checktitles = os.system('grep "" *.xml -c > /dev/null') + checkpageopen = os.system('grep "<page>" *.xml -c > /dev/null') + checkpageclose = os.system('grep "</page>" *.xml -c > /dev/null') + checkrevisionopen = os.system('grep "<revision>" *.xml -c > /dev/null') + checkrevisionclose = os.system('grep "</revision>" *.xml -c > /dev/null') + os.chdir('..') + if (checktitles == checkpageopen and checktitles == checkpageclose and checkpageopen == checkpageclose): + xmlisgood = True + else: + xmlisgood = False + print "XML dump is corrupted, regenerating a new dump" + generateXMLDump(config=config, titles=titles) + def main(params=[]): """ Main function """ welcome() @@ -1081,6 +1097,7 @@ def main(params=[]): titles += getPageTitles(config=config) saveTitles(config=config, titles=titles) generateXMLDump(config=config, titles=titles) + checkXMLIntegrity(config=config) if config['images']: if config['api']: images += getImageFilenamesURLAPI(config=config) @@ -1122,4 +1139,4 @@ def main(params=[]): bye() if __name__ == "__main__": - main() \ No newline at end of file + main()