2
0
mirror of https://github.com/WikiTeam/wikiteam synced 2024-11-04 12:00:28 +00:00

pause and reload xml when no </siteinfo> is found

git-svn-id: https://wikiteam.googlecode.com/svn/trunk@90 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95
This commit is contained in:
emijrp 2011-04-16 13:54:00 +00:00
parent 6c69e5b718
commit 727e4dbd7c

View File

@ -324,7 +324,9 @@ def generateXMLDump(config={}, titles=[], start=''):
xmlfile = open('%s/%s' % (config['path'], xmlfilename), 'a') xmlfile = open('%s/%s' % (config['path'], xmlfilename), 'a')
c = 1 c = 1
print len(titles)
for title in titles: for title in titles:
print '-->',title
if title == start: #start downloading from start, included if title == start: #start downloading from start, included
lock = False lock = False
if lock: if lock:
@ -333,6 +335,10 @@ def generateXMLDump(config={}, titles=[], start=''):
if c % 10 == 0: if c % 10 == 0:
print ' Downloaded %d pages' % (c) print ' Downloaded %d pages' % (c)
xml = getXMLPage(config=config, title=title) xml = getXMLPage(config=config, title=title)
while not re.search(r'</siteinfo>', xml): #empty xml by server? retry...
print ' XML for this page is wrong. Waiting some seconds and reloading...'
time.sleep(30)
xml = getXMLPage(config=config, title=title)
xml = cleanXML(xml=xml) xml = cleanXML(xml=xml)
xmlfile.write(xml) xmlfile.write(xml)
c += 1 c += 1