mirror of
https://github.com/WikiTeam/wikiteam
synced 2024-11-04 12:00:28 +00:00
pause and reload xml when no </siteinfo> is found
git-svn-id: https://wikiteam.googlecode.com/svn/trunk@90 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95
This commit is contained in:
parent
6c69e5b718
commit
727e4dbd7c
@ -324,7 +324,9 @@ def generateXMLDump(config={}, titles=[], start=''):
|
|||||||
|
|
||||||
xmlfile = open('%s/%s' % (config['path'], xmlfilename), 'a')
|
xmlfile = open('%s/%s' % (config['path'], xmlfilename), 'a')
|
||||||
c = 1
|
c = 1
|
||||||
|
print len(titles)
|
||||||
for title in titles:
|
for title in titles:
|
||||||
|
print '-->',title
|
||||||
if title == start: #start downloading from start, included
|
if title == start: #start downloading from start, included
|
||||||
lock = False
|
lock = False
|
||||||
if lock:
|
if lock:
|
||||||
@ -333,6 +335,10 @@ def generateXMLDump(config={}, titles=[], start=''):
|
|||||||
if c % 10 == 0:
|
if c % 10 == 0:
|
||||||
print ' Downloaded %d pages' % (c)
|
print ' Downloaded %d pages' % (c)
|
||||||
xml = getXMLPage(config=config, title=title)
|
xml = getXMLPage(config=config, title=title)
|
||||||
|
while not re.search(r'</siteinfo>', xml): #empty xml by server? retry...
|
||||||
|
print ' XML for this page is wrong. Waiting some seconds and reloading...'
|
||||||
|
time.sleep(30)
|
||||||
|
xml = getXMLPage(config=config, title=title)
|
||||||
xml = cleanXML(xml=xml)
|
xml = cleanXML(xml=xml)
|
||||||
xmlfile.write(xml)
|
xmlfile.write(xml)
|
||||||
c += 1
|
c += 1
|
||||||
|
Loading…
Reference in New Issue
Block a user