|
|
@ -297,6 +297,7 @@ def generateXMLDump(config={}, titles=[], start=''):
|
|
|
|
|
|
|
|
|
|
|
|
xmlfile = open('%s/%s' % (config['path'], xmlfilename), 'a')
|
|
|
|
xmlfile = open('%s/%s' % (config['path'], xmlfilename), 'a')
|
|
|
|
c = 1
|
|
|
|
c = 1
|
|
|
|
|
|
|
|
total = len(titles)
|
|
|
|
for title in titles:
|
|
|
|
for title in titles:
|
|
|
|
if title == start: #start downloading from start, included
|
|
|
|
if title == start: #start downloading from start, included
|
|
|
|
lock = False
|
|
|
|
lock = False
|
|
|
@ -304,7 +305,7 @@ def generateXMLDump(config={}, titles=[], start=''):
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
delay(config=config)
|
|
|
|
delay(config=config)
|
|
|
|
if c % 10 == 0:
|
|
|
|
if c % 10 == 0:
|
|
|
|
print ' Downloaded %d pages' % (c)
|
|
|
|
print ' Downloaded %d pages of %d (%.1f%%)' % (c, total, c/(total/100))
|
|
|
|
xml = getXMLPage(config=config, title=title)
|
|
|
|
xml = getXMLPage(config=config, title=title)
|
|
|
|
xml = cleanXML(xml=xml)
|
|
|
|
xml = cleanXML(xml=xml)
|
|
|
|
xmlfile.write(xml)
|
|
|
|
xmlfile.write(xml)
|
|
|
|