moving code to functions; tiny changes in comments

git-svn-id: https://wikiteam.googlecode.com/svn/trunk@814 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95
pull/117/head
emijrp 11 years ago
parent 79a310c470
commit 8295990df0

@ -710,14 +710,14 @@ def saveConfig(config={}, configfilename=''):
f.close()
def welcome():
""" """
""" Opening message """
print "#"*73
print """# Welcome to DumpGenerator 0.1 by WikiTeam (GPL v3) #
# More info at: http://code.google.com/p/wikiteam/ #"""
print "#"*73
print ''
print "#"*73
print """# Copyright (C) 2011-2012 WikiTeam #
print """# Copyright (C) 2011-2013 WikiTeam #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
@ -734,7 +734,7 @@ def welcome():
print ''
def bye():
""" """
""" Closing message """
print "---> Congratulations! Your dump is complete <---"
print "If you found any bug, report a new issue here (Google account required): http://code.google.com/p/wikiteam/issues/list"
print "If this is a public wiki, please, consider publishing this dump. Do it yourself as explained in http://code.google.com/p/wikiteam/wiki/NewTutorial#Publishing_the_dump or contact us at http://code.google.com/p/wikiteam"
@ -933,54 +933,28 @@ def checkXMLIntegrity(config={}):
print "XML dump is corrupted, regenerating a new dump"
generateXMLDump(config=config, titles=titles)
def main(params=[]):
""" Main function """
welcome()
configfilename = 'config.txt'
config, other = getParameters(params=params)
#notice about wikipedia dumps
if re.findall(r'(?i)(wikipedia|wikisource|wiktionary|wikibooks|wikiversity|wikimedia|wikispecies|wikiquote|wikinews|wikidata|wikivoyage)\.org', config['api']+config['index']):
print 'PLEASE, DO NOT USE THIS SCRIPT TO DOWNLOAD WIKIMEDIA PROJECTS!'
print 'Download the dumps from http://dumps.wikimedia.org'
if not other['force']:
print 'Thanks!'
sys.exit()
print 'Analysing %s' % (config['api'] and config['api'] or config['index'])
#creating path or resuming if desired
c = 2
originalpath = config['path'] # to avoid concat blabla-2, blabla-2-3, and so on...
while not other['resume'] and os.path.isdir(config['path']): #do not enter if resume is requested from begining
print '\nWarning!: "%s" path exists' % (config['path'])
reply = ''
while reply.lower() not in ['yes', 'y', 'no', 'n']:
reply = raw_input('There is a dump in "%s", probably incomplete.\nIf you choose resume, to avoid conflicts, the parameters you have chosen in the current session will be ignored\nand the parameters available in "%s/%s" will be loaded.\nDo you want to resume ([yes, y], [no, n])? ' % (config['path'], config['path'], configfilename))
if reply.lower() in ['yes', 'y']:
if not os.path.isfile('%s/%s' % (config['path'], configfilename)):
print 'No config file found. I can\'t resume. Aborting.'
sys.exit()
print 'You have selected: YES'
other['resume'] = True
break
elif reply.lower() in ['no', 'n']:
print 'You have selected: NO'
other['resume'] = False
config['path'] = '%s-%d' % (originalpath, c)
print 'Trying to use path "%s"...' % (config['path'])
c += 1
if other['resume']:
print 'Loading config file...'
config = loadConfig(config=config, configfilename=configfilename)
def createNewDump(config={}):
titles = []
images = []
print 'Trying generating a new dump into a new directory...'
if config['xml']:
titles += getPageTitles(config=config)
saveTitles(config=config, titles=titles)
generateXMLDump(config=config, titles=titles)
checkXMLIntegrity(config=config)
if config['images']:
if config['api']:
images += getImageFilenamesURLAPI(config=config)
else:
os.mkdir(config['path'])
saveConfig(config=config, configfilename=configfilename)
images += getImageFilenamesURL(config=config)
saveImageFilenamesURL(config=config, images=images)
generateImageDump(config=config, other=other, images=images)
if config['logs']:
saveLogs(config=config)
def resumePreviousDump(config={}):
titles = []
images = []
if other['resume']:
print 'Resuming previous dump process...'
if config['xml']:
#load titles
@ -1088,23 +1062,23 @@ def main(params=[]):
if config['logs']:
#fix
pass
def saveSpecialVersion(config={}):
#save Special:Version as .html, to preserve extensions details
if os.path.exists('%s/Special:Version.html' % (config['path'])):
print 'Special:Version.html exists, do not overwrite'
else:
print 'Trying generating a new dump into a new directory...'
if config['xml']:
titles += getPageTitles(config=config)
saveTitles(config=config, titles=titles)
generateXMLDump(config=config, titles=titles)
checkXMLIntegrity(config=config)
if config['images']:
if config['api']:
images += getImageFilenamesURLAPI(config=config)
else:
images += getImageFilenamesURL(config=config)
saveImageFilenamesURL(config=config, images=images)
generateImageDump(config=config, other=other, images=images)
if config['logs']:
saveLogs(config=config)
print 'Downloading Special:Version with extensions and other related info'
req = urllib2.Request(url=config['index'], data=urllib.urlencode({'title': 'Special:Version', }), headers={'User-Agent': getUserAgent()})
f = urllib2.urlopen(req)
raw = f.read()
f.close()
raw = removeIP(raw=raw)
f = open('%s/Special:Version.html' % (config['path']), 'w')
f.write(raw)
f.close()
def saveIndexPHP(config={}):
#save index.php as .html, to preserve license details available at the botom of the page
if os.path.exists('%s/index.html' % (config['path'])):
print 'index.html exists, do not overwrite'
@ -1119,20 +1093,59 @@ def main(params=[]):
f.write(raw)
f.close()
#save Special:Version as .html, to preserve extensions details
if os.path.exists('%s/Special:Version.html' % (config['path'])):
print 'Special:Version.html exists, do not overwrite'
def avoidWikimediaProjects(config={}):
#notice about wikipedia dumps
if re.findall(r'(?i)(wikipedia|wikisource|wiktionary|wikibooks|wikiversity|wikimedia|wikispecies|wikiquote|wikinews|wikidata|wikivoyage)\.org', config['api']+config['index']):
print 'PLEASE, DO NOT USE THIS SCRIPT TO DOWNLOAD WIKIMEDIA PROJECTS!'
print 'Download the dumps from http://dumps.wikimedia.org'
if not other['force']:
print 'Thanks!'
sys.exit()
def main(params=[]):
""" Main function """
welcome()
configfilename = 'config.txt'
config, other = getParameters(params=params)
avoidWikimediaProjects(config=config)
print 'Analysing %s' % (config['api'] and config['api'] or config['index'])
#creating path or resuming if desired
c = 2
originalpath = config['path'] # to avoid concat blabla-2, blabla-2-3, and so on...
while not other['resume'] and os.path.isdir(config['path']): #do not enter if resume is requested from begining
print '\nWarning!: "%s" path exists' % (config['path'])
reply = ''
while reply.lower() not in ['yes', 'y', 'no', 'n']:
reply = raw_input('There is a dump in "%s", probably incomplete.\nIf you choose resume, to avoid conflicts, the parameters you have chosen in the current session will be ignored\nand the parameters available in "%s/%s" will be loaded.\nDo you want to resume ([yes, y], [no, n])? ' % (config['path'], config['path'], configfilename))
if reply.lower() in ['yes', 'y']:
if not os.path.isfile('%s/%s' % (config['path'], configfilename)):
print 'No config file found. I can\'t resume. Aborting.'
sys.exit()
print 'You have selected: YES'
other['resume'] = True
break
elif reply.lower() in ['no', 'n']:
print 'You have selected: NO'
other['resume'] = False
config['path'] = '%s-%d' % (originalpath, c)
print 'Trying to use path "%s"...' % (config['path'])
c += 1
if other['resume']:
print 'Loading config file...'
config = loadConfig(config=config, configfilename=configfilename)
else:
print 'Downloading Special:Version with extensions and other related info'
req = urllib2.Request(url=config['index'], data=urllib.urlencode({'title': 'Special:Version', }), headers={'User-Agent': getUserAgent()})
f = urllib2.urlopen(req)
raw = f.read()
f.close()
raw = removeIP(raw=raw)
f = open('%s/Special:Version.html' % (config['path']), 'w')
f.write(raw)
f.close()
os.mkdir(config['path'])
saveConfig(config=config, configfilename=configfilename)
if other['resume']:
resumePreviousDump(config=config)
else:
createNewDump(config=config)
saveIndexPHP(config=config)
saveSpecialVersion(config=config)
bye()
if __name__ == "__main__":

Loading…
Cancel
Save