mirror of
https://github.com/WikiTeam/wikiteam
synced 2024-11-04 12:00:28 +00:00
adding support for excluding namespaces
git-svn-id: https://wikiteam.googlecode.com/svn/trunk@77 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95
This commit is contained in:
parent
0bec4c7e69
commit
a618c228db
@ -93,6 +93,10 @@ def getPageTitlesAPI(config={}):
|
||||
titles = []
|
||||
namespaces, namespacenames = getNamespaces(config=config)
|
||||
for namespace in namespaces:
|
||||
if namespace in config['exnamespaces']:
|
||||
print ' Skiping namespace =', namespace
|
||||
continue
|
||||
|
||||
c = 0
|
||||
print ' Retrieving titles in the namespace', namespace
|
||||
headers = {'User-Agent': getUserAgent()}
|
||||
@ -187,7 +191,8 @@ def getPageTitles(config={}):
|
||||
#http://en.wikipedia.org/wiki/Special:AllPages
|
||||
#http://archiveteam.org/index.php?title=Special:AllPages
|
||||
#http://www.wikanda.es/wiki/Especial:Todas
|
||||
print 'Loading page titles from namespaces =', ','.join([str(i) for i in config['namespaces']])
|
||||
print 'Loading page titles from namespaces = %s' % (config['namespaces'] and ','.join([str(i) for i in config['namespaces']]) or 'None')
|
||||
print 'Excluding titles from namespaces = %s' % (config['exnamespaces'] and ','.join([str(i) for i in config['exnamespaces']]) or 'None')
|
||||
|
||||
titles = []
|
||||
if config['api']:
|
||||
@ -478,6 +483,7 @@ def domain2prefix(config={}):
|
||||
domain = config['api']
|
||||
elif config['index']:
|
||||
domain = config['index']
|
||||
domain = domain.lower()
|
||||
domain = re.sub(r'(http://|www\.|/index\.php|/api\.php)', '', domain)
|
||||
domain = re.sub(r'/', '_', domain)
|
||||
domain = re.sub(r'\.', '', domain)
|
||||
@ -536,6 +542,7 @@ def getParameters():
|
||||
'logs': False,
|
||||
'xml': False,
|
||||
'namespaces': ['all'],
|
||||
'exnamespaces': [],
|
||||
'path': '',
|
||||
'threads': 1, #fix not coded yet
|
||||
'delay': 0,
|
||||
@ -546,7 +553,7 @@ def getParameters():
|
||||
}
|
||||
#console params
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], "", ["h", "help", "path=", "api=", "index=", "images", "logs", "xml", "curonly", "threads=", "resume", "delay=" ])
|
||||
opts, args = getopt.getopt(sys.argv[1:], "", ["h", "help", "path=", "api=", "index=", "images", "logs", "xml", "curonly", "threads=", "resume", "delay=", "namespaces=", "exnamespaces=", ])
|
||||
except getopt.GetoptError, err:
|
||||
# print help information and exit:
|
||||
print str(err) # will print something like "option -a not recognized"
|
||||
@ -583,6 +590,25 @@ def getParameters():
|
||||
other["resume"] = True
|
||||
elif o in ("--delay"):
|
||||
config["delay"] = int(a)
|
||||
elif o in ("--namespaces"):
|
||||
if re.search(r'[^\d, \-]', a) and a.lower() != 'all':
|
||||
print "Invalid namespaces values.\nValid format is integer(s) splitted by commas"
|
||||
sys.exit()
|
||||
a = re.sub(' ', '', a)
|
||||
if a.lower() == 'all':
|
||||
config["namespaces"] = 'all'
|
||||
else:
|
||||
config["namespaces"] = [int(i) for i in a.split(',')]
|
||||
elif o in ("--exnamespaces"):
|
||||
if re.search(r'[^\d, \-]', a):
|
||||
print "Invalid exnamespaces values.\nValid format is integer(s) splitted by commas"
|
||||
sys.exit()
|
||||
a = re.sub(' ', '', a)
|
||||
if a.lower() == 'all':
|
||||
print 'You have excluded all namespaces. Error.'
|
||||
sys.exit()
|
||||
else:
|
||||
config["exnamespaces"] = [int(i) for i in a.split(',')]
|
||||
else:
|
||||
assert False, "unhandled option"
|
||||
|
||||
@ -603,7 +629,10 @@ And one of these, or two or three:
|
||||
|
||||
You can resume previous incomplete dumps:
|
||||
--resume: it resumes previous incomplete dump. When using --resume, --path is mandatory (path to directory where incomplete dump is).
|
||||
|
||||
|
||||
You can exclude namespaces:
|
||||
--exnamespaces: write the number of the namespaces you want to exclude, splitted by commas.
|
||||
|
||||
Write --help for help."""
|
||||
sys.exit()
|
||||
#usage()
|
||||
|
Loading…
Reference in New Issue
Block a user