From e39fe4a10abff6fe0b48024be69813b266fca468 Mon Sep 17 00:00:00 2001 From: emijrp Date: Thu, 4 Aug 2016 23:43:49 +0200 Subject: [PATCH] --get-namespaces works now --- wikiteam/mediawiki.py | 15 ++++++++++++++- wikiteam/wikiteam.py | 30 ++++++++++++++++++++++++++++-- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/wikiteam/mediawiki.py b/wikiteam/mediawiki.py index 8222140..0217efc 100644 --- a/wikiteam/mediawiki.py +++ b/wikiteam/mediawiki.py @@ -409,6 +409,20 @@ def mwGetIndex(config={}): index = '/'.join(mwapi.split('/')[:-1]) + '/index.php' return index +def mwGetNamespaces(config={}): + """ Get list of namespaces """ + + sys.stderr.write('Retrieving namespaces\n') + namespaces = [] + namespacenames = [] + if 'mwapi' in config and config['mwapi']: + namespaces, namespacenames = mwGetNamespacesAPI(config=config) + elif 'mwindex' in config and config['mwindex']: + namespaces, namespacenames = mwGetImageNamesScraper(config=config) + namespaces.sort() + sys.stderr.write('%d namespaces loaded\n' % (len(namespaces))) + return namespaces, namespacenames + def mwGetNamespacesAPI(config={}): """ Uses the API to get the list of namespaces names and ids """ namespaces = config['namespaces'] @@ -445,7 +459,6 @@ def mwGetNamespacesAPI(config={}): sys.stderr.write('%d namespaces found\n' % (len(namespaces))) return namespaces, namespacenames - def mwGetNamespacesScraper(config={}): """ Hackishly gets the list of namespaces names and ids from the dropdown in the HTML of Special:AllPages """ """ Function called if no API is available """ diff --git a/wikiteam/wikiteam.py b/wikiteam/wikiteam.py index 19c1fe1..560cd47 100644 --- a/wikiteam/wikiteam.py +++ b/wikiteam/wikiteam.py @@ -156,6 +156,17 @@ def getJSON(request): return request.json()""" return json.loads(request) +def getNamespaces(config={}): + """ Returns list of namespaces for this wiki """ + + namespaces = [] + namespacenames = [] + if config['wikiengine'] == 'mediawiki': + import mediawiki + namespaces, namespacenames = mediawiki.mwGetNamespaces(config=config) + + return namespacenames + def getPageTitles(config={}): """ Returns page titles for this wiki """ @@ -260,6 +271,10 @@ def getParameters(params=[]): '--get-image-names', action='store_true', help="Returns wiki image names.") + groupMeta.add_argument( + '--get-namespaces', + action='store_true', + help="Returns wiki namespaces.") groupMeta.add_argument( '--get-wiki-engine', action='store_true', @@ -276,14 +291,14 @@ def getParameters(params=[]): # Don't mix download params and meta info params if (args.pages or args.images) and \ - (args.get_api or args.get_index or args.get_page_titles or args.get_image_names or args.get_wiki_engine): + (args.get_api or args.get_index or args.get_page_titles or args.get_image_names or args.get_namespaces or args.get_wiki_engine): sys.stderr.write('ERROR: Don\'t mix download params and meta info params\n') parser.print_help() sys.exit(1) # No download params and no meta info params? Exit if (not args.pages and not args.images) and \ - (not args.get_api and not args.get_index and not args.get_page_titles and not args.get_image_names and not args.get_wiki_engine): + (not args.get_api and not args.get_index and not args.get_page_titles and not args.get_image_names and not args.get_namespaces and not args.get_wiki_engine): sys.stderr.write('ERROR: Use at least one download param or meta info param\n') parser.print_help() sys.exit(1) @@ -327,6 +342,8 @@ def getParameters(params=[]): metainfo = 'get_page_titles' elif args.get_image_names: metainfo = 'get_image_names' + elif args.get_namespaces: + metainfo = 'get_namespaces' elif args.get_wiki_engine: metainfo = 'get_wiki_engine' @@ -576,6 +593,13 @@ def printImageNames(config={}): for imagename in getImageNames(config=config): sys.stdout.write('%s\n' % (imagename)) +def printNamespaces(config={}): + """ Print list of namespaces for this wiki """ + + namespacenames = getNamespaces(config=config) + for namespaceid, namespacename in namespacenames.items(): + sys.stdout.write('%s %s\n' % (namespaceid, namespacename)) + def printPageTitles(config={}): """ Print list of page titles for this wiki """ @@ -756,6 +780,8 @@ def main(params=[]): printPageTitles(config=config) elif config['metainfo'] == 'get_image_names': printImageNames(config=config) + elif config['metainfo'] == 'get_namespaces': + printNamespaces(config=config) elif config['metainfo'] == 'get_wiki_engine': print(config['wikiengine']) sys.exit()