Merge pull request #155 from balr0g/api-or-index-fix

Fix --index
pull/157/head
Emilio J. Rodríguez-Posada 10 years ago
commit 971404ac4c

@ -321,9 +321,9 @@ def getPageTitles(config={}, session=None):
print 'Excluding titles from namespaces = %s' % (config['exnamespaces'] and ','.join([str(i) for i in config['exnamespaces']]) or 'None') print 'Excluding titles from namespaces = %s' % (config['exnamespaces'] and ','.join([str(i) for i in config['exnamespaces']]) or 'None')
titles = [] titles = []
if 'api' in config: if 'api' in config and config['api']:
titles = getPageTitlesAPI(config=config, session=session) titles = getPageTitlesAPI(config=config, session=session)
elif 'index' in config: elif 'index' in config and config['index']:
titles = getPageTitlesScraper(config=config, session=session) titles = getPageTitlesScraper(config=config, session=session)
# removing dupes (e.g. in CZ appears Widget:AddThis two times (main # removing dupes (e.g. in CZ appears Widget:AddThis two times (main
@ -340,9 +340,9 @@ def getImageNames(config={}, session=None):
print 'Retrieving image filenames' print 'Retrieving image filenames'
images = [] images = []
if 'api' in config: if 'api' in config and config['api']:
images = getImageNamesAPI(config=config, session=session) images = getImageNamesAPI(config=config, session=session)
elif 'index' in config: elif 'index' in config and config['index']:
images = getImageNamesScraper(config=config, session=session) images = getImageNamesScraper(config=config, session=session)
#images = list(set(images)) # it is a list of lists #images = list(set(images)) # it is a list of lists
@ -622,10 +622,10 @@ def saveImageNames(config={}, images=[], session=None):
def curateImageURL(config={}, url=''): def curateImageURL(config={}, url=''):
""" Returns an absolute URL for an image, adding the domain if missing """ """ Returns an absolute URL for an image, adding the domain if missing """
if 'index' in config: if 'index' in config and config['index']:
#remove from :// (http or https) until the first / after domain #remove from :// (http or https) until the first / after domain
domainalone = config['index'].split('://')[0] + '://' + config['index'].split('://')[1].split('/')[0] domainalone = config['index'].split('://')[0] + '://' + config['index'].split('://')[1].split('/')[0]
elif 'api' in config: elif 'api' in config and config['api']:
domainalone = config['api'].split('://')[0] + '://' + config['api'].split('://')[1].split('/')[0] domainalone = config['api'].split('://')[0] + '://' + config['api'].split('://')[1].split('/')[0]
else: else:
print 'ERROR: no index nor API' print 'ERROR: no index nor API'

Loading…
Cancel
Save