2
0
mirror of https://github.com/WikiTeam/wikiteam synced 2024-11-12 07:12:41 +00:00

Save and upload logos in uploader.py

This commit is contained in:
PiRSquared17 2015-03-29 21:30:15 +00:00
parent 4e57430605
commit 109528384b

View File

@ -23,6 +23,8 @@ import sys
import time
import urllib
import urllib2
import urlparse
import StringIO
from xml.sax.saxutils import quoteattr
from internetarchive import get_item
@ -51,11 +53,12 @@ def getParameters(params=[]):
config = {
'prune-directories': False,
'prune-wikidump': False,
'collection': collection
'collection': collection,
'update': False,
}
#console params
try:
opts, args = getopt.getopt(params, "", ["h", "help", "prune-directories", "prune-wikidump", "admin"])
opts, args = getopt.getopt(params, "", ["h", "help", "prune-directories", "prune-wikidump", "admin", "update"])
except getopt.GetoptError, err:
# print help information and exit:
print str(err) # will print something like "option -a not recognized"
@ -71,6 +74,8 @@ def getParameters(params=[]):
config['prune-wikidump'] = True
elif o in ("--admin"):
config['collection'] = "wikiteam"
elif o in ("--update"):
config['update'] = True
return config
def usage():
@ -146,9 +151,12 @@ def upload(wikis, config={}):
# Does the item exist already?
ismissingitem = not item.exists
# Logo path
logourl = ''
# We don't know a way to fix/overwrite metadata if item exists already:
# just pass bogus data and save some time
if ismissingitem:
if ismissingitem or config['update']:
#get metadata from api.php
#first sitename and base url
params = {'action': 'query', 'meta': 'siteinfo', 'format': 'xml'}
@ -205,15 +213,16 @@ def upload(wikis, config={}):
except:
pass
raw = ''
try:
f = urllib.urlopen(baseurl)
raw = f.read()
f.close()
except:
pass
#or copyright info from #footer in mainpage
if baseurl and not rightsinfourl and not rightsinfotext:
raw = ''
try:
f = urllib.urlopen(baseurl)
raw = f.read()
f.close()
except:
pass
rightsinfotext = ''
rightsinfourl = ''
try:
@ -226,6 +235,11 @@ def upload(wikis, config={}):
pass
if rightsinfotext and not rightsinfourl:
rightsinfourl = baseurl + '#footer'
try:
logourl = re.findall(ur'p-logo["\'][^>]*>\s*<a [^>]*background-image:\s*(?:url\()?([^;)"]+)', raw)[0]
except:
pass
print logourl
#retrieve some info from the wiki
wikititle = "Wiki - %s" % (sitename) # Wiki - ECGpedia
@ -257,7 +271,7 @@ def upload(wikis, config={}):
'language': lang,
'last-updated-date': wikidate_text,
'subject': '; '.join(wikikeys), # Keywords should be separated by ; but it doesn't matter much; the alternative is to set one per field with subject[0], subject[1], ...
'licenseurl': wikilicenseurl,
'licenseurl': urlparse.urljoin(wiki, wikilicenseurl),
'rights': wikirights,
'originalurl': wikiurl,
}
@ -266,10 +280,16 @@ def upload(wikis, config={}):
#TODO: not needed for the second file in an item
try:
item.upload(dump, metadata=md, access_key=accesskey, secret_key=secretkey, verbose=True)
if logourl:
logo = StringIO.StringIO(urllib.urlopen(urlparse.urljoin(wiki, logourl)).read())
logoextension = logourl.split('.')[-1] if logourl.split('.') else 'unknown'
logo.name = 'wiki-' + wikiname + '_logo.' + logoextension
item.upload(logo, access_key=accesskey, secret_key=secretkey, verbose=True)
uploadeddumps.append(dump)
log(wiki, dump, 'ok')
except:
log(wiki, dump, 'error?')
c += 1
def main(params=[]):