diff --git a/uploader.py b/uploader.py index 7574548..748775b 100644 --- a/uploader.py +++ b/uploader.py @@ -24,7 +24,11 @@ import os import re import subprocess import sys +import time import urllib +import urllib2 + +import dumpgenerator # Configuration goes here accesskey = open('keys.txt', 'r').readlines()[0].strip() @@ -33,22 +37,63 @@ collection = 'opensource' # Replace with "wikiteam" if you're an admin of the co # Nothing to change below def upload(wikis): - for wiki, dumps in wikis.items(): - wikiname = '-'.join(wiki.split('-')[:-1]) # The identifier has to match this pattern: ^[a-zA-Z0-9][a-zA-Z0-9_.-]{4,100}$ - wikidate = wiki.split('-')[-1] + for wiki in wikis: + print "#"*73 + print "# Uploading", wiki + print "#"*73 + wiki = wiki.lower() + prefix = dumpgenerator.domain2prefix(config={'api': wiki}) + + wikiname = prefix.split('-')[0] + dumps = [] + for dirname, dirnames, filenames in os.walk('.'): + if dirname == '.': + for f in filenames: + if f.startswith('%s-' % (wikiname)) and (f.endswith('-wikidump.7z') or f.endswith('-history.xml.7z')): + dumps.append(f) + break + c = 0 for dump in dumps: + time.sleep(0.1) + wikidate = dump.split('-')[1] print wiki, wikiname, wikidate, dump - #get api.php - pass + + #get metadata from api.php + headers = {'User-Agent': dumpgenerator.getUserAgent()} + params = {'action': 'query', 'meta': 'siteinfo', 'siprop': 'general|rightsinfo', 'format': 'xml'} + data = urllib.urlencode(params) + req = urllib2.Request(url=wiki, data=data, headers=headers) + try: + f = urllib2.urlopen(req) + except: + print "Error while retrieving metadata from API, skiping this wiki..." + break + xml = f.read() + f.close() + + sitename = '' + rightsinfourl = '' + rightsinfotext = '' + try: + sitename = re.findall(ur"sitename=\"([^\"]+)\"", xml)[0] + rightsinfourl = re.findall(ur"rightsinfo url=\"([^\"]+)\"", xml)[0] + rightsinfotext = re.findall(ur"text=\"([^\"]+)\"", xml)[0] + except: + pass + + if not sitename or not rightsinfourl or not rightsinfotext: + print "Error while retrieving metadata from API, skiping this wiki..." + break #retrieve some info from the wiki - wikititle = "Wiki - " # Wiki - ECGpedia - wikidesc = "... Dumped with WikiTeam tools." # "ECGpedia,: a free electrocardiography (ECG) tutorial and textbook to which anyone can contribute, designed for medical professionals such as cardiac care nurses and physicians. Dumped with WikiTeam tools." - wikikeys = ['wiki', 'wikiteam', 'MediaWiki'] # ecg; ECGpedia; wiki; wikiteam; MediaWiki - wikilicenseurl = "" # http://creativecommons.org/licenses/by-nc-sa/3.0/ - wikirights = "" # e.g. http://en.ecgpedia.org/wiki/Frequently_Asked_Questions : hard to fetch automatically, could be the output of API's rightsinfo if it's not a usable licenseurl or "Unknown copyright status" if nothing is found. - wikiurl = "" # we use api here http://en.ecgpedia.org/api.php + wikititle = "Wiki - %s" % (sitename) # Wiki - ECGpedia + wikidesc = "Dumped with WikiTeam tools." # "ECGpedia,: a free electrocardiography (ECG) tutorial and textbook to which anyone can contribute, designed for medical professionals such as cardiac care nurses and physicians. Dumped with WikiTeam tools." + wikikeys = ['wiki', 'wikiteam', 'MediaWiki', sitename, wikiname] # ecg; ECGpedia; wiki; wikiteam; MediaWiki + print wikikeys + wikilicenseurl = rightsinfourl # http://creativecommons.org/licenses/by-nc-sa/3.0/ + wikirights = rightsinfotext # e.g. http://en.ecgpedia.org/wiki/Frequently_Asked_Questions : hard to fetch automatically, could be the output of API's rightsinfo if it's not a usable licenseurl or "Unknown copyright status" if nothing is found. + wikiurl = wiki # we use api here http://en.ecgpedia.org/api.php #creates curl command curl = ['curl', '--location', @@ -75,18 +120,8 @@ def upload(wikis): os.system(curlline) c += 1 -wikis = {} def main(): - for dirname, dirnames, filenames in os.walk('.'): - if dirname == '.': - for f in filenames: - if f.endswith('-wikidump.7z') or f.endswith('-history.xml.7z'): - wiki = f.split('-wikidump.7z')[0].split('-history.xml.7z')[0] - if not wikis.has_key(wiki): - wikis[wiki] = [] - wikis[wiki].append(f) - break - + wikis = open(sys.argv[1], 'r').read().splitlines() upload(wikis) if __name__ == "__main__":