wikiteam/uploader.py

#!/usr/bin/python
# -*- coding: utf-8 -*-

# Copyright (C) 2011-2012 WikiTeam
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Keys: http://archive.org/account/s3.php
# Documentation: http://archive.org/help/abouts3.txt
# https://wiki.archive.org/twiki/bin/view/Main/IAS3BulkUploader
# http://en.ecgpedia.org/api.php?action=query&meta=siteinfo&siprop=general|rightsinfo&format=xml

import os
import re
import subprocess
import sys
import time
import urllib
import urllib2

import dumpgenerator

listfile = sys.argv[1]

# Configuration goes here
# You need a file named keys.txt with access and secret keys, in two different lines
accesskey = open('keys.txt', 'r').readlines()[0].strip()
secretkey = open('keys.txt', 'r').readlines()[1].strip()
collection = 'wikiteam' # Replace with "opensource" if you are not an admin of the collection

# Nothing to change below
def log(wiki, dump, msg):
    f = open('uploader-%s.log' % (listfile), 'a')
    f.write('\n%s;%s;%s' % (wiki, dump, msg))
    f.close()

def upload(wikis):
    for wiki in wikis:
        print "#"*73
        print "# Uploading", wiki
        print "#"*73
        wiki = wiki.lower()
        prefix = dumpgenerator.domain2prefix(config={'api': wiki})
    
        wikiname = prefix.split('-')[0]
        dumps = []
        for dirname, dirnames, filenames in os.walk('.'):
            if dirname == '.':
                for f in filenames:
                    if f.startswith('%s-' % (wikiname)) and (f.endswith('-wikidump.7z') or f.endswith('-history.xml.7z')):
                        dumps.append(f)
                break

        c = 0
        for dump in dumps:
            time.sleep(0.1)
            wikidate = dump.split('-')[1]
            wikidate_text = wikidate[0:4]+'-'+wikidate[4:6]+'-'+wikidate[6:8]
            print wiki, wikiname, wikidate, dump
            
            #get metadata from api.php
            #first sitename and base url
            headers = {'User-Agent': dumpgenerator.getUserAgent()}
            params = {'action': 'query', 'meta': 'siteinfo', 'format': 'xml'}
            data = urllib.urlencode(params)
            req = urllib2.Request(url=wiki, data=data, headers=headers)
            xml = ''
            try:
                f = urllib2.urlopen(req)
                xml = f.read()
                f.close()
            except:
                pass
            
            sitename = ''
            baseurl = ''
            try:
                sitename = re.findall(ur"sitename=\"([^\"]+)\"", xml)[0]
                baseurl = re.findall(ur"base=\"([^\"]+)\"", xml)[0]
            except:
                pass
            
            if not sitename:
                sitename = wikiname
            if not baseurl:
                baseurl = re.sub(ur"(?im)/api\.php", ur"", wiki)
            
            #now copyright info from API
            params = {'action': 'query', 'siprop': 'general|rightsinfo', 'format': 'xml'}
            data = urllib.urlencode(params)
            req = urllib2.Request(url=wiki, data=data, headers=headers)
            xml = ''
            try:
                f = urllib2.urlopen(req)
                xml = f.read()
                f.close()
            except:
                pass
            
            rightsinfourl = ''
            rightsinfotext = ''
            try:
                rightsinfourl = re.findall(ur"rightsinfo url=\"([^\"]+)\"", xml)[0]
                rightsinfotext = re.findall(ur"text=\"([^\"]+)\"", xml)[0]
            except:
                pass
            
            #or copyright info from #footer in mainpage
            if baseurl and not rightsinfourl and not rightsinfotext:
                raw = ''
                try:
                    f = urllib.urlopen(baseurl)
                    raw = f.read()
                    f.close()
                except:
                    pass
                rightsinfotext = ''
                rightsinfourl = ''
                try:
                    rightsinfourl = re.findall(ur"<link rel=\"copyright\" href=\"([^\"]+)\" />", raw)[0]
                except:
                    pass
                try:
                    rightsinfotext = re.findall(ur"<li id=\"copyright\">([^\n\r]*?)</li>", raw)[0]
                except:
                    pass
                if rightsinfotext and not rightsinfourl:
                    rightsinfourl = baseurl + '#footer'
            
            #retrieve some info from the wiki
            wikititle = "Wiki - %s" % (sitename) # Wiki - ECGpedia
            wikidesc = "<a href=\"%s\">%s</a> dumped with <a href=\"http://code.google.com/p/wikiteam/\" rel=\"nofollow\">WikiTeam</a> tools." % (baseurl, sitename)# "<a href=\"http://en.ecgpedia.org/\" rel=\"nofollow\">ECGpedia,</a>: a free electrocardiography (ECG) tutorial and textbook to which anyone can contribute, designed for medical professionals such as cardiac care nurses and physicians. Dumped with <a href=\"http://code.google.com/p/wikiteam/\" rel=\"nofollow\">WikiTeam</a> tools."
            wikikeys = ['wiki', 'wikiteam', 'MediaWiki', sitename, wikiname] # ecg; ECGpedia; wiki; wikiteam; MediaWiki
            if not rightsinfourl and not rightsinfotext:
                wikikeys.append('unknowncopyright')
            
            wikilicenseurl = rightsinfourl # http://creativecommons.org/licenses/by-nc-sa/3.0/
            wikirights = rightsinfotext # e.g. http://en.ecgpedia.org/wiki/Frequently_Asked_Questions : hard to fetch automatically, could be the output of API's rightsinfo if it's not a usable licenseurl or "Unknown copyright status" if nothing is found.
            wikiurl = wiki # we use api here http://en.ecgpedia.org/api.php
                        
            #creates curl command
            curl = ['curl', '--location', 
                '--header', "'x-amz-auto-make-bucket:1'", # Creates the item automatically, need to give some time for the item to correctly be created on archive.org, or everything else will fail, showing "bucket not found" error
                '--header', "'x-archive-queue-derive:0'",
                '--header', "'x-archive-size-hint:%d'" % (os.path.getsize(dump)), 
                '--header', "'authorization: LOW %s:%s'" % (accesskey, secretkey),
            ]
            if c == 0:
                curl += ['--header', "'x-archive-meta-mediatype:web'",
                    '--header', "'x-archive-meta-collection:%s'" % (collection),
                    '--header', "'x-archive-meta-title:%s'" % (wikititle),
                    '--header', "'x-archive-meta-description:%s'" % (wikidesc),
                    '--header', "'x-archive-meta-last-updated-date:%s'" % (wikidate_text),
                    '--header', "'x-archive-meta-subject:%s'" % ('; '.join(wikikeys)), # Keywords should be separated by ; but it doesn't matter much; the alternative is to set one per field with subject[0], subject[1], ...
                    '--header', "'x-archive-meta-licenseurl:%s'" % (wikilicenseurl),
                    '--header', "'x-archive-meta-rights:%s'" % (wikirights),
                    '--header', "'x-archive-meta-originalurl:%s'" % (wikiurl),
                ]
            
            curl += ['--upload-file', "%s" % (dump),
                    "http://s3.us.archive.org/wiki-%s/%s" % (wikiname, dump), # It could happen that the identifier is taken by another user; only wikiteam collection admins will be able to upload more files to it, curl will fail immediately and get a permissions error by s3.
            ]
            curlline = ' '.join(curl)
            os.system(curlline)
            c += 1
            log(wiki, dump, 'ok')

def main():
    wikis = open(listfile, 'r').read().strip().splitlines()
    upload(wikis)

if __name__ == "__main__":
    main()
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@611 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`#!/usr/bin/python`
			`# -- coding: utf-8 --`

			`# Copyright (C) 2011-2012 WikiTeam`
			`# This program is free software: you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License as published by`
			`# the Free Software Foundation, either version 3 of the License, or`
			`# (at your option) any later version.`
			`#`
			`# This program is distributed in the hope that it will be useful,`
			`# but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`# GNU General Public License for more details.`
			`#`
			`# You should have received a copy of the GNU General Public License`
			`# along with this program. If not, see <http://www.gnu.org/licenses/>.`

Adding comments and making file look nicer. The response to archive.org's S3 is okay though git-svn-id: https://wikiteam.googlecode.com/svn/trunk@647 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`# Keys: http://archive.org/account/s3.php`
			`# Documentation: http://archive.org/help/abouts3.txt`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@611 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`# https://wiki.archive.org/twiki/bin/view/Main/IAS3BulkUploader`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@738 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`# http://en.ecgpedia.org/api.php?action=query&meta=siteinfo&siprop=general\|rightsinfo&format=xml`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@611 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago
working on the uploader to Internet Archive S3; launcher.py now do not explore subdirectories, just the current one '.' git-svn-id: https://wikiteam.googlecode.com/svn/trunk@613 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`import os`
			`import re`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@612 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`import subprocess`
git-svn-id: https://wikiteam.googlecode.com/svn/trunk@731 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`import sys`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`import time`
working on the uploader to Internet Archive S3; launcher.py now do not explore subdirectories, just the current one '.' git-svn-id: https://wikiteam.googlecode.com/svn/trunk@613 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`import urllib`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`import urllib2`

			`import dumpgenerator`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@612 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@737 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`listfile = sys.argv[1]`

Adding comments and making file look nicer. The response to archive.org's S3 is okay though git-svn-id: https://wikiteam.googlecode.com/svn/trunk@647 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`# Configuration goes here`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@737 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`# You need a file named keys.txt with access and secret keys, in two different lines`
git-svn-id: https://wikiteam.googlecode.com/svn/trunk@731 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`accesskey = open('keys.txt', 'r').readlines()[0].strip()`
			`secretkey = open('keys.txt', 'r').readlines()[1].strip()`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@737 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`collection = 'wikiteam' # Replace with "opensource" if you are not an admin of the collection`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@612 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago
Moving the collection parameter to the top of the script so as to allow the user to easily configure that setting. git-svn-id: https://wikiteam.googlecode.com/svn/trunk@666 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`# Nothing to change below`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@738 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`def log(wiki, dump, msg):`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@737 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`f = open('uploader-%s.log' % (listfile), 'a')`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@738 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`f.write('\n%s;%s;%s' % (wiki, dump, msg))`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@737 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`f.close()`

working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@615 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`def upload(wikis):`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`for wiki in wikis:`
			`print "#"*73`
			`print "# Uploading", wiki`
			`print "#"*73`
			`wiki = wiki.lower()`
			`prefix = dumpgenerator.domain2prefix(config={'api': wiki})`

			`wikiname = prefix.split('-')[0]`
			`dumps = []`
			`for dirname, dirnames, filenames in os.walk('.'):`
			`if dirname == '.':`
			`for f in filenames:`
			`if f.startswith('%s-' % (wikiname)) and (f.endswith('-wikidump.7z') or f.endswith('-history.xml.7z')):`
			`dumps.append(f)`
			`break`

working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@616 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`c = 0`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@615 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`for dump in dumps:`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`time.sleep(0.1)`
			`wikidate = dump.split('-')[1]`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@739 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`wikidate_text = wikidate[0:4]+'-'+wikidate[4:6]+'-'+wikidate[6:8]`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@615 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`print wiki, wikiname, wikidate, dump`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago
			`#get metadata from api.php`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@738 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`#first sitename and base url`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`headers = {'User-Agent': dumpgenerator.getUserAgent()}`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@738 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`params = {'action': 'query', 'meta': 'siteinfo', 'format': 'xml'}`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`data = urllib.urlencode(params)`
			`req = urllib2.Request(url=wiki, data=data, headers=headers)`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@739 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`xml = ''`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`try:`
			`f = urllib2.urlopen(req)`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@739 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`xml = f.read()`
			`f.close()`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`except:`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@739 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`pass`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago
			`sitename = ''`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@737 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`baseurl = ''`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`try:`
			`sitename = re.findall(ur"sitename=\"([^\"]+)\"", xml)[0]`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@738 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`baseurl = re.findall(ur"base=\"([^\"]+)\"", xml)[0]`
			`except:`
			`pass`

uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@739 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`if not sitename:`
			`sitename = wikiname`
			`if not baseurl:`
			`baseurl = re.sub(ur"(?im)/api\.php", ur"", wiki)`

uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@738 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`#now copyright info from API`
			`params = {'action': 'query', 'siprop': 'general\|rightsinfo', 'format': 'xml'}`
			`data = urllib.urlencode(params)`
			`req = urllib2.Request(url=wiki, data=data, headers=headers)`
			`xml = ''`
			`try:`
			`f = urllib2.urlopen(req)`
			`xml = f.read()`
			`f.close()`
			`except:`
			`pass`

			`rightsinfourl = ''`
			`rightsinfotext = ''`
			`try:`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`rightsinfourl = re.findall(ur"rightsinfo url=\"([^\"]+)\"", xml)[0]`
			`rightsinfotext = re.findall(ur"text=\"([^\"]+)\"", xml)[0]`
			`except:`
			`pass`

uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@738 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`#or copyright info from #footer in mainpage`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@739 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`if baseurl and not rightsinfourl and not rightsinfotext:`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@744 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`raw = ''`
			`try:`
			`f = urllib.urlopen(baseurl)`
			`raw = f.read()`
			`f.close()`
			`except:`
			`pass`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@738 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`rightsinfotext = ''`
			`rightsinfourl = ''`
			`try:`
			`rightsinfourl = re.findall(ur"<link rel=\"copyright\" href=\"([^\"]+)\" />", raw)[0]`
			`except:`
			`pass`
			`try:`
			`rightsinfotext = re.findall(ur"<li id=\"copyright\">([^\n\r]*?)</li>", raw)[0]`
			`except:`
			`pass`
			`if rightsinfotext and not rightsinfourl:`
			`rightsinfourl = baseurl + '#footer'`

working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@616 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`#retrieve some info from the wiki`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`wikititle = "Wiki - %s" % (sitename) # Wiki - ECGpedia`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@737 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`wikidesc = "<a href=\"%s\">%s</a> dumped with <a href=\"http://code.google.com/p/wikiteam/\" rel=\"nofollow\">WikiTeam</a> tools." % (baseurl, sitename)# "<a href=\"http://en.ecgpedia.org/\" rel=\"nofollow\">ECGpedia,</a>: a free electrocardiography (ECG) tutorial and textbook to which anyone can contribute, designed for medical professionals such as cardiac care nurses and physicians. Dumped with <a href=\"http://code.google.com/p/wikiteam/\" rel=\"nofollow\">WikiTeam</a> tools."`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`wikikeys = ['wiki', 'wikiteam', 'MediaWiki', sitename, wikiname] # ecg; ECGpedia; wiki; wikiteam; MediaWiki`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@739 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`if not rightsinfourl and not rightsinfotext:`
			`wikikeys.append('unknowncopyright')`

uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@736 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`wikilicenseurl = rightsinfourl # http://creativecommons.org/licenses/by-nc-sa/3.0/`
			`wikirights = rightsinfotext # e.g. http://en.ecgpedia.org/wiki/Frequently_Asked_Questions : hard to fetch automatically, could be the output of API's rightsinfo if it's not a usable licenseurl or "Unknown copyright status" if nothing is found.`
			`wikiurl = wiki # we use api here http://en.ecgpedia.org/api.php`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@616 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago
			`#creates curl command`
			`curl = ['curl', '--location',`
Adding comments and making file look nicer. The response to archive.org's S3 is okay though git-svn-id: https://wikiteam.googlecode.com/svn/trunk@647 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`'--header', "'x-amz-auto-make-bucket:1'", # Creates the item automatically, need to give some time for the item to correctly be created on archive.org, or everything else will fail, showing "bucket not found" error`
Some missing apex; more comments. git-svn-id: https://wikiteam.googlecode.com/svn/trunk@631 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`'--header', "'x-archive-queue-derive:0'",`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@616 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`'--header', "'x-archive-size-hint:%d'" % (os.path.getsize(dump)),`
			`'--header', "'authorization: LOW %s:%s'" % (accesskey, secretkey),`
			`]`
			`if c == 0:`
			`curl += ['--header', "'x-archive-meta-mediatype:web'",`
Moving the collection parameter to the top of the script so as to allow the user to easily configure that setting. git-svn-id: https://wikiteam.googlecode.com/svn/trunk@666 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`'--header', "'x-archive-meta-collection:%s'" % (collection),`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@616 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`'--header', "'x-archive-meta-title:%s'" % (wikititle),`
			`'--header', "'x-archive-meta-description:%s'" % (wikidesc),`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@740 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`'--header', "'x-archive-meta-last-updated-date:%s'" % (wikidate_text),`
Adding comments and making file look nicer. The response to archive.org's S3 is okay though git-svn-id: https://wikiteam.googlecode.com/svn/trunk@647 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`'--header', "'x-archive-meta-subject:%s'" % ('; '.join(wikikeys)), # Keywords should be separated by ; but it doesn't matter much; the alternative is to set one per field with subject[0], subject[1], ...`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@616 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`'--header', "'x-archive-meta-licenseurl:%s'" % (wikilicenseurl),`
			`'--header', "'x-archive-meta-rights:%s'" % (wikirights),`
			`'--header', "'x-archive-meta-originalurl:%s'" % (wikiurl),`
			`]`

			`curl += ['--upload-file', "%s" % (dump),`
Adding comments and making file look nicer. The response to archive.org's S3 is okay though git-svn-id: https://wikiteam.googlecode.com/svn/trunk@647 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`"http://s3.us.archive.org/wiki-%s/%s" % (wikiname, dump), # It could happen that the identifier is taken by another user; only wikiteam collection admins will be able to upload more files to it, curl will fail immediately and get a permissions error by s3.`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@616 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`]`
git-svn-id: https://wikiteam.googlecode.com/svn/trunk@731 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`curlline = ' '.join(curl)`
			`os.system(curlline)`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@616 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`c += 1`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@738 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`log(wiki, dump, 'ok')`
working on the uploader to Internet Archive S3; launcher.py now do not explore subdirectories, just the current one '.' git-svn-id: https://wikiteam.googlecode.com/svn/trunk@613 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago
			`def main():`
uploader git-svn-id: https://wikiteam.googlecode.com/svn/trunk@738 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`wikis = open(listfile, 'r').read().strip().splitlines()`
working on the uploader to Internet Archive S3 git-svn-id: https://wikiteam.googlecode.com/svn/trunk@615 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago			`upload(wikis)`
working on the uploader to Internet Archive S3; launcher.py now do not explore subdirectories, just the current one '.' git-svn-id: https://wikiteam.googlecode.com/svn/trunk@613 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95 12 years ago
			`if __name__ == "__main__":`
			`main()`