Reduce requests for existing items and remove whitespace: tested with wiki-smackdownneoseekercom_w

git-svn-id: https://wikiteam.googlecode.com/svn/trunk@939 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95
pull/117/head
nemobis 11 years ago
parent 54f9798be0
commit b74d6f79ce

@ -98,9 +98,19 @@ def upload(wikis):
wikidate_text = wikidate[0:4]+'-'+wikidate[4:6]+'-'+wikidate[6:8]
print wiki, wikiname, wikidate, dump
# Does the item exist already?
headers = {'User-Agent': dumpgenerator.getUserAgent()}
itemdata = urllib2.Request(url='http://archive.org/metadata/wiki-' + wikiname, headers=headers)
if urllib2.urlopen(itemdata).read() == '{}':
ismissingitem = True
else:
ismissingitem = False
# We don't know a way to fix/overwrite metadata if item exists already:
# just pass bogus data and save some time
if ismissingitem:
#get metadata from api.php
#first sitename and base url
headers = {'User-Agent': dumpgenerator.getUserAgent()}
params = {'action': 'query', 'meta': 'siteinfo', 'format': 'xml'}
data = urllib.urlencode(params)
req = urllib2.Request(url=wiki, data=data, headers=headers)
@ -187,6 +197,14 @@ def upload(wikis):
wikilicenseurl = rightsinfourl # http://creativecommons.org/licenses/by-nc-sa/3.0/
wikirights = rightsinfotext # e.g. http://en.ecgpedia.org/wiki/Frequently_Asked_Questions : hard to fetch automatically, could be the output of API's rightsinfo if it's not a usable licenseurl or "Unknown copyright status" if nothing is found.
wikiurl = wiki # we use api here http://en.ecgpedia.org/api.php
else:
lang = 'foo'
wikititle = 'foo'
wikidesc = 'foo'
wikikeys = 'foo'
wikilicenseurl = 'foo'
wikirights = 'foo'
wikiurl = 'foo'
#creates curl command
curl = ['curl', '--location',
@ -228,6 +246,7 @@ def upload(wikis):
if not os.system(curlline):
uploadeddumps.append(dump)
log(wiki, dump, 'ok')
if not ismissingitem:
os.system(curlmetaline)
c += 1

Loading…
Cancel
Save