comments and https

pull/287/head
emijrp 8 years ago
parent 80a565fcdd
commit 1a0368b84e

@ -1,7 +1,7 @@
#!/usr/bin/env python2
# -*- coding: utf8 -*-
# Copyright (C) 2011-2012 WikiTeam
# Copyright (C) 2011-2016 WikiTeam developers
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
@ -30,11 +30,11 @@ def welcome():
""" """
print "#"*73
print """# Welcome to CommonsDownloader 0.1 by WikiTeam (GPL v3) #
# More info at: http://code.google.com/p/wikiteam/ #"""
# More info: https://github.com/WikiTeam/wikiteam #"""
print "#"*73
print ''
print "#"*73
print """# Copyright (C) 2011-2012 WikiTeam #
print """# Copyright (C) 2011-2016 WikiTeam #
# This program is free software: you can redistribute it and/or modify #
# it under the terms of the GNU General Public License as published by #
# the Free Software Foundation, either version 3 of the License, or #
@ -61,7 +61,6 @@ def main():
welcome()
filenamefeed = 'commonssql.csv' # feed
#filenamefeed = 'a.csv'
startdate = ''
enddate = ''
delta = datetime.timedelta(days=1) #chunks by day
@ -122,7 +121,7 @@ def main():
#wget file
if original_name != img_name: #the image is an old version, download using /archive/ path in server
os.system('wget -c "http://upload.wikimedia.org/wikipedia/commons/archive/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_))
os.system('wget -c "https://upload.wikimedia.org/wikipedia/commons/archive/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_))
try:
if not os.path.getsize('%s/%s' % (savepath, img_saved_as_)): #empty file?...
#probably false 20101005024534! begining like this http://commons.wikimedia.org/wiki/File:20041028210012!Pilar.jpg
@ -131,16 +130,16 @@ def main():
original_name_ = re.sub(r'"', r'\"', re.sub(r' ', r'_', original_name.encode('utf-8')))
md5hash = md5(re.sub(' ', '_', original_name.encode("utf-8"))).hexdigest()
#redownload, now without /archive/ subpath
os.system('wget -c "http://upload.wikimedia.org/wikipedia/commons/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_))
os.system('wget -c "https://upload.wikimedia.org/wikipedia/commons/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_))
except OSError:
pass
else:
# Issue #66 : try your.org first
os.system('wget -c "http://ftpmirror.your.org/pub/wikimedia/images/wikipedia/commons/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_))
os.system('wget -c "http://upload.wikimedia.org/wikipedia/commons/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_))
os.system('wget -c "https://upload.wikimedia.org/wikipedia/commons/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_))
#curl .xml description page with full history
os.system('curl -d "&pages=File:%s&history=1&action=submit" http://commons.wikimedia.org/w/index.php?title=Special:Export -o "%s/%s.xml"' % (original_name_, savepath, img_saved_as_))
os.system('curl -d "&pages=File:%s&history=1&action=submit" https://commons.wikimedia.org/w/index.php?title=Special:Export -o "%s/%s.xml"' % (original_name_, savepath, img_saved_as_))
#save csv info
g = csv.writer(open(filenamecsv, 'a'), delimiter='|', quotechar='"', quoting=csv.QUOTE_MINIMAL)

Loading…
Cancel
Save