diff --git a/wikimediacommons/commonsdownloader.py b/wikimediacommons/commonsdownloader.py index 6237726..d1ee89d 100644 --- a/wikimediacommons/commonsdownloader.py +++ b/wikimediacommons/commonsdownloader.py @@ -1,7 +1,7 @@ #!/usr/bin/env python2 # -*- coding: utf8 -*- -# Copyright (C) 2011-2012 WikiTeam +# Copyright (C) 2011-2016 WikiTeam developers # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or @@ -30,11 +30,11 @@ def welcome(): """ """ print "#"*73 print """# Welcome to CommonsDownloader 0.1 by WikiTeam (GPL v3) # -# More info at: http://code.google.com/p/wikiteam/ #""" +# More info: https://github.com/WikiTeam/wikiteam #""" print "#"*73 print '' print "#"*73 - print """# Copyright (C) 2011-2012 WikiTeam # + print """# Copyright (C) 2011-2016 WikiTeam # # This program is free software: you can redistribute it and/or modify # # it under the terms of the GNU General Public License as published by # # the Free Software Foundation, either version 3 of the License, or # @@ -61,7 +61,6 @@ def main(): welcome() filenamefeed = 'commonssql.csv' # feed - #filenamefeed = 'a.csv' startdate = '' enddate = '' delta = datetime.timedelta(days=1) #chunks by day @@ -122,7 +121,7 @@ def main(): #wget file if original_name != img_name: #the image is an old version, download using /archive/ path in server - os.system('wget -c "http://upload.wikimedia.org/wikipedia/commons/archive/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_)) + os.system('wget -c "https://upload.wikimedia.org/wikipedia/commons/archive/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_)) try: if not os.path.getsize('%s/%s' % (savepath, img_saved_as_)): #empty file?... #probably false 20101005024534! begining like this http://commons.wikimedia.org/wiki/File:20041028210012!Pilar.jpg @@ -131,16 +130,16 @@ def main(): original_name_ = re.sub(r'"', r'\"', re.sub(r' ', r'_', original_name.encode('utf-8'))) md5hash = md5(re.sub(' ', '_', original_name.encode("utf-8"))).hexdigest() #redownload, now without /archive/ subpath - os.system('wget -c "http://upload.wikimedia.org/wikipedia/commons/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_)) + os.system('wget -c "https://upload.wikimedia.org/wikipedia/commons/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_)) except OSError: pass else: # Issue #66 : try your.org first os.system('wget -c "http://ftpmirror.your.org/pub/wikimedia/images/wikipedia/commons/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_)) - os.system('wget -c "http://upload.wikimedia.org/wikipedia/commons/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_)) + os.system('wget -c "https://upload.wikimedia.org/wikipedia/commons/%s/%s/%s" -O "%s/%s"' % (md5hash[0], md5hash[0:2], img_name_quoted, savepath, img_saved_as_)) #curl .xml description page with full history - os.system('curl -d "&pages=File:%s&history=1&action=submit" http://commons.wikimedia.org/w/index.php?title=Special:Export -o "%s/%s.xml"' % (original_name_, savepath, img_saved_as_)) + os.system('curl -d "&pages=File:%s&history=1&action=submit" https://commons.wikimedia.org/w/index.php?title=Special:Export -o "%s/%s.xml"' % (original_name_, savepath, img_saved_as_)) #save csv info g = csv.writer(open(filenamecsv, 'a'), delimiter='|', quotechar='"', quoting=csv.QUOTE_MINIMAL)