mirror of
https://github.com/WikiTeam/wikiteam
synced 2024-11-16 21:27:46 +00:00
updater for IA items
This commit is contained in:
parent
93f7ecf155
commit
2cc35237cd
65
wikimediacommons/commons-update-status.py
Normal file
65
wikimediacommons/commons-update-status.py
Normal file
@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env python3
|
||||
# -*- coding: utf8 -*-
|
||||
|
||||
# Copyright (C) 2012-2016 WikiTeam developers
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import json
|
||||
import urllib
|
||||
|
||||
def main():
|
||||
queryurl = 'https://archive.org/advancedsearch.php?q=subject%3A%22WikiTeam%22+AND+subject%3A%22Wikimedia+Commons%22&fl[]=identifier&sort[]=&sort[]=&sort[]=&rows=500&page=1&output=json&callback=callback'
|
||||
raw = urllib.urlopen(queryurl).read()
|
||||
raw = raw.split('callback(')[1].strip(')')
|
||||
result = json.loads(raw)['response']['docs']
|
||||
|
||||
identifiers = {}
|
||||
for item in result:
|
||||
identifier = item['identifier']
|
||||
if 'wikimediacommons-20' in identifier:
|
||||
date = identifier.split('wikimediacommons-')[1]
|
||||
t = date.split('-')
|
||||
if len(t) == 1:
|
||||
if len(t[0]) == 4: # YYYY
|
||||
identifiers[t[0]] = identifier
|
||||
elif len(t[0]) == 6: # YYYYMM
|
||||
identifiers['%s-%s' % (t[0][:4], t[0][4:6])] = identifier
|
||||
elif len(t[0]) == 8: # YYYYMMDD
|
||||
identifiers['%s-%s-%s' % (t[0][:4], t[0][4:6], t[0][6:8])] = identifier
|
||||
else:
|
||||
print('ERROR, dont understand date format in %s' % (identifier))
|
||||
elif len(t) == 2:
|
||||
if len(t[0]) == 4 and len(t[1]) == 2: #YYYY-MM
|
||||
identifiers['%s-%s' % (t[0], t[1])] = identifier
|
||||
else:
|
||||
print('ERROR, dont understand date format in %s' % (identifier))
|
||||
elif len(t) == 3:
|
||||
if len(t[0]) == 4 and len(t[1]) == 2 and len(t[2]) == 2: #YYYY-MM-DD
|
||||
identifiers['%s-%s-%s' % (t[0], t[1], t[2])] = identifier
|
||||
else:
|
||||
print('ERROR, dont understand date format in %s' % (identifier))
|
||||
|
||||
identifiers_list = [[k, v] for k, v in identifiers.items()]
|
||||
identifiers_list.sort()
|
||||
|
||||
rows = ["|-\n| %s || [https://archive.org/details/%s %s] || ??? || ???" % (k, v, v) for k, v in identifiers_list]
|
||||
output = """
|
||||
{| class="wikitable sortable"
|
||||
! Date !! Identifier !! Files !! Size (GB)
|
||||
%s
|
||||
|}""" % ('\n'.join(rows))
|
||||
print(output)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue
Block a user