mirror of https://github.com/WikiTeam/wikiteam
own directory for Wikimedia Commons, fixing commonssql.py db query
parent
e1647a83c6
commit
93f7ecf155
@ -1,61 +0,0 @@
|
||||
#!/usr/bin/env python2
|
||||
# -*- coding: utf8 -*-
|
||||
|
||||
# Copyright (C) 2012 WikiTeam
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import csv
|
||||
import MySQLdb
|
||||
import re
|
||||
import sys
|
||||
|
||||
filename = 'commonssql.csv'
|
||||
f = open(filename, 'w')
|
||||
f.write('img_name|img_timestamp|img_user|img_user_text|img_size|img_width|img_height\n')
|
||||
f.close()
|
||||
|
||||
#http://www.mediawiki.org/wiki/Manual:Image_table
|
||||
#http://www.mediawiki.org/wiki/Manual:Oldimage_table
|
||||
year = int(sys.argv[1])
|
||||
queries = [
|
||||
"SELECT /* commonssql.py SLOW_OK */ img_name, img_timestamp, img_user, img_user_text, img_size, img_width, img_height FROM image WHERE img_timestamp>=%d0101000000 AND img_timestamp<=%d1231235959 ORDER BY img_timestamp ASC" % (year, year),
|
||||
"SELECT /* commonssql.py SLOW_OK */ oi_archive_name AS img_name, oi_timestamp AS img_timestamp, oi_user AS img_user, oi_user_text AS img_user_text, oi_size AS img_size, oi_width AS img_width, oi_height AS img_height FROM oldimage WHERE oi_deleted=0 AND oi_timestamp>=%d0101000000 AND oi_timestamp<=%d1231235959 ORDER BY oi_timestamp ASC" % (year, year), #do not get unavailable images
|
||||
]
|
||||
|
||||
f = csv.writer(open(filename, 'a'), delimiter='|', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
conn = MySQLdb.connect(host='commonswiki.labsdb', db='commonswiki_p', read_default_file='~/.my.cnf')
|
||||
for query in queries:
|
||||
conn.query(query)
|
||||
r = conn.store_result()
|
||||
c = 0
|
||||
row = r.fetch_row(maxrows=1, how=1)
|
||||
rows = []
|
||||
while row:
|
||||
if len(row) == 1:
|
||||
img_name = re.sub(u' ', u'_', unicode(row[0]['img_name'], 'utf-8'))
|
||||
img_timestamp = row[0]['img_timestamp']
|
||||
img_user = row[0]['img_user']
|
||||
img_user_text = re.sub(u' ', u'_', unicode(row[0]['img_user_text'], 'utf-8'))
|
||||
img_size = row[0]['img_size']
|
||||
img_width = row[0]['img_width']
|
||||
img_height = row[0]['img_height']
|
||||
|
||||
rows.append([img_name.encode('utf-8'), img_timestamp, img_user, img_user_text.encode('utf-8'), img_size, img_width, img_height])
|
||||
c += 1
|
||||
if c % 10000 == 0:
|
||||
print c
|
||||
f.writerows(rows)
|
||||
rows = []
|
||||
row = r.fetch_row(maxrows=1, how=1)
|
||||
f.writerows(rows)
|
@ -0,0 +1,66 @@
|
||||
#!/usr/bin/env python2
|
||||
# -*- coding: utf8 -*-
|
||||
|
||||
# Copyright (C) 2012-2016 WikiTeam developers
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import csv
|
||||
import MySQLdb
|
||||
import re
|
||||
import sys
|
||||
|
||||
def main():
|
||||
year = int(sys.argv[1])
|
||||
filename = 'commonssql-%s.csv' % (year)
|
||||
f = open(filename, 'w')
|
||||
f.write('img_name|img_timestamp|img_user|img_user_text|img_size|img_width|img_height\n')
|
||||
f.close()
|
||||
|
||||
#http://www.mediawiki.org/wiki/Manual:Image_table
|
||||
#http://www.mediawiki.org/wiki/Manual:Oldimage_table
|
||||
|
||||
queries = [
|
||||
"SELECT /* commonssql.py SLOW_OK */ img_name, img_timestamp, img_user, img_user_text, img_size, img_width, img_height FROM image WHERE img_timestamp>=%d0101000000 AND img_timestamp<=%d1231235959 ORDER BY img_timestamp ASC" % (year, year),
|
||||
"SELECT /* commonssql.py SLOW_OK */ oi_archive_name AS img_name, oi_timestamp AS img_timestamp, oi_user AS img_user, oi_user_text AS img_user_text, oi_size AS img_size, oi_width AS img_width, oi_height AS img_height FROM oldimage WHERE oi_deleted=0 AND oi_timestamp>=%d0101000000 AND oi_timestamp<=%d1231235959 ORDER BY oi_timestamp ASC" % (year, year), #do not get unavailable images
|
||||
]
|
||||
|
||||
f = csv.writer(open(filename, 'a'), delimiter='|', quotechar='"', quoting=csv.QUOTE_MINIMAL)
|
||||
conn = MySQLdb.connect(host='s4.labsdb', db='commonswiki_p', read_default_file='~/replica.my.cnf', use_unicode=True)
|
||||
for query in queries:
|
||||
conn.query(query)
|
||||
r = conn.store_result()
|
||||
c = 0
|
||||
row = r.fetch_row(maxrows=1, how=1)
|
||||
rows = []
|
||||
while row:
|
||||
if len(row) == 1:
|
||||
img_name = re.sub(' ', '_', row[0]['img_name'])
|
||||
img_timestamp = row[0]['img_timestamp']
|
||||
img_user = row[0]['img_user']
|
||||
img_user_text = re.sub(' ', '_', row[0]['img_user_text'])
|
||||
img_size = row[0]['img_size']
|
||||
img_width = row[0]['img_width']
|
||||
img_height = row[0]['img_height']
|
||||
|
||||
rows.append([img_name, img_timestamp, img_user, img_user_text, img_size, img_width, img_height])
|
||||
c += 1
|
||||
if c % 10000 == 0:
|
||||
print(c)
|
||||
f.writerows(rows)
|
||||
rows = []
|
||||
row = r.fetch_row(maxrows=1, how=1)
|
||||
f.writerows(rows)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue