2
0
mirror of https://github.com/WikiTeam/wikiteam synced 2024-11-15 00:15:00 +00:00
wikiteam/gui.py
2014-02-26 23:22:53 +00:00

474 lines
22 KiB
Python

#!/usr/bin/env python2
# -*- coding: utf-8 -*-
# Copyright (C) 2011-2012 WikiTeam
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os
import platform
import random
import re
from Tkinter import *
import ttk
import tkMessageBox
import thread
import time
import urllib
import webbrowser
import dumpgenerator
"""
TODO:
* basic: GUI to download just a wiki
* advanced: batch downloads, upload to Internet Archive or anywhere
"""
wikifarms = {
'gentoo_wikicom': 'Gentoo Wiki',
'opensuseorg': 'OpenSuSE',
'referatacom': 'Referata',
'shoutwikicom': 'ShoutWiki',
'Unknown': 'Unknown',
'wikanda': 'Wikanda',
'wikifur': 'WikiFur',
'wikimedia': 'Wikimedia',
'wikitravelorg': 'WikiTravel',
'wikkii': 'Wikkii',
}
NAME = 'WikiTeam tools'
VERSION = '0.1'
HOMEPAGE = 'https://code.google.com/p/wikiteam/'
LINUX = platform.system().lower() == 'linux'
PATH = os.path.dirname(__file__)
if PATH: os.chdir(PATH)
class App:
def __init__(self, master):
self.master = master
self.dumps = []
self.downloadpath = 'downloads'
self.block = False
# interface elements
#progressbar
#self.value = 0
#self.progressbar = ttk.Progressbar(self.master, orient=HORIZONTAL, value=self.value, mode='determinate')
#self.progressbar.grid(row=0, column=0, columnspan=1, sticky=W+E)
#self.run()
#description
self.desc = Label(self.master, text="", anchor=W, font=("Arial", 10))
self.desc.grid(row=0, column=0, columnspan=1)
#self.footer = Label(self.master, text="%s (version %s). This program is free software (GPL v3 or higher)" % (NAME, VERSION), anchor=W, justify=LEFT, font=("Arial", 10))
#self.footer.grid(row=2, column=0, columnspan=1)
#begin tabs
self.notebook = ttk.Notebook(self.master)
self.notebook.grid(row=1, column=0, columnspan=1, sticky=W+E+N+S)
self.frame1 = ttk.Frame(self.master)
self.notebook.add(self.frame1, text='Dump generator')
self.frame2 = ttk.Frame(self.master)
self.notebook.add(self.frame2, text='Downloader')
self.frame3 = ttk.Frame(self.master)
self.notebook.add(self.frame3, text='Uploader')
#dump generator tab (1)
self.labelframe11 = LabelFrame(self.frame1, text="Single download")
self.labelframe11.grid(row=0, column=0)
self.labelframe12 = LabelFrame(self.frame1, text="Batch download")
self.labelframe12.grid(row=1, column=0)
#single download labelframe
self.label11 = Label(self.labelframe11, text="Wiki URL:")
self.label11.grid(row=0, column=0)
self.entry11 = Entry(self.labelframe11, width=40)
self.entry11.grid(row=0, column=1)
self.entry11.bind('<Return>', (lambda event: self.checkURL()))
self.optionmenu11var = StringVar(self.labelframe11)
self.optionmenu11var.set("api.php")
self.optionmenu11 = OptionMenu(self.labelframe11, self.optionmenu11var, self.optionmenu11var.get(), "index.php")
self.optionmenu11.grid(row=0, column=2)
self.button11 = Button(self.labelframe11, text="Check", command=lambda: thread.start_new_thread(self.checkURL, ()), width=5)
self.button11.grid(row=0, column=3)
#batch download labelframe
self.label12 = Label(self.labelframe12, text="Wiki URLs:")
self.label12.grid(row=0, column=0)
self.text11 = Text(self.labelframe12, width=70, height=20)
self.text11.grid(row=0, column=1)
#downloader tab (2)
self.label25var = StringVar(self.frame2)
self.label25var.set("Available dumps: 0 (0.0 MB)")
self.label25 = Label(self.frame2, textvariable=self.label25var, width=27, anchor=W)
self.label25.grid(row=0, column=0, columnspan=2)
self.label26var = StringVar(self.frame2)
self.label26var.set("Downloaded: 0 (0.0 MB)")
self.label26 = Label(self.frame2, textvariable=self.label26var, background='lightgreen', width=27, anchor=W)
self.label26.grid(row=0, column=2, columnspan=2)
self.label27var = StringVar(self.frame2)
self.label27var.set("Not downloaded: 0 (0.0 MB)")
self.label27 = Label(self.frame2, textvariable=self.label27var, background='white', width=27, anchor=W)
self.label27.grid(row=0, column=4, columnspan=2)
self.label21 = Label(self.frame2, text="Filter by wikifarm:", width=15, anchor=W)
self.label21.grid(row=1, column=0)
self.optionmenu21var = StringVar(self.frame2)
self.optionmenu21var.set("all")
self.optionmenu21 = OptionMenu(self.frame2, self.optionmenu21var, self.optionmenu21var.get(), "Gentoo Wiki", "OpenSuSE", "Referata", "ShoutWiki", "Unknown", "Wikanda", "WikiFur", "Wikimedia", "WikiTravel", "Wikkii")
self.optionmenu21.grid(row=1, column=1)
self.label22 = Label(self.frame2, text="Filter by size:", width=15, anchor=W)
self.label22.grid(row=1, column=2)
self.optionmenu22var = StringVar(self.frame2)
self.optionmenu22var.set("all")
self.optionmenu22 = OptionMenu(self.frame2, self.optionmenu22var, self.optionmenu22var.get(), "KB", "MB", "GB", "TB")
self.optionmenu22.grid(row=1, column=3)
self.label23 = Label(self.frame2, text="Filter by date:", width=15, anchor=W)
self.label23.grid(row=1, column=4)
self.optionmenu23var = StringVar(self.frame2)
self.optionmenu23var.set("all")
self.optionmenu23 = OptionMenu(self.frame2, self.optionmenu23var, self.optionmenu23var.get(), "2011", "2012")
self.optionmenu23.grid(row=1, column=5)
self.label24 = Label(self.frame2, text="Filter by mirror:")
self.label24.grid(row=1, column=6)
self.optionmenu24var = StringVar(self.frame2)
self.optionmenu24var.set("all")
self.optionmenu24 = OptionMenu(self.frame2, self.optionmenu24var, self.optionmenu24var.get(), "Google Code", "Internet Archive", "ScottDB")
self.optionmenu24.grid(row=1, column=7)
self.button23 = Button(self.frame2, text="Filter!", command=self.filterAvailableDumps, width=7)
self.button23.grid(row=1, column=8)
self.treescrollbar = Scrollbar(self.frame2)
self.treescrollbar.grid(row=2, column=9, sticky=W+E+N+S)
columns = ('dump', 'wikifarm', 'size', 'date', 'mirror', 'status')
self.tree = ttk.Treeview(self.frame2, height=20, columns=columns, show='headings', yscrollcommand=self.treescrollbar.set)
self.treescrollbar.config(command=self.tree.yview)
self.tree.column('dump', width=495, minwidth=200, anchor='center')
self.tree.heading('dump', text='Dump')
self.tree.column('wikifarm', width=100, minwidth=100, anchor='center')
self.tree.heading('wikifarm', text='Wikifarm')
self.tree.column('size', width=100, minwidth=100, anchor='center')
self.tree.heading('size', text='Size')
self.tree.column('date', width=100, minwidth=100, anchor='center')
self.tree.heading('date', text='Date')
self.tree.column('mirror', width=120, minwidth=120, anchor='center')
self.tree.heading('mirror', text='Mirror')
self.tree.column('status', width=120, minwidth=120, anchor='center')
self.tree.heading('status', text='Status')
self.tree.grid(row=2, column=0, columnspan=9, sticky=W+E+N+S)
[self.tree.heading(column, text=column, command=lambda: self.treeSortColumn(column=column, reverse=False)) for column in columns]
#self.tree.bind("<Double-1>", (lambda: thread.start_new_thread(self.downloadDump, ())))
self.tree.tag_configure('downloaded', background='lightgreen')
self.tree.tag_configure('nodownloaded', background='white')
self.button21 = Button(self.frame2, text="Load available dumps", command=lambda: thread.start_new_thread(self.loadAvailableDumps, ()), width=15)
self.button21.grid(row=3, column=0)
self.button23 = Button(self.frame2, text="Download selection", command=lambda: thread.start_new_thread(self.downloadDump, ()), width=15)
self.button23.grid(row=3, column=4)
self.button22 = Button(self.frame2, text="Clear list", command=self.deleteAvailableDumps, width=10)
self.button22.grid(row=3, column=8, columnspan=2)
#uploader tab (3)
self.label31 = Label(self.frame3, text="todo...")
self.label31.grid(row=0, column=0)
#end tabs
#statusbar
self.status = Label(self.master, text="Welcome to WikiTeam tools. What wiki do you want to preserve today?", bd=1, background='grey', justify=LEFT, relief=SUNKEN)
self.status.grid(row=4, column=0, columnspan=10, sticky=W+E)
#begin menu
menu = Menu(self.master)
master.config(menu=menu)
#file menu
filemenu = Menu(menu)
menu.add_cascade(label="File", menu=filemenu)
filemenu.add_command(label="Preferences", command=self.callback)
filemenu.add_separator()
filemenu.add_command(label="Exit", command=askclose)
#help menu
helpmenu = Menu(menu)
menu.add_cascade(label="Help", menu=helpmenu)
helpmenu.add_command(label="About", command=self.callback)
helpmenu.add_command(label="Help index", command=self.callback)
helpmenu.add_command(label="WikiTeam homepage", command=lambda: webbrowser.open_new_tab(HOMEPAGE))
#end menu
def blocked(self):
tkMessageBox.showerror("Error", "There is a task in progress. Please, wait.")
def checkURL(self):
if re.search(ur"(?im)^https?://[^/]+\.[^/]+/", self.entry11.get()): #well-constructed URL?, one dot at least, aaaaa.com, but bb.aaaaa.com is allowed too
if self.optionmenu11var.get() == 'api.php':
self.msg('Please wait... Checking api.php...')
if dumpgenerator.checkAPI(self.entry11.get()):
self.entry11.config(background='lightgreen')
self.msg('api.php is correct!', level='ok')
else:
self.entry11.config(background='red')
self.msg('api.php is incorrect!', level='error')
elif self.optionmenu11var.get() == 'index.php':
self.msg('Please wait... Checking index.php...')
if dumpgenerator.checkIndexphp(self.entry11.get()):
self.entry11.config(background='lightgreen')
self.msg('index.php is OK!', level='ok')
else:
self.entry11.config(background='red')
self.msg('index.php is incorrect!', level='error')
else:
tkMessageBox.showerror("Error", "You have to write a correct api.php or index.php URL.")
def sumSizes(self, sizes):
total = 0
for size in sizes:
if size.endswith('KB'):
total += float(size.split(' ')[0])
elif size.endswith('MB'):
total += float(size.split(' ')[0])*1024
elif size.endswith('GB'):
total += float(size.split(' ')[0])*1024*1024
elif size.endswith('TB'):
total += float(size.split(' ')[0])*1024*1024*1024
elif not size or size.lower() == 'unknown':
pass
else:
total += size
return total/1024 #MB
def run(self):
for i in range(10):
time.sleep(0.1)
self.value += 10
"""
#get parameters selected
params = ['--api=http://www.archiveteam.org/api.php', '--xml']
#launch dump
dumpgenerator.main(params=params)
#check dump
"""
def msg(self, msg='', level=''):
levels = { 'ok': 'lightgreen', 'warning': 'yellow', 'error': 'red' }
if levels.has_key(level.lower()):
print '%s: %s' % (level.upper(), msg)
self.status.config(text='%s: %s' % (level.upper(), msg), background=levels[level.lower()])
else:
print msg
self.status.config(text=msg, background='grey')
def treeSortColumn(self, column, reverse=False):
l = [(self.tree.set(i, column), i) for i in self.tree.get_children('')]
l.sort(reverse=reverse)
for index, (val, i) in enumerate(l):
self.tree.move(i, '', index)
self.tree.heading(column, command=lambda: self.treeSortColumn(column=column, reverse=not reverse))
def downloadProgress(self, block_count, block_size, total_size):
try:
total_mb = total_size/1024/1024.0
downloaded = block_count *(block_size/1024/1024.0)
percent = downloaded/(total_mb/100.0)
if not random.randint(0,10):
msg = "%.1f MB of %.1f MB downloaded (%.1f%%)" % (downloaded, total_mb, percent <= 100 and percent or 100)
self.msg(msg, level='ok')
#sys.stdout.write("%.1f MB of %.1f MB downloaded (%.2f%%)" %(downloaded, total_mb, percent))
#sys.stdout.flush()
except:
pass
def downloadDump(self, event=None):
if self.block:
self.blocked()
return
else:
self.block = True
items = self.tree.selection()
if items:
if not os.path.exists(self.downloadpath):
os.makedirs(self.downloadpath)
c = 0
d = 0
for item in items:
filepath = self.downloadpath and self.downloadpath + '/' + self.dumps[int(item)][0] or self.dumps[int(item)][0]
if os.path.exists(filepath):
self.msg('That dump was downloaded before', level='ok')
d += 1
else:
self.msg("[%d of %d] Downloading %s from %s" % (c+1, len(items), self.tree.item(item,"text"), self.dumps[int(item)][5]))
f = urllib.urlretrieve(self.dumps[int(item)][5], filepath, reporthook=self.downloadProgress)
msg='%s size is %s bytes large. Download successful!' % (self.dumps[int(item)][0], os.path.getsize(filepath))
self.msg(msg=msg, level='ok')
c += 1
self.dumps[int(item)] = self.dumps[int(item)][:6] + ['True']
if c + d == len(items):
self.msg('Downloaded %d of %d%s.' % (c, len(items), d and ' (and %d were previously downloaded)' % (d) or ''), level='ok')
else:
self.msg('Problems in %d dumps. Downloaded %d of %d (and %d were previously downloaded).' % (len(items)-(c+d), c, len(items), d), level='error')
else:
tkMessageBox.showerror("Error", "You have to select some dumps to download.")
self.clearAvailableDumps()
self.showAvailableDumps()
self.filterAvailableDumps()
self.block = False
def deleteAvailableDumps(self):
#really delete dump list and clear tree
self.clearAvailableDumps()
self.dumps = [] #reset list
def clearAvailableDumps(self):
#clear tree
for i in range(len(self.dumps)):
self.tree.delete(str(i))
def showAvailableDumps(self):
c = 0
for filename, wikifarm, size, date, mirror, url, downloaded in self.dumps:
self.tree.insert('', 'end', str(c), text=filename, values=(filename, wikifarm, size, date, mirror, downloaded and 'Downloaded' or 'Not downloaded'), tags=(downloaded and 'downloaded' or 'nodownloaded',))
c += 1
def filterAvailableDumps(self):
self.clearAvailableDumps()
self.showAvailableDumps()
sizes = []
downloadedsizes = []
nodownloadedsizes = []
for i in range(len(self.dumps)):
if (self.optionmenu21var.get() == 'all' and self.optionmenu22var.get() == 'all' and self.optionmenu23var.get() == 'all' and self.optionmenu24var.get() == 'all'):
sizes.append(self.dumps[i][2])
if self.dumps[i][6]:
downloadedsizes.append(self.dumps[i][2])
else:
nodownloadedsizes.append(self.dumps[i][2])
elif (self.optionmenu21var.get() != 'all' and not self.optionmenu21var.get() == self.dumps[i][1]) or \
(self.optionmenu22var.get() != 'all' and not self.optionmenu22var.get() in self.dumps[i][2]) or \
(self.optionmenu23var.get() != 'all' and not self.optionmenu23var.get() in self.dumps[i][3]) or \
(self.optionmenu24var.get() != 'all' and not self.optionmenu24var.get() in self.dumps[i][4]):
self.tree.detach(str(i)) #hide this item
sizes.append(self.dumps[i][2])
if self.dumps[i][6]:
downloadedsizes.append(self.dumps[i][2])
else:
nodownloadedsizes.append(self.dumps[i][2])
self.label25var.set("Available dumps: %d (%.1f MB)" % (len(sizes), self.sumSizes(sizes)))
self.label26var.set("Downloaded: %d (%.1f MB)" % (len(downloadedsizes), self.sumSizes(downloadedsizes)))
self.label27var.set("Not downloaded: %d (%.1f MB)" % (len(nodownloadedsizes), self.sumSizes(nodownloadedsizes)))
def isDumpDownloaded(self, filename):
#improve, size check or md5sum?
if filename:
filepath = self.downloadpath and self.downloadpath + '/' + filename or filename
if os.path.exists(filepath):
return True
"""estsize = os.path.getsize(filepath)
c = 0
while int(estsize) >= 1024:
estsize = estsize/1024.0
c += 1
estsize = '%.1f %s' % (estsize, ['', 'KB', 'MB', 'GB', 'TB'][c])"""
return False
def loadAvailableDumps(self):
if self.block:
self.blocked()
return
else:
self.block = True
if self.dumps:
self.deleteAvailableDumps()
iaregexp = ur'/download/[^/]+/(?P<filename>[^>]+\.7z)">\s*(?P<size>[\d\.]+ (?:KB|MB|GB|TB))\s*</a>'
self.urls = [
['Google Code', 'https://code.google.com/p/wikiteam/downloads/list?num=5000&start=0', ur'(?im)detail\?name=(?P<filename>[^&]+\.7z)&amp;can=2&amp;q=" style="white-space:nowrap">\s*(?P<size>[\d\.]+ (?:KB|MB|GB|TB))\s*</a></td>'],
['Internet Archive', 'http://www.archive.org/details/referata.com-20111204', iaregexp],
['Internet Archive', 'http://www.archive.org/details/WikiTeamMirror', iaregexp],
['ScottDB', 'http://mirrors.sdboyd56.com/WikiTeam/', ur'<a href="(?P<filename>[^>]+\.7z)">(?P<size>[\d\.]+ (?:KB|MB|GB|TB))</a>'],
['Wikimedia', 'http://dumps.wikimedia.org/backup-index.html', ur'(?P<size>)<a href="(?P<filename>[^>]+)">[^>]+</a>: <span class=\'done\'>Dump complete</span></li>']
]
wikifarms_r = re.compile(ur"(%s)" % ('|'.join(wikifarms.keys())))
c = 0
for mirror, url, regexp in self.urls:
print 'Loading data from', mirror, url
self.msg(msg='Please wait... Loading data from %s %s' % (mirror, url))
f = urllib.urlopen(url)
m = re.compile(regexp).finditer(f.read())
for i in m:
filename = i.group('filename')
if mirror == 'Wikimedia':
filename = '%s-pages-meta-history.xml.7z' % (re.sub('/', '-', filename))
wikifarm = 'Unknown'
if re.search(wikifarms_r, filename):
wikifarm = re.findall(wikifarms_r, filename)[0]
wikifarm = wikifarms[wikifarm]
size = i.group('size')
if not size:
size = 'Unknown'
date = 'Unknown'
if re.search(ur"\-(\d{8})[\.-]", filename):
date = re.findall(ur"\-(\d{4})(\d{2})(\d{2})[\.-]", filename)[0]
date = '%s-%s-%s' % (date[0], date[1], date[2])
elif re.search(ur"\-(\d{4}\-\d{2}\-\d{2})[\.-]", filename):
date = re.findall(ur"\-(\d{4}\-\d{2}\-\d{2})[\.-]", filename)[0]
downloadurl = ''
if mirror == 'Google Code':
downloadurl = 'https://wikiteam.googlecode.com/files/' + filename
elif mirror == 'Internet Archive':
downloadurl = re.sub(ur'/details/', ur'/download/', url) + '/' + filename
elif mirror == 'ScottDB':
downloadurl = url + '/' + filename
elif mirror == 'Wikimedia':
downloadurl = 'http://dumps.wikimedia.org/' + filename.split('-')[0] + '/' + re.sub('-', '', date) + '/' + filename
downloaded = self.isDumpDownloaded(filename)
self.dumps.append([filename, wikifarm, size, date, mirror, downloadurl, downloaded])
self.dumps.sort()
self.showAvailableDumps()
self.filterAvailableDumps()
self.msg(msg='Loaded %d available dumps!' % (len(self.dumps)), level='ok')
self.block = False
def callback(self):
self.msg("Feature not implemented for the moment. Contributions are welcome.", level='warning')
def askclose():
if tkMessageBox.askokcancel("Quit", "Do you really wish to exit?"):
root.destroy()
if __name__ == "__main__":
root = Tk()
width = 1050
height = 560
# calculate position x, y
x = (root.winfo_screenwidth()/2) - (width/2)
y = (root.winfo_screenheight()/2) - (height/2)
root.geometry('%dx%d+%d+%d' % (width, height, x, y))
root.title('%s (version %s)' % (NAME, VERSION))
root.protocol("WM_DELETE_WINDOW", askclose)
#logo
#imagelogo = PhotoImage(file = 'logo.gif')
#labellogo = Label(root, image=imagelogo)
#labellogo.grid(row=0, column=0, rowspan=3, sticky=W)
app = App(root)
root.mainloop()