mirror of
https://github.com/WikiTeam/wikiteam
synced 2024-11-15 00:15:00 +00:00
8763bdf045
git-svn-id: https://wikiteam.googlecode.com/svn/trunk@571 31edc4fc-5e31-b4c4-d58b-c8bc928bcb95
78 lines
3.0 KiB
Python
78 lines
3.0 KiB
Python
#!/usr/bin/python
|
|
# -*- coding: utf-8 -*-
|
|
|
|
# Copyright (C) 2011-2012 WikiTeam
|
|
# This program is free software: you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation, either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import time
|
|
|
|
import dumpgenerator
|
|
|
|
wikis = open(sys.argv[1], 'r').read().splitlines()
|
|
for wiki in wikis:
|
|
wiki = wiki.lower()
|
|
prefix = dumpgenerator.domain2prefix(config={'api': wiki})
|
|
|
|
#check if compressed, in that case it is finished
|
|
compressed = False
|
|
for dirname, dirnames, filenames in os.walk('.'):
|
|
if dirname == '.':
|
|
for f in filenames:
|
|
if f.startswith(prefix) and f.endswith('.7z'):
|
|
compressed = True
|
|
zipfilename = f
|
|
|
|
if compressed:
|
|
print 'Skipping... This wiki was downloaded and compressed before in', zipfilename
|
|
continue
|
|
|
|
#download
|
|
started = False #was this wiki download started before? then resume
|
|
wikidir = ''
|
|
for dirname, dirnames, filenames in os.walk('.'):
|
|
if dirname == '.':
|
|
for d in dirnames:
|
|
if d.startswith(prefix):
|
|
wikidir = d
|
|
started = True
|
|
|
|
if started and wikidir: #then resume
|
|
print 'Resuming download, using directory', wikidir
|
|
os.system('python dumpgenerator.py --api=%s --xml --images --resume --path=%s' % (wiki, wikidir))
|
|
else: #download from scratch
|
|
os.system('python dumpgenerator.py --api=%s --xml --images' % wiki)
|
|
#save wikidir now
|
|
for dirname, dirnames, filenames in os.walk('.'):
|
|
if dirname == '.':
|
|
for d in dirnames:
|
|
if d.startswith(prefix):
|
|
wikidir = d
|
|
|
|
#compress
|
|
prefix = wikidir.split('-wikidump')[0]
|
|
if wikidir and prefix:
|
|
time.sleep(1)
|
|
os.chdir(wikidir)
|
|
print 'Changed directory to', os.getcwd()
|
|
os.system('grep "<title>" *.xml -c;grep "<page>" *.xml -c;grep "</page>" *.xml -c;grep "<revision>" *.xml -c;grep "</revision>" *.xml -c')
|
|
os.system('7z a ../%s-wikidump.7z %s-history.xml %s-titles.txt %s-images.txt index.html Special:Version.html errors.log images/' % (prefix, prefix, prefix, prefix))
|
|
os.system('7z a ../%s-history.xml.7z %s-history.xml %s-titles.txt index.html Special:Version.html errors.log' % (prefix, prefix, prefix))
|
|
os.chdir('..')
|
|
print 'Changed directory to', os.getcwd()
|
|
time.sleep(1)
|
|
|