mirror of
https://github.com/WikiTeam/wikiteam
synced 2024-11-16 21:27:46 +00:00
resume and create dump functions, depends on wikengine
This commit is contained in:
parent
05e9486e5f
commit
2783e1cecb
@ -19,10 +19,10 @@
|
||||
# Documentation for developers: http://wikiteam.readthedocs.com
|
||||
|
||||
import argparse
|
||||
import cPickle # what is in python3?
|
||||
import datetime
|
||||
import random
|
||||
import re
|
||||
import requests # seriously needed?
|
||||
import sys
|
||||
import urllib
|
||||
|
||||
@ -31,8 +31,6 @@ if sys.version_info < (3, 0):
|
||||
else:
|
||||
import http.cookiejar as cookielib
|
||||
|
||||
import mediawiki
|
||||
|
||||
__version__ = "0.3.0"
|
||||
|
||||
def avoidWikimediaProjects(config={}):
|
||||
@ -56,6 +54,46 @@ If this is a public wiki, please consider publishing this dump. Do it yourself a
|
||||
Good luck! Bye!"""
|
||||
print(message)
|
||||
|
||||
def createNewDump(config={}):
|
||||
if config['wikiengine'] == 'mediawiki':
|
||||
import mediawiki
|
||||
mwCreateNewDump(config=config)
|
||||
elif config['wikiengine'] == 'wikispaces':
|
||||
import wikispaces
|
||||
wsCreateNewDump(config=config)
|
||||
else:
|
||||
print("Wikiengine %s not supported. Exiting." % (config['wikiengine']))
|
||||
|
||||
def createDumpPath(config={}):
|
||||
# creating path or resuming if desired
|
||||
c = 2
|
||||
# to avoid concat blabla-2, blabla-2-3, and so on...
|
||||
originalpath = config['path']
|
||||
# do not enter if resume is requested from begining
|
||||
while not config['other']['resume'] and os.path.isdir(config['path']):
|
||||
print('\nWarning!: "%s" path exists' % (config['path']))
|
||||
reply = ''
|
||||
while reply.lower() not in ['yes', 'y', 'no', 'n']:
|
||||
reply = raw_input(
|
||||
'There is a dump in "%s", probably incomplete.\nIf you choose resume, to avoid conflicts, the parameters you have chosen in the current session will be ignored\nand the parameters available in "%s/%s" will be loaded.\nDo you want to resume ([yes, y], [no, n])? ' %
|
||||
(config['path'],
|
||||
config['path'],
|
||||
config['other']['configfilename']))
|
||||
if reply.lower() in ['yes', 'y']:
|
||||
if not os.path.isfile('%s/%s' % (config['path'], config['other']['configfilename'])):
|
||||
print('No config file found. I can\'t resume. Aborting.')
|
||||
sys.exit()
|
||||
print('You have selected: YES')
|
||||
config['other']['resume'] = True
|
||||
break
|
||||
elif reply.lower() in ['no', 'n']:
|
||||
print('You have selected: NO')
|
||||
config['other']['resume'] = False
|
||||
config['path'] = '%s-%d' % (originalpath, c)
|
||||
print('Trying to use path "%s"...' % (config['path']))
|
||||
c += 1
|
||||
return config
|
||||
|
||||
def domain2prefix(config={}):
|
||||
""" Convert domain name to a valid prefix filename. """
|
||||
|
||||
@ -212,18 +250,26 @@ def getParameters(params=[]):
|
||||
print(getWikiEngine(url=args.wiki))
|
||||
sys.exit()
|
||||
|
||||
# Create session
|
||||
# Load cookies
|
||||
cj = cookielib.MozillaCookieJar()
|
||||
if args.cookies:
|
||||
cj.load(args.cookies)
|
||||
print('Using cookies from %s' % args.cookies)
|
||||
|
||||
session = requests.Session()
|
||||
session.cookies = cj
|
||||
session.headers.update({'User-Agent': getUserAgent()})
|
||||
# check user and pass (one requires both)
|
||||
if (args.user and not args.password) or (args.password and not args.user):
|
||||
print('ERROR: Both --user and --pass are required for authentication.')
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
session = None
|
||||
if args.user and args.password:
|
||||
import requests
|
||||
session = requests.Session()
|
||||
session.cookies = cj
|
||||
session.headers.update({'User-Agent': getUserAgent()})
|
||||
session.auth = (args.user, args.password)
|
||||
# session.mount(args.mw_api.split('/api.php')[0], HTTPAdapter(max_retries=max_ret))
|
||||
#session.mount(args.mw_api.split('/api.php')[0], HTTPAdapter(max_retries=max_ret)) Mediawiki-centric, be careful
|
||||
|
||||
# check URLs
|
||||
for url in [args.mw_api, args.mw_index, args.wiki]:
|
||||
@ -235,8 +281,10 @@ def getParameters(params=[]):
|
||||
|
||||
wikiengine = getWikiEngine(args.wiki)
|
||||
if wikiengine == 'wikispaces':
|
||||
import wikispaces
|
||||
pass
|
||||
else: # presume is a mediawiki
|
||||
import mediawiki
|
||||
if not args.mw_api:
|
||||
api = mediawiki.mwGetAPI(url=args.wiki)
|
||||
if not api:
|
||||
@ -245,12 +293,6 @@ def getParameters(params=[]):
|
||||
index = mediawiki.mwGetIndex(url=args.wiki)
|
||||
if not index:
|
||||
print('ERROR: Provide a URL to Index.php')
|
||||
|
||||
# check user and pass (one requires both)
|
||||
if (args.user and not args.password) or (args.password and not args.user):
|
||||
print('ERROR: Both --user and --pass are required for authentication.')
|
||||
parser.print_help()
|
||||
sys.exit(1)
|
||||
|
||||
namespaces = ['all']
|
||||
exnamespaces = []
|
||||
@ -290,6 +332,7 @@ def getParameters(params=[]):
|
||||
|
||||
config = {
|
||||
'wiki': args.wiki,
|
||||
'wikicanonical': '',
|
||||
'wikiengine': wikiengine,
|
||||
'curonly': args.curonly,
|
||||
'date': datetime.datetime.now().strftime('%Y%m%d'),
|
||||
@ -304,6 +347,7 @@ def getParameters(params=[]):
|
||||
'delay': args.delay,
|
||||
'retries': int(args.retries),
|
||||
'other': {
|
||||
'configfilename': 'config.txt',
|
||||
'resume': args.resume,
|
||||
'filenamelimit': 100, # do not change
|
||||
'force': args.force,
|
||||
@ -343,12 +387,7 @@ def getWikiEngine(url=''):
|
||||
|
||||
wikiengine = 'unknown'
|
||||
if url:
|
||||
session = requests.Session()
|
||||
session.headers.update({'User-Agent': getUserAgent()})
|
||||
r = session.post(url=url)
|
||||
if r.status_code == 405 or r.text == '':
|
||||
r = session.get(url=url)
|
||||
result = r.text
|
||||
html = getURL(url=url)
|
||||
else:
|
||||
return wikiengine.lower()
|
||||
|
||||
@ -423,6 +462,26 @@ def getWikiEngine(url=''):
|
||||
|
||||
return wikiengine.lower()
|
||||
|
||||
def resumePreviousDump(config={}):
|
||||
if config['wikiengine'] == 'mediawiki':
|
||||
import mediawiki
|
||||
mwResumePreviousDump(config=config)
|
||||
elif config['wikiengine'] == 'wikispaces':
|
||||
import wikispaces
|
||||
wsResumePreviousDump(config=config)
|
||||
else:
|
||||
print("Wikiengine %s not supported. Exiting." % (config['wikiengine']))
|
||||
|
||||
def saveConfig(config={}):
|
||||
""" Save config file """
|
||||
|
||||
# Do not save config['other'] as it has session info and other stuff
|
||||
config2 = config
|
||||
config2['other'] = {}
|
||||
with open('%s/%s' % (config['path'], config['other']['configfilename']), 'w') as outfile:
|
||||
print('Saving config file...')
|
||||
cPickle.dump(config2, outfile)
|
||||
|
||||
def welcome():
|
||||
""" Print opening message """
|
||||
|
||||
@ -450,6 +509,19 @@ def welcome():
|
||||
""" % (getVersion())
|
||||
print(message)
|
||||
|
||||
def loadConfig(config={}):
|
||||
""" Load config file """
|
||||
|
||||
try:
|
||||
with open('%s/%s' % (config['path'], config['other']['configfilename']), 'r') as infile:
|
||||
print('Loading config file...')
|
||||
config = cPickle.load(infile)
|
||||
except:
|
||||
print('ERROR: There is no config file. we can\'t resume. Start a new dump.')
|
||||
sys.exit()
|
||||
|
||||
return config
|
||||
|
||||
def main(params=[]):
|
||||
""" Main function """
|
||||
|
||||
@ -458,7 +530,23 @@ def main(params=[]):
|
||||
|
||||
welcome()
|
||||
avoidWikimediaProjects(config=config)
|
||||
config = createDumpPath(config=config)
|
||||
|
||||
if config['other']['resume']:
|
||||
config = loadConfig(config=config)
|
||||
else:
|
||||
os.mkdir(config['path'])
|
||||
saveConfig(config=config)
|
||||
|
||||
if config['other']['resume']:
|
||||
resumePreviousDump(config=config)
|
||||
else:
|
||||
createNewDump(config=config)
|
||||
|
||||
"""move to mw
|
||||
saveIndexPHP(config=config, session=other['session'])
|
||||
saveSpecialVersion(config=config, session=other['session'])
|
||||
saveSiteInfo(config=config, session=other['session'])"""
|
||||
bye()
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
Loading…
Reference in New Issue
Block a user