mirror of
https://github.com/WikiTeam/wikiteam
synced 2024-11-12 07:12:41 +00:00
uploader included
This commit is contained in:
parent
254486af06
commit
2fe1c0b6b2
@ -19,12 +19,18 @@
|
||||
# Documentation for developers: http://wikiteam.readthedocs.com
|
||||
|
||||
import csv
|
||||
import datetime
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
import urllib.request
|
||||
|
||||
# Requirements:
|
||||
# zip command (apt-get install zip)
|
||||
# ia command (pip install internetarchive)
|
||||
|
||||
def saveURL(wikidomain='', url='', filename='', path=''):
|
||||
filename2 = '%s/%s' % (wikidomain, filename)
|
||||
if path:
|
||||
@ -142,12 +148,29 @@ def downloadPagesAndFiles(wikidomain='', wikiurl=''):
|
||||
print('Downloaded %d files' % (filesc))
|
||||
|
||||
def downloadSitemap(wikidomain='', wikiurl=''):
|
||||
print('Downloading sitemap.xml')
|
||||
saveURL(wikidomain=wikidomain, url=wikiurl, filename='sitemap.xml', path='')
|
||||
|
||||
def downloadMainPage(wikidomain='', wikiurl=''):
|
||||
print('Downloading index.html')
|
||||
saveURL(wikidomain=wikidomain, url=wikiurl, filename='index.html', path='')
|
||||
|
||||
def downloadLogo(wikidomain='', wikiurl=''):
|
||||
index = '%s/index.html' % (wikidomain)
|
||||
if os.path.exists(index):
|
||||
with open(index, 'r') as f:
|
||||
m = re.findall(r'class="WikiLogo WikiElement"><img src="([^<> "]+?)"', f.read())
|
||||
if m:
|
||||
logourl = m[0]
|
||||
logofilename = logourl.split('/')[-1]
|
||||
print('Downloading logo')
|
||||
saveURL(wikidomain=wikidomain, url=logourl, filename=logofilename, path='')
|
||||
return logofilename
|
||||
return ''
|
||||
|
||||
def main():
|
||||
upload = False
|
||||
isadmin = False
|
||||
if len(sys.argv) < 2:
|
||||
print('Please, introduce a wikispaces wiki url or filename.\nExample: https://yourwiki.wikispaces.com or mylistofwikis.txt')
|
||||
sys.exit()
|
||||
@ -155,6 +178,11 @@ def main():
|
||||
if not param:
|
||||
print('Please, introduce a wikispaces wiki url or filename.\nExample: https://yourwiki.wikispaces.com or mylistofwikis.txt')
|
||||
sys.exit()
|
||||
if len(sys.argv) > 2:
|
||||
if '--upload' in sys.argv:
|
||||
upload = True
|
||||
if '--admin' in sys.argv:
|
||||
isadmin = True
|
||||
|
||||
wikilist = []
|
||||
if '://' in param:
|
||||
@ -169,17 +197,65 @@ def main():
|
||||
|
||||
for wikiurl in wikilist:
|
||||
wikidomain = wikiurl.split('://')[1].split('/')[0]
|
||||
print('#'*40,'\n Analyzing:', wikiurl)
|
||||
print('#'*40,'\n Downloading:', wikiurl)
|
||||
print('#'*40,'\n')
|
||||
print('Creating directories for %s' % (wikidomain))
|
||||
if not os.path.exists('%s/files' % (wikidomain)):
|
||||
os.makedirs('%s/files' % (wikidomain))
|
||||
if not os.path.exists('%s/pages' % (wikidomain)):
|
||||
os.makedirs('%s/pages' % (wikidomain))
|
||||
dirfiles = '%s/files' % (wikidomain)
|
||||
if not os.path.exists(dirfiles):
|
||||
print('Creating directory %s' % (dirfiles))
|
||||
os.makedirs(dirfiles)
|
||||
dirpages = '%s/pages' % (wikidomain)
|
||||
if not os.path.exists(dirpages):
|
||||
print('Creating directory %s' % (dirpages))
|
||||
os.makedirs(dirpages)
|
||||
downloadPagesAndFiles(wikidomain=wikidomain, wikiurl=wikiurl)
|
||||
sitemapurl = 'https://%s/sitemap.xml' % (wikidomain)
|
||||
downloadSitemap(wikidomain=wikidomain, wikiurl=sitemapurl)
|
||||
downloadMainPage(wikidomain=wikidomain, wikiurl=wikiurl)
|
||||
logofilename = downloadLogo(wikidomain=wikidomain, wikiurl=wikiurl)
|
||||
|
||||
if upload:
|
||||
print('\nCompressing dump...')
|
||||
wikidir = wikidomain
|
||||
os.chdir(wikidir)
|
||||
print('Changed directory to', os.getcwd())
|
||||
wikizip = '%s.zip' % (wikidomain)
|
||||
subprocess.call('zip' + ' -r ../%s files/ pages/ index.html pages-and-files.csv sitemap.xml %s' % (wikizip, logofilename), shell=True)
|
||||
os.chdir('..')
|
||||
print('Changed directory to', os.getcwd())
|
||||
|
||||
print('\nUploading to Internet Archive...')
|
||||
indexfilename = '%s/index.html' % (wikidir)
|
||||
if not os.path.exists(indexfilename):
|
||||
print('\nError dump incomplete, skipping upload\n')
|
||||
continue
|
||||
f = open(indexfilename, 'r')
|
||||
indexhtml = f.read()
|
||||
f.close()
|
||||
itemid = 'wiki-%s' % (wikidomain)
|
||||
wikititle = ''
|
||||
try:
|
||||
wikititle = indexhtml.split('wiki: {')[1].split('}')[0].split("text: '")[1].split("',")[0].strip()
|
||||
except:
|
||||
wikititle = wikidomain
|
||||
if not wikititle:
|
||||
wikititle = wikidomain
|
||||
itemtitle = 'Wiki - %s' % wikititle
|
||||
itemdesc = '<a href=\"%s\">%s</a> dumped with <a href=\"https://github.com/WikiTeam/wikiteam\" rel=\"nofollow\">WikiTeam</a> tools.' % (wikiurl, wikititle)
|
||||
itemtags = ['wiki', 'wikiteam', 'wikispaces', wikititle, wikidomain.split('.wikispaces.com')[0], wikidomain]
|
||||
itemoriginalurl = wikiurl
|
||||
itemlicenseurl = ''
|
||||
m = re.findall(r'<a rel="license" href="([^<>]+?)">', indexhtml.split('<div class="WikiLicense')[1].split('</div>')[0])
|
||||
if m:
|
||||
itemlicenseurl = m[0]
|
||||
if not itemlicenseurl:
|
||||
itemtags.append('unknowncopyright')
|
||||
itemtags_ = ' '.join(["--metadata='subject:%s'" % (tag) for tag in itemtags])
|
||||
itemcollection = isadmin and 'wikiteam' or 'opensource'
|
||||
itemlang = 'Unknown'
|
||||
itemdate = datetime.datetime.now().strftime("%Y-%m-%d")
|
||||
itemlogo = logofilename and '%s/%s' % (wikidir, logofilename) or ''
|
||||
subprocess.call('ia' + ' upload %s %s %s --metadata="mediatype:web" --metadata="collection:%s" --metadata="title:%s" --metadata="description:%s" --metadata="language:%s" --metadata="last-updated-date:%s" %s %s' % (itemid, wikizip, itemlogo and itemlogo or '', itemcollection, itemtitle, itemdesc, itemlang, itemdate, itemlicenseurl and '--metadata="licenseurl:%s"' % (itemlicenseurl) or '', itemtags_), shell=True)
|
||||
print('You can find it in https://archive.org/details/%s' % (itemid))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Loading…
Reference in New Issue
Block a user