mirror of
https://github.com/WikiTeam/wikiteam
synced 2024-11-12 07:12:41 +00:00
bug wikitext
This commit is contained in:
parent
ffff6cf568
commit
e01b2fb0c3
@ -71,9 +71,7 @@ def undoHTMLEntities(text=''):
|
||||
|
||||
return text
|
||||
|
||||
def convertHTML2Wikitext(wikidomain='', filename='', path='', overwrite=False):
|
||||
if not overwrite:
|
||||
return
|
||||
def convertHTML2Wikitext(wikidomain='', filename='', path=''):
|
||||
wikitext = ''
|
||||
wikitextfile = '%s/%s/%s' % (wikidomain, path, filename)
|
||||
if not os.path.exists(wikitextfile):
|
||||
@ -88,11 +86,7 @@ def convertHTML2Wikitext(wikidomain='', filename='', path='', overwrite=False):
|
||||
wikitext = wikitext.split(m[0])[1].split('</pre>')[0].strip()
|
||||
wikitext = undoHTMLEntities(text=wikitext)
|
||||
except:
|
||||
wikitext = ''
|
||||
print('Error extracting wikitext.')
|
||||
else:
|
||||
wikitext = ''
|
||||
print('Error extracting wikitext.')
|
||||
pass
|
||||
f.write(wikitext)
|
||||
|
||||
def downloadPage(wikidomain='', wikiurl='', pagename='', overwrite=False):
|
||||
@ -108,7 +102,7 @@ def downloadPage(wikidomain='', wikiurl='', pagename='', overwrite=False):
|
||||
filename2 = '%s.wikitext' % (pagenameplus)
|
||||
print('Downloading page: %s' % (filename2))
|
||||
saveURL(wikidomain=wikidomain, url=pageurl2, filename=filename2, path='pages', overwrite=overwrite)
|
||||
convertHTML2Wikitext(wikidomain=wikidomain, filename=filename2, path='pages', overwrite=overwrite)
|
||||
convertHTML2Wikitext(wikidomain=wikidomain, filename=filename2, path='pages')
|
||||
|
||||
#csv with page history
|
||||
csvurl = '%s/page/history/%s?utable=WikiTablePageHistoryList&ut_csv=1' % (wikiurl, pagename_)
|
||||
@ -245,6 +239,18 @@ def main():
|
||||
print('\n')
|
||||
print('#'*40,'\n Downloading:', wikiurl)
|
||||
print('#'*40,'\n')
|
||||
|
||||
if upload and not overwriteia:
|
||||
itemid = 'wiki-%s' % (wikidomain)
|
||||
try:
|
||||
iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8')
|
||||
if not re.findall(r'Item cannot be found', iahtml):
|
||||
if not overwriteia:
|
||||
print('Warning: item exists on Internet Archive. Skipping wiki. Force with parameter --overwrite-ia')
|
||||
continue
|
||||
except:
|
||||
pass
|
||||
|
||||
dirfiles = '%s/files' % (wikidomain)
|
||||
if not os.path.exists(dirfiles):
|
||||
print('Creating directory %s' % (dirfiles))
|
||||
@ -261,14 +267,6 @@ def main():
|
||||
|
||||
if upload:
|
||||
itemid = 'wiki-%s' % (wikidomain)
|
||||
try:
|
||||
iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8')
|
||||
if not re.findall(r'Item cannot be found', iahtml):
|
||||
if not overwriteia:
|
||||
print('Warning: item exists on Internet Archive. Skipping upload. Force upload with parameter --overwrite-ia')
|
||||
continue
|
||||
except:
|
||||
pass
|
||||
print('\nCompressing dump...')
|
||||
wikidir = wikidomain
|
||||
os.chdir(wikidir)
|
||||
|
Loading…
Reference in New Issue
Block a user