diff --git a/wikispaces.py b/wikispaces.py index 5ecb45f..287ba62 100644 --- a/wikispaces.py +++ b/wikispaces.py @@ -71,9 +71,7 @@ def undoHTMLEntities(text=''): return text -def convertHTML2Wikitext(wikidomain='', filename='', path='', overwrite=False): - if not overwrite: - return +def convertHTML2Wikitext(wikidomain='', filename='', path=''): wikitext = '' wikitextfile = '%s/%s/%s' % (wikidomain, path, filename) if not os.path.exists(wikitextfile): @@ -88,11 +86,7 @@ def convertHTML2Wikitext(wikidomain='', filename='', path='', overwrite=False): wikitext = wikitext.split(m[0])[1].split('')[0].strip() wikitext = undoHTMLEntities(text=wikitext) except: - wikitext = '' - print('Error extracting wikitext.') - else: - wikitext = '' - print('Error extracting wikitext.') + pass f.write(wikitext) def downloadPage(wikidomain='', wikiurl='', pagename='', overwrite=False): @@ -108,7 +102,7 @@ def downloadPage(wikidomain='', wikiurl='', pagename='', overwrite=False): filename2 = '%s.wikitext' % (pagenameplus) print('Downloading page: %s' % (filename2)) saveURL(wikidomain=wikidomain, url=pageurl2, filename=filename2, path='pages', overwrite=overwrite) - convertHTML2Wikitext(wikidomain=wikidomain, filename=filename2, path='pages', overwrite=overwrite) + convertHTML2Wikitext(wikidomain=wikidomain, filename=filename2, path='pages') #csv with page history csvurl = '%s/page/history/%s?utable=WikiTablePageHistoryList&ut_csv=1' % (wikiurl, pagename_) @@ -245,6 +239,18 @@ def main(): print('\n') print('#'*40,'\n Downloading:', wikiurl) print('#'*40,'\n') + + if upload and not overwriteia: + itemid = 'wiki-%s' % (wikidomain) + try: + iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8') + if not re.findall(r'Item cannot be found', iahtml): + if not overwriteia: + print('Warning: item exists on Internet Archive. Skipping wiki. Force with parameter --overwrite-ia') + continue + except: + pass + dirfiles = '%s/files' % (wikidomain) if not os.path.exists(dirfiles): print('Creating directory %s' % (dirfiles)) @@ -261,14 +267,6 @@ def main(): if upload: itemid = 'wiki-%s' % (wikidomain) - try: - iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8') - if not re.findall(r'Item cannot be found', iahtml): - if not overwriteia: - print('Warning: item exists on Internet Archive. Skipping upload. Force upload with parameter --overwrite-ia') - continue - except: - pass print('\nCompressing dump...') wikidir = wikidomain os.chdir(wikidir)