From 9fb8d4be0e384724fed0fd3ee2febdb7e985b12f Mon Sep 17 00:00:00 2001 From: emijrp Date: Thu, 10 May 2018 09:04:08 +0200 Subject: [PATCH] file check --- wikispaces.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/wikispaces.py b/wikispaces.py index b3cf8de..6750031 100644 --- a/wikispaces.py +++ b/wikispaces.py @@ -59,8 +59,9 @@ def saveURL(wikidomain='', url='', filename='', path='', overwrite=False, iterat print('Download failed') #sometimes wikispaces returns invalid data, redownload in that cases - if os.path.exists(filename2) and \ - filename2.split('.')[-1].lower() in ['csv', 'html', 'wikitext', 'xml']: + #only 'pages'. 'files' binaries are a pain to open and check + if (os.path.exists(filename2) and 'pages' in path) or \ + (os.path.exists(filename2) and path == '' and filename2.split('.')[-1] in ['xml', 'html', 'csv']): sleep2 = 60 * iteration raw = '' with open(filename2, 'r') as f: @@ -255,11 +256,17 @@ def main(): if upload and not overwriteia: itemid = 'wiki-%s' % (wikidomain) try: - iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8') - if not re.findall(r'Item cannot be found', iahtml): + iahtml = '' + try: + iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8') + except: + time.sleep(10) + iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8') + if iahtml and not re.findall(r'Item cannot be found', iahtml): if not overwriteia: print('Warning: item exists on Internet Archive. Skipping wiki. Force with parameter --overwrite-ia') print('You can find it in https://archive.org/details/%s' % (itemid)) + time.sleep(1) continue except: pass