file check

2024-11-12 07:12:41 +00:00 · 2018-05-10 09:04:08 +02:00 · 2018-05-10 09:04:08 +02:00 · 9fb8d4be0e
commit 9fb8d4be0e
parent 8c30b3a2b9
1 changed files with 11 additions and 4 deletions
--- a/wikispaces.py
+++ b/wikispaces.py
@ -59,8 +59,9 @@ def saveURL(wikidomain='', url='', filename='', path='', overwrite=False, iterat
        print('Download failed')
    
    #sometimes wikispaces returns invalid data, redownload in that cases
-    if os.path.exists(filename2) and \
-        filename2.split('.')[-1].lower() in ['csv', 'html', 'wikitext', 'xml']:
+    #only 'pages'. 'files' binaries are a pain to open and check
+    if (os.path.exists(filename2) and 'pages' in path) or \
+        (os.path.exists(filename2) and path == '' and filename2.split('.')[-1] in ['xml', 'html', 'csv']):
        sleep2 = 60 * iteration
        raw = ''
        with open(filename2, 'r') as f:
@ -255,11 +256,17 @@ def main():
        if upload and not overwriteia:
            itemid = 'wiki-%s' % (wikidomain)
            try:
-                iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8')
-                if not re.findall(r'Item cannot be found', iahtml):
+                iahtml = ''
+                try:
+                    iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8')
+                except:
+                    time.sleep(10)
+                    iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8')
+                if iahtml and not re.findall(r'Item cannot be found', iahtml):
                    if not overwriteia:
                        print('Warning: item exists on Internet Archive. Skipping wiki. Force with parameter --overwrite-ia')
                        print('You can find it in https://archive.org/details/%s' % (itemid))
+                        time.sleep(1)
                        continue
            except:
                pass