utf8 latin1

2024-11-12 07:12:41 +00:00 · 2018-05-20 20:36:08 +02:00 · 2018-05-20 20:36:08 +02:00 · 3b0d4fef5e
commit 3b0d4fef5e
parent 3a56037279
1 changed files with 23 additions and 12 deletions
--- a/wikispaces.py
+++ b/wikispaces.py
@ -183,14 +183,20 @@ def downloadMainPage(wikidomain='', wikiurl='', overwrite=False):
 def downloadLogo(wikidomain='', wikiurl='', overwrite=False):
    index = '%s/index.html' % (wikidomain)
    if os.path.exists(index):
-        with open(index, 'r') as f:
-            m = re.findall(r'class="WikiLogo WikiElement"><img src="([^<> "]+?)"', f.read())
-            if m:
-                logourl = m[0]
-                logofilename = logourl.split('/')[-1]
-                print('Downloading logo')
-                saveURL(wikidomain=wikidomain, url=logourl, filename=logofilename, path='', overwrite=overwrite)
-                return logofilename
+        raw = ''
+        try:
+            with open(index, 'r', encoding='utf-8') as f:
+                raw = f.read()
+        except:
+            with open(index, 'r', encoding='latin-1') as f:
+                raw = f.read()
+        m = re.findall(r'class="WikiLogo WikiElement"><img src="([^<> "]+?)"', raw)
+        if m:
+            logourl = m[0]
+            logofilename = logourl.split('/')[-1]
+            print('Downloading logo')
+            saveURL(wikidomain=wikidomain, url=logourl, filename=logofilename, path='', overwrite=overwrite)
+            return logofilename
    return ''

 def printhelp():
@ -266,7 +272,7 @@ def main():
                except:
                    time.sleep(10)
                    iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8')
-                if iahtml and not re.findall(r'Item cannot be found', iahtml):
+                if iahtml and not re.findall(r'(?im)Item cannot be found', iahtml):
                    if not overwriteia:
                        print('Warning: item exists on Internet Archive. Skipping wiki. Force with parameter --overwrite-ia')
                        print('You can find it in https://archive.org/details/%s' % (itemid))
@ -305,9 +311,14 @@ def main():
            if not os.path.exists(indexfilename):
                print('\nError dump incomplete, skipping upload\n')
                continue
-            f = open(indexfilename, 'r')
-            indexhtml = f.read()
-            f.close()
+            indexhtml = ''
+            try:
+                with open(indexfilename, 'r', encoding='utf-8') as f:
+                    indexhtml = f.read()
+            except:
+                with open(indexfilename, 'r', encoding='latin-1') as f:
+                    indexhtml = f.read()
+            
            wikititle = ''
            try:
                wikititle = indexhtml.split('wiki: {')[1].split('}')[0].split("text: '")[1].split("',")[0].strip()