mirror of
https://github.com/WikiTeam/wikiteam
synced 2024-11-12 07:12:41 +00:00
utf8 latin1
This commit is contained in:
parent
3a56037279
commit
3b0d4fef5e
@ -183,14 +183,20 @@ def downloadMainPage(wikidomain='', wikiurl='', overwrite=False):
|
||||
def downloadLogo(wikidomain='', wikiurl='', overwrite=False):
|
||||
index = '%s/index.html' % (wikidomain)
|
||||
if os.path.exists(index):
|
||||
with open(index, 'r') as f:
|
||||
m = re.findall(r'class="WikiLogo WikiElement"><img src="([^<> "]+?)"', f.read())
|
||||
if m:
|
||||
logourl = m[0]
|
||||
logofilename = logourl.split('/')[-1]
|
||||
print('Downloading logo')
|
||||
saveURL(wikidomain=wikidomain, url=logourl, filename=logofilename, path='', overwrite=overwrite)
|
||||
return logofilename
|
||||
raw = ''
|
||||
try:
|
||||
with open(index, 'r', encoding='utf-8') as f:
|
||||
raw = f.read()
|
||||
except:
|
||||
with open(index, 'r', encoding='latin-1') as f:
|
||||
raw = f.read()
|
||||
m = re.findall(r'class="WikiLogo WikiElement"><img src="([^<> "]+?)"', raw)
|
||||
if m:
|
||||
logourl = m[0]
|
||||
logofilename = logourl.split('/')[-1]
|
||||
print('Downloading logo')
|
||||
saveURL(wikidomain=wikidomain, url=logourl, filename=logofilename, path='', overwrite=overwrite)
|
||||
return logofilename
|
||||
return ''
|
||||
|
||||
def printhelp():
|
||||
@ -266,7 +272,7 @@ def main():
|
||||
except:
|
||||
time.sleep(10)
|
||||
iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8')
|
||||
if iahtml and not re.findall(r'Item cannot be found', iahtml):
|
||||
if iahtml and not re.findall(r'(?im)Item cannot be found', iahtml):
|
||||
if not overwriteia:
|
||||
print('Warning: item exists on Internet Archive. Skipping wiki. Force with parameter --overwrite-ia')
|
||||
print('You can find it in https://archive.org/details/%s' % (itemid))
|
||||
@ -305,9 +311,14 @@ def main():
|
||||
if not os.path.exists(indexfilename):
|
||||
print('\nError dump incomplete, skipping upload\n')
|
||||
continue
|
||||
f = open(indexfilename, 'r')
|
||||
indexhtml = f.read()
|
||||
f.close()
|
||||
indexhtml = ''
|
||||
try:
|
||||
with open(indexfilename, 'r', encoding='utf-8') as f:
|
||||
indexhtml = f.read()
|
||||
except:
|
||||
with open(indexfilename, 'r', encoding='latin-1') as f:
|
||||
indexhtml = f.read()
|
||||
|
||||
wikititle = ''
|
||||
try:
|
||||
wikititle = indexhtml.split('wiki: {')[1].split('}')[0].split("text: '")[1].split("',")[0].strip()
|
||||
|
Loading…
Reference in New Issue
Block a user