mirror of
https://github.com/WikiTeam/wikiteam
synced 2024-11-12 07:12:41 +00:00
file check
This commit is contained in:
parent
8c30b3a2b9
commit
9fb8d4be0e
@ -59,8 +59,9 @@ def saveURL(wikidomain='', url='', filename='', path='', overwrite=False, iterat
|
||||
print('Download failed')
|
||||
|
||||
#sometimes wikispaces returns invalid data, redownload in that cases
|
||||
if os.path.exists(filename2) and \
|
||||
filename2.split('.')[-1].lower() in ['csv', 'html', 'wikitext', 'xml']:
|
||||
#only 'pages'. 'files' binaries are a pain to open and check
|
||||
if (os.path.exists(filename2) and 'pages' in path) or \
|
||||
(os.path.exists(filename2) and path == '' and filename2.split('.')[-1] in ['xml', 'html', 'csv']):
|
||||
sleep2 = 60 * iteration
|
||||
raw = ''
|
||||
with open(filename2, 'r') as f:
|
||||
@ -255,11 +256,17 @@ def main():
|
||||
if upload and not overwriteia:
|
||||
itemid = 'wiki-%s' % (wikidomain)
|
||||
try:
|
||||
iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8')
|
||||
if not re.findall(r'Item cannot be found', iahtml):
|
||||
iahtml = ''
|
||||
try:
|
||||
iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8')
|
||||
except:
|
||||
time.sleep(10)
|
||||
iahtml = urllib.request.urlopen('https://archive.org/details/%s' % (itemid)).read().decode('utf-8')
|
||||
if iahtml and not re.findall(r'Item cannot be found', iahtml):
|
||||
if not overwriteia:
|
||||
print('Warning: item exists on Internet Archive. Skipping wiki. Force with parameter --overwrite-ia')
|
||||
print('You can find it in https://archive.org/details/%s' % (itemid))
|
||||
time.sleep(1)
|
||||
continue
|
||||
except:
|
||||
pass
|
||||
|
Loading…
Reference in New Issue
Block a user