From 3b74173e0fc42a138505fdf00a24359ccde634bc Mon Sep 17 00:00:00 2001 From: Federico Leva Date: Tue, 22 May 2018 21:44:18 +0300 Subject: [PATCH] launcher.py style and minor changes --- launcher.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/launcher.py b/launcher.py index cf09fcb..4f5e3f7 100644 --- a/launcher.py +++ b/launcher.py @@ -6,12 +6,12 @@ # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. -# +# # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. -# +# # You should have received a copy of the GNU General Public License # along with this program. If not, see . @@ -30,11 +30,11 @@ def main(): if len(sys.argv) < 2: print 'python script.py file-with-apis.txt' sys.exit() - + print 'Reading list of APIs from', sys.argv[1] wikis = open(sys.argv[1], 'r').read().splitlines() print '%d APIs found' % (len(wikis)) - + for wiki in wikis: print "#"*73 print "# Downloading", wiki @@ -42,7 +42,7 @@ def main(): wiki = wiki.lower() # Make the prefix in standard way; api and index must be defined, not important which is which prefix = dumpgenerator.domain2prefix(config={'api': wiki, 'index': wiki}) - + #check if compressed, in that case dump was finished previously compressed = False for f in os.listdir('.'): @@ -50,7 +50,7 @@ def main(): compressed = True zipfilename = f break #stop searching, dot not explore subdirectories - + if compressed: print 'Skipping... This wiki was downloaded and compressed before in', zipfilename # Get the archive's file list. @@ -65,17 +65,17 @@ def main(): print "WARNING: Content of the archive not checked, we need python 2.7+ or 3.1+." # TODO: Find a way like grep -q below without doing a 7z l multiple times? continue - + #download started = False #was this wiki download started before? then resume wikidir = '' for f in os.listdir('.'): # Does not find numbered wikidumps not verify directories - if d.startswith(prefix) and d.endswith('wikidump'): - wikidir = d + if f.startswith(prefix) and f.endswith('wikidump'): + wikidir = f started = True break #stop searching, dot not explore subdirectories - + # time.sleep(60) # Uncomment what above and add --delay=60 in the dumpgenerator.py calls below for broken wiki farms # such as editthis.info, wiki-site.com, wikkii (adjust the value as needed; @@ -89,12 +89,12 @@ def main(): #save wikidir now for f in os.listdir('.'): # Does not find numbered wikidumps not verify directories - if d.startswith(prefix) and d.endswith('wikidump'): - wikidir = d + if f.startswith(prefix) and f.endswith('wikidump'): + wikidir = f break #stop searching, dot not explore subdirectories - + prefix = wikidir.split('-wikidump')[0] - + finished = False if started and wikidir and prefix: if (subprocess.call (['tail -n 1 %s/%s-history.xml | grep -q ""' % (wikidir, prefix)], shell=True) ): @@ -103,7 +103,7 @@ def main(): finished = True # You can also issue this on your working directory to find all incomplete dumps: # tail -n 1 */*-history.xml | grep -Ev -B 1 "||==|^$" - + #compress if finished: time.sleep(1)