From 47d6d583ef1909b09ed42e67f0b0082289936466 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Wed, 19 Sep 2018 20:10:06 +0200 Subject: [PATCH] improve re-indexing --- catcli/catcli.py | 8 +++++--- catcli/noder.py | 7 +++---- catcli/walker.py | 47 ++++++++++++++++++++++++++++++----------------- 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index 3ad6c24..43f28fd 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -83,12 +83,13 @@ def cmd_index(args, noder, catalog, top, debug=False): walker = Walker(noder, nohash=nohash, debug=debug) attr = noder.format_storage_attr(args['--meta']) root = noder.storage_node(name, path, parent=top, attr=attr) - _, cnt = walker.index(path, name, root) + _, cnt = walker.index(path, root, name) if subsize: noder.rec_size(root) stop = datetime.datetime.now() Logger.info('Indexed {} file(s) in {}'.format(cnt, stop - start)) - catalog.save(top) + if cnt > 0: + catalog.save(top) def cmd_update(args, noder, catalog, top, debug=False): @@ -110,7 +111,8 @@ def cmd_update(args, noder, catalog, top, debug=False): noder.rec_size(root) stop = datetime.datetime.now() Logger.info('updated {} file(s) in {}'.format(cnt, stop - start)) - catalog.save(top) + if cnt > 0: + catalog.save(top) def cmd_ls(args, noder, top): diff --git a/catcli/noder.py b/catcli/noder.py index c62bccc..2776e1f 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -67,7 +67,7 @@ class Noder: Logger.err('No node at path \"{}\"'.format(path)) return None - def get_node_if_newer(self, top, path): + def get_node_if_newer(self, top, path, maccess): '''return the node (if any) and if path is newer''' treepath = path.lstrip(os.sep) node = self.get_node(top, treepath, quiet=True) @@ -77,9 +77,8 @@ class Noder: if not node.maccess: # force re-indexing if no maccess return node, True - maccess = node.maccess - cur_maccess = os.path.getmtime(path) - if float(cur_maccess) > maccess: + old_maccess = node.maccess + if float(maccess) > float(old_maccess): return node, True return node, False diff --git a/catcli/walker.py b/catcli/walker.py index 64f2fc8..8364ccb 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -22,7 +22,7 @@ class Walker: self.noder.set_hashing(not nohash) self.debug = debug - def index(self, path, name, parent): + def index(self, path, parent, name): '''index a directory and store in tree''' if not parent: parent = noder.dir_node(name, path, parent) @@ -32,14 +32,17 @@ class Walker: for f in files: sub = os.path.join(root, f) self._log(f) + self._debug('index file {}'.format(sub)) self.noder.file_node(os.path.basename(f), sub, parent, path) cnt += 1 for d in dirs: base = os.path.basename(d) sub = os.path.join(root, d) + self._debug('index directory {}'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, path) - _, cnt2 = self.index(sub, base, dummy) + cnt += 1 + _, cnt2 = self.index(sub, dummy, base) cnt += cnt2 break self._log(None) @@ -50,44 +53,54 @@ class Walker: cnt = 0 for (root, dirs, files) in os.walk(path): for f in files: + self._debug('found file {}'.format(f)) sub = os.path.join(root, f) - if not self._need_reindex(top, sub): - self._debug('ignore {}'.format(sub)) + maccess = os.path.getmtime(sub) + reindex, _ = self._need_reindex(parent, f, maccess) + if not reindex: + self._debug('\tignore file {}'.format(sub)) continue - self._debug('re-index {}'.format(sub)) + self._debug('\tre-index file {}'.format(sub)) self._log(f) self.noder.file_node(os.path.basename(f), sub, parent, path) cnt += 1 for d in dirs: + self._debug('found dir {}'.format(d)) base = os.path.basename(d) sub = os.path.join(root, d) - if not self._need_reindex(top, sub): - self._debug('ignore {}'.format(sub)) - continue - self._debug('re-index {}'.format(sub)) - dummy = self.noder.dir_node(base, sub, parent, path) - cnt2 = self.reindex(sub, dummy, top) - cnt += cnt2 + maccess = os.path.getmtime(sub) + reindex, dummy = self._need_reindex(parent, base, maccess) + if reindex: + self._debug('\tre-index directory {}'.format(sub)) + dummy = self.noder.dir_node(base, sub, parent, path) + cnt2 = self.reindex(sub, dummy, top) + cnt += cnt2 break self._log(None) return cnt - def _need_reindex(self, top, path): + def _need_reindex(self, top, path, maccess): '''test if node needs re-indexing''' - cnode, newer = self.noder.get_node_if_newer(top, path) + cnode, newer = self.noder.get_node_if_newer(top, path, maccess) + if not cnode: + self._debug('\tdoes not exist') + return True, cnode if cnode and not newer: # ignore this node - return False + self._debug('\tis not newer') + return False, cnode if cnode and newer: # remove this node and re-add + self._debug('\tis newer') cnode.parent = None - return True + self._debug('\tis to be re-indexed') + return True, cnode def _debug(self, string): if not self.debug: return - Logger.info(string) + Logger.log(string) def _log(self, string): if self.debug: