improve re-indexing

pull/6/head
deadc0de6 6 years ago
parent 4cce16ff64
commit 47d6d583ef

@ -83,12 +83,13 @@ def cmd_index(args, noder, catalog, top, debug=False):
walker = Walker(noder, nohash=nohash, debug=debug) walker = Walker(noder, nohash=nohash, debug=debug)
attr = noder.format_storage_attr(args['--meta']) attr = noder.format_storage_attr(args['--meta'])
root = noder.storage_node(name, path, parent=top, attr=attr) root = noder.storage_node(name, path, parent=top, attr=attr)
_, cnt = walker.index(path, name, root) _, cnt = walker.index(path, root, name)
if subsize: if subsize:
noder.rec_size(root) noder.rec_size(root)
stop = datetime.datetime.now() stop = datetime.datetime.now()
Logger.info('Indexed {} file(s) in {}'.format(cnt, stop - start)) Logger.info('Indexed {} file(s) in {}'.format(cnt, stop - start))
catalog.save(top) if cnt > 0:
catalog.save(top)
def cmd_update(args, noder, catalog, top, debug=False): def cmd_update(args, noder, catalog, top, debug=False):
@ -110,7 +111,8 @@ def cmd_update(args, noder, catalog, top, debug=False):
noder.rec_size(root) noder.rec_size(root)
stop = datetime.datetime.now() stop = datetime.datetime.now()
Logger.info('updated {} file(s) in {}'.format(cnt, stop - start)) Logger.info('updated {} file(s) in {}'.format(cnt, stop - start))
catalog.save(top) if cnt > 0:
catalog.save(top)
def cmd_ls(args, noder, top): def cmd_ls(args, noder, top):

@ -67,7 +67,7 @@ class Noder:
Logger.err('No node at path \"{}\"'.format(path)) Logger.err('No node at path \"{}\"'.format(path))
return None return None
def get_node_if_newer(self, top, path): def get_node_if_newer(self, top, path, maccess):
'''return the node (if any) and if path is newer''' '''return the node (if any) and if path is newer'''
treepath = path.lstrip(os.sep) treepath = path.lstrip(os.sep)
node = self.get_node(top, treepath, quiet=True) node = self.get_node(top, treepath, quiet=True)
@ -77,9 +77,8 @@ class Noder:
if not node.maccess: if not node.maccess:
# force re-indexing if no maccess # force re-indexing if no maccess
return node, True return node, True
maccess = node.maccess old_maccess = node.maccess
cur_maccess = os.path.getmtime(path) if float(maccess) > float(old_maccess):
if float(cur_maccess) > maccess:
return node, True return node, True
return node, False return node, False

@ -22,7 +22,7 @@ class Walker:
self.noder.set_hashing(not nohash) self.noder.set_hashing(not nohash)
self.debug = debug self.debug = debug
def index(self, path, name, parent): def index(self, path, parent, name):
'''index a directory and store in tree''' '''index a directory and store in tree'''
if not parent: if not parent:
parent = noder.dir_node(name, path, parent) parent = noder.dir_node(name, path, parent)
@ -32,14 +32,17 @@ class Walker:
for f in files: for f in files:
sub = os.path.join(root, f) sub = os.path.join(root, f)
self._log(f) self._log(f)
self._debug('index file {}'.format(sub))
self.noder.file_node(os.path.basename(f), sub, self.noder.file_node(os.path.basename(f), sub,
parent, path) parent, path)
cnt += 1 cnt += 1
for d in dirs: for d in dirs:
base = os.path.basename(d) base = os.path.basename(d)
sub = os.path.join(root, d) sub = os.path.join(root, d)
self._debug('index directory {}'.format(sub))
dummy = self.noder.dir_node(base, sub, parent, path) dummy = self.noder.dir_node(base, sub, parent, path)
_, cnt2 = self.index(sub, base, dummy) cnt += 1
_, cnt2 = self.index(sub, dummy, base)
cnt += cnt2 cnt += cnt2
break break
self._log(None) self._log(None)
@ -50,44 +53,54 @@ class Walker:
cnt = 0 cnt = 0
for (root, dirs, files) in os.walk(path): for (root, dirs, files) in os.walk(path):
for f in files: for f in files:
self._debug('found file {}'.format(f))
sub = os.path.join(root, f) sub = os.path.join(root, f)
if not self._need_reindex(top, sub): maccess = os.path.getmtime(sub)
self._debug('ignore {}'.format(sub)) reindex, _ = self._need_reindex(parent, f, maccess)
if not reindex:
self._debug('\tignore file {}'.format(sub))
continue continue
self._debug('re-index {}'.format(sub)) self._debug('\tre-index file {}'.format(sub))
self._log(f) self._log(f)
self.noder.file_node(os.path.basename(f), sub, self.noder.file_node(os.path.basename(f), sub,
parent, path) parent, path)
cnt += 1 cnt += 1
for d in dirs: for d in dirs:
self._debug('found dir {}'.format(d))
base = os.path.basename(d) base = os.path.basename(d)
sub = os.path.join(root, d) sub = os.path.join(root, d)
if not self._need_reindex(top, sub): maccess = os.path.getmtime(sub)
self._debug('ignore {}'.format(sub)) reindex, dummy = self._need_reindex(parent, base, maccess)
continue if reindex:
self._debug('re-index {}'.format(sub)) self._debug('\tre-index directory {}'.format(sub))
dummy = self.noder.dir_node(base, sub, parent, path) dummy = self.noder.dir_node(base, sub, parent, path)
cnt2 = self.reindex(sub, dummy, top) cnt2 = self.reindex(sub, dummy, top)
cnt += cnt2 cnt += cnt2
break break
self._log(None) self._log(None)
return cnt return cnt
def _need_reindex(self, top, path): def _need_reindex(self, top, path, maccess):
'''test if node needs re-indexing''' '''test if node needs re-indexing'''
cnode, newer = self.noder.get_node_if_newer(top, path) cnode, newer = self.noder.get_node_if_newer(top, path, maccess)
if not cnode:
self._debug('\tdoes not exist')
return True, cnode
if cnode and not newer: if cnode and not newer:
# ignore this node # ignore this node
return False self._debug('\tis not newer')
return False, cnode
if cnode and newer: if cnode and newer:
# remove this node and re-add # remove this node and re-add
self._debug('\tis newer')
cnode.parent = None cnode.parent = None
return True self._debug('\tis to be re-indexed')
return True, cnode
def _debug(self, string): def _debug(self, string):
if not self.debug: if not self.debug:
return return
Logger.info(string) Logger.log(string)
def _log(self, string): def _log(self, string):
if self.debug: if self.debug:

Loading…
Cancel
Save