adding ability to re-index a storage

pull/6/head
deadc0de6 6 years ago
parent 4e7a2b3c2e
commit 531541fea4

@ -37,6 +37,7 @@ USAGE = """
Usage:
{1} index [--catalog=<path>] [--meta=<meta>...] [-acfuV] <name> <path>
{1} update [--catalog=<path>] [-acfuV] <name> <path>
{1} ls [--catalog=<path>] [-arVS] [<path>]
{1} find [--catalog=<path>] [-abV] <term>
{1} rm [--catalog=<path>] [-fV] <storage>
@ -51,7 +52,7 @@ Usage:
Options:
--catalog=<path> Path to the catalog [default: {2}].
--meta=<meta> Additional attribute to store [default: ].
-u --subsize Store size of folders [default: False].
-u --subsize Store size of directories [default: False].
-a --archive Handle archive file [default: False].
-f --force Do not ask when updating the catalog [default: False].
-b --script Output script to manage found file(s) [default: False].
@ -64,7 +65,7 @@ Options:
""".format(BANNER, NAME, CATALOGPATH)
def cmd_index(args, noder, catalog, top):
def cmd_index(args, noder, catalog, top, debug=False):
path = args['<path>']
name = args['<name>']
nohash = not args['--hash']
@ -79,10 +80,10 @@ def cmd_index(args, noder, catalog, top):
node = noder.get_storage_node(top, name)
node.parent = None
start = datetime.datetime.now()
walker = Walker(noder, nohash=nohash)
walker = Walker(noder, nohash=nohash, debug=debug)
attr = noder.format_storage_attr(args['--meta'])
root = noder.storage_node(name, path, parent=top, attr=attr)
_, cnt = walker.index(path, name, parent=root, parentpath=path)
_, cnt = walker.index(path, name, root)
if subsize:
noder.rec_size(root)
stop = datetime.datetime.now()
@ -90,6 +91,28 @@ def cmd_index(args, noder, catalog, top):
catalog.save(top)
def cmd_update(args, noder, catalog, top, debug=False):
path = args['<path>']
name = args['<name>']
nohash = not args['--hash']
subsize = args['--subsize']
if not os.path.exists(path):
Logger.err('\"{}\" does not exist'.format(path))
return
root = noder.get_storage_node(top, name)
if not root:
Logger.err('storage named \"{}\" does not exist'.format(name))
return
start = datetime.datetime.now()
walker = Walker(noder, nohash=nohash, debug=debug)
cnt = walker.reindex(path, root, top)
if subsize:
noder.rec_size(root)
stop = datetime.datetime.now()
Logger.info('updated {} file(s) in {}'.format(cnt, stop - start))
catalog.save(top)
def cmd_ls(args, noder, top):
path = args['<path>']
if not path:
@ -203,7 +226,9 @@ def main():
# parse command
if args['index']:
cmd_index(args, noder, catalog, top)
cmd_index(args, noder, catalog, top, debug=args['--verbose'])
if args['update']:
cmd_update(args, noder, catalog, top, debug=args['--verbose'])
elif args['find']:
cmd_find(args, noder, top)
elif args['tree']:

@ -55,16 +55,34 @@ class Noder:
continue
if n.name == name:
return n
return None
def get_node(self, top, path):
def get_node(self, top, path, quiet=False):
'''get the node by internal tree path'''
r = anytree.resolver.Resolver('name')
try:
return r.get(top, path)
except anytree.resolver.ChildResolverError:
Logger.err('No node at path \"{}\"'.format(path))
if not quiet:
Logger.err('No node at path \"{}\"'.format(path))
return None
def get_node_if_newer(self, top, path):
'''return the node (if any) and if path is newer'''
treepath = path.lstrip(os.sep)
node = self.get_node(top, treepath, quiet=True)
if not node:
# node does not exist
return None, True
if not node.maccess:
# force re-indexing if no maccess
return node, True
maccess = node.maccess
cur_maccess = os.path.getmtime(path)
if float(cur_maccess) > maccess:
return node, True
return node, False
def get_meta_node(self, top):
'''return the meta node if any'''
try:
@ -76,7 +94,7 @@ class Noder:
def rec_size(self, node):
'''recursively traverse tree and store dir size'''
if self.verbose:
Logger.info('getting folder size recursively')
Logger.info('getting directory size recursively')
if node.type == self.TYPE_FILE:
return node.size
size = 0

@ -17,12 +17,44 @@ class Walker:
MAXLINE = 80 - 15
def __init__(self, noder, nohash=False):
def __init__(self, noder, nohash=False, debug=False):
self.noder = noder
self.noder.set_hashing(not nohash)
self.debug = debug
def index(self, path, name, parentpath=None, parent=None, isdir=False):
'''index a folder and store in tree'''
def index(self, path, name, parent):
return self._index(path, name, parent)
def reindex(self, path, parent, top):
'''reindex a directory and store in tree'''
cnt = 0
for (root, dirs, files) in os.walk(path):
for f in files:
sub = os.path.join(root, f)
if not self._need_reindex(top, sub):
self._debug('ignore {}'.format(sub))
continue
self._debug('re-index {}'.format(sub))
self._log(f)
self.noder.file_node(os.path.basename(f), sub,
parent, path)
cnt += 1
for d in dirs:
base = os.path.basename(d)
sub = os.path.join(root, d)
if not self._need_reindex(top, sub):
self._debug('ignore {}'.format(sub))
continue
self._debug('re-index {}'.format(sub))
dummy = self.noder.dir_node(base, sub, parent, path)
cnt2 = self.reindex(sub, dummy, top)
cnt += cnt2
break
self._log(None)
return cnt
def _index(self, path, name, parent):
'''index a directory and store in tree'''
if not parent:
parent = noder.dir_node(name, path, parent)
@ -30,22 +62,43 @@ class Walker:
for (root, dirs, files) in os.walk(path):
for f in files:
sub = os.path.join(root, f)
n = f
if len(n) > self.MAXLINE:
n = f[:self.MAXLINE] + '...'
Logger.progr('indexing: {:80}'.format(n))
self._log(f)
self.noder.file_node(os.path.basename(f), sub,
parent, parentpath)
parent, path)
cnt += 1
for d in dirs:
base = os.path.basename(d)
sub = os.path.join(root, d)
dummy = self.noder.dir_node(base, sub, parent, parentpath)
_, cnt2 = self.index(sub, base,
parent=dummy, parentpath=parentpath)
dummy = self.noder.dir_node(base, sub, parent, path)
_, cnt2 = self._index(sub, base, dummy)
cnt += cnt2
break
# clean line
Logger.progr('{:80}'.format(' '))
self._log(None)
return parent, cnt
def _need_reindex(self, top, path):
'''test if node needs re-indexing'''
cnode, newer = self.noder.get_node_if_newer(top, path)
if cnode and not newer:
# ignore this node
return False
if cnode and newer:
# remove this node and re-add
cnode.parent = None
return True
def _debug(self, string):
if not self.debug:
return
Logger.info(string)
def _log(self, string):
if self.debug:
return
if not string:
# clean
Logger.progr('{:80}'.format(' '))
return
if len(string) > self.MAXLINE:
string = string[:self.MAXLINE] + '...'
Logger.progr('indexing: {:80}'.format(string))

Loading…
Cancel
Save