From 23314564bf2710af098845455415f90a7920fe8a Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 14 Sep 2018 09:00:21 +0200 Subject: [PATCH 01/24] rephrase the -f --force option for #3 --- catcli/catcli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index 3612461..6dcfb94 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -53,7 +53,7 @@ Options: --meta= Additional attribute to store [default: ]. -u --subsize Store size of folders [default: False]. -a --archive Handle archive file [default: False]. - -f --force Force overwrite [default: False]. + -f --force Do not ask confirmation when updating the catalog [default: False]. -b --script Output script to manage found file(s) [default: False]. -S --sortsize Sort by size, largest first [default: False]. -c --hash Calculate md5 hash [default: False]. From a9d576f6e82046bed5cc0c654e229e19f1991670 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 14 Sep 2018 09:03:45 +0200 Subject: [PATCH 02/24] pep8 --- catcli/catcli.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index 6dcfb94..6e76d84 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -49,18 +49,18 @@ Usage: {1} --version Options: - --catalog= Path to the catalog [default: {2}]. - --meta= Additional attribute to store [default: ]. - -u --subsize Store size of folders [default: False]. - -a --archive Handle archive file [default: False]. - -f --force Do not ask confirmation when updating the catalog [default: False]. - -b --script Output script to manage found file(s) [default: False]. - -S --sortsize Sort by size, largest first [default: False]. - -c --hash Calculate md5 hash [default: False]. - -r --recursive Recursive [default: False]. - -V --verbose Be verbose [default: False]. - -v --version Show version. - -h --help Show this screen. + --catalog= Path to the catalog [default: {2}]. + --meta= Additional attribute to store [default: ]. + -u --subsize Store size of folders [default: False]. + -a --archive Handle archive file [default: False]. + -f --force Do not ask when updating the catalog [default: False]. + -b --script Output script to manage found file(s) [default: False]. + -S --sortsize Sort by size, largest first [default: False]. + -c --hash Calculate md5 hash [default: False]. + -r --recursive Recursive [default: False]. + -V --verbose Be verbose [default: False]. + -v --version Show version. + -h --help Show this screen. """.format(BANNER, NAME, CATALOGPATH) From 504a6d4e9aacbbc626a723f6dfeaefe32fa11caf Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 14 Sep 2018 09:13:35 +0200 Subject: [PATCH 03/24] refactoring --- catcli/catalog.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/catalog.py b/catcli/catalog.py index 3eb2b07..8a09042 100644 --- a/catcli/catalog.py +++ b/catcli/catalog.py @@ -49,7 +49,7 @@ class Catalog: if d and not os.path.exists(d): os.makedirs(d) elif os.path.exists(self.path) and not self.force: - if not utils.ask('Overwrite \"{}\"'.format(self.path)): + if not utils.ask('Update catalog \"{}\"'.format(self.path)): Logger.info('Catalog not saved') return False if d and not os.path.exists(d): From 927d5e2773fe7f102bc727c4967f6f55bcc397c0 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 14 Sep 2018 09:53:56 +0200 Subject: [PATCH 04/24] add ability to overwrite existing storage for #3 --- catcli/catcli.py | 20 +++++++++++--------- catcli/noder.py | 12 ++++++++++-- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index 6e76d84..b9e6a71 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -71,10 +71,13 @@ def cmd_index(args, noder, catalog, top): subsize = args['--subsize'] if not os.path.exists(path): Logger.err('\"{}\" does not exist'.format(path)) - return False + return if name in noder.get_storage_names(top): - Logger.err('storage named \"{}\" already exist'.format(name)) - return False + if not ask('Overwrite storage \"{}\"'.format(name)): + Logger.err('storage named \"{}\" already exist'.format(name)) + return + node = noder.get_storage_node(top, name) + node.parent = None start = datetime.datetime.now() walker = Walker(noder, nohash=nohash) attr = noder.clean_storage_attr(args['--meta']) @@ -104,15 +107,14 @@ def cmd_ls(args, noder, top): def cmd_rm(args, noder, catalog, top): - what = args[''] - storages = list(x.name for x in top.children) - if what in storages: - node = next(filter(lambda x: x.name == what, top.children)) + name = args[''] + node = noder.get_storage_node(top, name) + if node: node.parent = None if catalog.save(top): - Logger.info('Storage \"{}\" removed'.format(what)) + Logger.info('Storage \"{}\" removed'.format(name)) else: - Logger.err('Storage named \"{}\" does not exist'.format(what)) + Logger.err('Storage named \"{}\" does not exist'.format(name)) return top diff --git a/catcli/noder.py b/catcli/noder.py index 2f0a457..07539ce 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -29,7 +29,7 @@ class Noder: TOPNAME = 'top' METANAME = 'meta' - TYPE_TOP = 'top' # tip top ;-) + TYPE_TOP = 'top' TYPE_FILE = 'file' TYPE_DIR = 'dir' TYPE_ARC = 'arc' @@ -51,13 +51,21 @@ class Noder: ''' return a list of all storage names ''' return [x.name for x in list(top.children)] + def get_storage_node(self, top, name): + ''' return the storage node ''' + for n in top.children: + if n.type != self.TYPE_STORAGE: + continue + if n.name == name: + return n + def clean_storage_attr(self, attr): if not attr: return '' return ', '.join(attr) def get_node(self, top, path): - ''' get the node at path ''' + ''' get the node by internal tree path ''' r = anytree.resolver.Resolver('name') try: return r.get(top, path) From 39efd9b8fec2aad39de33707c697eeb3485970be Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 14 Sep 2018 10:07:38 +0200 Subject: [PATCH 05/24] refactoring --- catcli/catalog.py | 29 ++++++----- catcli/catcli.py | 4 +- catcli/decomp.py | 5 +- catcli/logger.py | 17 ++++--- catcli/noder.py | 124 ++++++++++++++++++++++++---------------------- catcli/utils.py | 8 +-- catcli/walker.py | 2 +- 7 files changed, 99 insertions(+), 90 deletions(-) diff --git a/catcli/catalog.py b/catcli/catalog.py index 8a09042..9eea7c5 100644 --- a/catcli/catalog.py +++ b/catcli/catalog.py @@ -22,16 +22,15 @@ class Catalog: self.verbose = verbose # verbosity self.force = force # force overwrite if exists self.metanode = None - # prefer json for git versioning self.pickle = pickle def set_metanode(self, metanode): - ''' remove the metanode until tree is re-written ''' + '''remove the metanode until tree is re-written''' self.metanode = metanode self.metanode.parent = None def restore(self): - ''' restore the catalog ''' + '''restore the catalog''' if not self.path: return None if not os.path.exists(self.path): @@ -41,7 +40,7 @@ class Catalog: return self._restore_json(open(self.path, 'r').read()) def save(self, node): - ''' save the catalog ''' + '''save the catalog''' if not self.path: Logger.err('Path not defined') return False @@ -62,14 +61,22 @@ class Catalog: return self._save_json(node) def _save_pickle(self, node): - ''' pickle the catalog''' + '''pickle the catalog''' pickle.dump(node, open(self.path, 'wb')) if self.verbose: Logger.info('Catalog saved to pickle \"{}\"'.format(self.path)) return True + def _restore_pickle(self): + '''restore the pickled tree''' + root = pickle.load(open(self.path, 'rb')) + if self.verbose: + m = 'Catalog imported from pickle \"{}\"'.format(self.path) + Logger.info(m) + return root + def _save_json(self, node): - ''' export the catalog in json ''' + '''export the catalog in json''' exp = JsonExporter(indent=2, sort_keys=True) with open(self.path, 'w') as f: exp.write(node, f) @@ -77,16 +84,8 @@ class Catalog: Logger.info('Catalog saved to json \"{}\"'.format(self.path)) return True - def _restore_pickle(self): - ''' restore the pickled tree ''' - root = pickle.load(open(self.path, 'rb')) - if self.verbose: - m = 'Catalog imported from pickle \"{}\"'.format(self.path) - Logger.info(m) - return root - def _restore_json(self, string): - ''' restore the tree from json ''' + '''restore the tree from json''' imp = JsonImporter() root = imp.import_(string) if self.verbose: diff --git a/catcli/catcli.py b/catcli/catcli.py index b9e6a71..5ee8575 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -80,7 +80,7 @@ def cmd_index(args, noder, catalog, top): node.parent = None start = datetime.datetime.now() walker = Walker(noder, nohash=nohash) - attr = noder.clean_storage_attr(args['--meta']) + attr = noder.format_storage_attr(args['--meta']) root = noder.storage_node(name, path, parent=top, attr=attr) _, cnt = walker.index(path, name, parent=root, parentpath=path) if subsize: @@ -223,7 +223,7 @@ def main(): if __name__ == '__main__': - ''' entry point ''' + '''entry point''' if main(): sys.exit(0) sys.exit(1) diff --git a/catcli/decomp.py b/catcli/decomp.py index fc69ac5..9963888 100644 --- a/catcli/decomp.py +++ b/catcli/decomp.py @@ -29,22 +29,25 @@ class Decomp: 'zip': self._zip} def get_format(self): + '''return list of supported extensions''' return list(self.ext.keys()) def get_names(self, path): - ''' get tree of compressed archive ''' + '''get tree of compressed archive''' ext = os.path.splitext(path)[1][1:] if ext in list(self.ext.keys()): return self.ext[ext](path) return None def _tar(self, path): + '''return list of file names in tar''' if not tarfile.is_tarfile(path): return None tar = tarfile.open(path, "r") return tar.getnames() def _zip(self, path): + '''return list of file names in zip''' if not zipfile.is_zipfile(path): return None z = zipfile.ZipFile(path) diff --git a/catcli/logger.py b/catcli/logger.py index 819acd5..5500a59 100644 --- a/catcli/logger.py +++ b/catcli/logger.py @@ -29,7 +29,7 @@ class Logger: # node specific output ###################################################################### def storage(pre, name, attr): - ''' print a storage node ''' + '''print a storage node''' end = '' if attr: end = ' {}({}){}'.format(Logger.GRAY, attr, Logger.RESET) @@ -38,13 +38,13 @@ class Logger: sys.stdout.write('{}\n'.format(s)) def file(pre, name, attr): - ''' print a file node ''' + '''print a file node''' s = '{}{}'.format(pre, name) s += ' {}[{}]{}'.format(Logger.GRAY, attr, Logger.RESET) sys.stdout.write('{}\n'.format(s)) def dir(pre, name, depth='', attr=None): - ''' print a directory node ''' + '''print a directory node''' end = [] if depth != '': end.append('nbfiles:{}'.format(depth)) @@ -65,27 +65,28 @@ class Logger: # generic output ###################################################################### def out(string): - ''' to stdout ''' + '''to stdout''' sys.stdout.write('{}\n'.format(string)) def log(string): - ''' to stderr ''' + '''to stderr''' sys.stderr.write('{}\n'.format(string)) def info(string): - ''' to stderr in color ''' + '''to stderr in color''' s = '{}{}{}'.format(Logger.MAGENTA, string, Logger.RESET) sys.stderr.write('{}\n'.format(s)) def err(string): - ''' to stderr in RED ''' + '''to stderr in RED''' s = '{}{}{}'.format(Logger.RED, string, Logger.RESET) sys.stderr.write('{}\n'.format(s)) def progr(string): - ''' print progress ''' + '''print progress''' sys.stderr.write('{}\r'.format(string)) sys.stderr.flush() def bold(string): + '''make it bold''' return '{}{}{}'.format(Logger.BOLD, string, Logger.RESET) diff --git a/catcli/noder.py b/catcli/noder.py index 07539ce..9dfd9a6 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -44,28 +44,20 @@ class Noder: if self.arc: self.decomp = Decomp() - def set_hashing(self, val): - self.hash = val - def get_storage_names(self, top): - ''' return a list of all storage names ''' + '''return a list of all storage names''' return [x.name for x in list(top.children)] def get_storage_node(self, top, name): - ''' return the storage node ''' + '''return the storage node if any''' for n in top.children: if n.type != self.TYPE_STORAGE: continue if n.name == name: return n - def clean_storage_attr(self, attr): - if not attr: - return '' - return ', '.join(attr) - def get_node(self, top, path): - ''' get the node by internal tree path ''' + '''get the node by internal tree path''' r = anytree.resolver.Resolver('name') try: return r.get(top, path) @@ -73,15 +65,53 @@ class Noder: Logger.err('No node at path \"{}\"'.format(path)) return None + def get_meta_node(self, top): + '''return the meta node if any''' + try: + return next(filter(lambda x: x.type == self.TYPE_META, + top.children)) + except StopIteration: + return None + + def rec_size(self, node): + '''recursively traverse tree and store dir size''' + if self.verbose: + Logger.info('getting folder size recursively') + if node.type == self.TYPE_FILE: + return node.size + size = 0 + for i in node.children: + if node.type == self.TYPE_DIR: + size += self.rec_size(i) + if node.type == self.TYPE_STORAGE: + self.rec_size(i) + else: + continue + node.size = size + return size + + ############################################################### + # public helpers + ############################################################### + def format_storage_attr(self, attr): + '''format the storage attr for saving''' + if not attr: + return '' + return ', '.join(attr) + + def set_hashing(self, val): + '''hash files when indexing''' + self.hash = val + ############################################################### # node creationg ############################################################### def new_top_node(self): - ''' create a new top node''' + '''create a new top node''' return anytree.AnyNode(name=self.TOPNAME, type=self.TYPE_TOP) def update_metanode(self, meta): - ''' create or update meta node information ''' + '''create or update meta node information''' epoch = int(time.time()) if not meta: attr = {} @@ -94,7 +124,7 @@ class Noder: return meta def file_node(self, name, path, parent, storagepath): - ''' create a new node representing a file ''' + '''create a new node representing a file''' if not os.path.exists(path): Logger.err('File \"{}\" does not exist'.format(path)) return None @@ -120,13 +150,13 @@ class Noder: return n def dir_node(self, name, path, parent, storagepath): - ''' create a new node representing a directory ''' + '''create a new node representing a directory''' path = os.path.abspath(path) relpath = os.path.relpath(path, start=storagepath) return self._node(name, self.TYPE_DIR, relpath, parent) def storage_node(self, name, path, parent, attr=None): - ''' create a new node representing a storage ''' + '''create a new node representing a storage''' path = os.path.abspath(path) free = psutil.disk_usage(path).free total = psutil.disk_usage(path).total @@ -135,12 +165,13 @@ class Noder: total=total, parent=parent, attr=attr, ts=epoch) def archive_node(self, name, path, parent, archive): + '''crete a new node for archive data''' return anytree.AnyNode(name=name, type=self.TYPE_ARC, relpath=path, parent=parent, size=0, md5=None, archive=archive) def _node(self, name, type, relpath, parent, size=None, md5=None): - ''' generic node creation ''' + '''generic node creation''' return anytree.AnyNode(name=name, type=type, relpath=relpath, parent=parent, size=size, md5=md5) @@ -149,7 +180,7 @@ class Noder: ############################################################### def _print_node(self, node, pre='', withpath=False, withdepth=False, withstorage=False): - ''' print a node ''' + '''print a node''' if node.type == self.TYPE_TOP: Logger.out('{}{}'.format(pre, node.name)) elif node.type == self.TYPE_FILE: @@ -193,16 +224,22 @@ class Noder: # Logger.out('{}{}'.format(pre, node.name)) def print_tree(self, node, style=anytree.ContRoundStyle()): - ''' print the tree similar to unix tool "tree" ''' + '''print the tree similar to unix tool "tree"''' rend = anytree.RenderTree(node, childiter=self._sort_tree) for pre, fill, node in rend: self._print_node(node, pre=pre, withdepth=True) + def to_dot(self, node, path='tree.dot'): + '''export to dot for graphing''' + anytree.exporter.DotExporter(node).to_dotfile(path) + Logger.info('dot file created under \"{}\"'.format(path)) + return 'dot {} -T png -o /tmp/tree.png'.format(path) + ############################################################### # searching ############################################################### def find_name(self, root, key, script=False): - ''' find files based on their names ''' + '''find files based on their names''' if self.verbose: Logger.info('searching for \"{}\"'.format(key)) self.term = key @@ -222,7 +259,7 @@ class Noder: return found def _find_name(self, node): - ''' callback for finding files ''' + '''callback for finding files''' if self.term.lower() in node.name.lower(): return True return False @@ -231,7 +268,7 @@ class Noder: # climbing ############################################################### def walk(self, root, path, rec=False): - ''' walk the tree for ls based on names ''' + '''walk the tree for ls based on names''' if self.verbose: Logger.info('walking path: \"{}\"'.format(path)) r = anytree.resolver.Resolver('name') @@ -256,7 +293,7 @@ class Noder: # tree creationg ############################################################### def _add_entry(self, name, top, resolv): - ''' add an entry to the tree ''' + '''add an entry to the tree''' entries = name.rstrip(os.sep).split(os.sep) if len(entries) == 1: self.archive_node(name, name, top, top.name) @@ -270,7 +307,7 @@ class Noder: self.archive_node(f, name, top, top.name) def list_to_tree(self, parent, names): - ''' convert list of files to a tree ''' + '''convert list of files to a tree''' if not names: return r = anytree.resolver.Resolver('name') @@ -282,7 +319,7 @@ class Noder: # diverse ############################################################### def _sort_tree(self, items): - ''' sorting a list of items ''' + '''sorting a list of items''' return sorted(items, key=self._sort, reverse=self.sortsize) def _sort(self, x): @@ -291,11 +328,11 @@ class Noder: return self._sort_fs(x) def _sort_fs(self, n): - ''' sorting nodes dir first and alpha ''' + '''sorting nodes dir first and alpha''' return (n.type, n.name.lstrip('\.').lower()) def _sort_size(self, n): - ''' sorting nodes by size ''' + '''sorting nodes by size''' try: if not n.size: return 0 @@ -303,37 +340,6 @@ class Noder: except AttributeError: return 0 - def to_dot(self, node, path='tree.dot'): - ''' export to dot for graphing ''' - anytree.exporter.DotExporter(node).to_dotfile(path) - Logger.info('dot file created under \"{}\"'.format(path)) - return 'dot {} -T png -o /tmp/tree.png'.format(path) - def _get_storage(self, node): - ''' recursively traverse up to find storage ''' + '''recursively traverse up to find storage''' return node.ancestors[1] - - def get_meta_node(self, top): - ''' return the meta node if any ''' - try: - return next(filter(lambda x: x.type == self.TYPE_META, - top.children)) - except StopIteration: - return None - - def rec_size(self, node): - ''' recursively traverse tree and store dir size ''' - if self.verbose: - Logger.info('getting folder size recursively') - if node.type == self.TYPE_FILE: - return node.size - size = 0 - for i in node.children: - if node.type == self.TYPE_DIR: - size += self.rec_size(i) - if node.type == self.TYPE_STORAGE: - self.rec_size(i) - else: - continue - node.size = size - return size diff --git a/catcli/utils.py b/catcli/utils.py index 2c1e23e..1392fae 100644 --- a/catcli/utils.py +++ b/catcli/utils.py @@ -16,7 +16,7 @@ from catcli.logger import Logger def md5sum(path): - ''' calculate md5 sum of a file ''' + '''calculate md5 sum of a file''' p = os.path.realpath(path) if not os.path.exists(p): Logger.err('\nunable to get md5sum on {}'.format(path)) @@ -36,7 +36,7 @@ def md5sum(path): def human(size): - ''' human readable size ''' + '''human readable size''' div = 1024. suf = ['B', 'K', 'M', 'G', 'T', 'P'] if size < div: @@ -49,13 +49,13 @@ def human(size): def ask(question): - ''' ask the user what to do ''' + '''ask the user what to do''' resp = input('{} [y|N] ? '.format(question)) return resp.lower() == 'y' def edit(string): - ''' edit the information with the default EDITOR ''' + '''edit the information with the default EDITOR''' string = string.encode('utf-8') EDITOR = os.environ.get('EDITOR', 'vim') with tempfile.NamedTemporaryFile(prefix='catcli', suffix='.tmp') as f: diff --git a/catcli/walker.py b/catcli/walker.py index 6dd880b..56faa97 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -22,7 +22,7 @@ class Walker: self.noder.set_hashing(not nohash) def index(self, path, name, parentpath=None, parent=None, isdir=False): - ''' index a folder and store in tree ''' + '''index a folder and store in tree''' if not parent: parent = noder.dir_node(name, path, parent) From 303fe41805b56181a2ff8ad7bdae9db2de5495ad Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Fri, 14 Sep 2018 10:09:05 +0200 Subject: [PATCH 06/24] bump version --- catcli/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/__init__.py b/catcli/__init__.py index f63e2b6..30495c2 100644 --- a/catcli/__init__.py +++ b/catcli/__init__.py @@ -5,7 +5,7 @@ Copyright (c) 2017, deadc0de6 import sys -__version__ = '0.4.6' +__version__ = '0.5.0' def main(): From 4e7a2b3c2e4d4f2e6e21a308488261a7e39f7b46 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Wed, 19 Sep 2018 18:28:55 +0200 Subject: [PATCH 07/24] adding access time in node --- catcli/noder.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/catcli/noder.py b/catcli/noder.py index 9dfd9a6..8071902 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -140,8 +140,9 @@ class Noder: relpath = os.path.join(os.path.basename(storagepath), os.path.relpath(path, start=storagepath)) + maccess = os.path.getmtime(path) n = self._node(name, self.TYPE_FILE, relpath, parent, - size=st.st_size, md5=md5) + size=st.st_size, md5=md5, maccess=maccess) if self.arc: ext = os.path.splitext(path)[1][1:] if ext in self.decomp.get_format(): @@ -153,7 +154,9 @@ class Noder: '''create a new node representing a directory''' path = os.path.abspath(path) relpath = os.path.relpath(path, start=storagepath) - return self._node(name, self.TYPE_DIR, relpath, parent) + maccess = os.path.getmtime(path) + return self._node(name, self.TYPE_DIR, relpath, + parent, maccess=maccess) def storage_node(self, name, path, parent, attr=None): '''create a new node representing a storage''' @@ -170,10 +173,12 @@ class Noder: parent=parent, size=0, md5=None, archive=archive) - def _node(self, name, type, relpath, parent, size=None, md5=None): + def _node(self, name, type, relpath, parent, + size=None, md5=None, maccess=None): '''generic node creation''' return anytree.AnyNode(name=name, type=type, relpath=relpath, - parent=parent, size=size, md5=md5) + parent=parent, size=size, + md5=md5, maccess=maccess) ############################################################### # printing From 531541fea4fd259897892c763773b84c49386e12 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Wed, 19 Sep 2018 19:30:44 +0200 Subject: [PATCH 08/24] adding ability to re-index a storage --- catcli/catcli.py | 35 ++++++++++++++++++--- catcli/noder.py | 24 ++++++++++++-- catcli/walker.py | 81 +++++++++++++++++++++++++++++++++++++++--------- 3 files changed, 118 insertions(+), 22 deletions(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index 5ee8575..3ad6c24 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -37,6 +37,7 @@ USAGE = """ Usage: {1} index [--catalog=] [--meta=...] [-acfuV] + {1} update [--catalog=] [-acfuV] {1} ls [--catalog=] [-arVS] [] {1} find [--catalog=] [-abV] {1} rm [--catalog=] [-fV] @@ -51,7 +52,7 @@ Usage: Options: --catalog= Path to the catalog [default: {2}]. --meta= Additional attribute to store [default: ]. - -u --subsize Store size of folders [default: False]. + -u --subsize Store size of directories [default: False]. -a --archive Handle archive file [default: False]. -f --force Do not ask when updating the catalog [default: False]. -b --script Output script to manage found file(s) [default: False]. @@ -64,7 +65,7 @@ Options: """.format(BANNER, NAME, CATALOGPATH) -def cmd_index(args, noder, catalog, top): +def cmd_index(args, noder, catalog, top, debug=False): path = args[''] name = args[''] nohash = not args['--hash'] @@ -79,10 +80,10 @@ def cmd_index(args, noder, catalog, top): node = noder.get_storage_node(top, name) node.parent = None start = datetime.datetime.now() - walker = Walker(noder, nohash=nohash) + walker = Walker(noder, nohash=nohash, debug=debug) attr = noder.format_storage_attr(args['--meta']) root = noder.storage_node(name, path, parent=top, attr=attr) - _, cnt = walker.index(path, name, parent=root, parentpath=path) + _, cnt = walker.index(path, name, root) if subsize: noder.rec_size(root) stop = datetime.datetime.now() @@ -90,6 +91,28 @@ def cmd_index(args, noder, catalog, top): catalog.save(top) +def cmd_update(args, noder, catalog, top, debug=False): + path = args[''] + name = args[''] + nohash = not args['--hash'] + subsize = args['--subsize'] + if not os.path.exists(path): + Logger.err('\"{}\" does not exist'.format(path)) + return + root = noder.get_storage_node(top, name) + if not root: + Logger.err('storage named \"{}\" does not exist'.format(name)) + return + start = datetime.datetime.now() + walker = Walker(noder, nohash=nohash, debug=debug) + cnt = walker.reindex(path, root, top) + if subsize: + noder.rec_size(root) + stop = datetime.datetime.now() + Logger.info('updated {} file(s) in {}'.format(cnt, stop - start)) + catalog.save(top) + + def cmd_ls(args, noder, top): path = args[''] if not path: @@ -203,7 +226,9 @@ def main(): # parse command if args['index']: - cmd_index(args, noder, catalog, top) + cmd_index(args, noder, catalog, top, debug=args['--verbose']) + if args['update']: + cmd_update(args, noder, catalog, top, debug=args['--verbose']) elif args['find']: cmd_find(args, noder, top) elif args['tree']: diff --git a/catcli/noder.py b/catcli/noder.py index 8071902..c62bccc 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -55,16 +55,34 @@ class Noder: continue if n.name == name: return n + return None - def get_node(self, top, path): + def get_node(self, top, path, quiet=False): '''get the node by internal tree path''' r = anytree.resolver.Resolver('name') try: return r.get(top, path) except anytree.resolver.ChildResolverError: - Logger.err('No node at path \"{}\"'.format(path)) + if not quiet: + Logger.err('No node at path \"{}\"'.format(path)) return None + def get_node_if_newer(self, top, path): + '''return the node (if any) and if path is newer''' + treepath = path.lstrip(os.sep) + node = self.get_node(top, treepath, quiet=True) + if not node: + # node does not exist + return None, True + if not node.maccess: + # force re-indexing if no maccess + return node, True + maccess = node.maccess + cur_maccess = os.path.getmtime(path) + if float(cur_maccess) > maccess: + return node, True + return node, False + def get_meta_node(self, top): '''return the meta node if any''' try: @@ -76,7 +94,7 @@ class Noder: def rec_size(self, node): '''recursively traverse tree and store dir size''' if self.verbose: - Logger.info('getting folder size recursively') + Logger.info('getting directory size recursively') if node.type == self.TYPE_FILE: return node.size size = 0 diff --git a/catcli/walker.py b/catcli/walker.py index 56faa97..e908ec8 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -17,12 +17,44 @@ class Walker: MAXLINE = 80 - 15 - def __init__(self, noder, nohash=False): + def __init__(self, noder, nohash=False, debug=False): self.noder = noder self.noder.set_hashing(not nohash) + self.debug = debug - def index(self, path, name, parentpath=None, parent=None, isdir=False): - '''index a folder and store in tree''' + def index(self, path, name, parent): + return self._index(path, name, parent) + + def reindex(self, path, parent, top): + '''reindex a directory and store in tree''' + cnt = 0 + for (root, dirs, files) in os.walk(path): + for f in files: + sub = os.path.join(root, f) + if not self._need_reindex(top, sub): + self._debug('ignore {}'.format(sub)) + continue + self._debug('re-index {}'.format(sub)) + self._log(f) + self.noder.file_node(os.path.basename(f), sub, + parent, path) + cnt += 1 + for d in dirs: + base = os.path.basename(d) + sub = os.path.join(root, d) + if not self._need_reindex(top, sub): + self._debug('ignore {}'.format(sub)) + continue + self._debug('re-index {}'.format(sub)) + dummy = self.noder.dir_node(base, sub, parent, path) + cnt2 = self.reindex(sub, dummy, top) + cnt += cnt2 + break + self._log(None) + return cnt + + def _index(self, path, name, parent): + '''index a directory and store in tree''' if not parent: parent = noder.dir_node(name, path, parent) @@ -30,22 +62,43 @@ class Walker: for (root, dirs, files) in os.walk(path): for f in files: sub = os.path.join(root, f) - n = f - if len(n) > self.MAXLINE: - n = f[:self.MAXLINE] + '...' - Logger.progr('indexing: {:80}'.format(n)) + self._log(f) self.noder.file_node(os.path.basename(f), sub, - parent, parentpath) + parent, path) cnt += 1 for d in dirs: base = os.path.basename(d) sub = os.path.join(root, d) - dummy = self.noder.dir_node(base, sub, parent, parentpath) - _, cnt2 = self.index(sub, base, - parent=dummy, parentpath=parentpath) + dummy = self.noder.dir_node(base, sub, parent, path) + _, cnt2 = self._index(sub, base, dummy) cnt += cnt2 break - # clean line - Logger.progr('{:80}'.format(' ')) - + self._log(None) return parent, cnt + + def _need_reindex(self, top, path): + '''test if node needs re-indexing''' + cnode, newer = self.noder.get_node_if_newer(top, path) + if cnode and not newer: + # ignore this node + return False + if cnode and newer: + # remove this node and re-add + cnode.parent = None + return True + + def _debug(self, string): + if not self.debug: + return + Logger.info(string) + + def _log(self, string): + if self.debug: + return + if not string: + # clean + Logger.progr('{:80}'.format(' ')) + return + if len(string) > self.MAXLINE: + string = string[:self.MAXLINE] + '...' + Logger.progr('indexing: {:80}'.format(string)) From 4cce16ff644a2a2003cda3d5a4ef2bcdd5531379 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Wed, 19 Sep 2018 19:35:50 +0200 Subject: [PATCH 09/24] refactoring --- catcli/walker.py | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/catcli/walker.py b/catcli/walker.py index e908ec8..64f2fc8 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -23,18 +23,14 @@ class Walker: self.debug = debug def index(self, path, name, parent): - return self._index(path, name, parent) + '''index a directory and store in tree''' + if not parent: + parent = noder.dir_node(name, path, parent) - def reindex(self, path, parent, top): - '''reindex a directory and store in tree''' cnt = 0 for (root, dirs, files) in os.walk(path): for f in files: sub = os.path.join(root, f) - if not self._need_reindex(top, sub): - self._debug('ignore {}'.format(sub)) - continue - self._debug('re-index {}'.format(sub)) self._log(f) self.noder.file_node(os.path.basename(f), sub, parent, path) @@ -42,26 +38,23 @@ class Walker: for d in dirs: base = os.path.basename(d) sub = os.path.join(root, d) - if not self._need_reindex(top, sub): - self._debug('ignore {}'.format(sub)) - continue - self._debug('re-index {}'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, path) - cnt2 = self.reindex(sub, dummy, top) + _, cnt2 = self.index(sub, base, dummy) cnt += cnt2 break self._log(None) - return cnt - - def _index(self, path, name, parent): - '''index a directory and store in tree''' - if not parent: - parent = noder.dir_node(name, path, parent) + return parent, cnt + def reindex(self, path, parent, top): + '''reindex a directory and store in tree''' cnt = 0 for (root, dirs, files) in os.walk(path): for f in files: sub = os.path.join(root, f) + if not self._need_reindex(top, sub): + self._debug('ignore {}'.format(sub)) + continue + self._debug('re-index {}'.format(sub)) self._log(f) self.noder.file_node(os.path.basename(f), sub, parent, path) @@ -69,12 +62,16 @@ class Walker: for d in dirs: base = os.path.basename(d) sub = os.path.join(root, d) + if not self._need_reindex(top, sub): + self._debug('ignore {}'.format(sub)) + continue + self._debug('re-index {}'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, path) - _, cnt2 = self._index(sub, base, dummy) + cnt2 = self.reindex(sub, dummy, top) cnt += cnt2 break self._log(None) - return parent, cnt + return cnt def _need_reindex(self, top, path): '''test if node needs re-indexing''' From 47d6d583ef1909b09ed42e67f0b0082289936466 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Wed, 19 Sep 2018 20:10:06 +0200 Subject: [PATCH 10/24] improve re-indexing --- catcli/catcli.py | 8 +++++--- catcli/noder.py | 7 +++---- catcli/walker.py | 47 ++++++++++++++++++++++++++++++----------------- 3 files changed, 38 insertions(+), 24 deletions(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index 3ad6c24..43f28fd 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -83,12 +83,13 @@ def cmd_index(args, noder, catalog, top, debug=False): walker = Walker(noder, nohash=nohash, debug=debug) attr = noder.format_storage_attr(args['--meta']) root = noder.storage_node(name, path, parent=top, attr=attr) - _, cnt = walker.index(path, name, root) + _, cnt = walker.index(path, root, name) if subsize: noder.rec_size(root) stop = datetime.datetime.now() Logger.info('Indexed {} file(s) in {}'.format(cnt, stop - start)) - catalog.save(top) + if cnt > 0: + catalog.save(top) def cmd_update(args, noder, catalog, top, debug=False): @@ -110,7 +111,8 @@ def cmd_update(args, noder, catalog, top, debug=False): noder.rec_size(root) stop = datetime.datetime.now() Logger.info('updated {} file(s) in {}'.format(cnt, stop - start)) - catalog.save(top) + if cnt > 0: + catalog.save(top) def cmd_ls(args, noder, top): diff --git a/catcli/noder.py b/catcli/noder.py index c62bccc..2776e1f 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -67,7 +67,7 @@ class Noder: Logger.err('No node at path \"{}\"'.format(path)) return None - def get_node_if_newer(self, top, path): + def get_node_if_newer(self, top, path, maccess): '''return the node (if any) and if path is newer''' treepath = path.lstrip(os.sep) node = self.get_node(top, treepath, quiet=True) @@ -77,9 +77,8 @@ class Noder: if not node.maccess: # force re-indexing if no maccess return node, True - maccess = node.maccess - cur_maccess = os.path.getmtime(path) - if float(cur_maccess) > maccess: + old_maccess = node.maccess + if float(maccess) > float(old_maccess): return node, True return node, False diff --git a/catcli/walker.py b/catcli/walker.py index 64f2fc8..8364ccb 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -22,7 +22,7 @@ class Walker: self.noder.set_hashing(not nohash) self.debug = debug - def index(self, path, name, parent): + def index(self, path, parent, name): '''index a directory and store in tree''' if not parent: parent = noder.dir_node(name, path, parent) @@ -32,14 +32,17 @@ class Walker: for f in files: sub = os.path.join(root, f) self._log(f) + self._debug('index file {}'.format(sub)) self.noder.file_node(os.path.basename(f), sub, parent, path) cnt += 1 for d in dirs: base = os.path.basename(d) sub = os.path.join(root, d) + self._debug('index directory {}'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, path) - _, cnt2 = self.index(sub, base, dummy) + cnt += 1 + _, cnt2 = self.index(sub, dummy, base) cnt += cnt2 break self._log(None) @@ -50,44 +53,54 @@ class Walker: cnt = 0 for (root, dirs, files) in os.walk(path): for f in files: + self._debug('found file {}'.format(f)) sub = os.path.join(root, f) - if not self._need_reindex(top, sub): - self._debug('ignore {}'.format(sub)) + maccess = os.path.getmtime(sub) + reindex, _ = self._need_reindex(parent, f, maccess) + if not reindex: + self._debug('\tignore file {}'.format(sub)) continue - self._debug('re-index {}'.format(sub)) + self._debug('\tre-index file {}'.format(sub)) self._log(f) self.noder.file_node(os.path.basename(f), sub, parent, path) cnt += 1 for d in dirs: + self._debug('found dir {}'.format(d)) base = os.path.basename(d) sub = os.path.join(root, d) - if not self._need_reindex(top, sub): - self._debug('ignore {}'.format(sub)) - continue - self._debug('re-index {}'.format(sub)) - dummy = self.noder.dir_node(base, sub, parent, path) - cnt2 = self.reindex(sub, dummy, top) - cnt += cnt2 + maccess = os.path.getmtime(sub) + reindex, dummy = self._need_reindex(parent, base, maccess) + if reindex: + self._debug('\tre-index directory {}'.format(sub)) + dummy = self.noder.dir_node(base, sub, parent, path) + cnt2 = self.reindex(sub, dummy, top) + cnt += cnt2 break self._log(None) return cnt - def _need_reindex(self, top, path): + def _need_reindex(self, top, path, maccess): '''test if node needs re-indexing''' - cnode, newer = self.noder.get_node_if_newer(top, path) + cnode, newer = self.noder.get_node_if_newer(top, path, maccess) + if not cnode: + self._debug('\tdoes not exist') + return True, cnode if cnode and not newer: # ignore this node - return False + self._debug('\tis not newer') + return False, cnode if cnode and newer: # remove this node and re-add + self._debug('\tis newer') cnode.parent = None - return True + self._debug('\tis to be re-indexed') + return True, cnode def _debug(self, string): if not self.debug: return - Logger.info(string) + Logger.log(string) def _log(self, string): if self.debug: From 8eed881f5d3d4fe645420085c7a5dfc113a759c0 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Thu, 20 Sep 2018 09:00:30 +0200 Subject: [PATCH 11/24] fix bug with meta --- catcli/catcli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index 43f28fd..d8acfc0 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -51,7 +51,7 @@ Usage: Options: --catalog= Path to the catalog [default: {2}]. - --meta= Additional attribute to store [default: ]. + --meta= Additional attribute to store sep by a comma [default: ]. -u --subsize Store size of directories [default: False]. -a --archive Handle archive file [default: False]. -f --force Do not ask when updating the catalog [default: False]. @@ -81,7 +81,7 @@ def cmd_index(args, noder, catalog, top, debug=False): node.parent = None start = datetime.datetime.now() walker = Walker(noder, nohash=nohash, debug=debug) - attr = noder.format_storage_attr(args['--meta']) + attr = noder.format_storage_attr(args['--meta'].split(',')) root = noder.storage_node(name, path, parent=top, attr=attr) _, cnt = walker.index(path, root, name) if subsize: From d7ea943c5fadc376bad4c40adc7ef83d73caf0ae Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Thu, 20 Sep 2018 09:34:26 +0200 Subject: [PATCH 12/24] improve re-indexing and add more tests --- catcli/walker.py | 13 +++-- tests/helpers.py | 32 ++++++++++++- tests/test_update.py | 112 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 151 insertions(+), 6 deletions(-) create mode 100644 tests/test_update.py diff --git a/catcli/walker.py b/catcli/walker.py index 8364ccb..0d9b968 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -24,12 +24,14 @@ class Walker: def index(self, path, parent, name): '''index a directory and store in tree''' + self._debug('indexing starting at {}'.format(path)) if not parent: parent = noder.dir_node(name, path, parent) cnt = 0 for (root, dirs, files) in os.walk(path): for f in files: + self._debug('found file {} under {}'.format(f, path)) sub = os.path.join(root, f) self._log(f) self._debug('index file {}'.format(sub)) @@ -37,6 +39,7 @@ class Walker: parent, path) cnt += 1 for d in dirs: + self._debug('found dir {} under {}'.format(f, path)) base = os.path.basename(d) sub = os.path.join(root, d) self._debug('index directory {}'.format(sub)) @@ -50,10 +53,11 @@ class Walker: def reindex(self, path, parent, top): '''reindex a directory and store in tree''' + self._debug('reindexing starting at {}'.format(path)) cnt = 0 for (root, dirs, files) in os.walk(path): for f in files: - self._debug('found file {}'.format(f)) + self._debug('found file {} under {}'.format(f, path)) sub = os.path.join(root, f) maccess = os.path.getmtime(sub) reindex, _ = self._need_reindex(parent, f, maccess) @@ -66,7 +70,7 @@ class Walker: parent, path) cnt += 1 for d in dirs: - self._debug('found dir {}'.format(d)) + self._debug('found dir {} under {}'.format(d, path)) base = os.path.basename(d) sub = os.path.join(root, d) maccess = os.path.getmtime(sub) @@ -74,8 +78,9 @@ class Walker: if reindex: self._debug('\tre-index directory {}'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, path) - cnt2 = self.reindex(sub, dummy, top) - cnt += cnt2 + self._debug('reindexing deeper under {}'.format(sub)) + cnt2 = self.reindex(sub, dummy, top) + cnt += cnt2 break self._log(None) return cnt diff --git a/tests/helpers.py b/tests/helpers.py index 45f0943..e58c43e 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -10,6 +10,7 @@ import string import random import tempfile import shutil +import subprocess TMPSUFFIX = '.catcli' @@ -35,6 +36,21 @@ def clean(path): else: os.remove(path) + +def edit_file(path, newcontent): + if not os.path.exists(path): + write_to_file(path, newcontent) + else: + write_to_file(path, newcontent) + + +def unix_tree(path): + if not os.path.exists(path): + return + cmd = ['tree', path] + subprocess.call(cmd) + + ############################################################ # catcli specific ############################################################ @@ -82,9 +98,21 @@ def create_rnd_file(path, filename, content=None): if not content: content = get_rnd_string(100) fpath = os.path.join(path, filename) - with open(fpath, 'w') as f: + return write_to_file(fpath, content) + + +def write_to_file(path, content): + with open(path, 'w') as f: f.write(content) - return fpath + return path + + +def read_from_file(path): + if not os.path.exists(path): + return '' + with open(path, 'r') as f: + content = f.read() + return content ############################################################ diff --git a/tests/test_update.py b/tests/test_update.py new file mode 100644 index 0000000..5c5d55d --- /dev/null +++ b/tests/test_update.py @@ -0,0 +1,112 @@ +""" +author: deadc0de6 (https://github.com/deadc0de6) +Copyright (c) 2017, deadc0de6 + +Basic unittest for updating an index +""" + +import unittest + +from catcli.catcli import * +from catcli.noder import Noder +from catcli.walker import Walker +from catcli.catalog import Catalog +from tests.helpers import * + + +class TestIndexing(unittest.TestCase): + + def test_index(self): + # init + workingdir = get_tempdir() + catalogpath = create_rnd_file(workingdir, 'catalog.json', content='') + self.addCleanup(clean, workingdir) + + dirpath = get_tempdir() + self.addCleanup(clean, dirpath) + + # create 3 files + f1 = create_rnd_file(dirpath, 'file1') + f2 = create_rnd_file(dirpath, 'file2') + f3 = create_rnd_file(dirpath, 'file3') + + # create 2 directories + d1 = create_dir(dirpath, 'dir1') + d2 = create_dir(dirpath, 'dir2') + + # fill directories with files + d1f1 = create_rnd_file(d1, 'dir1file1') + d1f2 = create_rnd_file(d1, 'dir1file2') + d2f1 = create_rnd_file(d2, 'dir2file1') + + noder = Noder() + top = noder.new_top_node() + walker = Walker(noder) + catalog = Catalog(catalogpath, force=True, verbose=False) + + # create fake args + tmpdirname = 'tmpdir' + args = {'': dirpath, '': tmpdirname, + '--hash': True, '--meta': 'some meta', + '--subsize': True, '--verbose': True} + + # index the directory + unix_tree(dirpath) + cmd_index(args, noder, catalog, top, debug=True) + self.assertTrue(os.stat(catalogpath).st_size != 0) + + # print catalog + noder.print_tree(top) + + # add some files and directories + new1 = create_rnd_file(d1, 'newf1') + new2 = create_rnd_file(dirpath, 'newf2') + new3 = create_dir(dirpath, 'newd3') + new4 = create_dir(d2, 'newd4') + new5 = create_rnd_file(new4, 'newf5') + unix_tree(dirpath) + + # modify files + EDIT = 'edited' + edit_file(d1f1, EDIT) + + # update storage + cmd_update(args, noder, catalog, top, debug=True) + + # print catalog + # print(read_from_file(catalogpath)) + noder.print_tree(top) + + # explore the top node to find all nodes + self.assertTrue(len(top.children) == 1) + storage = top.children[0] + self.assertTrue(len(storage.children) == 7) + + # ensures files and directories are in + names = [x.name for x in storage.children] + self.assertTrue(os.path.basename(f1) in names) + self.assertTrue(os.path.basename(f2) in names) + self.assertTrue(os.path.basename(f3) in names) + self.assertTrue(os.path.basename(d1) in names) + self.assertTrue(os.path.basename(d2) in names) + self.assertTrue(os.path.basename(new3) in names) + self.assertTrue(os.path.basename(new2) in names) + + for node in storage.children: + if node.name == os.path.basename(d1): + self.assertTrue(len(node.children) == 3) + elif node.name == os.path.basename(d2): + self.assertTrue(len(node.children) == 2) + elif node.name == os.path.basename(new3): + self.assertTrue(len(node.children) == 0) + elif node.name == os.path.basename(new4): + self.assertTrue(len(node.children) == 1) + self.assertTrue(read_from_file(d1f1) == EDIT) + + +def main(): + unittest.main() + + +if __name__ == '__main__': + main() From 2a175d2f7bb7953978b2e142a9c9032e834c21df Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Thu, 20 Sep 2018 09:40:14 +0200 Subject: [PATCH 13/24] travis does not have tree --- tests/helpers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/helpers.py b/tests/helpers.py index e58c43e..ae5624b 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -47,7 +47,8 @@ def edit_file(path, newcontent): def unix_tree(path): if not os.path.exists(path): return - cmd = ['tree', path] + # cmd = ['tree', path] + cmd = ['ls', '-R', path] subprocess.call(cmd) From a78eab6efd509b7edeacee198e393719457a9400 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Thu, 20 Sep 2018 09:42:33 +0200 Subject: [PATCH 14/24] bump version --- catcli/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/__init__.py b/catcli/__init__.py index 30495c2..3bef315 100644 --- a/catcli/__init__.py +++ b/catcli/__init__.py @@ -5,7 +5,7 @@ Copyright (c) 2017, deadc0de6 import sys -__version__ = '0.5.0' +__version__ = '0.5.1' def main(): From c4ebc1473457bc70dabee43166bb309f962d178e Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Mon, 24 Sep 2018 12:51:40 +0200 Subject: [PATCH 15/24] fix meta option parsing #4 --- catcli/catcli.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/catcli/catcli.py b/catcli/catcli.py index d8acfc0..43f28fd 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -51,7 +51,7 @@ Usage: Options: --catalog= Path to the catalog [default: {2}]. - --meta= Additional attribute to store sep by a comma [default: ]. + --meta= Additional attribute to store [default: ]. -u --subsize Store size of directories [default: False]. -a --archive Handle archive file [default: False]. -f --force Do not ask when updating the catalog [default: False]. @@ -81,7 +81,7 @@ def cmd_index(args, noder, catalog, top, debug=False): node.parent = None start = datetime.datetime.now() walker = Walker(noder, nohash=nohash, debug=debug) - attr = noder.format_storage_attr(args['--meta'].split(',')) + attr = noder.format_storage_attr(args['--meta']) root = noder.storage_node(name, path, parent=top, attr=attr) _, cnt = walker.index(path, root, name) if subsize: From 53a78b9b35da8b187b1054d3f16d4bbd63a891d4 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Mon, 24 Sep 2018 12:51:49 +0200 Subject: [PATCH 16/24] fix debug bug --- catcli/walker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/walker.py b/catcli/walker.py index 0d9b968..018307d 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -39,7 +39,7 @@ class Walker: parent, path) cnt += 1 for d in dirs: - self._debug('found dir {} under {}'.format(f, path)) + self._debug('found dir {} under {}'.format(d, path)) base = os.path.basename(d) sub = os.path.join(root, d) self._debug('index directory {}'.format(sub)) From 016c5c24ad68b9b863cc9fa89058d4c3e16b44ec Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Mon, 24 Sep 2018 12:52:28 +0200 Subject: [PATCH 17/24] bump version --- catcli/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/__init__.py b/catcli/__init__.py index 3bef315..458cc32 100644 --- a/catcli/__init__.py +++ b/catcli/__init__.py @@ -5,7 +5,7 @@ Copyright (c) 2017, deadc0de6 import sys -__version__ = '0.5.1' +__version__ = '0.5.2' def main(): From f4e6ea59ba53d0ebe7d2f74701683d54292f92bf Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Wed, 26 Sep 2018 09:00:45 +0200 Subject: [PATCH 18/24] improve tests --- tests/test_index.py | 2 +- tests/test_update.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_index.py b/tests/test_index.py index 973d0ef..187dd00 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -47,7 +47,7 @@ class TestIndexing(unittest.TestCase): # create fake args tmpdirname = 'tmpdir' args = {'': dirpath, '': tmpdirname, - '--hash': True, '--meta': 'some meta', + '--hash': True, '--meta': ['some meta'], '--subsize': True, '--verbose': True} # index the directory diff --git a/tests/test_update.py b/tests/test_update.py index 5c5d55d..3638df8 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -47,7 +47,7 @@ class TestIndexing(unittest.TestCase): # create fake args tmpdirname = 'tmpdir' args = {'': dirpath, '': tmpdirname, - '--hash': True, '--meta': 'some meta', + '--hash': True, '--meta': ['some meta'], '--subsize': True, '--verbose': True} # index the directory From 40ad2399149f9038ffe4491f47515ef70a7fb359 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Wed, 26 Sep 2018 09:01:36 +0200 Subject: [PATCH 19/24] add indexing date/time for #5 --- catcli/noder.py | 6 +++++- catcli/utils.py | 8 ++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/catcli/noder.py b/catcli/noder.py index 2776e1f..9bc295a 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -236,7 +236,11 @@ class Noder: elif node.type == self.TYPE_STORAGE: hf = utils.human(node.free) ht = utils.human(node.total) - name = '{} (free:{}, total:{})'.format(node.name, hf, ht) + dt = '' + if node.ts: + dt = ', date:' + dt += utils.epoch_to_str(node.ts) + name = '{} (free:{}, total:{}{})'.format(node.name, hf, ht, dt) Logger.storage(pre, name, node.attr) elif node.type == self.TYPE_ARC: if self.arc: diff --git a/catcli/utils.py b/catcli/utils.py index 1392fae..73f08dc 100644 --- a/catcli/utils.py +++ b/catcli/utils.py @@ -10,6 +10,7 @@ import hashlib import sys import tempfile import subprocess +import datetime # local imports from catcli.logger import Logger @@ -48,6 +49,13 @@ def human(size): return '{:.1f}{}'.format(size, suf[-1]) +def epoch_to_str(epoch): + '''convert epoch to string''' + fmt = '%Y-%m-%d %H:%M:%S' + t = datetime.datetime.fromtimestamp(float(epoch)) + return t.strftime(fmt) + + def ask(question): '''ask the user what to do''' resp = input('{} [y|N] ? '.format(question)) From 90028aff929e0021ae811de167f59c0abb76abfa Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Wed, 26 Sep 2018 09:15:55 +0200 Subject: [PATCH 20/24] bump version --- catcli/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/__init__.py b/catcli/__init__.py index 458cc32..cf29899 100644 --- a/catcli/__init__.py +++ b/catcli/__init__.py @@ -5,7 +5,7 @@ Copyright (c) 2017, deadc0de6 import sys -__version__ = '0.5.2' +__version__ = '0.5.3' def main(): From 88617b8a466f2eb3c9d9ba6727929966ba779cc7 Mon Sep 17 00:00:00 2001 From: "rolf.dergham" Date: Thu, 27 Sep 2018 09:05:13 +0300 Subject: [PATCH 21/24] Compatibility: run tests on Windows --- tests/test_graph.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/test_graph.py b/tests/test_graph.py index 23d147c..6b3a418 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -6,6 +6,8 @@ Basic unittest for graph """ import unittest +import tempfile +import os from catcli.catcli import * from catcli.noder import Noder @@ -19,7 +21,7 @@ class TestGraph(unittest.TestCase): def test_graph(self): # init path = 'fake' - gpath = '/tmp/graph.dot' + gpath = tempfile.gettempdir() + os.sep + 'graph.dot' self.addCleanup(clean, path) self.addCleanup(clean, gpath) catalog = Catalog(path, force=True, verbose=False) From 7e0447265da1c50f4ce9fd35267ed4856201a867 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Thu, 27 Sep 2018 09:16:31 +0200 Subject: [PATCH 22/24] fix removing files when update #3 --- catcli/noder.py | 27 ++++++++++++++++++++++++++- catcli/walker.py | 19 +++++++++++++++---- tests/helpers.py | 12 ++++++++++++ tests/test_update.py | 43 +++++++++++++++++++++++++++++++++++++++++-- 4 files changed, 94 insertions(+), 7 deletions(-) diff --git a/catcli/noder.py b/catcli/noder.py index 9bc295a..2a85d69 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -175,6 +175,28 @@ class Noder: return self._node(name, self.TYPE_DIR, relpath, parent, maccess=maccess) + def clean_not_flagged(self, top): + '''remove any node not flagged and clean flags''' + cnt = 0 + for node in anytree.PreOrderIter(top): + if node.type != self.TYPE_FILE and node.type != self.TYPE_DIR: + continue + if self._clean(node): + cnt += 1 + return cnt + + def flag(self, node): + node.flag = True + + def _clean(self, node): + '''remove node if not flagged''' + if not self._has_attr(node, 'flag') or \ + not node.flag: + node.parent = None + return True + del node.flag + return False + def storage_node(self, name, path, parent, attr=None): '''create a new node representing a storage''' path = os.path.abspath(path) @@ -237,7 +259,7 @@ class Noder: hf = utils.human(node.free) ht = utils.human(node.total) dt = '' - if node.ts: + if self._has_attr(node, 'ts'): dt = ', date:' dt += utils.epoch_to_str(node.ts) name = '{} (free:{}, total:{}{})'.format(node.name, hf, ht, dt) @@ -369,3 +391,6 @@ class Noder: def _get_storage(self, node): '''recursively traverse up to find storage''' return node.ancestors[1] + + def _has_attr(self, node, attr): + return attr in node.__dict__.keys() diff --git a/catcli/walker.py b/catcli/walker.py index 018307d..19e84f7 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -52,6 +52,12 @@ class Walker: return parent, cnt def reindex(self, path, parent, top): + '''reindex a directory and store in tree''' + cnt = self._reindex(path, parent, top) + cnt += self.noder.clean_not_flagged(top) + return cnt + + def _reindex(self, path, parent, top): '''reindex a directory and store in tree''' self._debug('reindexing starting at {}'.format(path)) cnt = 0 @@ -60,14 +66,16 @@ class Walker: self._debug('found file {} under {}'.format(f, path)) sub = os.path.join(root, f) maccess = os.path.getmtime(sub) - reindex, _ = self._need_reindex(parent, f, maccess) + reindex, n = self._need_reindex(parent, f, maccess) if not reindex: self._debug('\tignore file {}'.format(sub)) + self.noder.flag(n) continue self._debug('\tre-index file {}'.format(sub)) self._log(f) - self.noder.file_node(os.path.basename(f), sub, - parent, path) + n = self.noder.file_node(os.path.basename(f), sub, + parent, path) + self.noder.flag(n) cnt += 1 for d in dirs: self._debug('found dir {} under {}'.format(d, path)) @@ -78,8 +86,10 @@ class Walker: if reindex: self._debug('\tre-index directory {}'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, path) + cnt += 1 + self.noder.flag(dummy) self._debug('reindexing deeper under {}'.format(sub)) - cnt2 = self.reindex(sub, dummy, top) + cnt2 = self._reindex(sub, dummy, top) cnt += cnt2 break self._log(None) @@ -98,6 +108,7 @@ class Walker: if cnode and newer: # remove this node and re-add self._debug('\tis newer') + self._debug('\tremoving node {}'.format(cnode)) cnode.parent = None self._debug('\tis to be re-indexed') return True, cnode diff --git a/tests/helpers.py b/tests/helpers.py index ae5624b..591a282 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -102,6 +102,18 @@ def create_rnd_file(path, filename, content=None): return write_to_file(fpath, content) +def remove(path): + '''Delete file or directory.''' + if not os.path.exists(path): + return + if os.path.islink(path): + os.remove(path) + elif os.path.isdir(path): + shutil.rmtree(path) + else: + os.remove(path) + + def write_to_file(path, content): with open(path, 'w') as f: f.write(content) diff --git a/tests/test_update.py b/tests/test_update.py index 3638df8..85e6894 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -12,6 +12,7 @@ from catcli.noder import Noder from catcli.walker import Walker from catcli.catalog import Catalog from tests.helpers import * +import anytree class TestIndexing(unittest.TestCase): @@ -83,14 +84,21 @@ class TestIndexing(unittest.TestCase): self.assertTrue(len(storage.children) == 7) # ensures files and directories are in - names = [x.name for x in storage.children] + names = [node.name for node in anytree.PreOrderIter(storage)] + print(names) self.assertTrue(os.path.basename(f1) in names) self.assertTrue(os.path.basename(f2) in names) self.assertTrue(os.path.basename(f3) in names) self.assertTrue(os.path.basename(d1) in names) + self.assertTrue(os.path.basename(d1f1) in names) + self.assertTrue(os.path.basename(d1f2) in names) self.assertTrue(os.path.basename(d2) in names) - self.assertTrue(os.path.basename(new3) in names) + self.assertTrue(os.path.basename(d2f1) in names) + self.assertTrue(os.path.basename(new1) in names) self.assertTrue(os.path.basename(new2) in names) + self.assertTrue(os.path.basename(new3) in names) + self.assertTrue(os.path.basename(new4) in names) + self.assertTrue(os.path.basename(new5) in names) for node in storage.children: if node.name == os.path.basename(d1): @@ -103,6 +111,37 @@ class TestIndexing(unittest.TestCase): self.assertTrue(len(node.children) == 1) self.assertTrue(read_from_file(d1f1) == EDIT) + # remove some files + clean(d1f1) + clean(d2) + clean(new2) + clean(new4) + + # update storage + cmd_update(args, noder, catalog, top, debug=True) + + # ensures files and directories are (not) in + names = [node.name for node in anytree.PreOrderIter(storage)] + print(names) + self.assertTrue(os.path.basename(f1) in names) + self.assertTrue(os.path.basename(f2) in names) + self.assertTrue(os.path.basename(f3) in names) + self.assertTrue(os.path.basename(d1) in names) + self.assertTrue(os.path.basename(d1f1) not in names) + self.assertTrue(os.path.basename(d1f2) in names) + self.assertTrue(os.path.basename(d2) not in names) + self.assertTrue(os.path.basename(d2f1) not in names) + self.assertTrue(os.path.basename(new1) in names) + self.assertTrue(os.path.basename(new2) not in names) + self.assertTrue(os.path.basename(new3) in names) + self.assertTrue(os.path.basename(new4) not in names) + self.assertTrue(os.path.basename(new5) not in names) + for node in storage.children: + if node.name == os.path.basename(d1): + self.assertTrue(len(node.children) == 2) + elif node.name == os.path.basename(new3): + self.assertTrue(len(node.children) == 0) + def main(): unittest.main() From ed931cc8223c60d4f24610818e10182f3868227a Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Thu, 27 Sep 2018 09:29:37 +0200 Subject: [PATCH 23/24] refactoring --- tests/helpers.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/tests/helpers.py b/tests/helpers.py index 591a282..ae5624b 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -102,18 +102,6 @@ def create_rnd_file(path, filename, content=None): return write_to_file(fpath, content) -def remove(path): - '''Delete file or directory.''' - if not os.path.exists(path): - return - if os.path.islink(path): - os.remove(path) - elif os.path.isdir(path): - shutil.rmtree(path) - else: - os.remove(path) - - def write_to_file(path, content): with open(path, 'w') as f: f.write(content) From c69cd671a3c043ca888421c29cb0530f7de6c7c4 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Thu, 27 Sep 2018 18:14:24 +0200 Subject: [PATCH 24/24] bump version --- catcli/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/catcli/__init__.py b/catcli/__init__.py index cf29899..2f1d317 100644 --- a/catcli/__init__.py +++ b/catcli/__init__.py @@ -5,7 +5,7 @@ Copyright (c) 2017, deadc0de6 import sys -__version__ = '0.5.3' +__version__ = '0.5.4' def main():