diff --git a/catcli/__init__.py b/catcli/__init__.py index f63e2b6..2f1d317 100644 --- a/catcli/__init__.py +++ b/catcli/__init__.py @@ -5,7 +5,7 @@ Copyright (c) 2017, deadc0de6 import sys -__version__ = '0.4.6' +__version__ = '0.5.4' def main(): diff --git a/catcli/catalog.py b/catcli/catalog.py index 3eb2b07..9eea7c5 100644 --- a/catcli/catalog.py +++ b/catcli/catalog.py @@ -22,16 +22,15 @@ class Catalog: self.verbose = verbose # verbosity self.force = force # force overwrite if exists self.metanode = None - # prefer json for git versioning self.pickle = pickle def set_metanode(self, metanode): - ''' remove the metanode until tree is re-written ''' + '''remove the metanode until tree is re-written''' self.metanode = metanode self.metanode.parent = None def restore(self): - ''' restore the catalog ''' + '''restore the catalog''' if not self.path: return None if not os.path.exists(self.path): @@ -41,7 +40,7 @@ class Catalog: return self._restore_json(open(self.path, 'r').read()) def save(self, node): - ''' save the catalog ''' + '''save the catalog''' if not self.path: Logger.err('Path not defined') return False @@ -49,7 +48,7 @@ class Catalog: if d and not os.path.exists(d): os.makedirs(d) elif os.path.exists(self.path) and not self.force: - if not utils.ask('Overwrite \"{}\"'.format(self.path)): + if not utils.ask('Update catalog \"{}\"'.format(self.path)): Logger.info('Catalog not saved') return False if d and not os.path.exists(d): @@ -62,14 +61,22 @@ class Catalog: return self._save_json(node) def _save_pickle(self, node): - ''' pickle the catalog''' + '''pickle the catalog''' pickle.dump(node, open(self.path, 'wb')) if self.verbose: Logger.info('Catalog saved to pickle \"{}\"'.format(self.path)) return True + def _restore_pickle(self): + '''restore the pickled tree''' + root = pickle.load(open(self.path, 'rb')) + if self.verbose: + m = 'Catalog imported from pickle \"{}\"'.format(self.path) + Logger.info(m) + return root + def _save_json(self, node): - ''' export the catalog in json ''' + '''export the catalog in json''' exp = JsonExporter(indent=2, sort_keys=True) with open(self.path, 'w') as f: exp.write(node, f) @@ -77,16 +84,8 @@ class Catalog: Logger.info('Catalog saved to json \"{}\"'.format(self.path)) return True - def _restore_pickle(self): - ''' restore the pickled tree ''' - root = pickle.load(open(self.path, 'rb')) - if self.verbose: - m = 'Catalog imported from pickle \"{}\"'.format(self.path) - Logger.info(m) - return root - def _restore_json(self, string): - ''' restore the tree from json ''' + '''restore the tree from json''' imp = JsonImporter() root = imp.import_(string) if self.verbose: diff --git a/catcli/catcli.py b/catcli/catcli.py index 3612461..43f28fd 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -37,6 +37,7 @@ USAGE = """ Usage: {1} index [--catalog=] [--meta=...] [-acfuV] + {1} update [--catalog=] [-acfuV] {1} ls [--catalog=] [-arVS] [] {1} find [--catalog=] [-abV] {1} rm [--catalog=] [-fV] @@ -49,42 +50,69 @@ Usage: {1} --version Options: - --catalog= Path to the catalog [default: {2}]. - --meta= Additional attribute to store [default: ]. - -u --subsize Store size of folders [default: False]. - -a --archive Handle archive file [default: False]. - -f --force Force overwrite [default: False]. - -b --script Output script to manage found file(s) [default: False]. - -S --sortsize Sort by size, largest first [default: False]. - -c --hash Calculate md5 hash [default: False]. - -r --recursive Recursive [default: False]. - -V --verbose Be verbose [default: False]. - -v --version Show version. - -h --help Show this screen. + --catalog= Path to the catalog [default: {2}]. + --meta= Additional attribute to store [default: ]. + -u --subsize Store size of directories [default: False]. + -a --archive Handle archive file [default: False]. + -f --force Do not ask when updating the catalog [default: False]. + -b --script Output script to manage found file(s) [default: False]. + -S --sortsize Sort by size, largest first [default: False]. + -c --hash Calculate md5 hash [default: False]. + -r --recursive Recursive [default: False]. + -V --verbose Be verbose [default: False]. + -v --version Show version. + -h --help Show this screen. """.format(BANNER, NAME, CATALOGPATH) -def cmd_index(args, noder, catalog, top): +def cmd_index(args, noder, catalog, top, debug=False): path = args[''] name = args[''] nohash = not args['--hash'] subsize = args['--subsize'] if not os.path.exists(path): Logger.err('\"{}\" does not exist'.format(path)) - return False + return if name in noder.get_storage_names(top): - Logger.err('storage named \"{}\" already exist'.format(name)) - return False + if not ask('Overwrite storage \"{}\"'.format(name)): + Logger.err('storage named \"{}\" already exist'.format(name)) + return + node = noder.get_storage_node(top, name) + node.parent = None start = datetime.datetime.now() - walker = Walker(noder, nohash=nohash) - attr = noder.clean_storage_attr(args['--meta']) + walker = Walker(noder, nohash=nohash, debug=debug) + attr = noder.format_storage_attr(args['--meta']) root = noder.storage_node(name, path, parent=top, attr=attr) - _, cnt = walker.index(path, name, parent=root, parentpath=path) + _, cnt = walker.index(path, root, name) if subsize: noder.rec_size(root) stop = datetime.datetime.now() Logger.info('Indexed {} file(s) in {}'.format(cnt, stop - start)) - catalog.save(top) + if cnt > 0: + catalog.save(top) + + +def cmd_update(args, noder, catalog, top, debug=False): + path = args[''] + name = args[''] + nohash = not args['--hash'] + subsize = args['--subsize'] + if not os.path.exists(path): + Logger.err('\"{}\" does not exist'.format(path)) + return + root = noder.get_storage_node(top, name) + if not root: + Logger.err('storage named \"{}\" does not exist'.format(name)) + return + start = datetime.datetime.now() + walker = Walker(noder, nohash=nohash, debug=debug) + cnt = walker.reindex(path, root, top) + if subsize: + noder.rec_size(root) + stop = datetime.datetime.now() + Logger.info('updated {} file(s) in {}'.format(cnt, stop - start)) + if cnt > 0: + catalog.save(top) def cmd_ls(args, noder, top): @@ -104,15 +132,14 @@ def cmd_ls(args, noder, top): def cmd_rm(args, noder, catalog, top): - what = args[''] - storages = list(x.name for x in top.children) - if what in storages: - node = next(filter(lambda x: x.name == what, top.children)) + name = args[''] + node = noder.get_storage_node(top, name) + if node: node.parent = None if catalog.save(top): - Logger.info('Storage \"{}\" removed'.format(what)) + Logger.info('Storage \"{}\" removed'.format(name)) else: - Logger.err('Storage named \"{}\" does not exist'.format(what)) + Logger.err('Storage named \"{}\" does not exist'.format(name)) return top @@ -201,7 +228,9 @@ def main(): # parse command if args['index']: - cmd_index(args, noder, catalog, top) + cmd_index(args, noder, catalog, top, debug=args['--verbose']) + if args['update']: + cmd_update(args, noder, catalog, top, debug=args['--verbose']) elif args['find']: cmd_find(args, noder, top) elif args['tree']: @@ -221,7 +250,7 @@ def main(): if __name__ == '__main__': - ''' entry point ''' + '''entry point''' if main(): sys.exit(0) sys.exit(1) diff --git a/catcli/decomp.py b/catcli/decomp.py index fc69ac5..9963888 100644 --- a/catcli/decomp.py +++ b/catcli/decomp.py @@ -29,22 +29,25 @@ class Decomp: 'zip': self._zip} def get_format(self): + '''return list of supported extensions''' return list(self.ext.keys()) def get_names(self, path): - ''' get tree of compressed archive ''' + '''get tree of compressed archive''' ext = os.path.splitext(path)[1][1:] if ext in list(self.ext.keys()): return self.ext[ext](path) return None def _tar(self, path): + '''return list of file names in tar''' if not tarfile.is_tarfile(path): return None tar = tarfile.open(path, "r") return tar.getnames() def _zip(self, path): + '''return list of file names in zip''' if not zipfile.is_zipfile(path): return None z = zipfile.ZipFile(path) diff --git a/catcli/logger.py b/catcli/logger.py index 819acd5..5500a59 100644 --- a/catcli/logger.py +++ b/catcli/logger.py @@ -29,7 +29,7 @@ class Logger: # node specific output ###################################################################### def storage(pre, name, attr): - ''' print a storage node ''' + '''print a storage node''' end = '' if attr: end = ' {}({}){}'.format(Logger.GRAY, attr, Logger.RESET) @@ -38,13 +38,13 @@ class Logger: sys.stdout.write('{}\n'.format(s)) def file(pre, name, attr): - ''' print a file node ''' + '''print a file node''' s = '{}{}'.format(pre, name) s += ' {}[{}]{}'.format(Logger.GRAY, attr, Logger.RESET) sys.stdout.write('{}\n'.format(s)) def dir(pre, name, depth='', attr=None): - ''' print a directory node ''' + '''print a directory node''' end = [] if depth != '': end.append('nbfiles:{}'.format(depth)) @@ -65,27 +65,28 @@ class Logger: # generic output ###################################################################### def out(string): - ''' to stdout ''' + '''to stdout''' sys.stdout.write('{}\n'.format(string)) def log(string): - ''' to stderr ''' + '''to stderr''' sys.stderr.write('{}\n'.format(string)) def info(string): - ''' to stderr in color ''' + '''to stderr in color''' s = '{}{}{}'.format(Logger.MAGENTA, string, Logger.RESET) sys.stderr.write('{}\n'.format(s)) def err(string): - ''' to stderr in RED ''' + '''to stderr in RED''' s = '{}{}{}'.format(Logger.RED, string, Logger.RESET) sys.stderr.write('{}\n'.format(s)) def progr(string): - ''' print progress ''' + '''print progress''' sys.stderr.write('{}\r'.format(string)) sys.stderr.flush() def bold(string): + '''make it bold''' return '{}{}{}'.format(Logger.BOLD, string, Logger.RESET) diff --git a/catcli/noder.py b/catcli/noder.py index 2f0a457..2a85d69 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -29,7 +29,7 @@ class Noder: TOPNAME = 'top' METANAME = 'meta' - TYPE_TOP = 'top' # tip top ;-) + TYPE_TOP = 'top' TYPE_FILE = 'file' TYPE_DIR = 'dir' TYPE_ARC = 'arc' @@ -44,36 +44,91 @@ class Noder: if self.arc: self.decomp = Decomp() - def set_hashing(self, val): - self.hash = val - def get_storage_names(self, top): - ''' return a list of all storage names ''' + '''return a list of all storage names''' return [x.name for x in list(top.children)] - def clean_storage_attr(self, attr): - if not attr: - return '' - return ', '.join(attr) + def get_storage_node(self, top, name): + '''return the storage node if any''' + for n in top.children: + if n.type != self.TYPE_STORAGE: + continue + if n.name == name: + return n + return None - def get_node(self, top, path): - ''' get the node at path ''' + def get_node(self, top, path, quiet=False): + '''get the node by internal tree path''' r = anytree.resolver.Resolver('name') try: return r.get(top, path) except anytree.resolver.ChildResolverError: - Logger.err('No node at path \"{}\"'.format(path)) + if not quiet: + Logger.err('No node at path \"{}\"'.format(path)) return None + def get_node_if_newer(self, top, path, maccess): + '''return the node (if any) and if path is newer''' + treepath = path.lstrip(os.sep) + node = self.get_node(top, treepath, quiet=True) + if not node: + # node does not exist + return None, True + if not node.maccess: + # force re-indexing if no maccess + return node, True + old_maccess = node.maccess + if float(maccess) > float(old_maccess): + return node, True + return node, False + + def get_meta_node(self, top): + '''return the meta node if any''' + try: + return next(filter(lambda x: x.type == self.TYPE_META, + top.children)) + except StopIteration: + return None + + def rec_size(self, node): + '''recursively traverse tree and store dir size''' + if self.verbose: + Logger.info('getting directory size recursively') + if node.type == self.TYPE_FILE: + return node.size + size = 0 + for i in node.children: + if node.type == self.TYPE_DIR: + size += self.rec_size(i) + if node.type == self.TYPE_STORAGE: + self.rec_size(i) + else: + continue + node.size = size + return size + + ############################################################### + # public helpers + ############################################################### + def format_storage_attr(self, attr): + '''format the storage attr for saving''' + if not attr: + return '' + return ', '.join(attr) + + def set_hashing(self, val): + '''hash files when indexing''' + self.hash = val + ############################################################### # node creationg ############################################################### def new_top_node(self): - ''' create a new top node''' + '''create a new top node''' return anytree.AnyNode(name=self.TOPNAME, type=self.TYPE_TOP) def update_metanode(self, meta): - ''' create or update meta node information ''' + '''create or update meta node information''' epoch = int(time.time()) if not meta: attr = {} @@ -86,7 +141,7 @@ class Noder: return meta def file_node(self, name, path, parent, storagepath): - ''' create a new node representing a file ''' + '''create a new node representing a file''' if not os.path.exists(path): Logger.err('File \"{}\" does not exist'.format(path)) return None @@ -102,8 +157,9 @@ class Noder: relpath = os.path.join(os.path.basename(storagepath), os.path.relpath(path, start=storagepath)) + maccess = os.path.getmtime(path) n = self._node(name, self.TYPE_FILE, relpath, parent, - size=st.st_size, md5=md5) + size=st.st_size, md5=md5, maccess=maccess) if self.arc: ext = os.path.splitext(path)[1][1:] if ext in self.decomp.get_format(): @@ -112,13 +168,37 @@ class Noder: return n def dir_node(self, name, path, parent, storagepath): - ''' create a new node representing a directory ''' + '''create a new node representing a directory''' path = os.path.abspath(path) relpath = os.path.relpath(path, start=storagepath) - return self._node(name, self.TYPE_DIR, relpath, parent) + maccess = os.path.getmtime(path) + return self._node(name, self.TYPE_DIR, relpath, + parent, maccess=maccess) + + def clean_not_flagged(self, top): + '''remove any node not flagged and clean flags''' + cnt = 0 + for node in anytree.PreOrderIter(top): + if node.type != self.TYPE_FILE and node.type != self.TYPE_DIR: + continue + if self._clean(node): + cnt += 1 + return cnt + + def flag(self, node): + node.flag = True + + def _clean(self, node): + '''remove node if not flagged''' + if not self._has_attr(node, 'flag') or \ + not node.flag: + node.parent = None + return True + del node.flag + return False def storage_node(self, name, path, parent, attr=None): - ''' create a new node representing a storage ''' + '''create a new node representing a storage''' path = os.path.abspath(path) free = psutil.disk_usage(path).free total = psutil.disk_usage(path).total @@ -127,21 +207,24 @@ class Noder: total=total, parent=parent, attr=attr, ts=epoch) def archive_node(self, name, path, parent, archive): + '''crete a new node for archive data''' return anytree.AnyNode(name=name, type=self.TYPE_ARC, relpath=path, parent=parent, size=0, md5=None, archive=archive) - def _node(self, name, type, relpath, parent, size=None, md5=None): - ''' generic node creation ''' + def _node(self, name, type, relpath, parent, + size=None, md5=None, maccess=None): + '''generic node creation''' return anytree.AnyNode(name=name, type=type, relpath=relpath, - parent=parent, size=size, md5=md5) + parent=parent, size=size, + md5=md5, maccess=maccess) ############################################################### # printing ############################################################### def _print_node(self, node, pre='', withpath=False, withdepth=False, withstorage=False): - ''' print a node ''' + '''print a node''' if node.type == self.TYPE_TOP: Logger.out('{}{}'.format(pre, node.name)) elif node.type == self.TYPE_FILE: @@ -175,7 +258,11 @@ class Noder: elif node.type == self.TYPE_STORAGE: hf = utils.human(node.free) ht = utils.human(node.total) - name = '{} (free:{}, total:{})'.format(node.name, hf, ht) + dt = '' + if self._has_attr(node, 'ts'): + dt = ', date:' + dt += utils.epoch_to_str(node.ts) + name = '{} (free:{}, total:{}{})'.format(node.name, hf, ht, dt) Logger.storage(pre, name, node.attr) elif node.type == self.TYPE_ARC: if self.arc: @@ -185,16 +272,22 @@ class Noder: # Logger.out('{}{}'.format(pre, node.name)) def print_tree(self, node, style=anytree.ContRoundStyle()): - ''' print the tree similar to unix tool "tree" ''' + '''print the tree similar to unix tool "tree"''' rend = anytree.RenderTree(node, childiter=self._sort_tree) for pre, fill, node in rend: self._print_node(node, pre=pre, withdepth=True) + def to_dot(self, node, path='tree.dot'): + '''export to dot for graphing''' + anytree.exporter.DotExporter(node).to_dotfile(path) + Logger.info('dot file created under \"{}\"'.format(path)) + return 'dot {} -T png -o /tmp/tree.png'.format(path) + ############################################################### # searching ############################################################### def find_name(self, root, key, script=False): - ''' find files based on their names ''' + '''find files based on their names''' if self.verbose: Logger.info('searching for \"{}\"'.format(key)) self.term = key @@ -214,7 +307,7 @@ class Noder: return found def _find_name(self, node): - ''' callback for finding files ''' + '''callback for finding files''' if self.term.lower() in node.name.lower(): return True return False @@ -223,7 +316,7 @@ class Noder: # climbing ############################################################### def walk(self, root, path, rec=False): - ''' walk the tree for ls based on names ''' + '''walk the tree for ls based on names''' if self.verbose: Logger.info('walking path: \"{}\"'.format(path)) r = anytree.resolver.Resolver('name') @@ -248,7 +341,7 @@ class Noder: # tree creationg ############################################################### def _add_entry(self, name, top, resolv): - ''' add an entry to the tree ''' + '''add an entry to the tree''' entries = name.rstrip(os.sep).split(os.sep) if len(entries) == 1: self.archive_node(name, name, top, top.name) @@ -262,7 +355,7 @@ class Noder: self.archive_node(f, name, top, top.name) def list_to_tree(self, parent, names): - ''' convert list of files to a tree ''' + '''convert list of files to a tree''' if not names: return r = anytree.resolver.Resolver('name') @@ -274,7 +367,7 @@ class Noder: # diverse ############################################################### def _sort_tree(self, items): - ''' sorting a list of items ''' + '''sorting a list of items''' return sorted(items, key=self._sort, reverse=self.sortsize) def _sort(self, x): @@ -283,11 +376,11 @@ class Noder: return self._sort_fs(x) def _sort_fs(self, n): - ''' sorting nodes dir first and alpha ''' + '''sorting nodes dir first and alpha''' return (n.type, n.name.lstrip('\.').lower()) def _sort_size(self, n): - ''' sorting nodes by size ''' + '''sorting nodes by size''' try: if not n.size: return 0 @@ -295,37 +388,9 @@ class Noder: except AttributeError: return 0 - def to_dot(self, node, path='tree.dot'): - ''' export to dot for graphing ''' - anytree.exporter.DotExporter(node).to_dotfile(path) - Logger.info('dot file created under \"{}\"'.format(path)) - return 'dot {} -T png -o /tmp/tree.png'.format(path) - def _get_storage(self, node): - ''' recursively traverse up to find storage ''' + '''recursively traverse up to find storage''' return node.ancestors[1] - def get_meta_node(self, top): - ''' return the meta node if any ''' - try: - return next(filter(lambda x: x.type == self.TYPE_META, - top.children)) - except StopIteration: - return None - - def rec_size(self, node): - ''' recursively traverse tree and store dir size ''' - if self.verbose: - Logger.info('getting folder size recursively') - if node.type == self.TYPE_FILE: - return node.size - size = 0 - for i in node.children: - if node.type == self.TYPE_DIR: - size += self.rec_size(i) - if node.type == self.TYPE_STORAGE: - self.rec_size(i) - else: - continue - node.size = size - return size + def _has_attr(self, node, attr): + return attr in node.__dict__.keys() diff --git a/catcli/utils.py b/catcli/utils.py index 2c1e23e..73f08dc 100644 --- a/catcli/utils.py +++ b/catcli/utils.py @@ -10,13 +10,14 @@ import hashlib import sys import tempfile import subprocess +import datetime # local imports from catcli.logger import Logger def md5sum(path): - ''' calculate md5 sum of a file ''' + '''calculate md5 sum of a file''' p = os.path.realpath(path) if not os.path.exists(p): Logger.err('\nunable to get md5sum on {}'.format(path)) @@ -36,7 +37,7 @@ def md5sum(path): def human(size): - ''' human readable size ''' + '''human readable size''' div = 1024. suf = ['B', 'K', 'M', 'G', 'T', 'P'] if size < div: @@ -48,14 +49,21 @@ def human(size): return '{:.1f}{}'.format(size, suf[-1]) +def epoch_to_str(epoch): + '''convert epoch to string''' + fmt = '%Y-%m-%d %H:%M:%S' + t = datetime.datetime.fromtimestamp(float(epoch)) + return t.strftime(fmt) + + def ask(question): - ''' ask the user what to do ''' + '''ask the user what to do''' resp = input('{} [y|N] ? '.format(question)) return resp.lower() == 'y' def edit(string): - ''' edit the information with the default EDITOR ''' + '''edit the information with the default EDITOR''' string = string.encode('utf-8') EDITOR = os.environ.get('EDITOR', 'vim') with tempfile.NamedTemporaryFile(prefix='catcli', suffix='.tmp') as f: diff --git a/catcli/walker.py b/catcli/walker.py index 6dd880b..19e84f7 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -17,35 +17,114 @@ class Walker: MAXLINE = 80 - 15 - def __init__(self, noder, nohash=False): + def __init__(self, noder, nohash=False, debug=False): self.noder = noder self.noder.set_hashing(not nohash) + self.debug = debug - def index(self, path, name, parentpath=None, parent=None, isdir=False): - ''' index a folder and store in tree ''' + def index(self, path, parent, name): + '''index a directory and store in tree''' + self._debug('indexing starting at {}'.format(path)) if not parent: parent = noder.dir_node(name, path, parent) cnt = 0 for (root, dirs, files) in os.walk(path): for f in files: + self._debug('found file {} under {}'.format(f, path)) sub = os.path.join(root, f) - n = f - if len(n) > self.MAXLINE: - n = f[:self.MAXLINE] + '...' - Logger.progr('indexing: {:80}'.format(n)) + self._log(f) + self._debug('index file {}'.format(sub)) self.noder.file_node(os.path.basename(f), sub, - parent, parentpath) + parent, path) cnt += 1 for d in dirs: + self._debug('found dir {} under {}'.format(d, path)) base = os.path.basename(d) sub = os.path.join(root, d) - dummy = self.noder.dir_node(base, sub, parent, parentpath) - _, cnt2 = self.index(sub, base, - parent=dummy, parentpath=parentpath) + self._debug('index directory {}'.format(sub)) + dummy = self.noder.dir_node(base, sub, parent, path) + cnt += 1 + _, cnt2 = self.index(sub, dummy, base) cnt += cnt2 break - # clean line - Logger.progr('{:80}'.format(' ')) - + self._log(None) return parent, cnt + + def reindex(self, path, parent, top): + '''reindex a directory and store in tree''' + cnt = self._reindex(path, parent, top) + cnt += self.noder.clean_not_flagged(top) + return cnt + + def _reindex(self, path, parent, top): + '''reindex a directory and store in tree''' + self._debug('reindexing starting at {}'.format(path)) + cnt = 0 + for (root, dirs, files) in os.walk(path): + for f in files: + self._debug('found file {} under {}'.format(f, path)) + sub = os.path.join(root, f) + maccess = os.path.getmtime(sub) + reindex, n = self._need_reindex(parent, f, maccess) + if not reindex: + self._debug('\tignore file {}'.format(sub)) + self.noder.flag(n) + continue + self._debug('\tre-index file {}'.format(sub)) + self._log(f) + n = self.noder.file_node(os.path.basename(f), sub, + parent, path) + self.noder.flag(n) + cnt += 1 + for d in dirs: + self._debug('found dir {} under {}'.format(d, path)) + base = os.path.basename(d) + sub = os.path.join(root, d) + maccess = os.path.getmtime(sub) + reindex, dummy = self._need_reindex(parent, base, maccess) + if reindex: + self._debug('\tre-index directory {}'.format(sub)) + dummy = self.noder.dir_node(base, sub, parent, path) + cnt += 1 + self.noder.flag(dummy) + self._debug('reindexing deeper under {}'.format(sub)) + cnt2 = self._reindex(sub, dummy, top) + cnt += cnt2 + break + self._log(None) + return cnt + + def _need_reindex(self, top, path, maccess): + '''test if node needs re-indexing''' + cnode, newer = self.noder.get_node_if_newer(top, path, maccess) + if not cnode: + self._debug('\tdoes not exist') + return True, cnode + if cnode and not newer: + # ignore this node + self._debug('\tis not newer') + return False, cnode + if cnode and newer: + # remove this node and re-add + self._debug('\tis newer') + self._debug('\tremoving node {}'.format(cnode)) + cnode.parent = None + self._debug('\tis to be re-indexed') + return True, cnode + + def _debug(self, string): + if not self.debug: + return + Logger.log(string) + + def _log(self, string): + if self.debug: + return + if not string: + # clean + Logger.progr('{:80}'.format(' ')) + return + if len(string) > self.MAXLINE: + string = string[:self.MAXLINE] + '...' + Logger.progr('indexing: {:80}'.format(string)) diff --git a/tests/helpers.py b/tests/helpers.py index 45f0943..ae5624b 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -10,6 +10,7 @@ import string import random import tempfile import shutil +import subprocess TMPSUFFIX = '.catcli' @@ -35,6 +36,22 @@ def clean(path): else: os.remove(path) + +def edit_file(path, newcontent): + if not os.path.exists(path): + write_to_file(path, newcontent) + else: + write_to_file(path, newcontent) + + +def unix_tree(path): + if not os.path.exists(path): + return + # cmd = ['tree', path] + cmd = ['ls', '-R', path] + subprocess.call(cmd) + + ############################################################ # catcli specific ############################################################ @@ -82,9 +99,21 @@ def create_rnd_file(path, filename, content=None): if not content: content = get_rnd_string(100) fpath = os.path.join(path, filename) - with open(fpath, 'w') as f: + return write_to_file(fpath, content) + + +def write_to_file(path, content): + with open(path, 'w') as f: f.write(content) - return fpath + return path + + +def read_from_file(path): + if not os.path.exists(path): + return '' + with open(path, 'r') as f: + content = f.read() + return content ############################################################ diff --git a/tests/test_graph.py b/tests/test_graph.py index 23d147c..6b3a418 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -6,6 +6,8 @@ Basic unittest for graph """ import unittest +import tempfile +import os from catcli.catcli import * from catcli.noder import Noder @@ -19,7 +21,7 @@ class TestGraph(unittest.TestCase): def test_graph(self): # init path = 'fake' - gpath = '/tmp/graph.dot' + gpath = tempfile.gettempdir() + os.sep + 'graph.dot' self.addCleanup(clean, path) self.addCleanup(clean, gpath) catalog = Catalog(path, force=True, verbose=False) diff --git a/tests/test_index.py b/tests/test_index.py index 973d0ef..187dd00 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -47,7 +47,7 @@ class TestIndexing(unittest.TestCase): # create fake args tmpdirname = 'tmpdir' args = {'': dirpath, '': tmpdirname, - '--hash': True, '--meta': 'some meta', + '--hash': True, '--meta': ['some meta'], '--subsize': True, '--verbose': True} # index the directory diff --git a/tests/test_update.py b/tests/test_update.py new file mode 100644 index 0000000..85e6894 --- /dev/null +++ b/tests/test_update.py @@ -0,0 +1,151 @@ +""" +author: deadc0de6 (https://github.com/deadc0de6) +Copyright (c) 2017, deadc0de6 + +Basic unittest for updating an index +""" + +import unittest + +from catcli.catcli import * +from catcli.noder import Noder +from catcli.walker import Walker +from catcli.catalog import Catalog +from tests.helpers import * +import anytree + + +class TestIndexing(unittest.TestCase): + + def test_index(self): + # init + workingdir = get_tempdir() + catalogpath = create_rnd_file(workingdir, 'catalog.json', content='') + self.addCleanup(clean, workingdir) + + dirpath = get_tempdir() + self.addCleanup(clean, dirpath) + + # create 3 files + f1 = create_rnd_file(dirpath, 'file1') + f2 = create_rnd_file(dirpath, 'file2') + f3 = create_rnd_file(dirpath, 'file3') + + # create 2 directories + d1 = create_dir(dirpath, 'dir1') + d2 = create_dir(dirpath, 'dir2') + + # fill directories with files + d1f1 = create_rnd_file(d1, 'dir1file1') + d1f2 = create_rnd_file(d1, 'dir1file2') + d2f1 = create_rnd_file(d2, 'dir2file1') + + noder = Noder() + top = noder.new_top_node() + walker = Walker(noder) + catalog = Catalog(catalogpath, force=True, verbose=False) + + # create fake args + tmpdirname = 'tmpdir' + args = {'': dirpath, '': tmpdirname, + '--hash': True, '--meta': ['some meta'], + '--subsize': True, '--verbose': True} + + # index the directory + unix_tree(dirpath) + cmd_index(args, noder, catalog, top, debug=True) + self.assertTrue(os.stat(catalogpath).st_size != 0) + + # print catalog + noder.print_tree(top) + + # add some files and directories + new1 = create_rnd_file(d1, 'newf1') + new2 = create_rnd_file(dirpath, 'newf2') + new3 = create_dir(dirpath, 'newd3') + new4 = create_dir(d2, 'newd4') + new5 = create_rnd_file(new4, 'newf5') + unix_tree(dirpath) + + # modify files + EDIT = 'edited' + edit_file(d1f1, EDIT) + + # update storage + cmd_update(args, noder, catalog, top, debug=True) + + # print catalog + # print(read_from_file(catalogpath)) + noder.print_tree(top) + + # explore the top node to find all nodes + self.assertTrue(len(top.children) == 1) + storage = top.children[0] + self.assertTrue(len(storage.children) == 7) + + # ensures files and directories are in + names = [node.name for node in anytree.PreOrderIter(storage)] + print(names) + self.assertTrue(os.path.basename(f1) in names) + self.assertTrue(os.path.basename(f2) in names) + self.assertTrue(os.path.basename(f3) in names) + self.assertTrue(os.path.basename(d1) in names) + self.assertTrue(os.path.basename(d1f1) in names) + self.assertTrue(os.path.basename(d1f2) in names) + self.assertTrue(os.path.basename(d2) in names) + self.assertTrue(os.path.basename(d2f1) in names) + self.assertTrue(os.path.basename(new1) in names) + self.assertTrue(os.path.basename(new2) in names) + self.assertTrue(os.path.basename(new3) in names) + self.assertTrue(os.path.basename(new4) in names) + self.assertTrue(os.path.basename(new5) in names) + + for node in storage.children: + if node.name == os.path.basename(d1): + self.assertTrue(len(node.children) == 3) + elif node.name == os.path.basename(d2): + self.assertTrue(len(node.children) == 2) + elif node.name == os.path.basename(new3): + self.assertTrue(len(node.children) == 0) + elif node.name == os.path.basename(new4): + self.assertTrue(len(node.children) == 1) + self.assertTrue(read_from_file(d1f1) == EDIT) + + # remove some files + clean(d1f1) + clean(d2) + clean(new2) + clean(new4) + + # update storage + cmd_update(args, noder, catalog, top, debug=True) + + # ensures files and directories are (not) in + names = [node.name for node in anytree.PreOrderIter(storage)] + print(names) + self.assertTrue(os.path.basename(f1) in names) + self.assertTrue(os.path.basename(f2) in names) + self.assertTrue(os.path.basename(f3) in names) + self.assertTrue(os.path.basename(d1) in names) + self.assertTrue(os.path.basename(d1f1) not in names) + self.assertTrue(os.path.basename(d1f2) in names) + self.assertTrue(os.path.basename(d2) not in names) + self.assertTrue(os.path.basename(d2f1) not in names) + self.assertTrue(os.path.basename(new1) in names) + self.assertTrue(os.path.basename(new2) not in names) + self.assertTrue(os.path.basename(new3) in names) + self.assertTrue(os.path.basename(new4) not in names) + self.assertTrue(os.path.basename(new5) not in names) + for node in storage.children: + if node.name == os.path.basename(d1): + self.assertTrue(len(node.children) == 2) + elif node.name == os.path.basename(new3): + self.assertTrue(len(node.children) == 0) + + +def main(): + unittest.main() + + +if __name__ == '__main__': + main()