diff --git a/README.md b/README.md index ea6c0a4..7e4e452 100644 --- a/README.md +++ b/README.md @@ -200,8 +200,9 @@ Storage entry can be edited with following catcli commands: ## Update catalog The catalog can be updated with the `update` command. -Updates are based on the access time of each of the files. If using -`-c --hash`, only new files are re-hashed. +Updates are based on the access time of each of the files and on the +hash checksum if present (catalog was indexed with `-c --hash` and +`update` is called with the switch `-c --hash`). # Examples diff --git a/catcli/catalog.py b/catcli/catalog.py index 9eea7c5..5c7542b 100644 --- a/catcli/catalog.py +++ b/catcli/catalog.py @@ -17,10 +17,16 @@ from catcli.logger import Logger class Catalog: - def __init__(self, path, pickle=False, verbose=False, force=False): - self.path = path # catalog path - self.verbose = verbose # verbosity - self.force = force # force overwrite if exists + def __init__(self, path, pickle=False, debug=False, force=False): + ''' + @path: catalog path + @pickle: use pickle + @debug: debug mode + @force: force overwrite if exists + ''' + self.path = path + self.debug = debug + self.force = force self.metanode = None self.pickle = pickle @@ -60,19 +66,22 @@ class Catalog: return self._save_pickle(node) return self._save_json(node) + def _debug(self, text): + if not self.debug: + return + Logger.debug(text) + def _save_pickle(self, node): '''pickle the catalog''' pickle.dump(node, open(self.path, 'wb')) - if self.verbose: - Logger.info('Catalog saved to pickle \"{}\"'.format(self.path)) + self._debug('Catalog saved to pickle \"{}\"'.format(self.path)) return True def _restore_pickle(self): '''restore the pickled tree''' root = pickle.load(open(self.path, 'rb')) - if self.verbose: - m = 'Catalog imported from pickle \"{}\"'.format(self.path) - Logger.info(m) + m = 'Catalog imported from pickle \"{}\"'.format(self.path) + self._debug(m) return root def _save_json(self, node): @@ -80,14 +89,12 @@ class Catalog: exp = JsonExporter(indent=2, sort_keys=True) with open(self.path, 'w') as f: exp.write(node, f) - if self.verbose: - Logger.info('Catalog saved to json \"{}\"'.format(self.path)) + self._debug('Catalog saved to json \"{}\"'.format(self.path)) return True def _restore_json(self, string): '''restore the tree from json''' imp = JsonImporter() root = imp.import_(string) - if self.verbose: - Logger.info('Catalog imported from json \"{}\"'.format(self.path)) + self._debug('Catalog imported from json \"{}\"'.format(self.path)) return root diff --git a/catcli/catcli.py b/catcli/catcli.py index af01244..401a610 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -37,7 +37,7 @@ USAGE = """ Usage: {1} index [--catalog=] [--meta=...] [-acfnV] - {1} update [--catalog=] [-acfnV] + {1} update [--catalog=] [-acfnV] [--lpath=] {1} ls [--catalog=] [-arVS] [] {1} find [--catalog=] [-abdVP] [--path=] {1} rm [--catalog=] [-fV] @@ -50,28 +50,30 @@ Usage: {1} --version Options: - --catalog= Path to the catalog [default: {2}]. - --meta= Additional attribute to store [default: ]. - -p --path= Start path. - -n --no-subsize Do not store size of directories [default: False]. - -a --archive Handle archive file [default: False]. - -f --force Do not ask when updating the catalog [default: False]. - -d --directory Only directory (default: False). - -b --script Output script to manage found file(s) [default: False]. - -S --sortsize Sort by size, largest first [default: False]. - -c --hash Calculate md5 hash [default: False]. - -r --recursive Recursive [default: False]. - -P --parent Ignore stored relpath [default: True]. - -V --verbose Be verbose [default: False]. - -v --version Show version. - -h --help Show this screen. + --catalog= Path to the catalog [default: {2}]. + --meta= Additional attribute to store [default: ]. + -p --path= Start path. + -l --lpath= Path where changes are logged [default: ] + -n --no-subsize Do not store size of directories [default: False]. + -a --archive Handle archive file [default: False]. + -f --force Do not ask when updating the catalog [default: False]. + -d --directory Only directory (default: False). + -b --script Output script to manage found file(s) [default: False]. + -S --sortsize Sort by size, largest first [default: False]. + -c --hash Calculate md5 hash [default: False]. + -r --recursive Recursive [default: False]. + -P --parent Ignore stored relpath [default: True]. + -V --verbose Be verbose [default: False]. + -v --version Show version. + -h --help Show this screen. """.format(BANNER, NAME, CATALOGPATH) -def cmd_index(args, noder, catalog, top, debug=False): +def cmd_index(args, noder, catalog, top): path = args[''] name = args[''] - nohash = not args['--hash'] + hash = args['--hash'] + debug = args['--verbose'] subsize = not args['--no-subsize'] if not os.path.exists(path): Logger.err('\"{}\" does not exist'.format(path)) @@ -87,7 +89,7 @@ def cmd_index(args, noder, catalog, top, debug=False): node = noder.get_storage_node(top, name) node.parent = None start = datetime.datetime.now() - walker = Walker(noder, nohash=nohash, debug=debug) + walker = Walker(noder, hash=hash, debug=debug) attr = noder.format_storage_attr(args['--meta']) root = noder.storage_node(name, path, parent=top, attr=attr) _, cnt = walker.index(path, root, name) @@ -99,10 +101,12 @@ def cmd_index(args, noder, catalog, top, debug=False): catalog.save(top) -def cmd_update(args, noder, catalog, top, debug=False): +def cmd_update(args, noder, catalog, top): path = args[''] name = args[''] - nohash = not args['--hash'] + hash = args['--hash'] + logpath = args['--lpath'] + debug = args['--verbose'] subsize = not args['--no-subsize'] if not os.path.exists(path): Logger.err('\"{}\" does not exist'.format(path)) @@ -112,7 +116,8 @@ def cmd_update(args, noder, catalog, top, debug=False): Logger.err('storage named \"{}\" does not exist'.format(name)) return start = datetime.datetime.now() - walker = Walker(noder, nohash=nohash, debug=debug) + walker = Walker(noder, hash=hash, debug=debug, + logpath=logpath) cnt = walker.reindex(path, root, top) if subsize: noder.rec_size(root) @@ -212,8 +217,8 @@ def cmd_edit(args, noder, catalog, top): def banner(): - Logger.log(BANNER) - Logger.log("") + Logger.out(BANNER) + Logger.out("") def main(): @@ -230,10 +235,10 @@ def main(): banner() # init noder - noder = Noder(verbose=args['--verbose'], sortsize=args['--sortsize'], + noder = Noder(debug=args['--verbose'], sortsize=args['--sortsize'], arc=args['--archive']) # init catalog - catalog = Catalog(args['--catalog'], verbose=args['--verbose'], + catalog = Catalog(args['--catalog'], debug=args['--verbose'], force=args['--force']) # init top node top = catalog.restore() @@ -241,14 +246,14 @@ def main(): top = noder.new_top_node() # handle the meta node - meta = noder.update_metanode(noder.get_meta_node(top)) + meta = noder.update_metanode(top) catalog.set_metanode(meta) # parse command if args['index']: - cmd_index(args, noder, catalog, top, debug=args['--verbose']) + cmd_index(args, noder, catalog, top) if args['update']: - cmd_update(args, noder, catalog, top, debug=args['--verbose']) + cmd_update(args, noder, catalog, top) elif args['find']: cmd_find(args, noder, top) elif args['tree']: diff --git a/catcli/logger.py b/catcli/logger.py index 66a0134..64ded23 100644 --- a/catcli/logger.py +++ b/catcli/logger.py @@ -71,17 +71,17 @@ class Logger: # generic output ###################################################################### def out(string): - '''to stdout''' + '''to stdout no color''' sys.stdout.write('{}\n'.format(string)) - def log(string): - '''to stderr''' - sys.stderr.write('{}\n'.format(string)) + def debug(string): + '''to stderr no color''' + sys.stderr.write('[DBG] {}\n'.format(string)) def info(string): - '''to stderr in color''' + '''to stdout in color''' s = '{}{}{}'.format(Logger.MAGENTA, string, Logger.RESET) - sys.stderr.write('{}\n'.format(s)) + sys.stdout.write('{}\n'.format(s)) def err(string): '''to stderr in RED''' @@ -96,3 +96,10 @@ class Logger: def bold(string): '''make it bold''' return '{}{}{}'.format(Logger.BOLD, string, Logger.RESET) + + def flog(path, string, append=True): + mode = 'w' + if append: + mode = 'a' + with open(path, mode) as f: + f.write(string) diff --git a/catcli/noder.py b/catcli/noder.py index ba92479..1043b1d 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -36,9 +36,14 @@ class Noder: TYPE_STORAGE = 'storage' TYPE_META = 'meta' - def __init__(self, verbose=False, sortsize=False, arc=False): + def __init__(self, debug=False, sortsize=False, arc=False): + ''' + @debug: debug mode + @sortsize: sort nodes by size + @arch: handle archive + ''' self.hash = True - self.verbose = verbose + self.debug = debug self.sortsize = sortsize self.arc = arc if self.arc: @@ -61,45 +66,59 @@ class Noder: '''get the node by internal tree path''' r = anytree.resolver.Resolver('name') try: - return r.get(top, path) + p = os.path.basename(path) + return r.get(top, p) except anytree.resolver.ChildResolverError: if not quiet: - Logger.err('No node at path \"{}\"'.format(path)) + Logger.err('No node at path \"{}\"'.format(p)) return None - def get_node_if_newer(self, top, path, maccess): - '''return the node (if any) and if path is newer''' - treepath = path.lstrip(os.sep) + def get_node_if_changed(self, top, path, treepath): + ''' + return the node (if any) and if it has changed + @top: top node (storage) + @path: abs path to file + @treepath: rel path from indexed directory + ''' + treepath = treepath.lstrip(os.sep) node = self.get_node(top, treepath, quiet=True) + # node does not exist if not node: - # node does not exist + self._debug('\tchange: node does not exist') return None, True + if os.path.isdir(path): + return node, False + # force re-indexing if no maccess + maccess = os.path.getmtime(path) if not self._has_attr(node, 'maccess') or \ not node.maccess: - # force re-indexing if no maccess + self._debug('\tchange: no maccess found') return node, True + # maccess changed old_maccess = node.maccess - if float(maccess) > float(old_maccess): + if float(maccess) != float(old_maccess): + self._debug('\tchange: maccess changed for \"{}\"'.format(path)) return node, True + # test hash + if self.hash and node.md5: + md5 = self._get_hash(path) + if md5 != node.md5: + m = '\tchange: checksum changed for \"{}\"'.format(path) + self._debug(m) + return node, True + self._debug('\tchange: no change for \"{}\"'.format(path)) return node, False - def get_meta_node(self, top): - '''return the meta node if any''' - try: - return next(filter(lambda x: x.type == self.TYPE_META, - top.children)) - except StopIteration: - return None - def _rec_size(self, node, store=True): ''' recursively traverse tree and return size @store: store the size in the node ''' - if self.verbose: - Logger.info('getting node size recursively') if node.type == self.TYPE_FILE: + self._debug('getting node size for \"{}\"'.format(node.name)) return node.size + m = 'getting node size recursively for \"{}\"'.format(node.name) + self._debug(m) size = 0 for i in node.children: if node.type == self.TYPE_DIR: @@ -142,8 +161,9 @@ class Noder: '''create a new top node''' return anytree.AnyNode(name=self.TOPNAME, type=self.TYPE_TOP) - def update_metanode(self, meta): + def update_metanode(self, top): '''create or update meta node information''' + meta = self._get_meta_node(top) epoch = int(time.time()) if not meta: attr = {} @@ -155,6 +175,14 @@ class Noder: meta.attr['access_version'] = VERSION return meta + def _get_meta_node(self, top): + '''return the meta node if any''' + try: + return next(filter(lambda x: x.type == self.TYPE_META, + top.children)) + except StopIteration: + return None + def file_node(self, name, path, parent, storagepath): '''create a new node representing a file''' if not os.path.exists(path): @@ -168,7 +196,7 @@ class Noder: return None md5 = None if self.hash: - md5 = utils.md5sum(path) + md5 = self._get_hash(path) relpath = os.sep.join([storagepath, name]) maccess = os.path.getmtime(path) @@ -200,6 +228,7 @@ class Noder: return cnt def flag(self, node): + '''flag a node''' node.flag = True def _clean(self, node): @@ -337,8 +366,7 @@ class Noder: script=False, directory=False, startpath=None, parentfromtree=False): '''find files based on their names''' - if self.verbose: - Logger.info('searching for \"{}\"'.format(key)) + self._debug('searching for \"{}\"'.format(key)) start = root if startpath: start = self.get_node(root, startpath) @@ -375,8 +403,7 @@ class Noder: ############################################################### def walk(self, root, path, rec=False): '''walk the tree for ls based on names''' - if self.verbose: - Logger.info('walking path: \"{}\"'.format(path)) + self._debug('walking path: \"{}\"'.format(path)) r = anytree.resolver.Resolver('name') found = [] try: @@ -396,7 +423,7 @@ class Noder: return found ############################################################### - # tree creationg + # tree creation ############################################################### def _add_entry(self, name, top, resolv): '''add an entry to the tree''' @@ -429,6 +456,7 @@ class Noder: return sorted(items, key=self._sort, reverse=self.sortsize) def _sort(self, x): + '''sort a list''' if self.sortsize: return self._sort_size(x) return self._sort_fs(x) @@ -461,3 +489,13 @@ class Noder: if parent: return os.sep.join([parent, node.name]) return node.name + + def _get_hash(self, path): + """return md5 hash of node""" + return utils.md5sum(path) + + def _debug(self, string): + '''print debug''' + if not self.debug: + return + Logger.debug(string) diff --git a/catcli/utils.py b/catcli/utils.py index 5267737..798e784 100644 --- a/catcli/utils.py +++ b/catcli/utils.py @@ -19,7 +19,7 @@ def md5sum(path): '''calculate md5 sum of a file''' p = os.path.realpath(path) if not os.path.exists(p): - Logger.err('\nunable to get md5sum on {}'.format(path)) + Logger.err('\nmd5sum - file does not exist: {}'.format(p)) return None try: with open(p, mode='rb') as f: @@ -32,6 +32,8 @@ def md5sum(path): return d.hexdigest() except PermissionError: pass + except OSError as e: + Logger.err('md5sum error: {}'.format(e)) return None diff --git a/catcli/walker.py b/catcli/walker.py index 37b24fb..1dcd65c 100644 --- a/catcli/walker.py +++ b/catcli/walker.py @@ -15,10 +15,19 @@ class Walker: MAXLINE = 80 - 15 - def __init__(self, noder, nohash=False, debug=False): + def __init__(self, noder, hash=True, debug=False, + logpath=None): + ''' + @noder: the noder to use + @hash: calculate hash of nodes + @debug: debug mode + @logpath: path where to log catalog changes on reindex + ''' self.noder = noder - self.noder.set_hashing(not nohash) + self.hash = hash + self.noder.set_hashing(self.hash) self.debug = debug + self.lpath = logpath def index(self, path, parent, name, storagepath=''): ''' @@ -44,7 +53,7 @@ class Walker: sub = os.path.join(root, f) if not os.path.exists(sub): continue - self._log(f) + self._progress(f) self._debug('index file {}'.format(sub)) n = self.noder.file_node(os.path.basename(f), sub, parent, storagepath) @@ -67,43 +76,47 @@ class Walker: _, cnt2 = self.index(sub, dummy, base, nstoragepath) cnt += cnt2 break - self._log(None) + self._progress(None) return parent, cnt def reindex(self, path, parent, top): '''reindex a directory and store in tree''' - cnt = self._reindex(path, parent, top, '') + cnt = self._reindex(path, parent, top) cnt += self.noder.clean_not_flagged(parent) return cnt - def _reindex(self, path, parent, top, storagepath): - '''reindex a directory and store in tree''' + def _reindex(self, path, parent, top, storagepath=''): + ''' + reindex a directory and store in tree + @path: directory path to re-index + @top: top node (storage) + @storagepath: rel path relative to indexed directory + ''' self._debug('reindexing starting at {}'.format(path)) cnt = 0 for (root, dirs, files) in os.walk(path): for f in files: - self._debug('found file {} under {}'.format(f, path)) + self._debug('found file \"{}\" under {}'.format(f, path)) sub = os.path.join(root, f) - maccess = os.path.getmtime(sub) - need_reindex, n = self._need_reindex(parent, f, maccess) - if not need_reindex: - self._debug('\tignore file {}'.format(sub)) + treepath = os.path.join(storagepath, f) + reindex, n = self._need_reindex(parent, sub, treepath) + if not reindex: + self._debug('\tskip file {}'.format(sub)) self.noder.flag(n) continue - self._debug('\tre-index file {}'.format(sub)) - self._log(f) + self._log2file('update catalog for \"{}\"'.format(sub)) n = self.noder.file_node(os.path.basename(f), sub, parent, storagepath) self.noder.flag(n) cnt += 1 for d in dirs: - self._debug('found dir {} under {}'.format(d, path)) + self._debug('found dir \"{}\" under {}'.format(d, path)) base = os.path.basename(d) sub = os.path.join(root, d) - maccess = os.path.getmtime(sub) - need_reindex, dummy = self._need_reindex(parent, base, maccess) - if need_reindex: - self._debug('\tre-index directory {}'.format(sub)) + treepath = os.path.join(storagepath, d) + reindex, dummy = self._need_reindex(parent, sub, treepath) + if reindex: + self._log2file('update catalog for \"{}\"'.format(sub)) dummy = self.noder.dir_node(base, sub, parent, storagepath) cnt += 1 self.noder.flag(dummy) @@ -114,33 +127,38 @@ class Walker: cnt2 = self._reindex(sub, dummy, top, nstoragepath) cnt += cnt2 break - self._log(None) return cnt - def _need_reindex(self, top, path, maccess): - '''test if node needs re-indexing''' - cnode, newer = self.noder.get_node_if_newer(top, path, maccess) + def _need_reindex(self, top, path, treepath): + ''' + test if node needs re-indexing + @top: top node (storage) + @path: abs path to file + @treepath: rel path from indexed directory + ''' + cnode, changed = self.noder.get_node_if_changed(top, path, treepath) if not cnode: - self._debug('\tdoes not exist') + self._debug('\t{} does not exist'.format(path)) return True, cnode - if cnode and not newer: + if cnode and not changed: # ignore this node - self._debug('\tis not newer') + self._debug('\t{} has not changed'.format(path)) return False, cnode - if cnode and newer: + if cnode and changed: # remove this node and re-add - self._debug('\tis newer') - self._debug('\tremoving node {}'.format(cnode)) + self._debug('\t{} has changed'.format(path)) + self._debug('\tremoving node {} for {}'.format(cnode.name, path)) cnode.parent = None - self._debug('\tis to be re-indexed') return True, cnode def _debug(self, string): + '''print to debug''' if not self.debug: return - Logger.log(string) + Logger.debug(string) - def _log(self, string): + def _progress(self, string): + '''print progress''' if self.debug: return if not string: @@ -150,3 +168,10 @@ class Walker: if len(string) > self.MAXLINE: string = string[:self.MAXLINE] + '...' Logger.progr('indexing: {:80}'.format(string)) + + def _log2file(self, string): + '''log to file''' + if not self.lpath: + return + line = '{}\n'.format(string) + Logger.flog(self.lpath, line, append=True) diff --git a/tests/helpers.py b/tests/helpers.py index ae5624b..2d4c6d6 100644 --- a/tests/helpers.py +++ b/tests/helpers.py @@ -11,6 +11,7 @@ import random import tempfile import shutil import subprocess +import hashlib TMPSUFFIX = '.catcli' @@ -25,6 +26,25 @@ def get_rnd_string(length): return ''.join(random.choice(alpha) for _ in range(length)) +def md5sum(path): + '''calculate md5 sum of a file''' + p = os.path.realpath(path) + if not os.path.exists(p): + return None + try: + with open(p, mode='rb') as f: + d = hashlib.md5() + while True: + buf = f.read(4096) + if not buf: + break + d.update(buf) + return d.hexdigest() + except PermissionError: + pass + return None + + def clean(path): '''Delete file or folder.''' if not os.path.exists(path): @@ -38,10 +58,7 @@ def clean(path): def edit_file(path, newcontent): - if not os.path.exists(path): - write_to_file(path, newcontent) - else: - write_to_file(path, newcontent) + return write_to_file(path, newcontent) def unix_tree(path): diff --git a/tests/test_find.py b/tests/test_find.py index 6bbef99..cb5a584 100644 --- a/tests/test_find.py +++ b/tests/test_find.py @@ -17,7 +17,7 @@ class TestFind(unittest.TestCase): def test_find(self): # init - catalog = Catalog('fake', force=True, verbose=False) + catalog = Catalog('fake', force=True, debug=False) top = catalog._restore_json(get_fakecatalog()) noder = Noder() diff --git a/tests/test_graph.py b/tests/test_graph.py index 68b3750..4d68dbc 100644 --- a/tests/test_graph.py +++ b/tests/test_graph.py @@ -23,7 +23,7 @@ class TestGraph(unittest.TestCase): gpath = tempfile.gettempdir() + os.sep + 'graph.dot' self.addCleanup(clean, path) self.addCleanup(clean, gpath) - catalog = Catalog(path, force=True, verbose=False) + catalog = Catalog(path, force=True, debug=False) top = catalog._restore_json(get_fakecatalog()) noder = Noder() diff --git a/tests/test_index.py b/tests/test_index.py index c5396ff..966acc0 100644 --- a/tests/test_index.py +++ b/tests/test_index.py @@ -42,7 +42,7 @@ class TestIndexing(unittest.TestCase): noder = Noder() top = noder.new_top_node() - catalog = Catalog(catalogpath, force=True, verbose=False) + catalog = Catalog(catalogpath, force=True, debug=False) # create fake args tmpdirname = 'tmpdir' diff --git a/tests/test_ls.py b/tests/test_ls.py index 95d19eb..7b2061a 100644 --- a/tests/test_ls.py +++ b/tests/test_ls.py @@ -19,7 +19,7 @@ class TestWalking(unittest.TestCase): # init path = 'fake' self.addCleanup(clean, path) - catalog = Catalog(path, force=True, verbose=False) + catalog = Catalog(path, force=True, debug=False) top = catalog._restore_json(get_fakecatalog()) noder = Noder() diff --git a/tests/test_rm.py b/tests/test_rm.py index d81d5cd..81f96ef 100644 --- a/tests/test_rm.py +++ b/tests/test_rm.py @@ -19,7 +19,7 @@ class TestRm(unittest.TestCase): # init path = 'fake' self.addCleanup(clean, path) - catalog = Catalog(path, force=True, verbose=False) + catalog = Catalog(path, force=True, debug=False) top = catalog._restore_json(get_fakecatalog()) noder = Noder() diff --git a/tests/test_tree.py b/tests/test_tree.py index 238243f..66bb618 100644 --- a/tests/test_tree.py +++ b/tests/test_tree.py @@ -19,7 +19,7 @@ class TestTree(unittest.TestCase): # init path = 'fake' self.addCleanup(clean, path) - catalog = Catalog(path, force=True, verbose=False) + catalog = Catalog(path, force=True, debug=False) top = catalog._restore_json(get_fakecatalog()) noder = Noder() diff --git a/tests/test_update.py b/tests/test_update.py index 39eb02c..5d4d4de 100644 --- a/tests/test_update.py +++ b/tests/test_update.py @@ -12,7 +12,7 @@ from catcli.catcli import cmd_index, cmd_update from catcli.noder import Noder from catcli.catalog import Catalog from tests.helpers import create_dir, create_rnd_file, get_tempdir, \ - clean, unix_tree, edit_file, read_from_file + clean, unix_tree, edit_file, read_from_file, md5sum import anytree @@ -31,6 +31,7 @@ class TestIndexing(unittest.TestCase): f1 = create_rnd_file(dirpath, 'file1') f2 = create_rnd_file(dirpath, 'file2') f3 = create_rnd_file(dirpath, 'file3') + f4 = create_rnd_file(dirpath, 'file4') # create 2 directories d1 = create_dir(dirpath, 'dir1') @@ -40,22 +41,40 @@ class TestIndexing(unittest.TestCase): d1f1 = create_rnd_file(d1, 'dir1file1') d1f2 = create_rnd_file(d1, 'dir1file2') d2f1 = create_rnd_file(d2, 'dir2file1') + d2f2 = create_rnd_file(d2, 'dir2file2') - noder = Noder() + noder = Noder(debug=True) + noder.set_hashing(True) top = noder.new_top_node() - catalog = Catalog(catalogpath, force=True, verbose=False) + catalog = Catalog(catalogpath, force=True, debug=False) + + # get checksums + f4_md5 = md5sum(f4) + self.assertTrue(f4_md5) + d1f1_md5 = md5sum(d1f1) + self.assertTrue(d1f1_md5) + d2f2_md5 = md5sum(d2f2) + self.assertTrue(d2f2_md5) # create fake args tmpdirname = 'tmpdir' args = {'': dirpath, '': tmpdirname, '--hash': True, '--meta': ['some meta'], - '--no-subsize': False, '--verbose': True} + '--no-subsize': False, '--verbose': True, + '--lpath': None} # index the directory unix_tree(dirpath) - cmd_index(args, noder, catalog, top, debug=True) + cmd_index(args, noder, catalog, top) self.assertTrue(os.stat(catalogpath).st_size != 0) + # ensure md5 sum are in + nods = noder.find_name(top, os.path.basename(f4)) + self.assertTrue(len(nods) == 1) + nod = nods[0] + self.assertTrue(nod) + self.assertTrue(nod.md5 == f4_md5) + # print catalog noder.print_tree(top) @@ -70,9 +89,32 @@ class TestIndexing(unittest.TestCase): # modify files EDIT = 'edited' edit_file(d1f1, EDIT) + d1f1_md5_new = md5sum(d1f1) + self.assertTrue(d1f1_md5_new) + self.assertTrue(d1f1_md5_new != d1f1_md5) + + # change file without mtime + maccess = os.path.getmtime(f4) + EDIT = 'edited' + edit_file(f4, EDIT) + # reset edit time + os.utime(f4, (maccess, maccess)) + f4_md5_new = md5sum(d1f1) + self.assertTrue(f4_md5_new) + self.assertTrue(f4_md5_new != f4_md5) + + # change file without mtime + maccess = os.path.getmtime(d2f2) + EDIT = 'edited' + edit_file(d2f2, EDIT) + # reset edit time + os.utime(d2f2, (maccess, maccess)) + d2f2_md5_new = md5sum(d2f2) + self.assertTrue(d2f2_md5_new) + self.assertTrue(d2f2_md5_new != d2f2_md5) # update storage - cmd_update(args, noder, catalog, top, debug=True) + cmd_update(args, noder, catalog, top) # print catalog # print(read_from_file(catalogpath)) @@ -81,7 +123,31 @@ class TestIndexing(unittest.TestCase): # explore the top node to find all nodes self.assertTrue(len(top.children) == 1) storage = top.children[0] - self.assertTrue(len(storage.children) == 7) + self.assertTrue(len(storage.children) == 8) + + # ensure d1f1 md5 sum has changed in catalog + nods = noder.find_name(top, os.path.basename(d1f1)) + self.assertTrue(len(nods) == 1) + nod = nods[0] + self.assertTrue(nod) + self.assertTrue(nod.md5 != d1f1_md5) + self.assertTrue(nod.md5 == d1f1_md5_new) + + # ensure f4 md5 sum has changed in catalog + nods = noder.find_name(top, os.path.basename(f4)) + self.assertTrue(len(nods) == 1) + nod = nods[0] + self.assertTrue(nod) + self.assertTrue(nod.md5 != f4_md5) + self.assertTrue(nod.md5 == f4_md5_new) + + # ensure d2f2 md5 sum has changed in catalog + nods = noder.find_name(top, os.path.basename(d2f2)) + self.assertTrue(len(nods) == 1) + nod = nods[0] + self.assertTrue(nod) + self.assertTrue(nod.md5 != d2f2_md5) + self.assertTrue(nod.md5 == d2f2_md5_new) # ensures files and directories are in names = [node.name for node in anytree.PreOrderIter(storage)] @@ -89,6 +155,7 @@ class TestIndexing(unittest.TestCase): self.assertTrue(os.path.basename(f1) in names) self.assertTrue(os.path.basename(f2) in names) self.assertTrue(os.path.basename(f3) in names) + self.assertTrue(os.path.basename(f4) in names) self.assertTrue(os.path.basename(d1) in names) self.assertTrue(os.path.basename(d1f1) in names) self.assertTrue(os.path.basename(d1f2) in names) @@ -104,7 +171,7 @@ class TestIndexing(unittest.TestCase): if node.name == os.path.basename(d1): self.assertTrue(len(node.children) == 3) elif node.name == os.path.basename(d2): - self.assertTrue(len(node.children) == 2) + self.assertTrue(len(node.children) == 3) elif node.name == os.path.basename(new3): self.assertTrue(len(node.children) == 0) elif node.name == os.path.basename(new4): @@ -118,7 +185,7 @@ class TestIndexing(unittest.TestCase): clean(new4) # update storage - cmd_update(args, noder, catalog, top, debug=True) + cmd_update(args, noder, catalog, top) # ensures files and directories are (not) in names = [node.name for node in anytree.PreOrderIter(storage)] @@ -126,11 +193,13 @@ class TestIndexing(unittest.TestCase): self.assertTrue(os.path.basename(f1) in names) self.assertTrue(os.path.basename(f2) in names) self.assertTrue(os.path.basename(f3) in names) + self.assertTrue(os.path.basename(f4) in names) self.assertTrue(os.path.basename(d1) in names) self.assertTrue(os.path.basename(d1f1) not in names) self.assertTrue(os.path.basename(d1f2) in names) self.assertTrue(os.path.basename(d2) not in names) self.assertTrue(os.path.basename(d2f1) not in names) + self.assertTrue(os.path.basename(d2f1) not in names) self.assertTrue(os.path.basename(new1) in names) self.assertTrue(os.path.basename(new2) not in names) self.assertTrue(os.path.basename(new3) in names)