""" author: deadc0de6 (https://github.com/deadc0de6) Copyright (c) 2017, deadc0de6 Class that represents a node in the catalog tree """ import os import anytree import shutil import time # local imports from . import __version__ as VERSION import catcli.utils as utils from catcli.logger import Logger from catcli.decomp import Decomp ''' There are 4 types of node: * "top" node representing the top node (generic node) * "storage" node representing a storage * "dir" node representing a directory * "file" node representing a file ''' class Noder: TOPNAME = 'top' METANAME = 'meta' TYPE_TOP = 'top' TYPE_FILE = 'file' TYPE_DIR = 'dir' TYPE_ARC = 'arc' TYPE_STORAGE = 'storage' TYPE_META = 'meta' CSV_HEADER = ('name,type,path,size,indexed_at,' 'maccess,md5,nbfiles,free_space,' 'total_space,meta') def __init__(self, debug=False, sortsize=False, arc=False): ''' @debug: debug mode @sortsize: sort nodes by size @arch: handle archive ''' self.hash = True self.debug = debug self.sortsize = sortsize self.arc = arc if self.arc: self.decomp = Decomp() def get_storage_names(self, top): '''return a list of all storage names''' return [x.name for x in list(top.children)] def get_storage_node(self, top, name, path=None): ''' return the storage node if any if path is submitted, it will update the media info ''' found = None for n in top.children: if n.type != self.TYPE_STORAGE: continue if n.name == name: found = n break if found and path and os.path.exists(path): found.free = shutil.disk_usage(path).free found.total = shutil.disk_usage(path).total found.ts = int(time.time()) return found def get_node(self, top, path, quiet=False): '''get the node by internal tree path''' r = anytree.resolver.Resolver('name') try: p = os.path.basename(path) return r.get(top, p) except anytree.resolver.ChildResolverError: if not quiet: Logger.err('No node at path \"{}\"'.format(p)) return None def get_node_if_changed(self, top, path, treepath): ''' return the node (if any) and if it has changed @top: top node (storage) @path: abs path to file @treepath: rel path from indexed directory ''' treepath = treepath.lstrip(os.sep) node = self.get_node(top, treepath, quiet=True) # node does not exist if not node: self._debug('\tchange: node does not exist') return None, True if os.path.isdir(path): return node, False # force re-indexing if no maccess maccess = os.path.getmtime(path) if not self._has_attr(node, 'maccess') or \ not node.maccess: self._debug('\tchange: no maccess found') return node, True # maccess changed old_maccess = node.maccess if float(maccess) != float(old_maccess): self._debug('\tchange: maccess changed for \"{}\"'.format(path)) return node, True # test hash if self.hash and node.md5: md5 = self._get_hash(path) if md5 != node.md5: m = '\tchange: checksum changed for \"{}\"'.format(path) self._debug(m) return node, True self._debug('\tchange: no change for \"{}\"'.format(path)) return node, False def _rec_size(self, node, store=True): ''' recursively traverse tree and return size @store: store the size in the node ''' if node.type == self.TYPE_FILE: self._debug('getting node size for \"{}\"'.format(node.name)) return node.size m = 'getting node size recursively for \"{}\"'.format(node.name) self._debug(m) size = 0 for i in node.children: if node.type == self.TYPE_DIR: sz = self._rec_size(i, store=store) if store: i.size = sz size += sz if node.type == self.TYPE_STORAGE: sz = self._rec_size(i, store=store) if store: i.size = sz size += sz else: continue if store: node.size = size return size def rec_size(self, node): '''recursively traverse tree and store dir size''' return self._rec_size(node, store=True) ############################################################### # public helpers ############################################################### def format_storage_attr(self, attr): '''format the storage attr for saving''' if not attr: return '' if type(attr) is list: return ', '.join(attr) attr = attr.rstrip() return attr def set_hashing(self, val): '''hash files when indexing''' self.hash = val ############################################################### # node creationg ############################################################### def new_top_node(self): '''create a new top node''' return anytree.AnyNode(name=self.TOPNAME, type=self.TYPE_TOP) def update_metanode(self, top): '''create or update meta node information''' meta = self._get_meta_node(top) epoch = int(time.time()) if not meta: attr = {} attr['created'] = epoch attr['created_version'] = VERSION meta = anytree.AnyNode(name=self.METANAME, type=self.TYPE_META, attr=attr) meta.attr['access'] = epoch meta.attr['access_version'] = VERSION return meta def _get_meta_node(self, top): '''return the meta node if any''' try: return next(filter(lambda x: x.type == self.TYPE_META, top.children)) except StopIteration: return None def file_node(self, name, path, parent, storagepath): '''create a new node representing a file''' if not os.path.exists(path): Logger.err('File \"{}\" does not exist'.format(path)) return None path = os.path.abspath(path) try: st = os.lstat(path) except OSError as e: Logger.err('OSError: {}'.format(e)) return None md5 = None if self.hash: md5 = self._get_hash(path) relpath = os.sep.join([storagepath, name]) maccess = os.path.getmtime(path) n = self._node(name, self.TYPE_FILE, relpath, parent, size=st.st_size, md5=md5, maccess=maccess) if self.arc: ext = os.path.splitext(path)[1][1:] if ext.lower() in self.decomp.get_formats(): self._debug('{} is an archive'.format(path)) names = self.decomp.get_names(path) self.list_to_tree(n, names) else: self._debug('{} is NOT an archive'.format(path)) return n def dir_node(self, name, path, parent, storagepath): '''create a new node representing a directory''' path = os.path.abspath(path) relpath = os.sep.join([storagepath, name]) maccess = os.path.getmtime(path) return self._node(name, self.TYPE_DIR, relpath, parent, maccess=maccess) def clean_not_flagged(self, top): '''remove any node not flagged and clean flags''' cnt = 0 for node in anytree.PreOrderIter(top): if node.type != self.TYPE_FILE and node.type != self.TYPE_DIR: continue if self._clean(node): cnt += 1 return cnt def flag(self, node): '''flag a node''' node.flag = True def _clean(self, node): '''remove node if not flagged''' if not self._has_attr(node, 'flag') or \ not node.flag: node.parent = None return True del node.flag return False def storage_node(self, name, path, parent, attr=None): '''create a new node representing a storage''' path = os.path.abspath(path) free = shutil.disk_usage(path).free total = shutil.disk_usage(path).total epoch = int(time.time()) return anytree.AnyNode(name=name, type=self.TYPE_STORAGE, free=free, total=total, parent=parent, attr=attr, ts=epoch) def archive_node(self, name, path, parent, archive): '''crete a new node for archive data''' return anytree.AnyNode(name=name, type=self.TYPE_ARC, relpath=path, parent=parent, size=0, md5=None, archive=archive) def _node(self, name, type, relpath, parent, size=None, md5=None, maccess=None): '''generic node creation''' return anytree.AnyNode(name=name, type=type, relpath=relpath, parent=parent, size=size, md5=md5, maccess=maccess) ############################################################### # printing ############################################################### def _node_to_csv(self, node, sep=',', raw=False): ''' print a node to csv @node: the node to consider @sep: CSV separator character @raw: print raw size rather than human readable ''' if not node: return '' if node.type == self.TYPE_TOP: return '' out = [] if node.type == self.TYPE_STORAGE: # handle storage out.append(node.name) # name out.append(node.type) # type out.append('') # fake full path sz = self._rec_size(node, store=False) out.append(utils.size_to_str(sz, raw=raw)) # size out.append(utils.epoch_to_str(node.ts)) # indexed_at out.append('') # fake maccess out.append('') # fake md5 out.append(str(len(node.children))) # nbfiles # fake free_space out.append(utils.size_to_str(node.free, raw=raw)) # fake total_space out.append(utils.size_to_str(node.total, raw=raw)) out.append(node.attr) # meta else: # handle other nodes out.append(node.name.replace('"', '""')) # name out.append(node.type) # type parents = self._get_parents(node) storage = self._get_storage(node) fullpath = os.path.join(storage.name, parents) out.append(fullpath.replace('"', '""')) # full path out.append(utils.size_to_str(node.size, raw=raw)) # size out.append(utils.epoch_to_str(storage.ts)) # indexed_at if self._has_attr(node, 'maccess'): out.append(utils.epoch_to_str(node.maccess)) # maccess else: out.append('') # fake maccess if node.md5: out.append(node.md5) # md5 else: out.append('') # fake md5 if node.type == self.TYPE_DIR: out.append(str(len(node.children))) # nbfiles else: out.append('') # fake nbfiles out.append('') # fake free_space out.append('') # fake total_space out.append('') # fake meta line = sep.join(['"' + o + '"' for o in out]) if len(line) > 0: Logger.out(line) def _print_node(self, node, pre='', withpath=False, withdepth=False, withstorage=False, recalcparent=False, raw=False): ''' print a node @node: the node to print @pre: string to print before node @withpath: print the node path @withdepth: print the node depth info @withstorage: print the node storage it belongs to @recalcparent: get relpath from tree instead of relpath field @raw: print raw size rather than human readable ''' if node.type == self.TYPE_TOP: # top node Logger.out('{}{}'.format(pre, node.name)) elif node.type == self.TYPE_FILE: # node of type file name = node.name if withpath: if recalcparent: name = os.sep.join([self._get_parents(node.parent), name]) else: name = node.relpath name = name.lstrip(os.sep) if withstorage: storage = self._get_storage(node) attr = '' if node.md5: attr = ', md5:{}'.format(node.md5) sz = utils.size_to_str(node.size, raw=raw) compl = 'size:{}{}'.format(sz, attr) if withstorage: compl += ', storage:{}'.format(Logger.bold(storage.name)) Logger.file(pre, name, compl) elif node.type == self.TYPE_DIR: # node of type directory name = node.name if withpath: if recalcparent: name = os.sep.join([self._get_parents(node.parent), name]) else: name = node.relpath name = name.lstrip(os.sep) depth = '' if withdepth: depth = len(node.children) if withstorage: storage = self._get_storage(node) attr = [] if node.size: attr.append(['totsize', utils.size_to_str(node.size, raw=raw)]) if withstorage: attr.append(['storage', Logger.bold(storage.name)]) Logger.dir(pre, name, depth=depth, attr=attr) elif node.type == self.TYPE_STORAGE: # node of type storage hf = utils.size_to_str(node.free, raw=raw) ht = utils.size_to_str(node.total, raw=raw) nbchildren = len(node.children) freepercent = '{:.1f}%'.format( node.free * 100 / node.total ) # get the date dt = '' if self._has_attr(node, 'ts'): dt = 'date:' dt += '{}'.format(utils.epoch_to_str(node.ts)) ds = '' # the children size sz = self._rec_size(node, store=False) sz = utils.size_to_str(sz, raw=raw) ds = 'totsize:' + '{}'.format(sz) # format the output name = '{}'.format(node.name) args = [ 'nbfiles:' + '{}'.format(nbchildren), ds, 'free:{}'.format(freepercent), 'du:' + '{}/{}'.format(hf, ht), dt] Logger.storage(pre, name, '{}'.format(' | '.join(args)), node.attr) elif node.type == self.TYPE_ARC: # archive node if self.arc: Logger.arc(pre, node.name, node.archive) else: Logger.err('bad node encountered: {}'.format(node)) def print_tree(self, node, style=anytree.ContRoundStyle(), fmt='native', header=False, raw=False): ''' print the tree similar to unix tool "tree" @node: start node @style: when fmt=native, defines the tree style @fmt: output format @header: when fmt=csv, print the header @raw: print the raw size rather than human readable ''' if fmt == 'native': rend = anytree.RenderTree(node, childiter=self._sort_tree) for pre, fill, node in rend: self._print_node(node, pre=pre, withdepth=True, raw=raw) elif fmt == 'csv': self._to_csv(node, with_header=header, raw=raw) def _to_csv(self, node, with_header=False, raw=False): '''print the tree to csv''' rend = anytree.RenderTree(node, childiter=self._sort_tree) if with_header: Logger.out(self.CSV_HEADER) for _, _, node in rend: self._node_to_csv(node, raw=raw) def to_dot(self, node, path='tree.dot'): '''export to dot for graphing''' anytree.exporter.DotExporter(node).to_dotfile(path) Logger.info('dot file created under \"{}\"'.format(path)) return 'dot {} -T png -o /tmp/tree.png'.format(path) ############################################################### # searching ############################################################### def find_name(self, root, key, script=False, directory=False, startpath=None, parentfromtree=False, fmt='native', raw=False): ''' find files based on their names @script: output script @directory: only search for directories @startpath: node to start with @parentfromtree: get path from parent instead of stored relpath @fmt: output format ''' self._debug('searching for \"{}\"'.format(key)) start = root if startpath: start = self.get_node(root, startpath) self.term = key found = anytree.findall(start, filter_=self._find_name) paths = [] for f in found: if f.type == self.TYPE_STORAGE: # ignore storage nodes continue if directory and f.type != self.TYPE_DIR: # ignore non directory continue # print the node if fmt == 'native': self._print_node(f, withpath=True, withdepth=True, withstorage=True, recalcparent=parentfromtree, raw=raw) elif fmt == 'csv': self._node_to_csv(f, raw=raw) if parentfromtree: paths.append(self._get_parents(f)) else: paths.append(f.relpath) if script: tmp = ['${source}/' + x for x in paths] cmd = 'op=file; source=/media/mnt; $op {}'.format(' '.join(tmp)) Logger.info(cmd) return found def _find_name(self, node): '''callback for finding files''' if self.term.lower() in node.name.lower(): return True return False ############################################################### # climbing ############################################################### def walk(self, root, path, rec=False, fmt='native', raw=False): ''' walk the tree for ls based on names @root: start node @rec: recursive walk @fmt: output format ''' self._debug('walking path: \"{}\"'.format(path)) r = anytree.resolver.Resolver('name') found = [] try: found = r.glob(root, path) if len(found) < 1: # nothing found return [] if rec: # print the entire tree self.print_tree(found[0].parent, fmt=fmt, raw=raw) return found # sort found nodes found = sorted(found, key=self._sort, reverse=self.sortsize) # print the parent if fmt == 'native': self._print_node(found[0].parent, withpath=False, withdepth=True, raw=raw) elif fmt == 'csv': self._node_to_csv(found[0].parent, raw=raw) # print all found nodes for f in found: if fmt == 'native': self._print_node(f, withpath=False, pre='- ', withdepth=True, raw=raw) elif fmt == 'csv': self._node_to_csv(f, raw=raw) except anytree.resolver.ChildResolverError: pass return found ############################################################### # tree creation ############################################################### def _add_entry(self, name, top, resolv): '''add an entry to the tree''' entries = name.rstrip(os.sep).split(os.sep) if len(entries) == 1: self.archive_node(name, name, top, top.name) return sub = os.sep.join(entries[:-1]) f = entries[-1] try: parent = resolv.get(top, sub) parent = self.archive_node(f, name, parent, top.name) except anytree.resolver.ChildResolverError: self.archive_node(f, name, top, top.name) def list_to_tree(self, parent, names): '''convert list of files to a tree''' if not names: return r = anytree.resolver.Resolver('name') for name in names: name = name.rstrip(os.sep) self._add_entry(name, parent, r) ############################################################### # diverse ############################################################### def _sort_tree(self, items): '''sorting a list of items''' return sorted(items, key=self._sort, reverse=self.sortsize) def _sort(self, x): '''sort a list''' if self.sortsize: return self._sort_size(x) return self._sort_fs(x) def _sort_fs(self, n): '''sorting nodes dir first and alpha''' return (n.type, n.name.lstrip('\.').lower()) def _sort_size(self, n): '''sorting nodes by size''' try: if not n.size: return 0 return n.size except AttributeError: return 0 def _get_storage(self, node): '''recursively traverse up to find storage''' if node.type == self.TYPE_STORAGE: return node return node.ancestors[1] def _has_attr(self, node, attr): return attr in node.__dict__.keys() def _get_parents(self, node): '''get all parents recursively''' if node.type == self.TYPE_STORAGE: return '' parent = self._get_parents(node.parent) if parent: return os.sep.join([parent, node.name]) return node.name def _get_hash(self, path): """return md5 hash of node""" return utils.md5sum(path) def _debug(self, string): '''print debug''' if not self.debug: return Logger.debug(string)