From 340ab62d77671696d836b7532e43dfc55fa99010 Mon Sep 17 00:00:00 2001 From: deadc0de6 Date: Wed, 10 Jan 2024 22:18:43 +0100 Subject: [PATCH] du --- README.md | 8 +++ catcli/catcli.py | 62 ++++++++++++++---- catcli/noder.py | 91 +++++++++++++++++++-------- catcli/nodes.py | 132 +++++++++++++++++++-------------------- catcli/printer_csv.py | 5 +- catcli/printer_native.py | 22 +++++-- catcli/utils.py | 14 ----- 7 files changed, 209 insertions(+), 125 deletions(-) diff --git a/README.md b/README.md index 9e7c844..2916b4b 100644 --- a/README.md +++ b/README.md @@ -50,6 +50,8 @@ catcli ls -r catcli ls log # find files/directories named '*log*' catcli find log +# show directories sizes +catcli du log ``` see [usage](#usage) for specific info @@ -76,6 +78,7 @@ See the [examples](#examples) for an overview of the available features. * [Find files](#find-files) * [Mount catalog](#mount-catalog) * [Display entire hierarchy](#display-entire-hierarchy) + * [Disk usage](#disk-usage) * [Catalog graph](#catalog-graph) * [Edit storage](#edit-storage) * [Update catalog](#update-catalog) @@ -212,6 +215,11 @@ Resulting files can be sorted by size using the `-S --sortsize` switch. See the [examples](#examples) for more. +## Disk usage + +You can get the disk usage with the `du` command. +Resulting files can be sorted by size using the `-S --sortsize` switch. + ## Catalog graph The catalog can be exported in a dot file that can be used to diff --git a/catcli/catcli.py b/catcli/catcli.py index e9e62d3..805de23 100755 --- a/catcli/catcli.py +++ b/catcli/catcli.py @@ -40,19 +40,21 @@ USAGE = f""" {BANNER} Usage: - {NAME} ls [--catalog=] [--format=] [-aBCrVSs] [] - {NAME} tree [--catalog=] [-aBCVSs] [] - {NAME} find [--catalog=] [--format=] - [-aBCbdVs] [--path=] [] - {NAME} index [--catalog=] [--meta=...] - [-aBCcfV] - {NAME} update [--catalog=] [-aBCcfV] - [--lpath=] - {NAME} mount [--catalog=] [-V] - {NAME} rm [--catalog=] [-BCfV] - {NAME} rename [--catalog=] [-BCfV] - {NAME} edit [--catalog=] [-BCfV] - {NAME} graph [--catalog=] [-BCV] [] + {NAME} ls [--catalog=] [--format=] [-aBCrVSs] [] + {NAME} tree [--catalog=] [-aBCVSs] [] + {NAME} find [--catalog=] [--format=] + [-aBCbdVs] [--path=] [] + {NAME} index [--catalog=] [--meta=...] + [-aBCcfV] + {NAME} update [--catalog=] [-aBCcfV] + [--lpath=] + {NAME} mount [--catalog=] [-V] + {NAME} du [--catalog=] [-BCVSs] [] + {NAME} rm [--catalog=] [-BCfV] + {NAME} rename [--catalog=] [-BCfV] + {NAME} edit [--catalog=] [-BCfV] + {NAME} graph [--catalog=] [-BCV] [] + {NAME} fixsizes [--catalog=] {NAME} print_supported_formats {NAME} help {NAME} --help @@ -163,6 +165,19 @@ def cmd_update(args: Dict[str, Any], catalog.save(top) +def cmd_du(args: Dict[str, Any], + noder: Noder, + top: NodeTop) -> List[NodeAny]: + """du action""" + path = path_to_search_all(args['']) + found = noder.du(top, + path, + raw=args['--raw-size']) + if not found: + path = args[''] + Logger.err(f'\"{path}\": nothing found') + return found + def cmd_ls(args: Dict[str, Any], noder: Noder, top: NodeTop) -> List[NodeAny]: @@ -230,6 +245,17 @@ def cmd_graph(args: Dict[str, Any], Logger.info(f'create graph with \"{cmd}\" (you need graphviz)') +def cmd_fixsizes(top: NodeTop, + noder: Noder, + catalog: Catalog) -> None: + """ + fix each node size by re-calculating + recursively their size + """ + noder.fixsizes(top) + Logger.info('sizes fixed') + + def cmd_rename(args: Dict[str, Any], catalog: Catalog, top: NodeTop) -> None: @@ -379,6 +405,16 @@ def main() -> bool: Logger.err(f'no such catalog: {catalog_path}') return False cmd_edit(args, noder, catalog, top) + elif args['du']: + if not catalog.exists(): + Logger.err(f'no such catalog: {catalog_path}') + return False + cmd_du(args, noder, top) + elif args['fixsizes']: + if not catalog.exists(): + Logger.err(f'no such catalog: {catalog_path}') + return False + cmd_fixsizes(top, noder, catalog) except CatcliException as exc: Logger.stderr_nocolor('ERROR ' + str(exc)) return False diff --git a/catcli/noder.py b/catcli/noder.py index fbc0acc..f735a3f 100644 --- a/catcli/noder.py +++ b/catcli/noder.py @@ -18,8 +18,7 @@ from catcli import nodes from catcli.nodes import NodeAny, NodeStorage, \ NodeTop, NodeFile, NodeArchived, NodeDir, NodeMeta, \ typcast_node -from catcli.utils import md5sum, fix_badchars, has_attr, \ - get_node_fullpath +from catcli.utils import md5sum, fix_badchars, has_attr from catcli.logger import Logger from catcli.printer_native import NativePrinter from catcli.printer_csv import CsvPrinter @@ -307,25 +306,16 @@ class Noder: sep=sep, raw=raw) - def node_has_subs(self, node: Any) -> bool: + def _print_node_du(self, node: NodeAny, + raw: bool = False) -> None: """ - node may have children - we explicitely handle all case - for clarity + print node du style """ - if not node: - return False - if node.type == nodes.TYPE_TOP: - return True - if node.type == nodes.TYPE_FILE: - return False - if node.type == nodes.TYPE_DIR: - return True - if node.type == nodes.TYPE_STORAGE: - return True - if node.type == nodes.TYPE_ARCHIVED: - return True - return False + typcast_node(node) + thenodes = self._get_entire_tree(node, + dironly=True) + for thenode in thenodes: + self.native_printer.print_du(thenode, raw=raw) def _print_node_native(self, node: NodeAny, pre: str = '', @@ -427,7 +417,7 @@ class Noder: for _, _, rend in rendered: if not rend: continue - parents = rend.get_parent_hierarchy() + parents = rend.get_fullpath() storage = rend.get_storage_node() fullpath = os.path.join(storage.name, parents) the_nodes[fullpath] = rend @@ -487,7 +477,7 @@ class Noder: for item in found: typcast_node(item) item.name = fix_badchars(item.name) - key = get_node_fullpath(item) + key = item.get_fullpath() paths[key] = item # handle fzf mode @@ -527,7 +517,7 @@ class Noder: def _callback_find_name(self, term: str, only_dir: bool) -> Any: """callback for finding files""" def find_name(node: NodeAny) -> bool: - path = get_node_fullpath(node) + path = node.get_fullpath() if node.type == nodes.TYPE_STORAGE: # ignore storage nodes return False @@ -555,6 +545,16 @@ class Noder: return False return find_name + ############################################################### + # fixsizes + ############################################################### + def fixsizes(self, top: NodeTop) -> None: + typcast_node(top) + rend = anytree.RenderTree(top) + for _, _, thenode in rend: + typcast_node(thenode) + thenode.nodesize = thenode.get_rec_size() + ############################################################### # ls ############################################################### @@ -571,8 +571,7 @@ class Noder: @fmt: output format @raw: print raw size """ - self._debug(f'walking path: \"{path}\" from \"{top.name}\"') - + self._debug(f'ls walking path: \"{path}\" from \"{top.name}\"') resolv = anytree.resolver.Resolver('name') found = [] try: @@ -584,7 +583,8 @@ class Noder: # we have a canonical path self._debug('get ls...') found = resolv.get(top, path) - if found and self.node_has_subs(found): + typcast_node(found) + if found and found.may_have_children(): # let's find its children as well modpath = os.path.join(path, '*') found = resolv.glob(top, modpath) @@ -622,6 +622,30 @@ class Noder: pass return found + ############################################################### + # du + ############################################################### + def du(self, top: NodeTop, + path: str, + raw: bool = False) -> List[NodeAny]: + self._debug(f'du walking path: \"{path}\" from \"{top.name}\"') + resolv = anytree.resolver.Resolver('name') + found = [] + try: + # we have a canonical path + self._debug('get du...') + found = resolv.get(top, path) + if not found: + # nothing found + self._debug('nothing found') + return [] + + self._debug(f'du found: {found}') + self._print_node_du(found, raw=raw) + except anytree.resolver.ChildResolverError: + pass + return found + ############################################################### # tree creation ############################################################### @@ -653,6 +677,23 @@ class Noder: ############################################################### # diverse ############################################################### + def _get_entire_tree(self, start: NodeAny, + dironly: bool = False) -> List[NodeAny]: + """ + get entire tree and sort it + """ + typcast_node(start) + rend = anytree.RenderTree(start) + thenodes = [] + if dironly: + for _, _, thenode in rend: + typcast_node(thenode) + if thenode.type == nodes.TYPE_DIR: + thenodes.append(thenode) + else: + [thenodes.append(x) for _, _, x in rend] + return sorted(thenodes, key=os_sort_keygen(self._sort)) + def _sort_tree(self, items: List[NodeAny]) -> List[NodeAny]: """sorting a list of items""" diff --git a/catcli/nodes.py b/catcli/nodes.py index e1524d9..987e478 100644 --- a/catcli/nodes.py +++ b/catcli/nodes.py @@ -54,6 +54,10 @@ class NodeAny(NodeMixin): # type: ignore if children: self.children = children + def may_have_children(self) -> bool: + """can node contains sub""" + raise NotImplementedError + def _to_str(self) -> str: ret = str(self.__class__) + ": " + str(self.__dict__) if self.children: @@ -65,18 +69,27 @@ class NodeAny(NodeMixin): # type: ignore def __str__(self) -> str: return self._to_str() - def get_parent_hierarchy(self) -> str: - """get all parents recursively""" - raise NotImplementedError + def get_fullpath(self) -> str: + """return full path to this node""" + path = self.name + if self.parent: + typcast_node(self.parent) + ppath = self.parent.get_fullpath() + path = os.path.join(ppath, path) + return path + + def get_rec_size(self) -> int: + """recursively traverse tree and return size""" + totsize: int = self.nodesize + for node in self.children: + typcast_node(node) + totsize += node.get_rec_size() + return totsize def get_storage_node(self) -> NodeMixin: """recursively traverse up to find storage""" return None - def get_rec_size(self) -> int: - """recursively traverse tree and return size""" - raise NotImplementedError - def flagged(self) -> bool: """is flagged""" if not hasattr(self, '_flagged'): @@ -107,13 +120,22 @@ class NodeTop(NodeAny): if children: self.children = children - def get_parent_hierarchy(self) -> str: - """get all parents recursively""" + def get_fullpath(self) -> str: + """return full path to this node""" return '' + def may_have_children(self) -> bool: + """can node contains sub""" + return True + def get_rec_size(self) -> int: - """recursively traverse tree and return size""" - return 0 + """ + recursively traverse tree and return size + also ensure to update the size on the way + """ + size = super().get_rec_size() + self.nodesize = size + return size def __str__(self) -> str: return self._to_str() @@ -140,22 +162,14 @@ class NodeFile(NodeAny): if children: self.children = children - def get_parent_hierarchy(self) -> str: - """get all parents recursively""" - typcast_node(self.parent) - path = self.parent.get_parent_hierarchy() - if path: - return os.sep.join([path, self.name]) - return '' + def may_have_children(self) -> bool: + """can node contains sub""" + return False def get_storage_node(self) -> NodeAny: """recursively traverse up to find storage""" return cast(NodeStorage, self.ancestors[1]) - def get_rec_size(self) -> int: - """recursively traverse tree and return size""" - return self.nodesize - def __str__(self) -> str: return self._to_str() @@ -179,26 +193,23 @@ class NodeDir(NodeAny): if children: self.children = children - def get_parent_hierarchy(self) -> str: - """get all parents recursively""" - typcast_node(self.parent) - path = self.parent.get_parent_hierarchy() - if path: - return os.sep.join([path, self.name]) - return '' + def may_have_children(self) -> bool: + """can node contains sub""" + return True + + def get_rec_size(self) -> int: + """ + recursively traverse tree and return size + also ensure to update the size on the way + """ + size = super().get_rec_size() + self.nodesize = size + return size def get_storage_node(self) -> NodeAny: """recursively traverse up to find storage""" return cast(NodeStorage, self.ancestors[1]) - def get_rec_size(self) -> int: - """recursively traverse tree and return size""" - totsize: int = 0 - for node in self.children: - typcast_node(node) - totsize += node.get_rec_size() - return totsize - def __str__(self) -> str: return self._to_str() @@ -224,22 +235,14 @@ class NodeArchived(NodeAny): if children: self.children = children - def get_parent_hierarchy(self) -> str: - """get all parents recursively""" - typcast_node(self.parent) - path = self.parent.get_parent_hierarchy() - if path: - return os.sep.join([path, self.name]) - return '' + def may_have_children(self) -> bool: + """can node contains sub""" + return False def get_storage_node(self) -> NodeAny: """recursively traverse up to find storage""" return cast(NodeStorage, self.ancestors[1]) - def get_rec_size(self) -> int: - """recursively traverse tree and return size""" - return self.nodesize - def __str__(self) -> str: return self._to_str() @@ -269,22 +272,23 @@ class NodeStorage(NodeAny): if children: self.children = children - def get_parent_hierarchy(self) -> str: - """get all parents recursively""" - return '' + def may_have_children(self) -> bool: + """can node contains sub""" + return True + + def get_rec_size(self) -> int: + """ + recursively traverse tree and return size + also ensure to update the size on the way + """ + size = super().get_rec_size() + self.nodesize = size + return size def get_storage_node(self) -> NodeAny: """recursively traverse up to find storage""" return self - def get_rec_size(self) -> int: - """recursively traverse tree and return size""" - totsize: int = 0 - for node in self.children: - typcast_node(node) - totsize += node.get_rec_size() - return totsize - def __str__(self) -> str: return self._to_str() @@ -306,13 +310,9 @@ class NodeMeta(NodeAny): if children: self.children = children - def get_parent_hierarchy(self) -> str: - """get all parents recursively""" - typcast_node(self.parent) - path = self.parent.get_parent_hierarchy() - if path: - return os.sep.join([path, self.name]) - return '' + def may_have_children(self) -> bool: + """can node contains sub""" + return False def get_rec_size(self) -> int: """recursively traverse tree and return size""" diff --git a/catcli/printer_csv.py b/catcli/printer_csv.py index 3b7a453..2f62a3c 100644 --- a/catcli/printer_csv.py +++ b/catcli/printer_csv.py @@ -59,12 +59,11 @@ class CsvPrinter: out = [] out.append(node.name.replace('"', '""')) # name out.append(node.type) # type - parents = node.get_parent_hierarchy() - storage = node.get_storage_node() - fullpath = os.path.join(storage.name, parents) + fullpath = node.get_fullpath() out.append(fullpath.replace('"', '""')) # full path out.append(size_to_str(node.nodesize, raw=raw)) # size + storage = node.get_storage_node() out.append(epoch_to_str(storage.ts)) # indexed_at if has_attr(node, 'maccess'): out.append(epoch_to_str(node.maccess)) # maccess diff --git a/catcli/printer_native.py b/catcli/printer_native.py index 38e4fdf..dc3d6bb 100644 --- a/catcli/printer_native.py +++ b/catcli/printer_native.py @@ -7,11 +7,12 @@ Class for printing nodes in native format import sys -from catcli.nodes import NodeFile, NodeDir, NodeStorage +from catcli.nodes import NodeFile, NodeDir, \ + NodeStorage, NodeAny, typcast_node from catcli.colors import Colors from catcli.logger import Logger from catcli.utils import fix_badchars, size_to_str, \ - has_attr, epoch_to_str, get_node_fullpath + has_attr, epoch_to_str COLOR_STORAGE = Colors.YELLOW @@ -31,6 +32,19 @@ class NativePrinter: ARCHIVE = 'archive' NBFILES = 'nbfiles' + def print_du(self, node: NodeAny, + raw: bool = False) -> None: + """print du style""" + typcast_node(node) + name = node.get_fullpath() + size = node.nodesize + + line = size_to_str(size, raw=raw).ljust(10, ' ') + out = f'{COLOR_SIZE}{line}{Colors.RESET}' + out += ' ' + out += f'{COLOR_FILE}{name}{Colors.RESET}' + sys.stdout.write(f'{out}\n') + def print_top(self, pre: str, name: str) -> None: """print top node""" sys.stdout.write(f'{pre}{name}\n') @@ -80,7 +94,7 @@ class NativePrinter: name = node.name storage = node.get_storage_node() if withpath: - name = get_node_fullpath(node) + name = node.get_fullpath() # construct attributes attrs = [] if node.md5: @@ -117,7 +131,7 @@ class NativePrinter: name = node.name storage = node.get_storage_node() if withpath: - name = get_node_fullpath(node) + name = node.get_fullpath() # construct attrs attrs = [] if withnbchildren: diff --git a/catcli/utils.py b/catcli/utils.py index 8ae5e30..7fef081 100644 --- a/catcli/utils.py +++ b/catcli/utils.py @@ -122,17 +122,3 @@ def fix_badchars(string: str) -> str: def has_attr(node: nodes.NodeAny, attr: str) -> bool: """return True if node has attr as attribute""" return attr in node.__dict__.keys() - - -def get_node_fullpath(node: nodes.NodeAny) -> str: - """get node full path""" - nodes.typcast_node(node) - path = node.name - parents = node.get_parent_hierarchy() - if parents: - path = os.sep.join([parents, path]) - storage = node.get_storage_node() - if storage: - path = os.sep.join([storage.name, path]) - path = fix_badchars(path) - return str(path)