From 17bf2bc294af29ca300acf39cb76a1dbb3d7c837 Mon Sep 17 00:00:00 2001 From: Dave Vasilevsky Date: Thu, 14 Oct 2010 02:11:46 -0400 Subject: [PATCH] Doc update --- README | 30 ++++++++++++++++++++++++++---- TODO | 10 +--------- common.c | 10 ++++++++-- list.c | 16 ++++++++-------- pixz.h | 2 +- 5 files changed, 44 insertions(+), 24 deletions(-) diff --git a/README b/README index 037d7e3..262195b 100644 --- a/README +++ b/README @@ -7,14 +7,36 @@ The existing XZ Utils ( http://tukaani.org/xz/ ) provide great compression in th * The .xz files they produce are just one big block of compressed data, rather than a collection of smaller blocks. This makes random access to the original data impossible. -With pixz, both these problems can eventually be solved. Currently these pixz tools are available: +With pixz, both these problems are solved. The most useful commands: -* write INPUT.tar OUTPUT.tpxz: Compresses an uncompressed tarball. The compression uses two cores. An index of all the files in the tarball is stored within the file, yet it remains compatible with standard xz and tar. +$ pixz foo.tar foo.tpxz # Compress and index a tarball, multi-core +$ pixz -l foo.tpxz # Very quickly list the contents of the compressed tarball +$ pixz -x dir/file < foo.tpxz | tar x # Very quickly extract a file, multi-core. + # Also verifies that contents match index. -* read INPUT.tpxz PATH: Efficiently extracts a single file from a tarball compressed by 'write'. +$ pixz bar bar.xz # Compress a non-tarball, multi-core +$ pixz -d bar.xz bar # Decompress it, multi-core -* list [-t] INPUT.xz: Lists the xz blocks present within any .xz file. Optionally also lists a file index as stored by 'write'. +Specifying input and output: + +$ pixz < foo.tar > foo.tpxz # Same as 'pixz foo.tar foo.tpxz' +$ pixz -i foo.tar -o foo.tpxz # Ditto. These both work for -x, -d and -l too, eg: + +$ pixz -x -i foo.tpxz -o foo.tar file1 file2 ... # Extract the files from foo.tpxz into foo.tar + +$ pixz foo.tar # Compress it to foo.tpxz, removing the original +$ pixz -d foo.tpxz # Extract it to foo.tar, removing the original + + +Other flags: + +$ pixz -1 foo.tar # Faster, worse compression +$ pixz -9 foo.tar # Better, slower compression + +$ pixz -t foo.tar # Compress but don't treat it as a tarball (don't index it) +$ pixz -d -t foo.tpxz # Decompress foo, don't check that contents match index +$ pixz -l -t foo.tpxz # List the xz blocks instead of files Compare to: diff --git a/TODO b/TODO index 2656d1c..65c846f 100644 --- a/TODO +++ b/TODO @@ -3,26 +3,19 @@ CLEANUP * error handling * signal handling * globals - * non-tar parallel compression - * multi-threaded decoding - * command-line options * autoconf - * portability: byte-swapping * optimized settings * memory limit * cpu number * block size, for max threads on small files BUGS - * fast input or slow output -> blocks pile up, huge memory usage - * performance lags under IO? + * performance lags under IO? * slow input -> CPUs idle while waiting for input * safe extraction - * validate file headers against index * abort if block size exceeded EFFICIENCY - * should use ordered list in collator * more efficient indexing: ranges? sorted? mtree? * circular buffer > linked list? @@ -36,4 +29,3 @@ FEATURES * other archive formats: cpio? * lzma-like API * recovery tool (already is, kinda) - \ No newline at end of file diff --git a/common.c b/common.c index e0011ed..1bf00ab 100644 --- a/common.c +++ b/common.c @@ -87,9 +87,15 @@ static void read_file_index_make_space(void); static void read_file_index_data(void); -void dump_file_index(FILE *out) { +void dump_file_index(FILE *out, bool verbose) { for (file_index_t *f = gFileIndex; f != NULL; f = f->next) { - fprintf(out, "%10"PRIuMAX" %s\n", (uintmax_t)f->offset, f->name ? f->name : ""); + if (verbose) { + fprintf(out, "%10"PRIuMAX" %s\n", (uintmax_t)f->offset, + f->name ? f->name : ""); + } else { + if (f->name) + fprintf(out, "%s\n", f->name); + } } } diff --git a/list.c b/list.c index d284ea7..4ae709b 100644 --- a/list.c +++ b/list.c @@ -8,16 +8,16 @@ void pixz_list(bool tar) { decode_index(); lzma_index_iter iter; lzma_index_iter_init(&iter, gIndex); - while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { - printf("%9"PRIuMAX" / %9"PRIuMAX"\n", - (uintmax_t)iter.block.unpadded_size, - (uintmax_t)iter.block.uncompressed_size); - } - + if (tar && read_file_index(0)) { - printf("\n"); - dump_file_index(stdout); + dump_file_index(stdout, false); free_file_index(); + } else { + while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) { + printf("%9"PRIuMAX" / %9"PRIuMAX"\n", + (uintmax_t)iter.block.unpadded_size, + (uintmax_t)iter.block.uncompressed_size); + } } lzma_index_end(gIndex, NULL); diff --git a/pixz.h b/pixz.h index 8e2474e..3b57c89 100644 --- a/pixz.h +++ b/pixz.h @@ -72,7 +72,7 @@ void decode_index(void); lzma_vli find_file_index(void **bdatap); lzma_vli read_file_index(lzma_vli offset); -void dump_file_index(FILE *out); +void dump_file_index(FILE *out, bool verbose); void free_file_index(void);