2
0
mirror of https://github.com/vasi/pixz synced 2024-11-18 15:26:46 +00:00

Doc update

This commit is contained in:
Dave Vasilevsky 2010-10-14 02:11:46 -04:00
parent 4713b32cf1
commit 17bf2bc294
5 changed files with 44 additions and 24 deletions

30
README
View File

@ -7,14 +7,36 @@ The existing XZ Utils ( http://tukaani.org/xz/ ) provide great compression in th
* The .xz files they produce are just one big block of compressed data, rather than a collection of smaller blocks. This makes random access to the original data impossible. * The .xz files they produce are just one big block of compressed data, rather than a collection of smaller blocks. This makes random access to the original data impossible.
With pixz, both these problems can eventually be solved. Currently these pixz tools are available: With pixz, both these problems are solved. The most useful commands:
* write INPUT.tar OUTPUT.tpxz: Compresses an uncompressed tarball. The compression uses two cores. An index of all the files in the tarball is stored within the file, yet it remains compatible with standard xz and tar. $ pixz foo.tar foo.tpxz # Compress and index a tarball, multi-core
$ pixz -l foo.tpxz # Very quickly list the contents of the compressed tarball
$ pixz -x dir/file < foo.tpxz | tar x # Very quickly extract a file, multi-core.
# Also verifies that contents match index.
* read INPUT.tpxz PATH: Efficiently extracts a single file from a tarball compressed by 'write'. $ pixz bar bar.xz # Compress a non-tarball, multi-core
$ pixz -d bar.xz bar # Decompress it, multi-core
* list [-t] INPUT.xz: Lists the xz blocks present within any .xz file. Optionally also lists a file index as stored by 'write'.
Specifying input and output:
$ pixz < foo.tar > foo.tpxz # Same as 'pixz foo.tar foo.tpxz'
$ pixz -i foo.tar -o foo.tpxz # Ditto. These both work for -x, -d and -l too, eg:
$ pixz -x -i foo.tpxz -o foo.tar file1 file2 ... # Extract the files from foo.tpxz into foo.tar
$ pixz foo.tar # Compress it to foo.tpxz, removing the original
$ pixz -d foo.tpxz # Extract it to foo.tar, removing the original
Other flags:
$ pixz -1 foo.tar # Faster, worse compression
$ pixz -9 foo.tar # Better, slower compression
$ pixz -t foo.tar # Compress but don't treat it as a tarball (don't index it)
$ pixz -d -t foo.tpxz # Decompress foo, don't check that contents match index
$ pixz -l -t foo.tpxz # List the xz blocks instead of files
Compare to: Compare to:

10
TODO
View File

@ -3,26 +3,19 @@ CLEANUP
* error handling * error handling
* signal handling * signal handling
* globals * globals
* non-tar parallel compression
* multi-threaded decoding
* command-line options
* autoconf * autoconf
* portability: byte-swapping
* optimized settings * optimized settings
* memory limit * memory limit
* cpu number * cpu number
* block size, for max threads on small files * block size, for max threads on small files
BUGS BUGS
* fast input or slow output -> blocks pile up, huge memory usage * performance lags under IO?
* performance lags under IO?
* slow input -> CPUs idle while waiting for input * slow input -> CPUs idle while waiting for input
* safe extraction * safe extraction
* validate file headers against index
* abort if block size exceeded * abort if block size exceeded
EFFICIENCY EFFICIENCY
* should use ordered list in collator
* more efficient indexing: ranges? sorted? mtree? * more efficient indexing: ranges? sorted? mtree?
* circular buffer > linked list? * circular buffer > linked list?
@ -36,4 +29,3 @@ FEATURES
* other archive formats: cpio? * other archive formats: cpio?
* lzma-like API * lzma-like API
* recovery tool (already is, kinda) * recovery tool (already is, kinda)

View File

@ -87,9 +87,15 @@ static void read_file_index_make_space(void);
static void read_file_index_data(void); static void read_file_index_data(void);
void dump_file_index(FILE *out) { void dump_file_index(FILE *out, bool verbose) {
for (file_index_t *f = gFileIndex; f != NULL; f = f->next) { for (file_index_t *f = gFileIndex; f != NULL; f = f->next) {
fprintf(out, "%10"PRIuMAX" %s\n", (uintmax_t)f->offset, f->name ? f->name : ""); if (verbose) {
fprintf(out, "%10"PRIuMAX" %s\n", (uintmax_t)f->offset,
f->name ? f->name : "");
} else {
if (f->name)
fprintf(out, "%s\n", f->name);
}
} }
} }

14
list.c
View File

@ -8,16 +8,16 @@ void pixz_list(bool tar) {
decode_index(); decode_index();
lzma_index_iter iter; lzma_index_iter iter;
lzma_index_iter_init(&iter, gIndex); lzma_index_iter_init(&iter, gIndex);
while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
printf("%9"PRIuMAX" / %9"PRIuMAX"\n",
(uintmax_t)iter.block.unpadded_size,
(uintmax_t)iter.block.uncompressed_size);
}
if (tar && read_file_index(0)) { if (tar && read_file_index(0)) {
printf("\n"); dump_file_index(stdout, false);
dump_file_index(stdout);
free_file_index(); free_file_index();
} else {
while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
printf("%9"PRIuMAX" / %9"PRIuMAX"\n",
(uintmax_t)iter.block.unpadded_size,
(uintmax_t)iter.block.uncompressed_size);
}
} }
lzma_index_end(gIndex, NULL); lzma_index_end(gIndex, NULL);

2
pixz.h
View File

@ -72,7 +72,7 @@ void decode_index(void);
lzma_vli find_file_index(void **bdatap); lzma_vli find_file_index(void **bdatap);
lzma_vli read_file_index(lzma_vli offset); lzma_vli read_file_index(lzma_vli offset);
void dump_file_index(FILE *out); void dump_file_index(FILE *out, bool verbose);
void free_file_index(void); void free_file_index(void);