2
0
mirror of https://github.com/vasi/pixz synced 2024-11-03 09:40:24 +00:00

encode and decode index

This commit is contained in:
Dave Vasilevsky 2010-01-10 01:49:21 -05:00
parent ae4dab3f31
commit 53be4cd65d
4 changed files with 120 additions and 28 deletions

123
index.c
View File

@ -9,6 +9,8 @@ static bool pixz_index_is_prefix(const char *name);
static void pixz_index_add_record(pixz_index *i, size_t offset, const char *name); static void pixz_index_add_record(pixz_index *i, size_t offset, const char *name);
static uint8_t *pixz_index_write_buf(pixz_index_record **rec, size_t *outsize); static uint8_t *pixz_index_write_buf(pixz_index_record **rec, size_t *outsize);
static size_t pixz_index_read_buf(pixz_index *i, uint8_t **outbuf,
uint8_t * last, size_t *outsize);
pixz_index *pixz_index_new(void) { pixz_index *pixz_index_new(void) {
pixz_index *i = malloc(sizeof(pixz_index)); pixz_index *i = malloc(sizeof(pixz_index));
@ -19,8 +21,10 @@ pixz_index *pixz_index_new(void) {
} }
void pixz_index_free(pixz_index *i) { void pixz_index_free(pixz_index *i) {
for (pixz_index_record *rec = i->first; rec; rec = rec->next) { pixz_index_record *nextrec;
for (pixz_index_record *rec = i->first; rec; rec = nextrec) {
free(rec->name); free(rec->name);
nextrec = rec->next;
free(rec); free(rec);
} }
free(i); free(i);
@ -82,7 +86,7 @@ static uint8_t *pixz_index_write_buf(pixz_index_record **rec, size_t *outsize) {
do { do {
if (end->name) if (end->name)
space += strlen(end->name); space += strlen(end->name);
space += 2 + sizeof(uint64_t); // offset and two nulls space += 1 + sizeof(uint64_t); // nul and offset
end = end->next; end = end->next;
} while (end && space < CHUNKSIZE); } while (end && space < CHUNKSIZE);
@ -92,15 +96,13 @@ static uint8_t *pixz_index_write_buf(pixz_index_record **rec, size_t *outsize) {
for (; *rec != end; *rec = (*rec)->next) { for (; *rec != end; *rec = (*rec)->next) {
const char *name = (*rec)->name; const char *name = (*rec)->name;
if (!name) if (!name)
name = ""; name = ""; // Empty string signifies finish
printf("%s\n", name);
size_t len = strlen(name); size_t len = strlen(name);
strncpy((char*)pos, name, len + 1); strncpy((char*)pos, name, len + 1);
pos += len + 1; pos += len + 1;
pixz_offset_write((*rec)->offset, pos); pixz_offset_write((*rec)->offset, pos);
pos += sizeof(uint64_t); pos += sizeof(uint64_t);
*pos++ = '\0';
} }
*outsize = space; *outsize = space;
@ -117,39 +119,120 @@ fixme_err pixz_index_write(pixz_index *i, FILE *out, pixz_encode_options *opts)
pixz_die("Error writing file index header\n"); pixz_die("Error writing file index header\n");
lzma_stream stream = LZMA_STREAM_INIT; lzma_stream stream = LZMA_STREAM_INIT;
lzma_ret err = lzma_block_encoder(&stream, &block); if (lzma_block_encoder(&stream, &block) != LZMA_OK)
if (err != LZMA_OK) pixz_die("Error creating file index block encoder.\n");
pixz_die("Error #%d creating file index block encoder.\n", err);
uint8_t *inbuf = NULL; uint8_t *inbuf = NULL;
stream.avail_in = 0;
pixz_index_record *rec = i->first; pixz_index_record *rec = i->first;
while (rec) { stream.avail_in = 0;
if (stream.avail_in == 0) { lzma_ret err = LZMA_OK;
lzma_action action = LZMA_RUN;
while (err != LZMA_STREAM_END) {
if (action != LZMA_FINISH && stream.avail_in == 0) {
free(inbuf); free(inbuf);
stream.next_in = inbuf = pixz_index_write_buf(&rec, &stream.avail_in); stream.next_in = inbuf = pixz_index_write_buf(&rec, &stream.avail_in);
if (!inbuf) { action = rec ? LZMA_RUN : LZMA_FINISH;
if (lzma_code(&stream, LZMA_FINISH) != LZMA_STREAM_END)
pixz_die("Error finishing file index\n");
break;
}
} }
stream.next_out = buf; stream.next_out = buf;
stream.avail_out = CHUNKSIZE; stream.avail_out = CHUNKSIZE;
if (lzma_code(&stream, LZMA_RUN) != LZMA_OK) err = lzma_code(&stream, action);
pixz_die("Error encoding file index\n"); if (err != LZMA_OK && err != LZMA_STREAM_END)
pixz_die("Error #%d encoding file index\n", err);
size_t wr = stream.next_out - buf; size_t wr = stream.next_out - buf;
if (wr) { if (wr) {
if (fwrite(buf, wr, 1, out) != 1) if (fwrite(buf, wr, 1, out) != 1)
pixz_die("Error writing file index\n"); pixz_die("Error writing file index\n");
} }
} }
free(inbuf);
lzma_end(&stream); lzma_end(&stream);
return 31337; return 31337;
} }
fixme_err pixz_index_read_in_place(pixz_index **i, FILE *in) { // return number of bytes at beginning to keep
static size_t pixz_index_read_buf(pixz_index *i, uint8_t **outbuf,
uint8_t *last, size_t *outsize) {
uint8_t *pos = *outbuf, *lastpos = last - sizeof(uint64_t);
while (pos < lastpos) {
uint8_t *strend = memchr(pos, '\0', lastpos - pos);
if (!strend)
break;
uint64_t offset = pixz_offset_read(strend + 1);
if (*pos) {
pixz_index_add_record(i, offset, (char*)pos);
} else {
pixz_index_finish(i, offset);
return 0;
}
pos = strend + 1 + sizeof(uint64_t);
}
if (pos == *outbuf) {
// found nothing at all, need a bigger buffer
size_t oldsize = *outsize;
*outsize *= 2;
*outbuf = realloc(*outbuf, *outsize);
return oldsize;
} else {
size_t keep = last - pos;
memmove(*outbuf, pos, keep);
return keep;
}
}
fixme_err pixz_index_read_in_place(pixz_index **i, FILE *in, lzma_check check) {
int c = fgetc(in);
if (c == EOF || c == 0)
pixz_die("There's no block here\n");
lzma_block block = { .check = check };
block.header_size = lzma_block_header_size_decode(c);
uint8_t header[block.header_size];
header[0] = c;
if (fread(header + 1, block.header_size - 1, 1, in) != 1)
pixz_die("Can't read block header\n");
block.filters = malloc((LZMA_FILTERS_MAX + 1) * sizeof(lzma_filter));
if (lzma_block_header_decode(&block, NULL, header) != LZMA_OK)
pixz_die("Can't decode header\n");
lzma_stream stream = LZMA_STREAM_INIT;
if (lzma_block_decoder(&stream, &block) != LZMA_OK)
pixz_die("Can't setup block decoder\n");
size_t outsize = CHUNKSIZE;
uint8_t inbuf[CHUNKSIZE], *outbuf = malloc(outsize);
*i = pixz_index_new();
stream.next_out = outbuf;
stream.avail_out = outsize;
stream.avail_in = 0;
lzma_ret err = LZMA_OK;
lzma_action action = LZMA_RUN;
while (err != LZMA_STREAM_END) {
if (action != LZMA_FINISH && stream.avail_in == 0) {
stream.avail_in = fread(inbuf, 1, CHUNKSIZE, in);
stream.next_in = inbuf;
action = stream.avail_in == 0 ? LZMA_FINISH : LZMA_RUN;
}
err = lzma_code(&stream, action);
if (err != LZMA_OK && err != LZMA_STREAM_END)
pixz_die("Error #%d decoding file index\n", err);
if (stream.avail_out == 0 || err == LZMA_STREAM_END) {
size_t keep = pixz_index_read_buf(*i, &outbuf, stream.next_out, &outsize);
stream.next_out = outbuf + keep;
stream.avail_out = outsize - keep;
}
}
free(block.filters);
free(outbuf);
lzma_end(&stream);
return 31337; return 31337;
} }

7
pixz.h
View File

@ -8,8 +8,9 @@
typedef int fixme_err; typedef int fixme_err;
void pixz_die(const char *fmt, ...); void pixz_die(const char *fmt, ...);
void pixz_offset_write(uint64_t n, uint8_t *buf);
void pixz_offset_write(uint64_t n, uint8_t *buf);
uint64_t pixz_offset_read(uint8_t *buf);
/***** BLOCK *****/ /***** BLOCK *****/
@ -92,6 +93,6 @@ void pixz_index_finish(pixz_index *i, size_t offset);
void pixz_index_dump(pixz_index *i, FILE *out); void pixz_index_dump(pixz_index *i, FILE *out);
fixme_err pixz_index_write(pixz_index *i, FILE *out, pixz_encode_options *opts); fixme_err pixz_index_write(pixz_index *i, FILE *out, pixz_encode_options *opts);
fixme_err pixz_index_read_in_place(pixz_index **i, FILE *in); fixme_err pixz_index_read_in_place(pixz_index **i, FILE *in, lzma_check check);
fixme_err pixz_index_read(pixz_index **i, FILE *in); fixme_err pixz_index_read(pixz_index **i, FILE *in, lzma_check check);

14
tar.c
View File

@ -5,8 +5,7 @@
#include <sys/errno.h> #include <sys/errno.h>
// Tar uses records of 512 bytes #define CHUNKSIZE 4096
#define CHUNKSIZE 512
#define INDEXFILE "index.xz" #define INDEXFILE "index.xz"
typedef struct { typedef struct {
@ -18,6 +17,7 @@ static int pixz_tar_open(struct archive *a, void *refp);
static int pixz_tar_close(struct archive *a, void *refp); static int pixz_tar_close(struct archive *a, void *refp);
static ssize_t pixz_tar_read(struct archive *a, void *refp, const void **buf); static ssize_t pixz_tar_read(struct archive *a, void *refp, const void **buf);
#include <string.h>
int main(void) { int main(void) {
pixz_index *index = pixz_index_new(); pixz_index *index = pixz_index_new();
@ -38,8 +38,11 @@ int main(void) {
if (aerr == ARCHIVE_EOF) { if (aerr == ARCHIVE_EOF) {
pixz_index_finish(index, ftello(stdin)); pixz_index_finish(index, ftello(stdin));
break; break;
} else if (aerr != ARCHIVE_OK) } else if (aerr != ARCHIVE_OK && aerr != ARCHIVE_WARN) {
pixz_die("Error reading header\n"); // libarchive warns for silly things like failure to convert
// names into multibyte strings
pixz_die("Error reading header: %s\n", archive_error_string(a));
}
const char *name = archive_entry_pathname(entry); const char *name = archive_entry_pathname(entry);
size_t offset = archive_read_header_position(a); size_t offset = archive_read_header_position(a);
@ -58,11 +61,12 @@ int main(void) {
pixz_encode_options_default(opts); pixz_encode_options_default(opts);
pixz_index_write(index, ifile, opts); pixz_index_write(index, ifile, opts);
pixz_index_free(index); pixz_index_free(index);
lzma_check check = opts->check;
pixz_encode_options_free(opts); pixz_encode_options_free(opts);
fseek(ifile, 0, SEEK_SET); fseek(ifile, 0, SEEK_SET);
pixz_index *i2; pixz_index *i2;
pixz_index_read_in_place(&i2, ifile); pixz_index_read_in_place(&i2, ifile, check);
fclose(ifile); fclose(ifile);
pixz_index_dump(i2, stdout); pixz_index_dump(i2, stdout);

4
util.c
View File

@ -17,3 +17,7 @@ void pixz_die(const char *fmt, ...) {
void pixz_offset_write(uint64_t n, uint8_t *buf) { void pixz_offset_write(uint64_t n, uint8_t *buf) {
OSWriteLittleInt64(buf, 0, n); OSWriteLittleInt64(buf, 0, n);
} }
uint64_t pixz_offset_read(uint8_t *buf) {
return OSReadLittleInt64(buf, 0);
}