diff --git a/index.c b/index.c index 8412cae..1e0c097 100644 --- a/index.c +++ b/index.c @@ -9,6 +9,8 @@ static bool pixz_index_is_prefix(const char *name); static void pixz_index_add_record(pixz_index *i, size_t offset, const char *name); static uint8_t *pixz_index_write_buf(pixz_index_record **rec, size_t *outsize); +static size_t pixz_index_read_buf(pixz_index *i, uint8_t **outbuf, + uint8_t * last, size_t *outsize); pixz_index *pixz_index_new(void) { pixz_index *i = malloc(sizeof(pixz_index)); @@ -19,8 +21,10 @@ pixz_index *pixz_index_new(void) { } void pixz_index_free(pixz_index *i) { - for (pixz_index_record *rec = i->first; rec; rec = rec->next) { + pixz_index_record *nextrec; + for (pixz_index_record *rec = i->first; rec; rec = nextrec) { free(rec->name); + nextrec = rec->next; free(rec); } free(i); @@ -82,7 +86,7 @@ static uint8_t *pixz_index_write_buf(pixz_index_record **rec, size_t *outsize) { do { if (end->name) space += strlen(end->name); - space += 2 + sizeof(uint64_t); // offset and two nulls + space += 1 + sizeof(uint64_t); // nul and offset end = end->next; } while (end && space < CHUNKSIZE); @@ -92,15 +96,13 @@ static uint8_t *pixz_index_write_buf(pixz_index_record **rec, size_t *outsize) { for (; *rec != end; *rec = (*rec)->next) { const char *name = (*rec)->name; if (!name) - name = ""; - printf("%s\n", name); + name = ""; // Empty string signifies finish size_t len = strlen(name); strncpy((char*)pos, name, len + 1); pos += len + 1; pixz_offset_write((*rec)->offset, pos); pos += sizeof(uint64_t); - *pos++ = '\0'; } *outsize = space; @@ -117,39 +119,120 @@ fixme_err pixz_index_write(pixz_index *i, FILE *out, pixz_encode_options *opts) pixz_die("Error writing file index header\n"); lzma_stream stream = LZMA_STREAM_INIT; - lzma_ret err = lzma_block_encoder(&stream, &block); - if (err != LZMA_OK) - pixz_die("Error #%d creating file index block encoder.\n", err); + if (lzma_block_encoder(&stream, &block) != LZMA_OK) + pixz_die("Error creating file index block encoder.\n"); uint8_t *inbuf = NULL; - stream.avail_in = 0; pixz_index_record *rec = i->first; - while (rec) { - if (stream.avail_in == 0) { + stream.avail_in = 0; + lzma_ret err = LZMA_OK; + lzma_action action = LZMA_RUN; + while (err != LZMA_STREAM_END) { + if (action != LZMA_FINISH && stream.avail_in == 0) { free(inbuf); stream.next_in = inbuf = pixz_index_write_buf(&rec, &stream.avail_in); - if (!inbuf) { - if (lzma_code(&stream, LZMA_FINISH) != LZMA_STREAM_END) - pixz_die("Error finishing file index\n"); - break; - } + action = rec ? LZMA_RUN : LZMA_FINISH; } stream.next_out = buf; - stream.avail_out = CHUNKSIZE; - if (lzma_code(&stream, LZMA_RUN) != LZMA_OK) - pixz_die("Error encoding file index\n"); + stream.avail_out = CHUNKSIZE; + err = lzma_code(&stream, action); + if (err != LZMA_OK && err != LZMA_STREAM_END) + pixz_die("Error #%d encoding file index\n", err); + size_t wr = stream.next_out - buf; if (wr) { if (fwrite(buf, wr, 1, out) != 1) pixz_die("Error writing file index\n"); } } + free(inbuf); lzma_end(&stream); return 31337; } -fixme_err pixz_index_read_in_place(pixz_index **i, FILE *in) { +// return number of bytes at beginning to keep +static size_t pixz_index_read_buf(pixz_index *i, uint8_t **outbuf, + uint8_t *last, size_t *outsize) { + uint8_t *pos = *outbuf, *lastpos = last - sizeof(uint64_t); + while (pos < lastpos) { + uint8_t *strend = memchr(pos, '\0', lastpos - pos); + if (!strend) + break; + + uint64_t offset = pixz_offset_read(strend + 1); + if (*pos) { + pixz_index_add_record(i, offset, (char*)pos); + } else { + pixz_index_finish(i, offset); + return 0; + } + pos = strend + 1 + sizeof(uint64_t); + } + + if (pos == *outbuf) { + // found nothing at all, need a bigger buffer + size_t oldsize = *outsize; + *outsize *= 2; + *outbuf = realloc(*outbuf, *outsize); + return oldsize; + } else { + size_t keep = last - pos; + memmove(*outbuf, pos, keep); + return keep; + } +} + +fixme_err pixz_index_read_in_place(pixz_index **i, FILE *in, lzma_check check) { + int c = fgetc(in); + if (c == EOF || c == 0) + pixz_die("There's no block here\n"); + + lzma_block block = { .check = check }; + block.header_size = lzma_block_header_size_decode(c); + uint8_t header[block.header_size]; + header[0] = c; + if (fread(header + 1, block.header_size - 1, 1, in) != 1) + pixz_die("Can't read block header\n"); + + block.filters = malloc((LZMA_FILTERS_MAX + 1) * sizeof(lzma_filter)); + if (lzma_block_header_decode(&block, NULL, header) != LZMA_OK) + pixz_die("Can't decode header\n"); + + lzma_stream stream = LZMA_STREAM_INIT; + if (lzma_block_decoder(&stream, &block) != LZMA_OK) + pixz_die("Can't setup block decoder\n"); + + size_t outsize = CHUNKSIZE; + uint8_t inbuf[CHUNKSIZE], *outbuf = malloc(outsize); + + *i = pixz_index_new(); + stream.next_out = outbuf; + stream.avail_out = outsize; + stream.avail_in = 0; + lzma_ret err = LZMA_OK; + lzma_action action = LZMA_RUN; + while (err != LZMA_STREAM_END) { + if (action != LZMA_FINISH && stream.avail_in == 0) { + stream.avail_in = fread(inbuf, 1, CHUNKSIZE, in); + stream.next_in = inbuf; + action = stream.avail_in == 0 ? LZMA_FINISH : LZMA_RUN; + } + + err = lzma_code(&stream, action); + if (err != LZMA_OK && err != LZMA_STREAM_END) + pixz_die("Error #%d decoding file index\n", err); + + if (stream.avail_out == 0 || err == LZMA_STREAM_END) { + size_t keep = pixz_index_read_buf(*i, &outbuf, stream.next_out, &outsize); + stream.next_out = outbuf + keep; + stream.avail_out = outsize - keep; + } + } + free(block.filters); + free(outbuf); + lzma_end(&stream); + return 31337; } diff --git a/pixz.h b/pixz.h index fc4b1a5..9a188e6 100644 --- a/pixz.h +++ b/pixz.h @@ -8,8 +8,9 @@ typedef int fixme_err; void pixz_die(const char *fmt, ...); -void pixz_offset_write(uint64_t n, uint8_t *buf); +void pixz_offset_write(uint64_t n, uint8_t *buf); +uint64_t pixz_offset_read(uint8_t *buf); /***** BLOCK *****/ @@ -92,6 +93,6 @@ void pixz_index_finish(pixz_index *i, size_t offset); void pixz_index_dump(pixz_index *i, FILE *out); fixme_err pixz_index_write(pixz_index *i, FILE *out, pixz_encode_options *opts); -fixme_err pixz_index_read_in_place(pixz_index **i, FILE *in); -fixme_err pixz_index_read(pixz_index **i, FILE *in); +fixme_err pixz_index_read_in_place(pixz_index **i, FILE *in, lzma_check check); +fixme_err pixz_index_read(pixz_index **i, FILE *in, lzma_check check); diff --git a/tar.c b/tar.c index 14dacca..a528040 100644 --- a/tar.c +++ b/tar.c @@ -5,8 +5,7 @@ #include -// Tar uses records of 512 bytes -#define CHUNKSIZE 512 +#define CHUNKSIZE 4096 #define INDEXFILE "index.xz" typedef struct { @@ -18,6 +17,7 @@ static int pixz_tar_open(struct archive *a, void *refp); static int pixz_tar_close(struct archive *a, void *refp); static ssize_t pixz_tar_read(struct archive *a, void *refp, const void **buf); +#include int main(void) { pixz_index *index = pixz_index_new(); @@ -38,8 +38,11 @@ int main(void) { if (aerr == ARCHIVE_EOF) { pixz_index_finish(index, ftello(stdin)); break; - } else if (aerr != ARCHIVE_OK) - pixz_die("Error reading header\n"); + } else if (aerr != ARCHIVE_OK && aerr != ARCHIVE_WARN) { + // libarchive warns for silly things like failure to convert + // names into multibyte strings + pixz_die("Error reading header: %s\n", archive_error_string(a)); + } const char *name = archive_entry_pathname(entry); size_t offset = archive_read_header_position(a); @@ -58,11 +61,12 @@ int main(void) { pixz_encode_options_default(opts); pixz_index_write(index, ifile, opts); pixz_index_free(index); + lzma_check check = opts->check; pixz_encode_options_free(opts); fseek(ifile, 0, SEEK_SET); pixz_index *i2; - pixz_index_read_in_place(&i2, ifile); + pixz_index_read_in_place(&i2, ifile, check); fclose(ifile); pixz_index_dump(i2, stdout); diff --git a/util.c b/util.c index 92562ed..f41b1d5 100644 --- a/util.c +++ b/util.c @@ -17,3 +17,7 @@ void pixz_die(const char *fmt, ...) { void pixz_offset_write(uint64_t n, uint8_t *buf) { OSWriteLittleInt64(buf, 0, n); } + +uint64_t pixz_offset_read(uint8_t *buf) { + return OSReadLittleInt64(buf, 0); +}