diff --git a/Makefile b/Makefile index efc3f8a..0b08884 100644 --- a/Makefile +++ b/Makefile @@ -16,11 +16,10 @@ pixz: pixz.o encode.o block.o util.o pixzlist: pixzlist.o $(LD) $@ $^ -llzma -pixztar: tar.o util.o +pixztar: tar.o util.o index.o $(LD) $@ $^ -larchive - run: pixz time ./$< < test.in > test.out @md5sum test.in diff --git a/index.c b/index.c new file mode 100644 index 0000000..b89b6ce --- /dev/null +++ b/index.c @@ -0,0 +1,72 @@ +#include "pixz.h" + +#include + +static bool pixz_index_is_prefix(const char *name); +static void pixz_index_add_record(pixz_index *i, size_t offset, const char *name); + +pixz_index *pixz_index_new(void) { + pixz_index *i = malloc(sizeof(pixz_index)); + i->first = NULL; + i->last = NULL; + i->have_last_offset = false; + return i; +} + +void pixz_index_free(pixz_index *i) { + for (pixz_index_record *rec = i->first; rec; rec = rec->next) { + free(rec->name); + free(rec); + } + free(i); +} + +static void pixz_index_add_record(pixz_index *i, size_t offset, const char *name) { + pixz_index_record *rec = malloc(sizeof(pixz_index_record)); + rec->next = NULL; + rec->name = name ? strdup(name) : NULL; + rec->offset = offset; + + if (!i->first) + i->first = rec; + if (i->last) + i->last->next = rec; + i->last = rec; +} + +void pixz_index_add(pixz_index *i, size_t offset, const char *name) { + if (pixz_index_is_prefix(name)) { + if (!i->have_last_offset) + i->last_offset = offset; + i->have_last_offset = true; + return; + } + + pixz_index_add_record(i, i->have_last_offset ? i->last_offset : offset, name); + i->have_last_offset = false; +} + +static bool pixz_index_is_prefix(const char *name) { + // Unfortunately, this is the only way I can think of to identify + // copyfile data. + + // basename(3) is not thread-safe + size_t i = strlen(name); + while (i != 0 && name[i - 1] != '/') + --i; + + return strncmp(name + i, "._", 2) == 0; +} + +void pixz_index_finish(pixz_index *i, size_t offset) { + pixz_index_add_record(i, offset, NULL); +} + +void pixz_index_dump(pixz_index *i, FILE *out) { + pixz_index_record *rec; + for (rec = i->first; rec && rec->name; rec = rec->next) { + fprintf(out, "%12zu %s\n", rec->offset, rec->name); + } + fprintf(out, "Total: %zu\n", rec->offset); +} + diff --git a/pixz.h b/pixz.h index 45e497e..7a1947a 100644 --- a/pixz.h +++ b/pixz.h @@ -2,6 +2,7 @@ #include #include +#include typedef int fixme_err; @@ -58,3 +59,29 @@ fixme_err pixz_encode_stream_footer(FILE *outfile, pixz_encode_options *opts, fixme_err pixz_encode_index(FILE *outfile, pixz_encode_options *opts, lzma_index *index); fixme_err pixz_encode_file(FILE *infile, FILE *outfile, pixz_encode_options *opts); + + +/***** INDEX *****/ + +typedef struct pixz_index_record pixz_index_record; +struct pixz_index_record { + size_t offset; + char *name; + pixz_index_record *next; +}; + +typedef struct { + pixz_index_record *first; + pixz_index_record *last; + + size_t last_offset; + bool have_last_offset; +} pixz_index; + +pixz_index *pixz_index_new(void); +void pixz_index_free(pixz_index *i); + +void pixz_index_add(pixz_index *i, size_t offset, const char *name); +void pixz_index_finish(pixz_index *i, size_t offset); + +void pixz_index_dump(pixz_index *i, FILE *out); diff --git a/tar.c b/tar.c index 22d02db..348d19c 100644 --- a/tar.c +++ b/tar.c @@ -4,100 +4,70 @@ #include #include -#include -#include - // Tar uses records of 512 bytes #define CHUNKSIZE 512 - -typedef struct pixz_tar_index_record pixz_tar_index_record; -struct pixz_tar_index_record { - size_t offset; - char *name; - pixz_tar_index_record *next; -}; - -typedef struct { - pixz_tar_index_record *first; - pixz_tar_index_record *last; -} pixz_tar_index; - -static pixz_tar_index *pixz_tar_index_new(void); -static void pixz_tar_index_add(pixz_tar_index *i, size_t offset, const char *name); -static void pixz_tar_index_dump(pixz_tar_index *i, FILE *out); -static void pixz_tar_index_free(pixz_tar_index *i); -static int pixz_tar_index_is_metadata(struct archive_entry *entry); - - typedef struct { FILE *file; uint8_t buf[CHUNKSIZE]; -} pixz_tar_input; +} pixz_tar; -static int pixz_tar_input_open(struct archive *a, void *refp); -static int pixz_tar_input_close(struct archive *a, void *refp); -static ssize_t pixz_tar_input_read(struct archive *a, void *refp, const void **buf); +static int pixz_tar_open(struct archive *a, void *refp); +static int pixz_tar_close(struct archive *a, void *refp); +static ssize_t pixz_tar_read(struct archive *a, void *refp, const void **buf); int main(void) { - pixz_tar_index *index = pixz_tar_index_new(); + pixz_index *index = pixz_index_new(); struct archive *a = archive_read_new(); archive_read_support_compression_none(a); archive_read_support_format_tar(a); - pixz_tar_input input = { .file = stdin }; - if (archive_read_open(a, &input, pixz_tar_input_open, pixz_tar_input_read, - pixz_tar_input_close) != ARCHIVE_OK) + FILE *infile = stdin; + pixz_tar input = { .file = infile }; + if (archive_read_open(a, &input, pixz_tar_open, pixz_tar_read, + pixz_tar_close) != ARCHIVE_OK) pixz_die("Can't open archive\n"); - int want_offset = 0; - size_t offset = 0; while (1) { struct archive_entry *entry; int aerr = archive_read_next_header(a, &entry); - if (aerr == ARCHIVE_EOF) + if (aerr == ARCHIVE_EOF) { + pixz_index_finish(index, ftello(stdin)); break; - else if (aerr != ARCHIVE_OK) + } else if (aerr != ARCHIVE_OK) pixz_die("Error reading header\n"); - if (!pixz_tar_index_is_metadata(entry)) { - const char *name = archive_entry_pathname(entry); - pixz_tar_index_add(index, offset, name); - want_offset = 1; - } + const char *name = archive_entry_pathname(entry); + size_t offset = archive_read_header_position(a); + pixz_index_add(index, offset, name); if (archive_read_data_skip(a) != ARCHIVE_OK) - pixz_die("Error skipping data\n"); - - if (want_offset) { - offset = ftell(input.file); - want_offset = 0; - } + pixz_die("Error skipping data\n"); } if (archive_read_finish(a) != ARCHIVE_OK) pixz_die("Error finishing read\n"); - pixz_tar_index_dump(index, stdout); - pixz_tar_index_free(index); + pixz_index_dump(index, stdout); + pixz_index_free(index); return 0; } -static int pixz_tar_input_open(struct archive *a, void *refp) { +static int pixz_tar_open(struct archive *a, void *refp) { return ARCHIVE_OK; } -static int pixz_tar_input_close(struct archive *a, void *refp) { - fclose(((pixz_tar_input*)refp)->file); +static int pixz_tar_close(struct archive *a, void *refp) { + fclose(((pixz_tar*)refp)->file); return ARCHIVE_OK; } -static ssize_t pixz_tar_input_read(struct archive *a, void *refp, const void **buf) { - pixz_tar_input *input = (pixz_tar_input*)refp; +static ssize_t pixz_tar_read(struct archive *a, void *refp, const void **buf) { + pixz_tar *input = (pixz_tar*)refp; size_t rd = fread(input->buf, 1, CHUNKSIZE, input->file); if (ferror(input->file)) { archive_set_error(a, errno, "Read error"); @@ -106,49 +76,3 @@ static ssize_t pixz_tar_input_read(struct archive *a, void *refp, const void **b *buf = input->buf; return rd; } - - -static pixz_tar_index *pixz_tar_index_new(void) { - pixz_tar_index *i = malloc(sizeof(pixz_tar_index)); - i->first = NULL; - i->last = NULL; - return i; -} - -static void pixz_tar_index_add(pixz_tar_index *i, size_t offset, const char *name) { - pixz_tar_index_record *rec = malloc(sizeof(pixz_tar_index_record)); - rec->next = NULL; - rec->name = strdup(name); - rec->offset = offset; - - if (!i->first) - i->first = rec; - if (i->last) - i->last->next = rec; - i->last = rec; -} - -static void pixz_tar_index_dump(pixz_tar_index *i, FILE *out) { - for (pixz_tar_index_record *rec = i->first; rec; rec = rec->next) { - fprintf(out, "%12zu %s\n", rec->offset, rec->name); - } -} - -static void pixz_tar_index_free(pixz_tar_index *i) { - for (pixz_tar_index_record *rec = i->first; rec; rec = rec->next) { - free(rec->name); - free(rec); - } - free(i); -} - -static int pixz_tar_index_is_metadata(struct archive_entry *entry) { - // FIXME: better copyfile detection? - - const char *name = archive_entry_pathname(entry); - size_t i = strlen(name); - while (i != 0 && name[i - 1] != '/') - --i; - - return strncmp(name + i, "._", 2) == 0; -}