mirror of
https://github.com/vasi/pixz
synced 2024-11-03 09:40:24 +00:00
factor out indexing
This commit is contained in:
parent
69dc231f72
commit
64ee987eb8
3
Makefile
3
Makefile
@ -16,11 +16,10 @@ pixz: pixz.o encode.o block.o util.o
|
||||
pixzlist: pixzlist.o
|
||||
$(LD) $@ $^ -llzma
|
||||
|
||||
pixztar: tar.o util.o
|
||||
pixztar: tar.o util.o index.o
|
||||
$(LD) $@ $^ -larchive
|
||||
|
||||
|
||||
|
||||
run: pixz
|
||||
time ./$< < test.in > test.out
|
||||
@md5sum test.in
|
||||
|
72
index.c
Normal file
72
index.c
Normal file
@ -0,0 +1,72 @@
|
||||
#include "pixz.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
static bool pixz_index_is_prefix(const char *name);
|
||||
static void pixz_index_add_record(pixz_index *i, size_t offset, const char *name);
|
||||
|
||||
pixz_index *pixz_index_new(void) {
|
||||
pixz_index *i = malloc(sizeof(pixz_index));
|
||||
i->first = NULL;
|
||||
i->last = NULL;
|
||||
i->have_last_offset = false;
|
||||
return i;
|
||||
}
|
||||
|
||||
void pixz_index_free(pixz_index *i) {
|
||||
for (pixz_index_record *rec = i->first; rec; rec = rec->next) {
|
||||
free(rec->name);
|
||||
free(rec);
|
||||
}
|
||||
free(i);
|
||||
}
|
||||
|
||||
static void pixz_index_add_record(pixz_index *i, size_t offset, const char *name) {
|
||||
pixz_index_record *rec = malloc(sizeof(pixz_index_record));
|
||||
rec->next = NULL;
|
||||
rec->name = name ? strdup(name) : NULL;
|
||||
rec->offset = offset;
|
||||
|
||||
if (!i->first)
|
||||
i->first = rec;
|
||||
if (i->last)
|
||||
i->last->next = rec;
|
||||
i->last = rec;
|
||||
}
|
||||
|
||||
void pixz_index_add(pixz_index *i, size_t offset, const char *name) {
|
||||
if (pixz_index_is_prefix(name)) {
|
||||
if (!i->have_last_offset)
|
||||
i->last_offset = offset;
|
||||
i->have_last_offset = true;
|
||||
return;
|
||||
}
|
||||
|
||||
pixz_index_add_record(i, i->have_last_offset ? i->last_offset : offset, name);
|
||||
i->have_last_offset = false;
|
||||
}
|
||||
|
||||
static bool pixz_index_is_prefix(const char *name) {
|
||||
// Unfortunately, this is the only way I can think of to identify
|
||||
// copyfile data.
|
||||
|
||||
// basename(3) is not thread-safe
|
||||
size_t i = strlen(name);
|
||||
while (i != 0 && name[i - 1] != '/')
|
||||
--i;
|
||||
|
||||
return strncmp(name + i, "._", 2) == 0;
|
||||
}
|
||||
|
||||
void pixz_index_finish(pixz_index *i, size_t offset) {
|
||||
pixz_index_add_record(i, offset, NULL);
|
||||
}
|
||||
|
||||
void pixz_index_dump(pixz_index *i, FILE *out) {
|
||||
pixz_index_record *rec;
|
||||
for (rec = i->first; rec && rec->name; rec = rec->next) {
|
||||
fprintf(out, "%12zu %s\n", rec->offset, rec->name);
|
||||
}
|
||||
fprintf(out, "Total: %zu\n", rec->offset);
|
||||
}
|
||||
|
27
pixz.h
27
pixz.h
@ -2,6 +2,7 @@
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
|
||||
typedef int fixme_err;
|
||||
@ -58,3 +59,29 @@ fixme_err pixz_encode_stream_footer(FILE *outfile, pixz_encode_options *opts,
|
||||
fixme_err pixz_encode_index(FILE *outfile, pixz_encode_options *opts, lzma_index *index);
|
||||
|
||||
fixme_err pixz_encode_file(FILE *infile, FILE *outfile, pixz_encode_options *opts);
|
||||
|
||||
|
||||
/***** INDEX *****/
|
||||
|
||||
typedef struct pixz_index_record pixz_index_record;
|
||||
struct pixz_index_record {
|
||||
size_t offset;
|
||||
char *name;
|
||||
pixz_index_record *next;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
pixz_index_record *first;
|
||||
pixz_index_record *last;
|
||||
|
||||
size_t last_offset;
|
||||
bool have_last_offset;
|
||||
} pixz_index;
|
||||
|
||||
pixz_index *pixz_index_new(void);
|
||||
void pixz_index_free(pixz_index *i);
|
||||
|
||||
void pixz_index_add(pixz_index *i, size_t offset, const char *name);
|
||||
void pixz_index_finish(pixz_index *i, size_t offset);
|
||||
|
||||
void pixz_index_dump(pixz_index *i, FILE *out);
|
||||
|
122
tar.c
122
tar.c
@ -4,100 +4,70 @@
|
||||
#include <archive_entry.h>
|
||||
|
||||
#include <sys/errno.h>
|
||||
#include <string.h>
|
||||
#include <libgen.h>
|
||||
|
||||
|
||||
// Tar uses records of 512 bytes
|
||||
#define CHUNKSIZE 512
|
||||
|
||||
|
||||
typedef struct pixz_tar_index_record pixz_tar_index_record;
|
||||
struct pixz_tar_index_record {
|
||||
size_t offset;
|
||||
char *name;
|
||||
pixz_tar_index_record *next;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
pixz_tar_index_record *first;
|
||||
pixz_tar_index_record *last;
|
||||
} pixz_tar_index;
|
||||
|
||||
static pixz_tar_index *pixz_tar_index_new(void);
|
||||
static void pixz_tar_index_add(pixz_tar_index *i, size_t offset, const char *name);
|
||||
static void pixz_tar_index_dump(pixz_tar_index *i, FILE *out);
|
||||
static void pixz_tar_index_free(pixz_tar_index *i);
|
||||
static int pixz_tar_index_is_metadata(struct archive_entry *entry);
|
||||
|
||||
|
||||
typedef struct {
|
||||
FILE *file;
|
||||
uint8_t buf[CHUNKSIZE];
|
||||
} pixz_tar_input;
|
||||
} pixz_tar;
|
||||
|
||||
static int pixz_tar_input_open(struct archive *a, void *refp);
|
||||
static int pixz_tar_input_close(struct archive *a, void *refp);
|
||||
static ssize_t pixz_tar_input_read(struct archive *a, void *refp, const void **buf);
|
||||
static int pixz_tar_open(struct archive *a, void *refp);
|
||||
static int pixz_tar_close(struct archive *a, void *refp);
|
||||
static ssize_t pixz_tar_read(struct archive *a, void *refp, const void **buf);
|
||||
|
||||
|
||||
int main(void) {
|
||||
pixz_tar_index *index = pixz_tar_index_new();
|
||||
pixz_index *index = pixz_index_new();
|
||||
|
||||
struct archive *a = archive_read_new();
|
||||
archive_read_support_compression_none(a);
|
||||
archive_read_support_format_tar(a);
|
||||
|
||||
pixz_tar_input input = { .file = stdin };
|
||||
if (archive_read_open(a, &input, pixz_tar_input_open, pixz_tar_input_read,
|
||||
pixz_tar_input_close) != ARCHIVE_OK)
|
||||
FILE *infile = stdin;
|
||||
pixz_tar input = { .file = infile };
|
||||
if (archive_read_open(a, &input, pixz_tar_open, pixz_tar_read,
|
||||
pixz_tar_close) != ARCHIVE_OK)
|
||||
pixz_die("Can't open archive\n");
|
||||
|
||||
int want_offset = 0;
|
||||
size_t offset = 0;
|
||||
while (1) {
|
||||
struct archive_entry *entry;
|
||||
int aerr = archive_read_next_header(a, &entry);
|
||||
if (aerr == ARCHIVE_EOF)
|
||||
if (aerr == ARCHIVE_EOF) {
|
||||
pixz_index_finish(index, ftello(stdin));
|
||||
break;
|
||||
else if (aerr != ARCHIVE_OK)
|
||||
} else if (aerr != ARCHIVE_OK)
|
||||
pixz_die("Error reading header\n");
|
||||
|
||||
if (!pixz_tar_index_is_metadata(entry)) {
|
||||
const char *name = archive_entry_pathname(entry);
|
||||
pixz_tar_index_add(index, offset, name);
|
||||
want_offset = 1;
|
||||
}
|
||||
const char *name = archive_entry_pathname(entry);
|
||||
size_t offset = archive_read_header_position(a);
|
||||
pixz_index_add(index, offset, name);
|
||||
|
||||
if (archive_read_data_skip(a) != ARCHIVE_OK)
|
||||
pixz_die("Error skipping data\n");
|
||||
|
||||
if (want_offset) {
|
||||
offset = ftell(input.file);
|
||||
want_offset = 0;
|
||||
}
|
||||
pixz_die("Error skipping data\n");
|
||||
}
|
||||
if (archive_read_finish(a) != ARCHIVE_OK)
|
||||
pixz_die("Error finishing read\n");
|
||||
|
||||
pixz_tar_index_dump(index, stdout);
|
||||
pixz_tar_index_free(index);
|
||||
pixz_index_dump(index, stdout);
|
||||
pixz_index_free(index);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static int pixz_tar_input_open(struct archive *a, void *refp) {
|
||||
static int pixz_tar_open(struct archive *a, void *refp) {
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
static int pixz_tar_input_close(struct archive *a, void *refp) {
|
||||
fclose(((pixz_tar_input*)refp)->file);
|
||||
static int pixz_tar_close(struct archive *a, void *refp) {
|
||||
fclose(((pixz_tar*)refp)->file);
|
||||
return ARCHIVE_OK;
|
||||
}
|
||||
|
||||
static ssize_t pixz_tar_input_read(struct archive *a, void *refp, const void **buf) {
|
||||
pixz_tar_input *input = (pixz_tar_input*)refp;
|
||||
static ssize_t pixz_tar_read(struct archive *a, void *refp, const void **buf) {
|
||||
pixz_tar *input = (pixz_tar*)refp;
|
||||
size_t rd = fread(input->buf, 1, CHUNKSIZE, input->file);
|
||||
if (ferror(input->file)) {
|
||||
archive_set_error(a, errno, "Read error");
|
||||
@ -106,49 +76,3 @@ static ssize_t pixz_tar_input_read(struct archive *a, void *refp, const void **b
|
||||
*buf = input->buf;
|
||||
return rd;
|
||||
}
|
||||
|
||||
|
||||
static pixz_tar_index *pixz_tar_index_new(void) {
|
||||
pixz_tar_index *i = malloc(sizeof(pixz_tar_index));
|
||||
i->first = NULL;
|
||||
i->last = NULL;
|
||||
return i;
|
||||
}
|
||||
|
||||
static void pixz_tar_index_add(pixz_tar_index *i, size_t offset, const char *name) {
|
||||
pixz_tar_index_record *rec = malloc(sizeof(pixz_tar_index_record));
|
||||
rec->next = NULL;
|
||||
rec->name = strdup(name);
|
||||
rec->offset = offset;
|
||||
|
||||
if (!i->first)
|
||||
i->first = rec;
|
||||
if (i->last)
|
||||
i->last->next = rec;
|
||||
i->last = rec;
|
||||
}
|
||||
|
||||
static void pixz_tar_index_dump(pixz_tar_index *i, FILE *out) {
|
||||
for (pixz_tar_index_record *rec = i->first; rec; rec = rec->next) {
|
||||
fprintf(out, "%12zu %s\n", rec->offset, rec->name);
|
||||
}
|
||||
}
|
||||
|
||||
static void pixz_tar_index_free(pixz_tar_index *i) {
|
||||
for (pixz_tar_index_record *rec = i->first; rec; rec = rec->next) {
|
||||
free(rec->name);
|
||||
free(rec);
|
||||
}
|
||||
free(i);
|
||||
}
|
||||
|
||||
static int pixz_tar_index_is_metadata(struct archive_entry *entry) {
|
||||
// FIXME: better copyfile detection?
|
||||
|
||||
const char *name = archive_entry_pathname(entry);
|
||||
size_t i = strlen(name);
|
||||
while (i != 0 && name[i - 1] != '/')
|
||||
--i;
|
||||
|
||||
return strncmp(name + i, "._", 2) == 0;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user