mirror of https://github.com/vasi/pixz
factor out common code
parent
c060addccb
commit
bc649b9877
@ -0,0 +1,211 @@
|
||||
#include "pixz.h"
|
||||
|
||||
#include <stdarg.h>
|
||||
|
||||
#include <libkern/OSByteOrder.h>
|
||||
|
||||
|
||||
#pragma mark TYPES
|
||||
|
||||
typedef struct {
|
||||
lzma_block block;
|
||||
lzma_filter filters[LZMA_FILTERS_MAX + 1];
|
||||
} block_wrapper_t;
|
||||
|
||||
|
||||
#pragma mark GLOBALS
|
||||
|
||||
FILE *gInFile = NULL;
|
||||
lzma_stream gStream = LZMA_STREAM_INIT;
|
||||
|
||||
lzma_index *gIndex = NULL;
|
||||
file_index_t *gFileIndex = NULL, *gLastFile = NULL;
|
||||
|
||||
|
||||
static lzma_check gCheck = LZMA_CHECK_NONE;
|
||||
|
||||
static uint8_t *gFileIndexBuf = NULL;
|
||||
static size_t gFIBSize = CHUNKSIZE, gFIBPos = 0;
|
||||
static lzma_ret gFIBErr = LZMA_OK;
|
||||
static uint8_t gFIBInputBuf[CHUNKSIZE];
|
||||
static size_t gMoved = 0;
|
||||
|
||||
|
||||
#pragma mark FUNCTION DECLARATIONS
|
||||
|
||||
static char *read_file_index_name(void);
|
||||
static void read_file_index_make_space(void);
|
||||
static void read_file_index_data(void);
|
||||
|
||||
|
||||
#pragma mark FUNCTION DEFINITIONS
|
||||
|
||||
void die(const char *fmt, ...) {
|
||||
va_list args;
|
||||
va_start(args, fmt);
|
||||
vfprintf(stderr, fmt, args);
|
||||
fprintf(stderr, "\n");
|
||||
fflush(stderr);
|
||||
va_end(args);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
void dump_file_index(void) {
|
||||
for (file_index_t *f = gFileIndex; f != NULL; f = f->next) {
|
||||
fprintf(stderr, "%10llx %s\n", f->offset, f->name ? f->name : "");
|
||||
}
|
||||
}
|
||||
|
||||
void free_file_index(void) {
|
||||
for (file_index_t *f = gFileIndex; f != NULL; ) {
|
||||
file_index_t *next = f->next;
|
||||
free(f->name);
|
||||
free(f);
|
||||
f = next;
|
||||
}
|
||||
gFileIndex = gLastFile = NULL;
|
||||
}
|
||||
|
||||
void read_file_index(void) {
|
||||
// find the last block
|
||||
lzma_vli loc = lzma_index_uncompressed_size(gIndex) - 1;
|
||||
lzma_index_record rec;
|
||||
if (lzma_index_locate(gIndex, &rec, loc))
|
||||
die("Can't locate file index block");
|
||||
void *bdata = decode_block_start(rec.stream_offset);
|
||||
|
||||
gFileIndexBuf = malloc(gFIBSize);
|
||||
gStream.avail_out = gFIBSize;
|
||||
gStream.avail_in = 0;
|
||||
while (true) {
|
||||
char *name = read_file_index_name();
|
||||
if (!name)
|
||||
break;
|
||||
|
||||
file_index_t *f = malloc(sizeof(file_index_t));
|
||||
f->name = strlen(name) ? strdup(name) : NULL;
|
||||
f->offset = OSReadLittleInt64(gFileIndexBuf, gFIBPos);
|
||||
gFIBPos += sizeof(uint64_t);
|
||||
|
||||
if (gLastFile) {
|
||||
gLastFile->next = f;
|
||||
} else {
|
||||
gFileIndex = f;
|
||||
}
|
||||
gLastFile = f;
|
||||
}
|
||||
free(gFileIndexBuf);
|
||||
lzma_end(&gStream);
|
||||
free(bdata);
|
||||
}
|
||||
|
||||
static char *read_file_index_name(void) {
|
||||
while (true) {
|
||||
// find a nul that ends a name
|
||||
uint8_t *eos, *haystack = gFileIndexBuf + gFIBPos;
|
||||
ssize_t len = gFIBSize - gStream.avail_out - gFIBPos - sizeof(uint64_t);
|
||||
if (len > 0 && (eos = memchr(haystack, '\0', len))) { // found it
|
||||
gFIBPos += eos - haystack + 1;
|
||||
return (char*)haystack;
|
||||
} else if (gFIBErr == LZMA_STREAM_END) { // nothing left
|
||||
return NULL;
|
||||
} else { // need more data
|
||||
if (gStream.avail_out == 0)
|
||||
read_file_index_make_space();
|
||||
read_file_index_data();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void read_file_index_make_space(void) {
|
||||
bool expand = (gFIBPos == 0);
|
||||
if (gFIBPos != 0) { // clear more space
|
||||
size_t move = gFIBSize - gStream.avail_out - gFIBPos;
|
||||
memmove(gFileIndexBuf, gFileIndexBuf + gFIBPos, move);
|
||||
gMoved += move;
|
||||
gStream.avail_out += gFIBPos;
|
||||
gFIBPos = 0;
|
||||
}
|
||||
// Try to reduce number of moves by expanding proactively
|
||||
if (expand || gMoved >= gFIBSize) { // malloc more space
|
||||
gStream.avail_out += gFIBSize;
|
||||
gFIBSize *= 2;
|
||||
gFileIndexBuf = realloc(gFileIndexBuf, gFIBSize);
|
||||
}
|
||||
}
|
||||
|
||||
static void read_file_index_data(void) {
|
||||
gStream.next_out = gFileIndexBuf + gFIBSize - gStream.avail_out;
|
||||
while (gFIBErr != LZMA_STREAM_END && gStream.avail_out) {
|
||||
if (gStream.avail_in == 0) {
|
||||
// It's ok to read past the end of the block, we'll still
|
||||
// get LZMA_STREAM_END at the right place
|
||||
gStream.avail_in = fread(gFIBInputBuf, 1, CHUNKSIZE, gInFile);
|
||||
if (ferror(gInFile))
|
||||
die("Error reading file index data");
|
||||
gStream.next_in = gFIBInputBuf;
|
||||
}
|
||||
|
||||
gFIBErr = lzma_code(&gStream, LZMA_RUN);
|
||||
if (gFIBErr != LZMA_OK && gFIBErr != LZMA_STREAM_END)
|
||||
die("Error decoding file index data");
|
||||
}
|
||||
}
|
||||
|
||||
void decode_index(void) {
|
||||
if (fseek(gInFile, -LZMA_STREAM_HEADER_SIZE, SEEK_END) == -1)
|
||||
die("Error seeking to stream footer");
|
||||
uint8_t hdrbuf[LZMA_STREAM_HEADER_SIZE];
|
||||
if (fread(hdrbuf, LZMA_STREAM_HEADER_SIZE, 1, gInFile) != 1)
|
||||
die("Error reading stream footer");
|
||||
lzma_stream_flags flags;
|
||||
if (lzma_stream_footer_decode(&flags, hdrbuf) != LZMA_OK)
|
||||
die("Error decoding stream footer");
|
||||
|
||||
gCheck = flags.check;
|
||||
size_t index_seek = -LZMA_STREAM_HEADER_SIZE - flags.backward_size;
|
||||
if (fseek(gInFile, index_seek, SEEK_CUR) == -1)
|
||||
die("Error seeking to index");
|
||||
if (lzma_index_decoder(&gStream, &gIndex, MEMLIMIT) != LZMA_OK)
|
||||
die("Error creating index decoder");
|
||||
|
||||
uint8_t ibuf[CHUNKSIZE];
|
||||
gStream.avail_in = 0;
|
||||
lzma_ret err = LZMA_OK;
|
||||
while (err != LZMA_STREAM_END) {
|
||||
if (gStream.avail_in == 0) {
|
||||
gStream.avail_in = fread(ibuf, 1, CHUNKSIZE, gInFile);
|
||||
if (ferror(gInFile))
|
||||
die("Error reading index");
|
||||
gStream.next_in = ibuf;
|
||||
}
|
||||
|
||||
err = lzma_code(&gStream, LZMA_RUN);
|
||||
if (err != LZMA_OK && err != LZMA_STREAM_END)
|
||||
die("Error decoding index");
|
||||
}
|
||||
}
|
||||
|
||||
void *decode_block_start(off_t block_seek) {
|
||||
if (fseeko(gInFile, block_seek, SEEK_SET) == -1)
|
||||
die("Error seeking to block");
|
||||
|
||||
block_wrapper_t *bw = malloc(sizeof(block_wrapper_t));
|
||||
bw->block = (lzma_block){ .check = gCheck, .filters = bw->filters };
|
||||
|
||||
int b = fgetc(gInFile);
|
||||
if (b == EOF || b == 0)
|
||||
die("Error reading block size");
|
||||
bw->block.header_size = lzma_block_header_size_decode(b);
|
||||
uint8_t hdrbuf[bw->block.header_size];
|
||||
hdrbuf[0] = (uint8_t)b;
|
||||
if (fread(hdrbuf + 1, bw->block.header_size - 1, 1, gInFile) != 1)
|
||||
die("Error reading block header");
|
||||
if (lzma_block_header_decode(&bw->block, NULL, hdrbuf) != LZMA_OK)
|
||||
die("Error decoding file index block header");
|
||||
|
||||
if (lzma_block_decoder(&gStream, &bw->block) != LZMA_OK)
|
||||
die("Error initializing file index stream");
|
||||
|
||||
return bw;
|
||||
}
|
@ -1,120 +1,47 @@
|
||||
#include <lzma.h>
|
||||
#include "pixz.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <sys/errno.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#define CHUNKSIZE 4096
|
||||
#define MEMLIMIT (32 * 1024 * 1204)
|
||||
|
||||
void pixzlist_listfile(char *fname, FILE *f);
|
||||
lzma_index *pixzlist_index(char *fname, FILE *f);
|
||||
#pragma mark FUNCTION DEFINITIONS
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
char *fname = argv[i];
|
||||
FILE *f = fopen(fname, "r");
|
||||
if (f == NULL) {
|
||||
fprintf(stderr, "Can't open file '%s': %s.\n", fname, strerror(errno));
|
||||
continue;
|
||||
char *progname = argv[0];
|
||||
int ch;
|
||||
bool tar = false;
|
||||
while ((ch = getopt(argc, argv, "t")) != -1) {
|
||||
switch (ch) {
|
||||
case 't':
|
||||
tar = true;
|
||||
break;
|
||||
default:
|
||||
die("Unknown option");
|
||||
}
|
||||
pixzlist_listfile(fname, f);
|
||||
fclose(f);
|
||||
if (i != argc - 1)
|
||||
printf("\n");
|
||||
}
|
||||
argc -= optind - 1;
|
||||
argv += optind - 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void pixzlist_listfile(char *fname, FILE *f) {
|
||||
lzma_index *index = pixzlist_index(fname, f);
|
||||
if (!index)
|
||||
return;
|
||||
|
||||
printf("%s:\n", fname);
|
||||
if (argc != 2)
|
||||
die("Usage: %s [-t] file", progname);
|
||||
if (!(gInFile = fopen(argv[1], "r")))
|
||||
die("Can't open input file");
|
||||
|
||||
decode_index();
|
||||
lzma_index_record rec;
|
||||
while (!lzma_index_read(index, &rec)) {
|
||||
printf("%llu / %llu\n", rec.unpadded_size, rec.uncompressed_size);
|
||||
}
|
||||
|
||||
lzma_index_end(index, NULL);
|
||||
}
|
||||
|
||||
lzma_index *pixzlist_index(char *fname, FILE *f) {
|
||||
// Seek to footer
|
||||
if (fseek(f, -LZMA_STREAM_HEADER_SIZE, SEEK_END) == -1) {
|
||||
fprintf(stderr, "Can't seek to footer in '%s': %s.\n",
|
||||
fname, strerror(errno));
|
||||
return NULL;
|
||||
while (!lzma_index_read(gIndex, &rec)) {
|
||||
fprintf(stderr, "%9llu / %9llu\n", rec.unpadded_size,
|
||||
rec.uncompressed_size);
|
||||
}
|
||||
|
||||
// Read footer
|
||||
uint8_t header[LZMA_STREAM_HEADER_SIZE];
|
||||
if (fread(header, LZMA_STREAM_HEADER_SIZE, 1, f) != 1) {
|
||||
fprintf(stderr, "Can't read footer from '%s': %s.\n",
|
||||
fname, strerror(errno));
|
||||
return NULL;
|
||||
if (tar) {
|
||||
fprintf(stderr, "\n");
|
||||
read_file_index();
|
||||
dump_file_index();
|
||||
free_file_index();
|
||||
}
|
||||
|
||||
// Decode footer
|
||||
lzma_stream_flags flags;
|
||||
lzma_ret lerr = lzma_stream_footer_decode(&flags, header);
|
||||
if (lerr != LZMA_OK) {
|
||||
if (lerr == LZMA_FORMAT_ERROR)
|
||||
fprintf(stderr, "'%s' isn't an LZMA file.\n", fname);
|
||||
else if (lerr == LZMA_DATA_ERROR)
|
||||
fprintf(stderr, "CRC mismatch in '%s' footer.\n", fname);
|
||||
else
|
||||
fprintf(stderr, "Error #%d decoding footer of '%s'.\n", lerr, fname);
|
||||
return NULL;
|
||||
}
|
||||
lzma_index_end(gIndex, NULL);
|
||||
lzma_end(&gStream);
|
||||
|
||||
// Seek to index
|
||||
if (fseek(f, -LZMA_STREAM_HEADER_SIZE - flags.backward_size, SEEK_END) == -1) {
|
||||
fprintf(stderr, "Can't seek to index in '%s': %s.\n",
|
||||
fname, strerror(errno));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Create index decoder
|
||||
uint8_t chunk[CHUNKSIZE];
|
||||
lzma_stream stream = LZMA_STREAM_INIT;
|
||||
lzma_index *index = NULL;
|
||||
lerr = lzma_index_decoder(&stream, &index, MEMLIMIT);
|
||||
if (lerr != LZMA_OK) {
|
||||
fprintf(stderr, "Error #%d starting decoding index of '%s'.\n",
|
||||
lerr, fname);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Decode index
|
||||
while (lerr != LZMA_STREAM_END) {
|
||||
size_t rd = fread(chunk, 1, CHUNKSIZE, f);
|
||||
if (rd == 0) {
|
||||
fprintf(stderr, "Error reading index from '%s': %s.\n",
|
||||
fname, strerror(errno));
|
||||
goto index_err;
|
||||
}
|
||||
stream.next_in = chunk;
|
||||
stream.avail_in = rd;
|
||||
|
||||
while (stream.avail_in != 0 && lerr != LZMA_STREAM_END) {
|
||||
lerr = lzma_code(&stream, LZMA_RUN);
|
||||
if (lerr != LZMA_OK && lerr != LZMA_STREAM_END) {
|
||||
fprintf(stderr, "Error #%d starting decoding index of '%s'.\n",
|
||||
lerr, fname);
|
||||
goto index_err;
|
||||
}
|
||||
}
|
||||
}
|
||||
lzma_end(&stream);
|
||||
return index;
|
||||
|
||||
index_err:
|
||||
lzma_end(&stream);
|
||||
lzma_index_end(index, NULL);
|
||||
|
||||
return NULL;
|
||||
return 0;
|
||||
}
|
||||
|
@ -1,98 +1,46 @@
|
||||
#include <lzma.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
|
||||
typedef int fixme_err;
|
||||
|
||||
void pixz_die(const char *fmt, ...);
|
||||
|
||||
void pixz_offset_write(uint64_t n, uint8_t *buf);
|
||||
uint64_t pixz_offset_read(uint8_t *buf);
|
||||
|
||||
/***** BLOCK *****/
|
||||
|
||||
typedef struct {
|
||||
uint8_t *ibuf, *obuf;
|
||||
size_t isize;
|
||||
|
||||
lzma_block block;
|
||||
lzma_stream stream;
|
||||
} pixz_block;
|
||||
|
||||
#include <string.h>
|
||||
|
||||
pixz_block *pixz_block_new(size_t size, lzma_check check, lzma_filter *filters);
|
||||
void pixz_block_free(pixz_block *b);
|
||||
|
||||
bool pixz_block_full(pixz_block *b);
|
||||
size_t pixz_block_new_input_avail(pixz_block *b);
|
||||
uint8_t *pixz_block_new_input_next(pixz_block *b);
|
||||
void pixz_block_new_input(pixz_block *b, size_t bytes);
|
||||
#pragma mark DEFINES
|
||||
|
||||
uint8_t *pixz_block_coded_data(pixz_block *b);
|
||||
size_t pixz_block_coded_size(pixz_block *b);
|
||||
#define CHECK LZMA_CHECK_CRC32
|
||||
#define MEMLIMIT (64L * 1024 * 1024 * 1024) // crazy high
|
||||
|
||||
fixme_err pixz_block_encode(pixz_block *b, size_t bytes);
|
||||
fixme_err pixz_block_encode_all(pixz_block *b);
|
||||
#define CHUNKSIZE 4096
|
||||
#define BLOCKSIZE (1024 * 1024)
|
||||
|
||||
fixme_err pixz_block_index_append(pixz_block *b, lzma_index *index);
|
||||
|
||||
#pragma mark TYPES
|
||||
|
||||
/***** ENCODE *****/
|
||||
|
||||
typedef struct {
|
||||
size_t chunksize; // read quantum
|
||||
size_t blocksize; // encode quantum
|
||||
lzma_check check;
|
||||
lzma_filter *filters;
|
||||
} pixz_encode_options;
|
||||
|
||||
pixz_encode_options *pixz_encode_options_new();
|
||||
fixme_err pixz_encode_options_default(pixz_encode_options *opts);
|
||||
void pixz_encode_options_free(pixz_encode_options *opts);
|
||||
|
||||
void pixz_encode_initialize_block(lzma_block *b, lzma_check check,
|
||||
lzma_filter *filters);
|
||||
|
||||
fixme_err pixz_encode_block_header(lzma_block *b, uint8_t *buf, size_t avail);
|
||||
fixme_err pixz_encode_block(FILE *infile, FILE *outfile, pixz_encode_options *opts,
|
||||
lzma_index *index);
|
||||
fixme_err pixz_encode_stream_header(FILE *outfile, pixz_encode_options *opts);
|
||||
fixme_err pixz_encode_stream_footer(FILE *outfile, pixz_encode_options *opts,
|
||||
lzma_index *index);
|
||||
fixme_err pixz_encode_index(FILE *outfile, pixz_encode_options *opts, lzma_index *index);
|
||||
|
||||
fixme_err pixz_encode_file(FILE *infile, FILE *outfile, pixz_encode_options *opts);
|
||||
struct file_index_t {
|
||||
char *name;
|
||||
off_t offset;
|
||||
struct file_index_t *next;
|
||||
};
|
||||
typedef struct file_index_t file_index_t;
|
||||
|
||||
|
||||
/***** INDEX *****/
|
||||
#pragma mark GLOBALS
|
||||
|
||||
typedef struct pixz_index_record pixz_index_record;
|
||||
struct pixz_index_record {
|
||||
size_t offset;
|
||||
char *name;
|
||||
pixz_index_record *next;
|
||||
};
|
||||
FILE *gInFile;
|
||||
lzma_stream gStream;
|
||||
|
||||
typedef struct {
|
||||
pixz_index_record *first;
|
||||
pixz_index_record *last;
|
||||
|
||||
size_t last_offset;
|
||||
bool have_last_offset;
|
||||
} pixz_index;
|
||||
extern lzma_index *gIndex;
|
||||
extern file_index_t *gFileIndex, *gLastFile;
|
||||
|
||||
pixz_index *pixz_index_new(void);
|
||||
void pixz_index_free(pixz_index *i);
|
||||
|
||||
void pixz_index_add(pixz_index *i, size_t offset, const char *name);
|
||||
void pixz_index_finish(pixz_index *i, size_t offset);
|
||||
#pragma mark FUNCTION DECLARATIONS
|
||||
|
||||
void pixz_index_dump(pixz_index *i, FILE *out);
|
||||
void die(const char *fmt, ...);
|
||||
|
||||
fixme_err pixz_index_write(pixz_index *i, FILE *out, pixz_encode_options *opts);
|
||||
fixme_err pixz_index_read_in_place(pixz_index **i, FILE *in, lzma_check check);
|
||||
fixme_err pixz_index_read(pixz_index **i, FILE *in, lzma_check check);
|
||||
void decode_index(void);
|
||||
void *decode_block_start(off_t block_seek);
|
||||
|
||||
void read_file_index(void);
|
||||
void dump_file_index(void);
|
||||
void free_file_index(void);
|
||||
|
Loading…
Reference in New Issue