mirror of
https://github.com/vasi/pixz
synced 2024-11-16 21:26:04 +00:00
Fix issue 10: Uncompressible data causes overrun
lzma_code() doesn't automatically handle the case where data doesn't compress well. Unfortunately the only way to do this with liblzma API is to use single- call block encoder, but that wouldn't let us re-use the lzma_stream. So we roll our own uncompressed-block output, based on the LZMA spec. It's ugly, but it works.
This commit is contained in:
parent
5f36662909
commit
870ce29ce5
90
write.c
90
write.c
@ -16,6 +16,8 @@ struct io_block_t {
|
||||
|
||||
#pragma mark GLOBALS
|
||||
|
||||
#define LZMA_CHUNK_MAX (1 << 16)
|
||||
|
||||
double gBlockFraction = 2.0;
|
||||
|
||||
static bool gTar = true;
|
||||
@ -39,7 +41,11 @@ static size_t gFileIndexBufPos = 0;
|
||||
#pragma mark FUNCTION DECLARATIONS
|
||||
|
||||
static void read_thread();
|
||||
|
||||
static void encode_thread(size_t thnum);
|
||||
static void encode_uncompressible(io_block_t *ib);
|
||||
static size_t size_uncompressible(size_t insize);
|
||||
|
||||
static void *block_create();
|
||||
static void block_free(void *data);
|
||||
|
||||
@ -274,6 +280,67 @@ static void block_dealloc(io_block_t *ib, block_parts parts) {
|
||||
|
||||
#pragma mark ENCODING
|
||||
|
||||
static size_t size_uncompressible(size_t insize) {
|
||||
size_t chunks = insize / LZMA_CHUNK_MAX;
|
||||
if (insize % LZMA_CHUNK_MAX)
|
||||
++chunks;
|
||||
// Per chunk (control code + 2-byte size), one byte for EOF
|
||||
size_t data_size = insize + chunks * 3 + 1;
|
||||
if (data_size % 4)
|
||||
data_size += 4 - data_size % 4; // Padding
|
||||
return data_size;
|
||||
}
|
||||
|
||||
static void encode_uncompressible(io_block_t *ib) {
|
||||
// See http://en.wikipedia.org/wiki/Lzma#LZMA2_format
|
||||
const uint8_t control_uncomp = 1;
|
||||
const uint8_t control_end = 0;
|
||||
|
||||
uint8_t *output_start = ib->output + ib->block.header_size;
|
||||
uint8_t *output = output_start;
|
||||
uint8_t *input = ib->input;
|
||||
size_t remain = ib->insize;
|
||||
|
||||
while (remain) {
|
||||
size_t size = remain;
|
||||
if (size > LZMA_CHUNK_MAX)
|
||||
size = LZMA_CHUNK_MAX;
|
||||
|
||||
// control byte for uncompressed block
|
||||
*output++ = control_uncomp;
|
||||
|
||||
// 16-bit big endian (size - 1)
|
||||
uint16_t size_write = size - 1;
|
||||
*output++ = (size_write >> 8);
|
||||
*output++ = (size_write & 0xFF);
|
||||
|
||||
// actual chunk data
|
||||
memcpy(output, input, size);
|
||||
|
||||
remain -= size;
|
||||
output += size;
|
||||
input += size;
|
||||
}
|
||||
// control byte for end of block
|
||||
*output++ = control_end;
|
||||
|
||||
ib->block.compressed_size = output - output_start;
|
||||
ib->block.uncompressed_size = ib->insize;
|
||||
|
||||
// padding
|
||||
while ((output - output_start) % 4)
|
||||
*output++ = 0;
|
||||
|
||||
// checksum (little endian)
|
||||
if (ib->block.check != LZMA_CHECK_CRC32)
|
||||
die("pixz only supports CRC-32 checksums");
|
||||
uint32_t check = lzma_crc32(ib->input, ib->insize, 0);
|
||||
*output++ = check & 0xFF;
|
||||
*output++ = (check >> 8) & 0xFF;
|
||||
*output++ = (check >> 16) & 0xFF;
|
||||
*output++ = (check >> 24);
|
||||
}
|
||||
|
||||
static void encode_thread(size_t thnum) {
|
||||
lzma_stream stream = LZMA_STREAM_INIT;
|
||||
while (true) {
|
||||
@ -288,24 +355,31 @@ static void encode_thread(size_t thnum) {
|
||||
block_alloc(ib, BLOCK_OUT);
|
||||
block_init(&ib->block, ib->insize);
|
||||
size_t header_size = ib->block.header_size;
|
||||
ib->block.uncompressed_size = LZMA_VLI_UNKNOWN;
|
||||
ib->outsize = header_size;
|
||||
size_t uncompressible_size = size_uncompressible(ib->insize) +
|
||||
lzma_check_size(ib->block.check);
|
||||
|
||||
if (lzma_block_encoder(&stream, &ib->block) != LZMA_OK)
|
||||
die("Error creating block encoder");
|
||||
stream.next_in = ib->input;
|
||||
stream.avail_in = ib->insize;
|
||||
stream.next_out = ib->output + ib->outsize;
|
||||
stream.avail_out = gBlockOutSize - ib->outsize;
|
||||
stream.next_out = ib->output + header_size;
|
||||
stream.avail_out = uncompressible_size;
|
||||
|
||||
ib->block.uncompressed_size = LZMA_VLI_UNKNOWN; // for encoder to change
|
||||
lzma_ret err = LZMA_OK;
|
||||
while (err != LZMA_STREAM_END) {
|
||||
while (err == LZMA_OK) {
|
||||
err = lzma_code(&stream, LZMA_FINISH);
|
||||
if (err != LZMA_OK && err != LZMA_STREAM_END)
|
||||
}
|
||||
if (err == LZMA_BUF_ERROR) {
|
||||
debug("encoder: uncompressible %zu", pi->seq);
|
||||
encode_uncompressible(ib);
|
||||
ib->outsize = header_size + uncompressible_size;
|
||||
} else if (err == LZMA_STREAM_END) {
|
||||
ib->outsize = stream.next_out - ib->output;
|
||||
} else {
|
||||
die("Error encoding block");
|
||||
}
|
||||
block_dealloc(ib, BLOCK_IN);
|
||||
ib->outsize = stream.next_out - ib->output;
|
||||
|
||||
if (lzma_block_header_encode(&ib->block, ib->output) != LZMA_OK)
|
||||
die("Error encoding block header");
|
||||
@ -325,7 +399,7 @@ static void block_init(lzma_block *block, size_t insize) {
|
||||
block->check = CHECK;
|
||||
block->filters = gFilters;
|
||||
block->uncompressed_size = insize ? insize : LZMA_VLI_UNKNOWN;
|
||||
block->compressed_size = insize? gBlockOutSize : LZMA_VLI_UNKNOWN;
|
||||
block->compressed_size = insize ? gBlockOutSize : LZMA_VLI_UNKNOWN;
|
||||
|
||||
if (lzma_block_header_size(block) != LZMA_OK)
|
||||
die("Error getting block header size");
|
||||
|
Loading…
Reference in New Issue
Block a user