From dedbc6d23fc448553214796b6471d9985deec0a9 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 14 Oct 2012 03:01:32 -0400
Subject: [PATCH 01/22] Use dynamic block sizes

---
 read.c | 43 ++++++++++++++++++-------------------------
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/read.c b/read.c
index 4d89f48..bb2647c 100644
--- a/read.c
+++ b/read.c
@@ -25,6 +25,7 @@ static void wanted_free(wanted_t *w);
 
 typedef struct {
     uint8_t *input, *output;
+	size_t incap, outcap;
     size_t insize, outsize;
     off_t uoffset; // uncompressed offset
 } io_block_t;
@@ -52,9 +53,8 @@ static void tar_write_last(void);
 #pragma mark DECLARE UTILS
 
 static lzma_vli gFileIndexOffset = 0;
-static size_t gBlockInSize = 0, gBlockOutSize = 0;
 
-static void set_block_sizes(void);
+static void check_capacity(io_block_t *ib, size_t incap, size_t outcap);
 
 
 #pragma mark MAIN
@@ -64,7 +64,6 @@ void pixz_read(bool verify, size_t nspecs, char **specs) {
     if (verify)
         gFileIndexOffset = read_file_index(0);
     wanted_files(nspecs, specs);
-    set_block_sizes();
 
 #if DEBUG
     for (wanted_t *w = gWantedFiles; w; w = w->next)
@@ -135,8 +134,8 @@ void pixz_read(bool verify, size_t nspecs, char **specs) {
 
 static void *block_create(void) {
     io_block_t *ib = malloc(sizeof(io_block_t));
-    ib->input = malloc(gBlockInSize);
-    ib->output = malloc(gBlockOutSize);
+	ib->incap = ib->outcap = 0;
+	ib->input = ib->output = NULL;
     return ib;
 }
 
@@ -150,25 +149,6 @@ static void block_free(void* data) {
 
 #pragma mark SETUP
 
-static void set_block_sizes() {
-    lzma_index_iter iter;
-    lzma_index_iter_init(&iter, gIndex);
-    while (!lzma_index_iter_next(&iter, LZMA_INDEX_ITER_BLOCK)) {
-        // exclude the file index block
-        lzma_vli off = iter.block.compressed_file_offset;
-        if (gFileIndexOffset && off == gFileIndexOffset)
-            continue;
-        
-        size_t in = iter.block.total_size,
-            out = iter.block.uncompressed_size;
-        if (out > gBlockOutSize)
-            gBlockOutSize = out;
-        if (in > gBlockInSize)
-            gBlockInSize = in;
-    }
-}
-
-
 static void wanted_free(wanted_t *w) {
     for (wanted_t *w = gWantedFiles; w; ) {
         wanted_t *tmp = w->next;
@@ -244,6 +224,17 @@ static void wanted_files(size_t count, char **specs) {
 
 #pragma mark THREADS
 
+static void check_capacity(io_block_t *ib, size_t incap, size_t outcap) {
+	if (incap > ib->incap) {
+		ib->incap = incap;
+		ib->input = malloc(incap);
+	}
+	if (outcap > ib->outcap) {
+		ib->outcap = outcap;
+		ib->output = malloc(outcap);
+	}
+}
+
 static void read_thread(void) {
     off_t offset = ftello(gInFile);
     wanted_t *w = gWantedFiles;
@@ -273,6 +264,8 @@ static void read_thread(void) {
         pipeline_item_t *pi;
         queue_pop(gPipelineStartQ, (void**)&pi);
         io_block_t *ib = (io_block_t*)(pi->data);
+		check_capacity(ib, iter.block.unpadded_size,
+			iter.block.uncompressed_size);
         
         // Seek if needed, and get the data
         if (offset != boffset) {
@@ -310,7 +303,7 @@ static void decode_thread(size_t thnum) {
         
         stream.avail_in = ib->insize - block.header_size;
         stream.next_in = ib->input + block.header_size;
-        stream.avail_out = gBlockOutSize;
+        stream.avail_out = ib->outcap;
         stream.next_out = ib->output;
         
         lzma_ret err = LZMA_OK;

From cad2ee95ebaf31a25bbe86ef9b01645c60202f1f Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 14 Oct 2012 06:13:23 -0400
Subject: [PATCH 02/22] We never use the argument to read_file_index

---
 common.c | 5 ++---
 list.c   | 2 +-
 pixz.h   | 2 +-
 read.c   | 2 +-
 4 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/common.c b/common.c
index b637d2b..ad291d2 100644
--- a/common.c
+++ b/common.c
@@ -146,10 +146,9 @@ lzma_vli find_file_index(void **bdatap) {
     return ret; 
 }  
 
-lzma_vli read_file_index(lzma_vli offset) {
+lzma_vli read_file_index() {
     void *bdata = NULL;
-    if (!offset)
-        offset = find_file_index(&bdata);
+	lzma_vli offset = find_file_index(&bdata);
     if (!offset)
         return 0;
     
diff --git a/list.c b/list.c
index e601ce2..9798d29 100644
--- a/list.c
+++ b/list.c
@@ -7,7 +7,7 @@ void pixz_list(bool tar) {
     lzma_index_iter iter;
     lzma_index_iter_init(&iter, gIndex);
 
-    if (tar && read_file_index(0)) {
+    if (tar && read_file_index()) {
         dump_file_index(stdout, false);
         free_file_index();
     } else {
diff --git a/pixz.h b/pixz.h
index daacd97..d1e9239 100644
--- a/pixz.h
+++ b/pixz.h
@@ -71,7 +71,7 @@ bool is_multi_header(const char *name);
 void decode_index(void);
 
 lzma_vli find_file_index(void **bdatap);
-lzma_vli read_file_index(lzma_vli offset);
+lzma_vli read_file_index(void);
 void dump_file_index(FILE *out, bool verbose);
 void free_file_index(void);
 
diff --git a/read.c b/read.c
index bb2647c..134a50d 100644
--- a/read.c
+++ b/read.c
@@ -62,7 +62,7 @@ static void check_capacity(io_block_t *ib, size_t incap, size_t outcap);
 void pixz_read(bool verify, size_t nspecs, char **specs) {
     decode_index();
     if (verify)
-        gFileIndexOffset = read_file_index(0);
+        gFileIndexOffset = read_file_index();
     wanted_files(nspecs, specs);
 
 #if DEBUG

From b13ae91698316ba1e3646b85ebe13cb8004b2df9 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 14 Oct 2012 07:33:33 -0400
Subject: [PATCH 03/22] It's ok to decompress a text file to a TTY

---
 pixz.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pixz.c b/pixz.c
index 22bf3ff..7b318dc 100644
--- a/pixz.c
+++ b/pixz.c
@@ -110,11 +110,13 @@ int main(int argc, char **argv) {
         die("Can't open input file");
     if (opath && !(gOutFile = fopen(opath, "w")))
         die("Can't open output file");
-    if (op != OP_LIST && isatty(fileno(gOutFile)) == 1)
-		usage("Refusing to output to a TTY");
 	
     switch (op) {
-        case OP_WRITE: pixz_write(tar, level); break;
+        case OP_WRITE:
+			if (isatty(fileno(gOutFile)) == -1)
+				usage("Refusing to output to a TTY");
+			pixz_write(tar, level);
+			break;
         case OP_READ: pixz_read(tar, 0, NULL); break;
         case OP_EXTRACT: pixz_read(tar, argc, argv); break;
         case OP_LIST: pixz_list(tar);

From ea64c94c2182bb3347ca37d5425ea9d14c1f753f Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 14 Oct 2012 07:38:42 -0400
Subject: [PATCH 04/22] We handle EOF fine now

---
 write.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/write.c b/write.c
index e8186de..958dc1a 100644
--- a/write.c
+++ b/write.c
@@ -126,7 +126,6 @@ static void read_thread() {
 	    while (true) {
 	        int aerr = archive_read_next_header(ar, &entry);
 	        if (aerr == ARCHIVE_EOF) {
-	            // TODO
 	            break;
 	        } else if (aerr != ARCHIVE_OK && aerr != ARCHIVE_WARN) {
 	            // Some charset translations warn spuriously

From 91f044e569a9bf5f2ebf574bdaefcb02f185858f Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 14 Oct 2012 07:39:10 -0400
Subject: [PATCH 05/22] Start adding non-indexed reading

---
 common.c |  6 +++--
 pixz.h   |  2 +-
 read.c   | 74 +++++++++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/common.c b/common.c
index ad291d2..a0e0b30 100644
--- a/common.c
+++ b/common.c
@@ -229,9 +229,10 @@ static void read_file_index_data(void) {
     }
 }
 
-void decode_index(void) {
+bool decode_index(void) {
     if (fseek(gInFile, -LZMA_STREAM_HEADER_SIZE, SEEK_END) == -1)
-        die("Error seeking to stream footer");
+		return false; // not seekable
+	
     uint8_t hdrbuf[LZMA_STREAM_HEADER_SIZE];
     if (fread(hdrbuf, LZMA_STREAM_HEADER_SIZE, 1, gInFile) != 1)
         die("Error reading stream footer");
@@ -261,6 +262,7 @@ void decode_index(void) {
         if (err != LZMA_OK && err != LZMA_STREAM_END)
             die("Error decoding index");
     }
+	return true;
 }
 
 
diff --git a/pixz.h b/pixz.h
index d1e9239..ee45a2e 100644
--- a/pixz.h
+++ b/pixz.h
@@ -68,7 +68,7 @@ extern file_index_t *gFileIndex, *gLastFile;
 extern lzma_check gCheck;
 
 bool is_multi_header(const char *name);
-void decode_index(void);
+bool decode_index(void); // true on success
 
 lzma_vli find_file_index(void **bdatap);
 lzma_vli read_file_index(void);
diff --git a/read.c b/read.c
index 134a50d..08d7acd 100644
--- a/read.c
+++ b/read.c
@@ -33,6 +33,7 @@ typedef struct {
 static void *block_create(void);
 static void block_free(void *data);
 static void read_thread(void);
+static void read_thread_noindex(void);
 static void decode_thread(size_t thnum);
 
 
@@ -60,17 +61,18 @@ static void check_capacity(io_block_t *ib, size_t incap, size_t outcap);
 #pragma mark MAIN
 
 void pixz_read(bool verify, size_t nspecs, char **specs) {
-    decode_index();
-    if (verify)
-        gFileIndexOffset = read_file_index();
-    wanted_files(nspecs, specs);
+    if (0 && decode_index()) { // FIXME
+	    if (verify)
+	        gFileIndexOffset = read_file_index();
+	    wanted_files(nspecs, specs);    	
+    }
 
 #if DEBUG
     for (wanted_t *w = gWantedFiles; w; w = w->next)
         debug("want: %s", w->name);
 #endif
     
-    pipeline_create(block_create, block_free, read_thread, decode_thread);
+    pipeline_create(block_create, block_free, read_thread_noindex, decode_thread);
     if (verify && gFileIndexOffset) {
         gArWanted = gWantedFiles;
         wanted_t *w = gWantedFiles, *wlast = NULL;
@@ -227,7 +229,7 @@ static void wanted_files(size_t count, char **specs) {
 static void check_capacity(io_block_t *ib, size_t incap, size_t outcap) {
 	if (incap > ib->incap) {
 		ib->incap = incap;
-		ib->input = malloc(incap);
+		ib->input = realloc(ib->input, incap);
 	}
 	if (outcap > ib->outcap) {
 		ib->outcap = outcap;
@@ -235,6 +237,66 @@ static void check_capacity(io_block_t *ib, size_t incap, size_t outcap) {
 	}
 }
 
+static void read_thread_noindex(void) {
+	size_t bytes;
+	lzma_ret err;
+	
+	// Read the header
+	uint8_t stream_header[LZMA_STREAM_HEADER_SIZE];
+	bytes = fread(stream_header, 1, LZMA_STREAM_HEADER_SIZE, gInFile);
+	if (bytes != LZMA_STREAM_HEADER_SIZE)
+		die("Error reading stream header");
+	lzma_stream_flags stream_flags;
+	err = lzma_stream_header_decode(&stream_flags, stream_header);
+	if (err == LZMA_FORMAT_ERROR)
+		die("Not an XZ file");
+	else if (err != LZMA_OK)
+		die("Error decoding XZ header");
+	gCheck = stream_flags.check;
+	
+    lzma_filter filters[LZMA_FILTERS_MAX + 1];
+    lzma_block block = { .filters = filters, .check = gCheck, .version = 0 };
+	while (true) {
+		// Get pipeline item
+        pipeline_item_t *pi;
+        queue_pop(gPipelineStartQ, (void**)&pi);
+        io_block_t *ib = (io_block_t*)(pi->data);
+		check_capacity(ib, LZMA_BLOCK_HEADER_SIZE_MAX, 0);
+		
+		// Check for index
+		if (fread(ib->input, 1, 1, gInFile) != 1)
+			die("Error reading block header size");
+		if (ib->input[0] == 0)
+			break; // Found the index
+		
+		// Decode header
+		block.header_size = lzma_block_header_size_decode(ib->input[0]);
+		if (block.header_size > LZMA_BLOCK_HEADER_SIZE_MAX)
+			die("Block header size too large");
+		size_t rest = block.header_size - 1;
+		if (fread(ib->input + 1, 1, rest, gInFile) != rest)
+			die("Error reading block header");
+		if (lzma_block_header_decode(&block, NULL, ib->input) != LZMA_OK)
+			die("Error decoding block header");
+		
+		lzma_vli comp = block.compressed_size;
+		ib->insize = lzma_block_total_size(&block);
+		ib->outsize = block.uncompressed_size;
+		if (comp == LZMA_VLI_UNKNOWN || ib->outsize == LZMA_VLI_UNKNOWN)
+			die("No sizes in header!!!"); // FIXME: streaming; file index
+		check_capacity(ib, ib->insize, ib->outsize);
+		
+		rest = ib->insize - block.header_size;
+		bytes = fread(ib->input + block.header_size, 1, rest, gInFile);
+		if (bytes != rest)
+			die("Error reading block contents");
+		pipeline_split(pi);
+	}
+	
+	pipeline_stop();
+	// FIXME: don't output the pixz file index! heuristic?
+}
+
 static void read_thread(void) {
     off_t offset = ftello(gInFile);
     wanted_t *w = gWantedFiles;

From 7820ec52b9be080a9740659a7ed45031554b7479 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 14 Oct 2012 09:15:42 -0400
Subject: [PATCH 06/22] cleanup

---
 write.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/write.c b/write.c
index 958dc1a..2c291e6 100644
--- a/write.c
+++ b/write.c
@@ -93,10 +93,9 @@ void pixz_write(bool tar, uint32_t level) {
     }
     
     // file index
-    if (gTar) {
+    if (gTar)
         write_file_index();
-        free_file_index();
-    }
+    free_file_index();
     
     // post-block cleanup: index, footer
     encode_index();

From a6d82ed79edffbc4f2d23e2546c99f71992db685 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 14 Oct 2012 09:25:25 -0400
Subject: [PATCH 07/22] Add FIXMEs

---
 read.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/read.c b/read.c
index 08d7acd..697d726 100644
--- a/read.c
+++ b/read.c
@@ -74,6 +74,8 @@ void pixz_read(bool verify, size_t nspecs, char **specs) {
     
     pipeline_create(block_create, block_free, read_thread_noindex, decode_thread);
     if (verify && gFileIndexOffset) {
+		// FIXME: verify this works with noindex/streamed reading
+		// FIXME: don't stop on End Of Archive
         gArWanted = gWantedFiles;
         wanted_t *w = gWantedFiles, *wlast = NULL;
         bool lastmulti = false;

From 8e1efb824eebf04a4a10b46d020df3ba44698faa Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 14 Oct 2012 09:48:48 -0400
Subject: [PATCH 08/22] Fix pragma marks

---
 read.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/read.c b/read.c
index 697d726..9eb1384 100644
--- a/read.c
+++ b/read.c
@@ -226,7 +226,7 @@ static void wanted_files(size_t count, char **specs) {
 }
 
 
-#pragma mark THREADS
+#pragma mark READ
 
 static void check_capacity(io_block_t *ib, size_t incap, size_t outcap) {
 	if (incap > ib->incap) {
@@ -266,7 +266,7 @@ static void read_thread_noindex(void) {
 		check_capacity(ib, LZMA_BLOCK_HEADER_SIZE_MAX, 0);
 		
 		// Check for index
-		if (fread(ib->input, 1, 1, gInFile) != 1)
+		if (ib->insize < 1 && fread(ib->input, 1, 1, gInFile) != 1)
 			die("Error reading block header size");
 		if (ib->input[0] == 0)
 			break; // Found the index
@@ -348,6 +348,8 @@ static void read_thread(void) {
     pipeline_stop();
 }
 
+#pragma mark DECODE
+
 static void decode_thread(size_t thnum) {
     lzma_stream stream = LZMA_STREAM_INIT;
     lzma_filter filters[LZMA_FILTERS_MAX + 1];

From 9694d22dcd2ada24d67e0a6fe696c2b7cc8a0be5 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sat, 20 Oct 2012 21:07:49 -0400
Subject: [PATCH 09/22] Declare start of read buffering infrastructure

---
 read.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/read.c b/read.c
index 9eb1384..dd143df 100644
--- a/read.c
+++ b/read.c
@@ -51,12 +51,23 @@ static bool tar_next_block(void);
 static void tar_write_last(void);
 
 
+#pragma mark DECLARE READ BUFFER
+
+static pipeline_item_t *gRbufPI = NULL;
+static io_block_t *gRbuf = NULL;
+
+static void block_capacity(io_block_t *ib, size_t incap, size_t outcap);
+static void stream_write(pipeline_item_t *pi);
+
+static ssize_t rbuf_read(size_t bytes);
+static void rbuf_consume(size_t bytes);
+static void rbuf_dispatch();
+
+
 #pragma mark DECLARE UTILS
 
 static lzma_vli gFileIndexOffset = 0;
 
-static void check_capacity(io_block_t *ib, size_t incap, size_t outcap);
-
 
 #pragma mark MAIN
 
@@ -228,7 +239,7 @@ static void wanted_files(size_t count, char **specs) {
 
 #pragma mark READ
 
-static void check_capacity(io_block_t *ib, size_t incap, size_t outcap) {
+static void block_capacity(io_block_t *ib, size_t incap, size_t outcap) {
 	if (incap > ib->incap) {
 		ib->incap = incap;
 		ib->input = realloc(ib->input, incap);
@@ -263,7 +274,7 @@ static void read_thread_noindex(void) {
         pipeline_item_t *pi;
         queue_pop(gPipelineStartQ, (void**)&pi);
         io_block_t *ib = (io_block_t*)(pi->data);
-		check_capacity(ib, LZMA_BLOCK_HEADER_SIZE_MAX, 0);
+		block_capacity(ib, LZMA_BLOCK_HEADER_SIZE_MAX, 0);
 		
 		// Check for index
 		if (ib->insize < 1 && fread(ib->input, 1, 1, gInFile) != 1)
@@ -286,7 +297,7 @@ static void read_thread_noindex(void) {
 		ib->outsize = block.uncompressed_size;
 		if (comp == LZMA_VLI_UNKNOWN || ib->outsize == LZMA_VLI_UNKNOWN)
 			die("No sizes in header!!!"); // FIXME: streaming; file index
-		check_capacity(ib, ib->insize, ib->outsize);
+		block_capacity(ib, ib->insize, ib->outsize);
 		
 		rest = ib->insize - block.header_size;
 		bytes = fread(ib->input + block.header_size, 1, rest, gInFile);
@@ -328,7 +339,7 @@ static void read_thread(void) {
         pipeline_item_t *pi;
         queue_pop(gPipelineStartQ, (void**)&pi);
         io_block_t *ib = (io_block_t*)(pi->data);
-		check_capacity(ib, iter.block.unpadded_size,
+		block_capacity(ib, iter.block.unpadded_size,
 			iter.block.uncompressed_size);
         
         // Seek if needed, and get the data

From 70a3c58520c0254eb58511bc09d76e32c3e53dc1 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sat, 20 Oct 2012 21:54:17 -0400
Subject: [PATCH 10/22] Use the read buffer

---
 common.c |  8 ++++--
 pixz.h   |  1 +
 read.c   | 79 +++++++++++++++++++++++++++++++++++++-------------------
 3 files changed, 60 insertions(+), 28 deletions(-)

diff --git a/common.c b/common.c
index a0e0b30..2f0f051 100644
--- a/common.c
+++ b/common.c
@@ -437,10 +437,14 @@ void pipeline_destroy(void) {
     free(gPLProcessThreads);
 }
 
-void pipeline_split(pipeline_item_t *item) {
+void pipeline_dispatch(pipeline_item_t *item, queue_t *q) {
     item->seq = gPLSplitSeq++;
     item->next = NULL;
-    queue_push(gPipelineSplitQ, PIPELINE_ITEM, item);
+    queue_push(q, PIPELINE_ITEM, item);
+}
+
+void pipeline_split(pipeline_item_t *item) {
+	pipeline_dispatch(item, gPipelineSplitQ);
 }
 
 pipeline_item_t *pipeline_merged() {
diff --git a/pixz.h b/pixz.h
index ee45a2e..7e7b5fc 100644
--- a/pixz.h
+++ b/pixz.h
@@ -135,5 +135,6 @@ void pipeline_create(
 void pipeline_stop(void);
 void pipeline_destroy(void);
 
+void pipeline_dispatch(pipeline_item_t *item, queue_t *q);
 void pipeline_split(pipeline_item_t *item);
 pipeline_item_t *pipeline_merged();
diff --git a/read.c b/read.c
index dd143df..9d71005 100644
--- a/read.c
+++ b/read.c
@@ -57,11 +57,14 @@ static pipeline_item_t *gRbufPI = NULL;
 static io_block_t *gRbuf = NULL;
 
 static void block_capacity(io_block_t *ib, size_t incap, size_t outcap);
-static void stream_write(pipeline_item_t *pi);
 
-static ssize_t rbuf_read(size_t bytes);
+typedef enum {
+	RBUF_ERR, RBUF_EOF, RBUF_PART, RBUF_FULL
+} rbuf_read_status;
+
+static rbuf_read_status rbuf_read(size_t bytes);
 static void rbuf_consume(size_t bytes);
-static void rbuf_dispatch();
+static void rbuf_dispatch(void);
 
 
 #pragma mark DECLARE UTILS
@@ -250,6 +253,40 @@ static void block_capacity(io_block_t *ib, size_t incap, size_t outcap) {
 	}
 }
 
+// Ensure at least this many bytes available
+// Return 1 on success, zero on EOF, -1 on error
+static rbuf_read_status rbuf_read(size_t bytes) {
+	if (!gRbufPI) {
+        queue_pop(gPipelineStartQ, (void**)&gRbufPI);
+		gRbuf = (io_block_t*)(gRbufPI->data);
+		gRbuf->insize = gRbuf->outsize = 0;
+	}
+	
+	if (gRbuf->insize >= bytes)
+		return RBUF_FULL;
+	
+	block_capacity(gRbuf, bytes, 0);
+	size_t r = fread(gRbuf->input + gRbuf->insize, 1, bytes - gRbuf->insize,
+		gInFile);
+	gRbuf->insize += r;
+	
+	if (r)
+		return (gRbuf->insize == bytes) ? RBUF_FULL : RBUF_PART;
+	return feof(gInFile) ? RBUF_EOF : RBUF_ERR;
+}
+
+static void rbuf_consume(size_t bytes) {
+	if (bytes < gRbuf->insize)
+		memmove(gRbuf->input, gRbuf->input + bytes, gRbuf->insize - bytes);
+	gRbuf->insize -= bytes;
+}
+
+static void rbuf_dispatch(void) {
+	pipeline_split(gRbufPI);
+	gRbufPI = NULL;
+	gRbuf = NULL;
+}
+
 static void read_thread_noindex(void) {
 	size_t bytes;
 	lzma_ret err;
@@ -270,40 +307,30 @@ static void read_thread_noindex(void) {
     lzma_filter filters[LZMA_FILTERS_MAX + 1];
     lzma_block block = { .filters = filters, .check = gCheck, .version = 0 };
 	while (true) {
-		// Get pipeline item
-        pipeline_item_t *pi;
-        queue_pop(gPipelineStartQ, (void**)&pi);
-        io_block_t *ib = (io_block_t*)(pi->data);
-		block_capacity(ib, LZMA_BLOCK_HEADER_SIZE_MAX, 0);
-		
 		// Check for index
-		if (ib->insize < 1 && fread(ib->input, 1, 1, gInFile) != 1)
+		if (rbuf_read(1) != RBUF_FULL)
 			die("Error reading block header size");
-		if (ib->input[0] == 0)
-			break; // Found the index
-		
+		if (gRbuf->input[0] == 0)
+			break; // Found the index. FIXME: multi-stream?
+				
 		// Decode header
-		block.header_size = lzma_block_header_size_decode(ib->input[0]);
+		block.header_size = lzma_block_header_size_decode(gRbuf->input[0]);
 		if (block.header_size > LZMA_BLOCK_HEADER_SIZE_MAX)
 			die("Block header size too large");
-		size_t rest = block.header_size - 1;
-		if (fread(ib->input + 1, 1, rest, gInFile) != rest)
+		if (rbuf_read(block.header_size) != RBUF_FULL)
 			die("Error reading block header");
-		if (lzma_block_header_decode(&block, NULL, ib->input) != LZMA_OK)
+		if (lzma_block_header_decode(&block, NULL, gRbuf->input) != LZMA_OK)
 			die("Error decoding block header");
 		
-		lzma_vli comp = block.compressed_size;
-		ib->insize = lzma_block_total_size(&block);
-		ib->outsize = block.uncompressed_size;
-		if (comp == LZMA_VLI_UNKNOWN || ib->outsize == LZMA_VLI_UNKNOWN)
+		size_t comp = block.compressed_size, outsize = block.uncompressed_size;
+		if (comp == LZMA_VLI_UNKNOWN || outsize == LZMA_VLI_UNKNOWN)
 			die("No sizes in header!!!"); // FIXME: streaming; file index
-		block_capacity(ib, ib->insize, ib->outsize);
+		block_capacity(gRbuf, 0, outsize);
+		gRbuf->outsize = outsize;
 		
-		rest = ib->insize - block.header_size;
-		bytes = fread(ib->input + block.header_size, 1, rest, gInFile);
-		if (bytes != rest)
+		if (rbuf_read(lzma_block_total_size(&block)) != RBUF_FULL)
 			die("Error reading block contents");
-		pipeline_split(pi);
+		rbuf_dispatch();
 	}
 	
 	pipeline_stop();

From 4474af5419fdf588131a13c70422e8543edb2749 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sat, 20 Oct 2012 22:32:20 -0400
Subject: [PATCH 11/22] Streaming read works

---
 read.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 60 insertions(+), 10 deletions(-)

diff --git a/read.c b/read.c
index 9d71005..b17a5fc 100644
--- a/read.c
+++ b/read.c
@@ -53,6 +53,8 @@ static void tar_write_last(void);
 
 #pragma mark DECLARE READ BUFFER
 
+#define STREAMSIZE (1024 * 1024)
+
 static pipeline_item_t *gRbufPI = NULL;
 static io_block_t *gRbuf = NULL;
 
@@ -66,6 +68,8 @@ static rbuf_read_status rbuf_read(size_t bytes);
 static void rbuf_consume(size_t bytes);
 static void rbuf_dispatch(void);
 
+static void read_streaming(lzma_block *block);
+
 
 #pragma mark DECLARE UTILS
 
@@ -287,11 +291,57 @@ static void rbuf_dispatch(void) {
 	gRbuf = NULL;
 }
 
+static void read_streaming(lzma_block *block) {
+    lzma_stream stream = LZMA_STREAM_INIT;
+    if (lzma_block_decoder(&stream, block) != LZMA_OK)
+		die("Error initializing streaming block decode");
+	stream.next_in = gRbuf->input + block->header_size;
+	stream.avail_in = gRbuf->insize - block->header_size;
+	stream.avail_out = 0;
+	
+    pipeline_item_t *pi = NULL;
+    io_block_t *ib = NULL;
+    
+	lzma_ret err = LZMA_OK;
+	while (err != LZMA_STREAM_END) {
+		if (err != LZMA_OK)
+			die("Error decoding streaming block");
+		
+		if (stream.avail_out == 0) {
+			if (ib) {
+				ib->outsize = ib->outcap;
+				pipeline_dispatch(pi, gPipelineMergeQ);
+			}
+			queue_pop(gPipelineStartQ, (void**)&pi);
+			ib = (io_block_t*)pi->data;
+			block_capacity(ib, 0, STREAMSIZE);
+			stream.next_out = ib->output;
+			stream.avail_out = ib->outcap;
+		}
+		if (stream.avail_in == 0) {
+			rbuf_consume(gRbuf->insize);
+			if (rbuf_read(CHUNKSIZE) < RBUF_PART)
+				die("Error reading streaming block contents");
+			stream.next_in = gRbuf->input;
+			stream.avail_in = gRbuf->insize;
+		}
+		
+		err = lzma_code(&stream, LZMA_RUN);
+	}
+	
+	if (ib && stream.avail_out != ib->outcap) {
+		ib->outsize = ib->outcap - stream.avail_out;
+		pipeline_dispatch(pi, gPipelineMergeQ);
+	}
+	rbuf_consume(gRbuf->insize - stream.avail_in);
+	lzma_end(&stream);
+}
+
 static void read_thread_noindex(void) {
 	size_t bytes;
 	lzma_ret err;
 	
-	// Read the header
+	// Stream header
 	uint8_t stream_header[LZMA_STREAM_HEADER_SIZE];
 	bytes = fread(stream_header, 1, LZMA_STREAM_HEADER_SIZE, gInFile);
 	if (bytes != LZMA_STREAM_HEADER_SIZE)
@@ -307,13 +357,11 @@ static void read_thread_noindex(void) {
     lzma_filter filters[LZMA_FILTERS_MAX + 1];
     lzma_block block = { .filters = filters, .check = gCheck, .version = 0 };
 	while (true) {
-		// Check for index
 		if (rbuf_read(1) != RBUF_FULL)
 			die("Error reading block header size");
 		if (gRbuf->input[0] == 0)
 			break; // Found the index. FIXME: multi-stream?
 				
-		// Decode header
 		block.header_size = lzma_block_header_size_decode(gRbuf->input[0]);
 		if (block.header_size > LZMA_BLOCK_HEADER_SIZE_MAX)
 			die("Block header size too large");
@@ -323,14 +371,16 @@ static void read_thread_noindex(void) {
 			die("Error decoding block header");
 		
 		size_t comp = block.compressed_size, outsize = block.uncompressed_size;
-		if (comp == LZMA_VLI_UNKNOWN || outsize == LZMA_VLI_UNKNOWN)
-			die("No sizes in header!!!"); // FIXME: streaming; file index
-		block_capacity(gRbuf, 0, outsize);
-		gRbuf->outsize = outsize;
+		if (comp == LZMA_VLI_UNKNOWN || outsize == LZMA_VLI_UNKNOWN) {
+			read_streaming(&block);
+		} else {
+			block_capacity(gRbuf, 0, outsize);
+			gRbuf->outsize = outsize;
 		
-		if (rbuf_read(lzma_block_total_size(&block)) != RBUF_FULL)
-			die("Error reading block contents");
-		rbuf_dispatch();
+			if (rbuf_read(lzma_block_total_size(&block)) != RBUF_FULL)
+				die("Error reading block contents");
+			rbuf_dispatch();
+		}
 	}
 	
 	pipeline_stop();

From aa79e8795655890e0311868aa2e2454896462e44 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sat, 20 Oct 2012 23:30:07 -0400
Subject: [PATCH 12/22] Multiple streams are supported

---
 read.c | 169 ++++++++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 118 insertions(+), 51 deletions(-)

diff --git a/read.c b/read.c
index b17a5fc..592fc66 100644
--- a/read.c
+++ b/read.c
@@ -65,10 +65,15 @@ typedef enum {
 } rbuf_read_status;
 
 static rbuf_read_status rbuf_read(size_t bytes);
+static bool rbuf_cycle(lzma_stream *stream, bool start, size_t skip);
 static void rbuf_consume(size_t bytes);
 static void rbuf_dispatch(void);
 
+static bool read_header(void);
+static bool read_block(void);
 static void read_streaming(lzma_block *block);
+static void read_index(void);
+static void read_footer(void);
 
 
 #pragma mark DECLARE UTILS
@@ -179,6 +184,7 @@ static void wanted_free(wanted_t *w) {
     }
 }
 
+
 static bool spec_match(char *spec, char *name) {
     bool match = true;
     for (; *spec; ++spec, ++name) {
@@ -279,6 +285,17 @@ static rbuf_read_status rbuf_read(size_t bytes) {
 	return feof(gInFile) ? RBUF_EOF : RBUF_ERR;
 }
 
+static bool rbuf_cycle(lzma_stream *stream, bool start, size_t skip) {
+	if (!start) {
+		rbuf_consume(gRbuf->insize);
+		if (rbuf_read(CHUNKSIZE) < RBUF_PART)
+			return false;
+	}
+	stream->next_in = gRbuf->input + skip;
+	stream->avail_in = gRbuf->insize - skip;
+	return true;
+}
+
 static void rbuf_consume(size_t bytes) {
 	if (bytes < gRbuf->insize)
 		memmove(gRbuf->input, gRbuf->input + bytes, gRbuf->insize - bytes);
@@ -291,12 +308,60 @@ static void rbuf_dispatch(void) {
 	gRbuf = NULL;
 }
 
+
+static bool read_header(void) {
+	lzma_stream_flags stream_flags;
+	rbuf_read_status st = rbuf_read(LZMA_STREAM_HEADER_SIZE);
+	if (st == RBUF_EOF)
+		return false;
+	else if (st != RBUF_FULL)
+		die("Error reading stream header");
+	lzma_ret err = lzma_stream_header_decode(&stream_flags, gRbuf->input);
+	if (err == LZMA_FORMAT_ERROR)
+		die("Not an XZ file");
+	else if (err != LZMA_OK)
+		die("Error decoding XZ header");
+	gCheck = stream_flags.check;
+	rbuf_consume(LZMA_STREAM_HEADER_SIZE);
+	return true;
+}
+
+static bool read_block(void) {
+    lzma_filter filters[LZMA_FILTERS_MAX + 1];
+    lzma_block block = { .filters = filters, .check = gCheck, .version = 0 };
+	
+	if (rbuf_read(1) != RBUF_FULL)
+		die("Error reading block header size");
+	if (gRbuf->input[0] == 0)
+		return false;
+	
+	block.header_size = lzma_block_header_size_decode(gRbuf->input[0]);
+	if (block.header_size > LZMA_BLOCK_HEADER_SIZE_MAX)
+		die("Block header size too large");
+	if (rbuf_read(block.header_size) != RBUF_FULL)
+		die("Error reading block header");
+	if (lzma_block_header_decode(&block, NULL, gRbuf->input) != LZMA_OK)
+		die("Error decoding block header");
+		
+	size_t comp = block.compressed_size, outsize = block.uncompressed_size;
+	if (comp == LZMA_VLI_UNKNOWN || outsize == LZMA_VLI_UNKNOWN) {
+		read_streaming(&block);
+	} else {
+		block_capacity(gRbuf, 0, outsize);
+		gRbuf->outsize = outsize;
+		
+		if (rbuf_read(lzma_block_total_size(&block)) != RBUF_FULL)
+			die("Error reading block contents");
+		rbuf_dispatch();
+	}
+	return true;
+}
+
 static void read_streaming(lzma_block *block) {
     lzma_stream stream = LZMA_STREAM_INIT;
     if (lzma_block_decoder(&stream, block) != LZMA_OK)
 		die("Error initializing streaming block decode");
-	stream.next_in = gRbuf->input + block->header_size;
-	stream.avail_in = gRbuf->insize - block->header_size;
+	rbuf_cycle(&stream, true, block->header_size);
 	stream.avail_out = 0;
 	
     pipeline_item_t *pi = NULL;
@@ -318,13 +383,8 @@ static void read_streaming(lzma_block *block) {
 			stream.next_out = ib->output;
 			stream.avail_out = ib->outcap;
 		}
-		if (stream.avail_in == 0) {
-			rbuf_consume(gRbuf->insize);
-			if (rbuf_read(CHUNKSIZE) < RBUF_PART)
-				die("Error reading streaming block contents");
-			stream.next_in = gRbuf->input;
-			stream.avail_in = gRbuf->insize;
-		}
+		if (stream.avail_in == 0 && !rbuf_cycle(&stream, false, 0))
+			die("Error reading streaming block");
 		
 		err = lzma_code(&stream, LZMA_RUN);
 	}
@@ -337,54 +397,61 @@ static void read_streaming(lzma_block *block) {
 	lzma_end(&stream);
 }
 
-static void read_thread_noindex(void) {
-	size_t bytes;
-	lzma_ret err;
+static void read_index(void) {
+	// FIXME: verify it matches the blocks?
+    lzma_stream stream = LZMA_STREAM_INIT;
+	lzma_index *index;
+	if (lzma_index_decoder(&stream, &index, MEMLIMIT) != LZMA_OK)
+		die("Error initializing index decoder");
+	rbuf_cycle(&stream, true, 0);
 	
-	// Stream header
-	uint8_t stream_header[LZMA_STREAM_HEADER_SIZE];
-	bytes = fread(stream_header, 1, LZMA_STREAM_HEADER_SIZE, gInFile);
-	if (bytes != LZMA_STREAM_HEADER_SIZE)
-		die("Error reading stream header");
+	lzma_ret err = LZMA_OK;
+	while (err != LZMA_STREAM_END) {
+		if (err != LZMA_OK)
+			die("Error decoding index");
+		if (stream.avail_in == 0 && !rbuf_cycle(&stream, false, 0))
+			die("Error reading index");
+		err = lzma_code(&stream, LZMA_RUN);
+	}
+	rbuf_consume(gRbuf->insize - stream.avail_in);
+	lzma_end(&stream);
+}
+
+static void read_footer(void) {
+	// FIXME: compare with header?
 	lzma_stream_flags stream_flags;
-	err = lzma_stream_header_decode(&stream_flags, stream_header);
-	if (err == LZMA_FORMAT_ERROR)
-		die("Not an XZ file");
-	else if (err != LZMA_OK)
-		die("Error decoding XZ header");
-	gCheck = stream_flags.check;
+	if (rbuf_read(LZMA_STREAM_HEADER_SIZE) != RBUF_FULL)
+		die("Error reading stream footer");
+	if (lzma_stream_footer_decode(&stream_flags, gRbuf->input) != LZMA_OK)
+		die("Error decoding XZ footer");
+	rbuf_consume(LZMA_STREAM_HEADER_SIZE);
 	
-    lzma_filter filters[LZMA_FILTERS_MAX + 1];
-    lzma_block block = { .filters = filters, .check = gCheck, .version = 0 };
+	char zeros[4] = "\0\0\0\0";
 	while (true) {
-		if (rbuf_read(1) != RBUF_FULL)
-			die("Error reading block header size");
-		if (gRbuf->input[0] == 0)
-			break; // Found the index. FIXME: multi-stream?
-				
-		block.header_size = lzma_block_header_size_decode(gRbuf->input[0]);
-		if (block.header_size > LZMA_BLOCK_HEADER_SIZE_MAX)
-			die("Block header size too large");
-		if (rbuf_read(block.header_size) != RBUF_FULL)
-			die("Error reading block header");
-		if (lzma_block_header_decode(&block, NULL, gRbuf->input) != LZMA_OK)
-			die("Error decoding block header");
-		
-		size_t comp = block.compressed_size, outsize = block.uncompressed_size;
-		if (comp == LZMA_VLI_UNKNOWN || outsize == LZMA_VLI_UNKNOWN) {
-			read_streaming(&block);
-		} else {
-			block_capacity(gRbuf, 0, outsize);
-			gRbuf->outsize = outsize;
-		
-			if (rbuf_read(lzma_block_total_size(&block)) != RBUF_FULL)
-				die("Error reading block contents");
-			rbuf_dispatch();
-		}
+		rbuf_read_status st = rbuf_read(4);
+		if (st == RBUF_EOF)
+			return;
+		if (st != RBUF_FULL)
+			die("Footer must be multiple of four bytes");
+		if (memcmp(zeros, gRbuf->input, 4) != 0)
+			return;
+		rbuf_consume(4);
 	}
-	
+}
+
+static void read_thread_noindex(void) {
+	bool empty = true;
+	while (read_header()) {
+		empty = false;
+		while (read_block())
+			; // pass
+		read_index();
+		read_footer();
+		// FIXME: don't output the pixz file index! heuristic?
+	}
+	if (empty)
+		die("Empty input");
 	pipeline_stop();
-	// FIXME: don't output the pixz file index! heuristic?
 }
 
 static void read_thread(void) {

From 28e0515d75b1aafd0c8a07e774fadf63ba514484 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 4 Nov 2012 19:48:28 -0500
Subject: [PATCH 13/22] Start factoring out index decoding

---
 common.c | 109 ++++++++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 91 insertions(+), 18 deletions(-)

diff --git a/common.c b/common.c
index 2f0f051..a6da1f6 100644
--- a/common.c
+++ b/common.c
@@ -229,39 +229,112 @@ static void read_file_index_data(void) {
     }
 }
 
-bool decode_index(void) {
-    if (fseek(gInFile, -LZMA_STREAM_HEADER_SIZE, SEEK_END) == -1)
-		return false; // not seekable
+
+#define BWCHUNK 512
+
+typedef struct {
+	uint8_t buf[BWCHUNK];
+	off_t pos;
+	size_t size;
+} bw;
+
+static uint32_t *bw_read(bw *b) {
+	size_t sz = sizeof(uint32_t);
+	if (b->size < sz) {
+		if (b->pos < sz)
+			return NULL; // EOF
+		b->size = (b->pos > BWCHUNK) ? BWCHUNK : b->pos;
+		b->pos -= b->size;
+		if (fseeko(gInFile, b->pos, SEEK_SET) == -1)
+			return NULL;
+		if (fread(b->buf, b->size, 1, gInFile) != 1)
+			return NULL;
+	}
+	
+	b->size -= sz;
+	return &((uint32_t*)b->buf)[b->size / sz];
+}
+
+static off_t stream_padding(bw *b, off_t pos) {
+	b->pos = pos;
+	b->size = 0;
+	
+	for (off_t pad = 0; true; ++pad) {
+		uint32_t *i = bw_read(b);
+		if (!i)
+			die("Error reading stream padding");
+		if (*i != 0) {
+			b->size += sizeof(uint32_t);
+			return pad;
+		}
+	}
+}
+
+static void stream_footer(bw *b, lzma_stream_flags *flags) {
+	uint8_t ftr[LZMA_STREAM_HEADER_SIZE];
+	for (int i = sizeof(ftr) / sizeof(uint32_t) - 1; i >= 0; --i) {
+		uint32_t *p = bw_read(b);
+		if (!p)
+			die("Error reading stream footer");
+		*((uint32_t*)ftr + i) = *p;
+	}
 	
-    uint8_t hdrbuf[LZMA_STREAM_HEADER_SIZE];
-    if (fread(hdrbuf, LZMA_STREAM_HEADER_SIZE, 1, gInFile) != 1)
-        die("Error reading stream footer");
-    lzma_stream_flags flags;
-    if (lzma_stream_footer_decode(&flags, hdrbuf) != LZMA_OK)
+    if (lzma_stream_footer_decode(flags, ftr) != LZMA_OK)
         die("Error decoding stream footer");
-    
-    gCheck = flags.check;
-    size_t index_seek = -LZMA_STREAM_HEADER_SIZE - flags.backward_size;
-    if (fseek(gInFile, index_seek, SEEK_CUR) == -1)
+	gCheck = flags->check; // FIXME: multiple streams
+}
+
+static lzma_index *next_index(off_t *pos) {
+	bw b;
+	off_t pad = stream_padding(&b, *pos);
+	off_t eos = *pos - pad;
+	
+	lzma_stream_flags flags;
+	stream_footer(&b, &flags);
+	*pos = eos - LZMA_STREAM_HEADER_SIZE - flags.backward_size;
+    if (fseeko(gInFile, *pos, SEEK_SET) == -1)
         die("Error seeking to index");
-    if (lzma_index_decoder(&gStream, &gIndex, MEMLIMIT) != LZMA_OK)
+	
+	lzma_stream strm = LZMA_STREAM_INIT;
+	lzma_index *index;
+    if (lzma_index_decoder(&strm, &index, MEMLIMIT) != LZMA_OK)
         die("Error creating index decoder");
     
     uint8_t ibuf[CHUNKSIZE];
-    gStream.avail_in = 0;
+    strm.avail_in = 0;
     lzma_ret err = LZMA_OK;
     while (err != LZMA_STREAM_END) {
-        if (gStream.avail_in == 0) {
-            gStream.avail_in = fread(ibuf, 1, CHUNKSIZE, gInFile);
+        if (strm.avail_in == 0) {
+            strm.avail_in = fread(ibuf, 1, CHUNKSIZE, gInFile);
             if (ferror(gInFile))
                 die("Error reading index");
-            gStream.next_in = ibuf;
+            strm.next_in = ibuf;
         }
         
-        err = lzma_code(&gStream, LZMA_RUN);
+        err = lzma_code(&strm, LZMA_RUN);
         if (err != LZMA_OK && err != LZMA_STREAM_END)
             die("Error decoding index");
     }
+	
+	*pos = eos - lzma_index_stream_size(index);
+	if (fseeko(gInFile, *pos, SEEK_SET) == -1)
+		die("Error seeking to beginning of stream");
+	
+	
+	if (lzma_index_stream_flags(index, &flags) != LZMA_OK)
+		die("Error setting stream flags");
+	if (lzma_index_stream_padding(index, pad) != LZMA_OK)
+		die("Error setting stream padding");
+	return index;
+}
+
+bool decode_index(void) {
+    if (fseeko(gInFile, 0, SEEK_END) == -1)
+		return false; // not seekable
+	off_t pos = ftello(gInFile);
+	
+	gIndex = next_index(&pos);
+	
 	return true;
 }
 

From 4ef79b67d6b71b1a5c68048edc5155a888da2604 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 4 Nov 2012 20:11:49 -0500
Subject: [PATCH 14/22] Reading multiple indices ok!

---
 common.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/common.c b/common.c
index a6da1f6..f5e3073 100644
--- a/common.c
+++ b/common.c
@@ -259,7 +259,7 @@ static off_t stream_padding(bw *b, off_t pos) {
 	b->pos = pos;
 	b->size = 0;
 	
-	for (off_t pad = 0; true; ++pad) {
+	for (off_t pad = 0; true; pad += sizeof(uint32_t)) {
 		uint32_t *i = bw_read(b);
 		if (!i)
 			die("Error reading stream padding");
@@ -333,7 +333,13 @@ bool decode_index(void) {
 		return false; // not seekable
 	off_t pos = ftello(gInFile);
 	
-	gIndex = next_index(&pos);
+	gIndex = NULL;
+	while (pos > 0) {
+		lzma_index *index = next_index(&pos);
+		if (gIndex && lzma_index_cat(index, gIndex, NULL) != LZMA_OK)
+			die("Error concatenating indices");
+		gIndex = index;
+	}
 	
 	return true;
 }

From dd86134d64d7fd044319fdec033ac540fdc85878 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 4 Nov 2012 21:15:55 -0500
Subject: [PATCH 15/22] Remove global gCheck

---
 common.c | 73 ++++++++++++++++++++++++++++----------------------------
 pixz.h   |  6 -----
 read.c   | 26 ++++++++++++--------
 3 files changed, 53 insertions(+), 52 deletions(-)

diff --git a/common.c b/common.c
index f5e3073..334dc0b 100644
--- a/common.c
+++ b/common.c
@@ -5,16 +5,9 @@
 
 #pragma mark UTILS
 
-typedef struct {
-    lzma_block block;
-    lzma_filter filters[LZMA_FILTERS_MAX + 1];
-} block_wrapper_t;
-
 FILE *gInFile = NULL;
 lzma_stream gStream = LZMA_STREAM_INIT;
 
-lzma_check gCheck = LZMA_CHECK_NONE;
-
 
 void die(const char *fmt, ...) {
     va_list args;
@@ -36,32 +29,6 @@ char *xstrdup(const char *s) {
     return memcpy(r, s, len + 1); 
 }
 
-void *decode_block_start(off_t block_seek) {
-    if (fseeko(gInFile, block_seek, SEEK_SET) == -1)
-        die("Error seeking to block");
-    
-    // Some memory in which to keep the discovered filters safe
-    block_wrapper_t *bw = malloc(sizeof(block_wrapper_t));
-    bw->block = (lzma_block){ .check = gCheck, .filters = bw->filters,
-	 	.version = 0 };
-    
-    int b = fgetc(gInFile);
-    if (b == EOF || b == 0)
-        die("Error reading block size");
-    bw->block.header_size = lzma_block_header_size_decode(b);
-    uint8_t hdrbuf[bw->block.header_size];
-    hdrbuf[0] = (uint8_t)b;
-    if (fread(hdrbuf + 1, bw->block.header_size - 1, 1, gInFile) != 1)
-        die("Error reading block header");
-    if (lzma_block_header_decode(&bw->block, NULL, hdrbuf) != LZMA_OK)
-        die("Error decoding file index block header");
-    
-    if (lzma_block_decoder(&gStream, &bw->block) != LZMA_OK)
-        die("Error initializing file index stream");
-    
-    return bw;
-}
-
 bool is_multi_header(const char *name) {
     size_t i = strlen(name);
     while (i != 0 && name[i - 1] != '/')
@@ -82,6 +49,9 @@ static lzma_ret gFIBErr = LZMA_OK;
 static uint8_t gFIBInputBuf[CHUNKSIZE];
 static size_t gMoved = 0;
 
+static void *decode_file_index_start(off_t block_seek, lzma_check check);
+static lzma_vli find_file_index(void **bdatap);
+
 static char *read_file_index_name(void);
 static void read_file_index_make_space(void);
 static void read_file_index_data(void);
@@ -109,7 +79,38 @@ void free_file_index(void) {
     gFileIndex = gLastFile = NULL;
 }
 
-lzma_vli find_file_index(void **bdatap) {
+typedef struct {
+    lzma_block block;
+    lzma_filter filters[LZMA_FILTERS_MAX + 1];
+} block_wrapper_t;
+
+static void *decode_file_index_start(off_t block_seek, lzma_check check) {
+    if (fseeko(gInFile, block_seek, SEEK_SET) == -1)
+        die("Error seeking to block");
+    
+    // Some memory in which to keep the discovered filters safe
+    block_wrapper_t *bw = malloc(sizeof(block_wrapper_t));
+    bw->block = (lzma_block){ .check = check, .filters = bw->filters,
+	 	.version = 0 };
+    
+    int b = fgetc(gInFile);
+    if (b == EOF || b == 0)
+        die("Error reading block size");
+    bw->block.header_size = lzma_block_header_size_decode(b);
+    uint8_t hdrbuf[bw->block.header_size];
+    hdrbuf[0] = (uint8_t)b;
+    if (fread(hdrbuf + 1, bw->block.header_size - 1, 1, gInFile) != 1)
+        die("Error reading block header");
+    if (lzma_block_header_decode(&bw->block, NULL, hdrbuf) != LZMA_OK)
+        die("Error decoding file index block header");
+    
+    if (lzma_block_decoder(&gStream, &bw->block) != LZMA_OK)
+        die("Error initializing file index stream");
+    
+    return bw;
+}
+
+static lzma_vli find_file_index(void **bdatap) {
     if (!gIndex)
         decode_index();
         
@@ -119,7 +120,8 @@ lzma_vli find_file_index(void **bdatap) {
     lzma_vli loc = lzma_index_uncompressed_size(gIndex) - 1;
     if (lzma_index_iter_locate(&iter, loc))
         die("Can't locate file index block");
-    void *bdata = decode_block_start(iter.block.compressed_file_offset);
+    void *bdata = decode_file_index_start(iter.block.compressed_file_offset,
+		iter.stream.flags->check);
     
     gFileIndexBuf = malloc(gFIBSize);
     gStream.avail_out = gFIBSize;
@@ -281,7 +283,6 @@ static void stream_footer(bw *b, lzma_stream_flags *flags) {
 	
     if (lzma_stream_footer_decode(flags, ftr) != LZMA_OK)
         die("Error decoding stream footer");
-	gCheck = flags->check; // FIXME: multiple streams
 }
 
 static lzma_index *next_index(off_t *pos) {
diff --git a/pixz.h b/pixz.h
index 7e7b5fc..b7c7d61 100644
--- a/pixz.h
+++ b/pixz.h
@@ -50,8 +50,6 @@ uint64_t xle64dec(const uint8_t *d);
 void xle64enc(uint8_t *d, uint64_t n);
 size_t num_threads(void);
 
-void *decode_block_start(off_t block_seek);
-
 
 #pragma mark INDEX
 
@@ -64,13 +62,9 @@ struct file_index_t {
 
 extern file_index_t *gFileIndex, *gLastFile;
 
-// As discovered from footer
-extern lzma_check gCheck;
-
 bool is_multi_header(const char *name);
 bool decode_index(void); // true on success
 
-lzma_vli find_file_index(void **bdatap);
 lzma_vli read_file_index(void);
 void dump_file_index(FILE *out, bool verbose);
 void free_file_index(void);
diff --git a/read.c b/read.c
index 592fc66..eb25bfd 100644
--- a/read.c
+++ b/read.c
@@ -28,6 +28,7 @@ typedef struct {
 	size_t incap, outcap;
     size_t insize, outsize;
     off_t uoffset; // uncompressed offset
+	lzma_check check;
 } io_block_t;
 
 static void *block_create(void);
@@ -69,8 +70,8 @@ static bool rbuf_cycle(lzma_stream *stream, bool start, size_t skip);
 static void rbuf_consume(size_t bytes);
 static void rbuf_dispatch(void);
 
-static bool read_header(void);
-static bool read_block(void);
+static bool read_header(lzma_check *check);
+static bool read_block(lzma_check check);
 static void read_streaming(lzma_block *block);
 static void read_index(void);
 static void read_footer(void);
@@ -309,7 +310,7 @@ static void rbuf_dispatch(void) {
 }
 
 
-static bool read_header(void) {
+static bool read_header(lzma_check *check) {
 	lzma_stream_flags stream_flags;
 	rbuf_read_status st = rbuf_read(LZMA_STREAM_HEADER_SIZE);
 	if (st == RBUF_EOF)
@@ -321,14 +322,14 @@ static bool read_header(void) {
 		die("Not an XZ file");
 	else if (err != LZMA_OK)
 		die("Error decoding XZ header");
-	gCheck = stream_flags.check;
+	*check = stream_flags.check;
 	rbuf_consume(LZMA_STREAM_HEADER_SIZE);
 	return true;
 }
 
-static bool read_block(void) {
+static bool read_block(lzma_check check) {
     lzma_filter filters[LZMA_FILTERS_MAX + 1];
-    lzma_block block = { .filters = filters, .check = gCheck, .version = 0 };
+    lzma_block block = { .filters = filters, .check = check, .version = 0 };
 	
 	if (rbuf_read(1) != RBUF_FULL)
 		die("Error reading block header size");
@@ -349,6 +350,7 @@ static bool read_block(void) {
 	} else {
 		block_capacity(gRbuf, 0, outsize);
 		gRbuf->outsize = outsize;
+		gRbuf->check = check;
 		
 		if (rbuf_read(lzma_block_total_size(&block)) != RBUF_FULL)
 			die("Error reading block contents");
@@ -441,9 +443,10 @@ static void read_footer(void) {
 
 static void read_thread_noindex(void) {
 	bool empty = true;
-	while (read_header()) {
+	lzma_check check = LZMA_CHECK_NONE;
+	while (read_header(&check)) {
 		empty = false;
-		while (read_block())
+		while (read_block(check))
 			; // pass
 		read_index();
 		read_footer();
@@ -496,6 +499,7 @@ static void read_thread(void) {
             die("Error reading block contents");
         offset += bsize;
         ib->uoffset = iter.block.uncompressed_file_offset;
+		ib->check = iter.stream.flags->check;
         
         pipeline_split(pi);
     }
@@ -508,7 +512,8 @@ static void read_thread(void) {
 static void decode_thread(size_t thnum) {
     lzma_stream stream = LZMA_STREAM_INIT;
     lzma_filter filters[LZMA_FILTERS_MAX + 1];
-    lzma_block block = { .filters = filters, .check = gCheck, .version = 0 };
+    lzma_block block = { .filters = filters, .check = LZMA_CHECK_NONE,
+		.version = 0 };
     
     pipeline_item_t *pi;
     io_block_t *ib;
@@ -517,7 +522,8 @@ static void decode_thread(size_t thnum) {
         ib = (io_block_t*)(pi->data);
         
         block.header_size = lzma_block_header_size_decode(*(ib->input));
-        if (lzma_block_header_decode(&block, NULL, ib->input) != LZMA_OK)
+        block.check = ib->check;
+		if (lzma_block_header_decode(&block, NULL, ib->input) != LZMA_OK)
             die("Error decoding block header");
         if (lzma_block_decoder(&stream, &block) != LZMA_OK)
             die("Error initializing block decode");

From dd5f6d01e3dad7d82a0478fff0325c34b5535c2b Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 4 Nov 2012 21:23:18 -0500
Subject: [PATCH 16/22] Enable both seekable and non-seekable modes

---
 list.c | 4 +++-
 read.c | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/list.c b/list.c
index 9798d29..3dbf614 100644
--- a/list.c
+++ b/list.c
@@ -3,7 +3,9 @@
 #pragma mark FUNCTION DEFINITIONS
 
 void pixz_list(bool tar) {
-    decode_index();
+    if (!decode_index())
+		die("Can't list non-seekable input");
+	
     lzma_index_iter iter;
     lzma_index_iter_init(&iter, gIndex);
 
diff --git a/read.c b/read.c
index eb25bfd..5058b16 100644
--- a/read.c
+++ b/read.c
@@ -85,7 +85,7 @@ static lzma_vli gFileIndexOffset = 0;
 #pragma mark MAIN
 
 void pixz_read(bool verify, size_t nspecs, char **specs) {
-    if (0 && decode_index()) { // FIXME
+    if (decode_index()) { // FIXME
 	    if (verify)
 	        gFileIndexOffset = read_file_index();
 	    wanted_files(nspecs, specs);    	
@@ -96,7 +96,8 @@ void pixz_read(bool verify, size_t nspecs, char **specs) {
         debug("want: %s", w->name);
 #endif
     
-    pipeline_create(block_create, block_free, read_thread_noindex, decode_thread);
+    pipeline_create(block_create, block_free,
+		gIndex ? read_thread : read_thread_noindex, decode_thread);
     if (verify && gFileIndexOffset) {
 		// FIXME: verify this works with noindex/streamed reading
 		// FIXME: don't stop on End Of Archive

From 3d5d1f1d399df15557098167da9f2feafea66a4a Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sun, 4 Nov 2012 23:27:59 -0500
Subject: [PATCH 17/22] Handle oversized blocks

---
 read.c | 34 ++++++++++++++++++++++------------
 1 file changed, 22 insertions(+), 12 deletions(-)

diff --git a/read.c b/read.c
index 5058b16..b979783 100644
--- a/read.c
+++ b/read.c
@@ -55,6 +55,7 @@ static void tar_write_last(void);
 #pragma mark DECLARE READ BUFFER
 
 #define STREAMSIZE (1024 * 1024)
+#define MAXSPLITSIZE (64 * 1024 * 1024) // xz -9 blocksize
 
 static pipeline_item_t *gRbufPI = NULL;
 static io_block_t *gRbuf = NULL;
@@ -71,7 +72,7 @@ static void rbuf_consume(size_t bytes);
 static void rbuf_dispatch(void);
 
 static bool read_header(lzma_check *check);
-static bool read_block(lzma_check check);
+static bool read_block(bool force_stream, lzma_check check);
 static void read_streaming(lzma_block *block);
 static void read_index(void);
 static void read_footer(void);
@@ -328,7 +329,7 @@ static bool read_header(lzma_check *check) {
 	return true;
 }
 
-static bool read_block(lzma_check check) {
+static bool read_block(bool force_stream, lzma_check check) {
     lzma_filter filters[LZMA_FILTERS_MAX + 1];
     lzma_block block = { .filters = filters, .check = check, .version = 0 };
 	
@@ -346,7 +347,9 @@ static bool read_block(lzma_check check) {
 		die("Error decoding block header");
 		
 	size_t comp = block.compressed_size, outsize = block.uncompressed_size;
-	if (comp == LZMA_VLI_UNKNOWN || outsize == LZMA_VLI_UNKNOWN) {
+	if (force_stream || comp == LZMA_VLI_UNKNOWN
+			|| outsize == LZMA_VLI_UNKNOWN
+			|| outsize > MAXSPLITSIZE) {
 		read_streaming(&block);
 	} else {
 		block_capacity(gRbuf, 0, outsize);
@@ -447,7 +450,7 @@ static void read_thread_noindex(void) {
 	lzma_check check = LZMA_CHECK_NONE;
 	while (read_header(&check)) {
 		empty = false;
-		while (read_block(check))
+		while (read_block(false, check))
 			; // pass
 		read_index();
 		read_footer();
@@ -494,15 +497,22 @@ static void read_thread(void) {
         if (offset != boffset) {
             fseeko(gInFile, boffset, SEEK_SET);
             offset = boffset;
-        }        
-        ib->insize = fread(ib->input, 1, bsize, gInFile);
-        if (ib->insize < bsize)
-            die("Error reading block contents");
-        offset += bsize;
-        ib->uoffset = iter.block.uncompressed_file_offset;
-		ib->check = iter.stream.flags->check;
+        }
+		
+		if (iter.block.uncompressed_size > MAXSPLITSIZE) { // must stream
+			if (gRbuf)
+				rbuf_consume(gRbuf->insize); // clear
+			read_block(true, iter.stream.flags->check);
+		} else {
+	        ib->insize = fread(ib->input, 1, bsize, gInFile);
+	        if (ib->insize < bsize)
+	            die("Error reading block contents");
+	        offset += bsize;
+	        ib->uoffset = iter.block.uncompressed_file_offset;
+			ib->check = iter.stream.flags->check;
         
-        pipeline_split(pi);
+	        pipeline_split(pi);
+		}
     }
     
     pipeline_stop();

From 0aa5ae7d1a2c3b65a948692845be72d700a531af Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Mon, 5 Nov 2012 01:10:32 -0500
Subject: [PATCH 18/22] Output post-tar data (if it won't interfere with
 wanted-file filtering)

---
 common.c |  2 +-
 read.c   | 16 +++++++++-------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/common.c b/common.c
index 334dc0b..8077541 100644
--- a/common.c
+++ b/common.c
@@ -342,7 +342,7 @@ bool decode_index(void) {
 		gIndex = index;
 	}
 	
-	return true;
+	return (gIndex != NULL);
 }
 
 
diff --git a/read.c b/read.c
index b979783..a30e41c 100644
--- a/read.c
+++ b/read.c
@@ -45,6 +45,7 @@ static off_t gArLastOffset;
 static size_t gArLastSize;
 static wanted_t *gArWanted = NULL;
 static bool gArNextItem = false;
+static bool gExplicitFiles = false;
 
 static int tar_ok(struct archive *ar, void *ref);
 static ssize_t tar_read(struct archive *ar, void *ref, const void **bufp);
@@ -86,10 +87,11 @@ static lzma_vli gFileIndexOffset = 0;
 #pragma mark MAIN
 
 void pixz_read(bool verify, size_t nspecs, char **specs) {
-    if (decode_index()) { // FIXME
+    if (decode_index()) {
 	    if (verify)
 	        gFileIndexOffset = read_file_index();
-	    wanted_files(nspecs, specs);    	
+	    wanted_files(nspecs, specs);
+		gExplicitFiles = nspecs;
     }
 
 #if DEBUG
@@ -100,7 +102,6 @@ void pixz_read(bool verify, size_t nspecs, char **specs) {
     pipeline_create(block_create, block_free,
 		gIndex ? read_thread : read_thread_noindex, decode_thread);
     if (verify && gFileIndexOffset) {
-		// FIXME: verify this works with noindex/streamed reading
 		// FIXME: don't stop on End Of Archive
         gArWanted = gWantedFiles;
         wanted_t *w = gWantedFiles, *wlast = NULL;
@@ -146,7 +147,8 @@ void pixz_read(bool verify, size_t nspecs, char **specs) {
         if (w && w->name)
             die("File %s missing in archive", w->name);
         tar_write_last(); // write whatever's left
-    } else {
+    }
+	if (!gExplicitFiles) {
         pipeline_item_t *pi;
         while ((pi = pipeline_merged())) {
             io_block_t *ib = (io_block_t*)(pi->data);
@@ -475,7 +477,7 @@ static void read_thread(void) {
             continue;
         
         // Do we need this block?
-        if (gWantedFiles) {
+        if (gWantedFiles && gExplicitFiles) {
             off_t uend = iter.block.uncompressed_file_offset +
                 iter.block.uncompressed_size;
             if (!w || w->start >= uend) {
@@ -565,7 +567,7 @@ static int tar_ok(struct archive *ar, void *ref) {
 }
 
 static bool tar_next_block(void) {
-    if (gArItem && !gArNextItem && gArWanted) {
+    if (gArItem && !gArNextItem && gArWanted && gExplicitFiles) {
         io_block_t *ib = (io_block_t*)(gArItem->data);
         if (gArWanted->start < ib->uoffset + ib->outsize)
             return true; // No need
@@ -598,7 +600,7 @@ static ssize_t tar_read(struct archive *ar, void *ref, const void **bufp) {
     off_t off;
     size_t size;
     io_block_t *ib = (io_block_t*)(gArItem->data);
-    if (gWantedFiles) {
+    if (gWantedFiles && gExplicitFiles) {
         debug("tar want: %s", gArWanted->name);
         off = gArWanted->start - ib->uoffset;
         size = gArWanted->size;

From e20c330366cf7e9466296371b25f844af1c42b3f Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Mon, 5 Nov 2012 01:14:59 -0500
Subject: [PATCH 19/22] Makefile cleanup

---
 Makefile | 19 ++++++++++---------
 pixz.h   |  4 +++-
 2 files changed, 13 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index 4901162..c09e45a 100644
--- a/Makefile
+++ b/Makefile
@@ -2,22 +2,23 @@ ifneq ($(shell gcc -v 2>&1 | grep 'Apple Inc'),)
 	APPLE=1
 endif
 
-ifdef APPLE
-ifeq ($(CC),gcc)
-	LDFLAGS += -search_paths_first
-endif
-endif
 OPT = -g -O0
-CFLAGS = $(patsubst %,-I%/include,$(LIBPREFIX)) $(OPT) -std=gnu99 \
+MYCFLAGS = $(patsubst %,-I%/include,$(LIBPREFIX)) $(OPT) -std=gnu99 \
 	-Wall -Wno-unknown-pragmas
-LDFLAGS = $(patsubst %,-L%/lib,$(LIBPREFIX)) $(OPT) -Wall
+MYLDFLAGS = $(patsubst %,-L%/lib,$(LIBPREFIX)) $(OPT) -Wall
 
 THREADS = -lpthread
 LIBADD = $(THREADS) -llzma -larchive
 
 CC = gcc
-COMPILE = $(CC) $(CFLAGS) -c -o
-LD = $(CC) $(LDFLAGS) -o
+COMPILE = $(CC) $(MYCFLAGS) $(CFLAGS) -c -o
+LD = $(CC) $(MYLDFLAGS) $(LDFLAGS) -o
+
+ifdef APPLE
+ifeq ($(CC),gcc)
+	MYLDFLAGS += -search_paths_first
+endif
+endif
 
 PROGS = pixz
 COMMON = common.o endian.o cpu.o read.o write.o list.o
diff --git a/pixz.h b/pixz.h
index b7c7d61..df9a398 100644
--- a/pixz.h
+++ b/pixz.h
@@ -20,7 +20,9 @@
 
 #define CHUNKSIZE 4096
 
-#define DEBUG 0
+#ifndef DEBUG
+	#define DEBUG 0
+#endif
 #if DEBUG
     #define debug(str, ...) fprintf(stderr, str "\n", ##__VA_ARGS__)
 #else

From 09c60316cf1b7b0aaa7d5f2aee411ad1d8768dbd Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Mon, 5 Nov 2012 02:39:20 -0500
Subject: [PATCH 20/22] Use heuristic to omit file index

---
 read.c | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 55 insertions(+), 3 deletions(-)

diff --git a/read.c b/read.c
index a30e41c..b9470b1 100644
--- a/read.c
+++ b/read.c
@@ -23,12 +23,16 @@ static void wanted_free(wanted_t *w);
 
 #pragma mark DECLARE PIPELINE
 
+typedef enum { BLOCK_SIZED, BLOCK_UNSIZED, BLOCK_CONTINUATION } block_type;
+
 typedef struct {
     uint8_t *input, *output;
 	size_t incap, outcap;
     size_t insize, outsize;
     off_t uoffset; // uncompressed offset
 	lzma_check check;
+	
+	block_type btype;
 } io_block_t;
 
 static void *block_create(void);
@@ -83,6 +87,9 @@ static void read_footer(void);
 
 static lzma_vli gFileIndexOffset = 0;
 
+static bool taste_tar(io_block_t *ib);
+static bool taste_file_index(io_block_t *ib);
+
 
 #pragma mark MAIN
 
@@ -99,6 +106,7 @@ void pixz_read(bool verify, size_t nspecs, char **specs) {
         debug("want: %s", w->name);
 #endif
     
+	bool first = true;
     pipeline_create(block_create, block_free,
 		gIndex ? read_thread : read_thread_noindex, decode_thread);
     if (verify && gFileIndexOffset) {
@@ -144,15 +152,36 @@ void pixz_read(bool verify, size_t nspecs, char **specs) {
             wlast = w;
             w = w->next;
         }
+		archive_read_finish(ar);
         if (w && w->name)
             die("File %s missing in archive", w->name);
         tar_write_last(); // write whatever's left
+		first = false;
     }
 	if (!gExplicitFiles) {
-        pipeline_item_t *pi;
+		bool tar = false;
+		bool all_sized = true;
+        bool skipping = false;
+		
+		pipeline_item_t *pi;
         while ((pi = pipeline_merged())) {
             io_block_t *ib = (io_block_t*)(pi->data);
-            fwrite(ib->output, ib->outsize, 1, gOutFile);
+			if (first) {
+				tar = taste_tar(ib);
+				first = false;
+			}
+			if (skipping && ib->btype != BLOCK_CONTINUATION) {
+				die("File index heuristic failed, retry with -t flag");
+				skipping = false;
+			}
+			if (verify && !skipping && !first && tar && all_sized
+					&& ib->btype == BLOCK_UNSIZED && taste_file_index(ib))
+				skipping = true;
+			if (ib->btype != BLOCK_SIZED)
+				all_sized = false;
+			
+			if (!skipping)
+				fwrite(ib->output, ib->outsize, 1, gOutFile);
             queue_push(gPipelineStartQ, PIPELINE_ITEM, pi);
         }
     }
@@ -357,6 +386,7 @@ static bool read_block(bool force_stream, lzma_check check) {
 		block_capacity(gRbuf, 0, outsize);
 		gRbuf->outsize = outsize;
 		gRbuf->check = check;
+		gRbuf->btype = BLOCK_SIZED;
 		
 		if (rbuf_read(lzma_block_total_size(&block)) != RBUF_FULL)
 			die("Error reading block contents");
@@ -372,6 +402,7 @@ static void read_streaming(lzma_block *block) {
 	rbuf_cycle(&stream, true, block->header_size);
 	stream.avail_out = 0;
 	
+	bool first = true;
     pipeline_item_t *pi = NULL;
     io_block_t *ib = NULL;
     
@@ -384,9 +415,11 @@ static void read_streaming(lzma_block *block) {
 			if (ib) {
 				ib->outsize = ib->outcap;
 				pipeline_dispatch(pi, gPipelineMergeQ);
+				first = false;
 			}
 			queue_pop(gPipelineStartQ, (void**)&pi);
 			ib = (io_block_t*)pi->data;
+			ib->btype = (first ? BLOCK_UNSIZED : BLOCK_CONTINUATION);
 			block_capacity(ib, 0, STREAMSIZE);
 			stream.next_out = ib->output;
 			stream.avail_out = ib->outcap;
@@ -512,7 +545,8 @@ static void read_thread(void) {
 	        offset += bsize;
 	        ib->uoffset = iter.block.uncompressed_file_offset;
 			ib->check = iter.stream.flags->check;
-        
+			ib->btype = BLOCK_SIZED;
+			
 	        pipeline_split(pi);
 		}
     }
@@ -626,3 +660,21 @@ static ssize_t tar_read(struct archive *ar, void *ref, const void **bufp) {
         *bufp = ib->output + off;
     return size;
 }
+
+
+#pragma mark UTILS
+
+static bool taste_tar(io_block_t *ib) {
+    struct archive *ar = archive_read_new();
+    archive_read_support_compression_none(ar);
+    archive_read_support_format_tar(ar);
+    archive_read_open_memory(ar, ib->output, ib->outsize);
+    struct archive_entry *entry;
+    bool ok = (archive_read_next_header(ar, &entry) == ARCHIVE_OK);
+	archive_read_finish(ar);
+	return ok;
+}
+
+static bool taste_file_index(io_block_t *ib) {
+	return xle64dec(ib->output) == PIXZ_INDEX_MAGIC;
+}

From 14f5644bd8c1846e268329c3c59d53d3568950e1 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Mon, 5 Nov 2012 02:45:03 -0500
Subject: [PATCH 21/22] Fixup docs

---
 TODO   | 8 +++++++-
 read.c | 4 ----
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/TODO b/TODO
index 64d4d3b..40ca5b1 100644
--- a/TODO
+++ b/TODO
@@ -15,7 +15,13 @@ BUGS
 	* performance lags under IO?
 	* slow input -> CPUs idle while waiting for input
 	* safe extraction
-		* abort if block size exceeded
+	* sanity checks, from spec:
+		- CRCs are already tested, i think?
+		- backward size should match file
+		- reserved flags must be zero
+		- header vs footer flags
+		- uncompressed size field vs actual uncompressed size
+		- index vs actual blocks
 
 EFFICIENCY
 	* more efficient indexing: ranges? sorted? mtree?
diff --git a/read.c b/read.c
index b9470b1..1085e71 100644
--- a/read.c
+++ b/read.c
@@ -110,7 +110,6 @@ void pixz_read(bool verify, size_t nspecs, char **specs) {
     pipeline_create(block_create, block_free,
 		gIndex ? read_thread : read_thread_noindex, decode_thread);
     if (verify && gFileIndexOffset) {
-		// FIXME: don't stop on End Of Archive
         gArWanted = gWantedFiles;
         wanted_t *w = gWantedFiles, *wlast = NULL;
         bool lastmulti = false;
@@ -439,7 +438,6 @@ static void read_streaming(lzma_block *block) {
 }
 
 static void read_index(void) {
-	// FIXME: verify it matches the blocks?
     lzma_stream stream = LZMA_STREAM_INIT;
 	lzma_index *index;
 	if (lzma_index_decoder(&stream, &index, MEMLIMIT) != LZMA_OK)
@@ -459,7 +457,6 @@ static void read_index(void) {
 }
 
 static void read_footer(void) {
-	// FIXME: compare with header?
 	lzma_stream_flags stream_flags;
 	if (rbuf_read(LZMA_STREAM_HEADER_SIZE) != RBUF_FULL)
 		die("Error reading stream footer");
@@ -489,7 +486,6 @@ static void read_thread_noindex(void) {
 			; // pass
 		read_index();
 		read_footer();
-		// FIXME: don't output the pixz file index! heuristic?
 	}
 	if (empty)
 		die("Empty input");

From 267e538c5eedd99a436a542d45d3a9e0754298c8 Mon Sep 17 00:00:00 2001
From: Dave Vasilevsky <dave@vasilevsky.ca>
Date: Sat, 10 Nov 2012 00:00:35 -0500
Subject: [PATCH 22/22] More file-index skipping fixes

Don't detect file-index on multi-stream files, that usage is not supported.

Handle multi-stream files correctly when using heuristic skipping.
---
 common.c |  3 +++
 read.c   | 26 ++++++++++++++------------
 2 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/common.c b/common.c
index 8077541..0740d30 100644
--- a/common.c
+++ b/common.c
@@ -120,6 +120,9 @@ static lzma_vli find_file_index(void **bdatap) {
     lzma_vli loc = lzma_index_uncompressed_size(gIndex) - 1;
     if (lzma_index_iter_locate(&iter, loc))
         die("Can't locate file index block");
+	if (iter.stream.number != 1)
+		return 0; // Too many streams for one file index
+	
     void *bdata = decode_file_index_start(iter.block.compressed_file_offset,
 		iter.stream.flags->check);
     
diff --git a/read.c b/read.c
index 1085e71..a496148 100644
--- a/read.c
+++ b/read.c
@@ -106,7 +106,6 @@ void pixz_read(bool verify, size_t nspecs, char **specs) {
         debug("want: %s", w->name);
 #endif
     
-	bool first = true;
     pipeline_create(block_create, block_free,
 		gIndex ? read_thread : read_thread_noindex, decode_thread);
     if (verify && gFileIndexOffset) {
@@ -155,27 +154,30 @@ void pixz_read(bool verify, size_t nspecs, char **specs) {
         if (w && w->name)
             die("File %s missing in archive", w->name);
         tar_write_last(); // write whatever's left
-		first = false;
     }
 	if (!gExplicitFiles) {
-		bool tar = false;
-		bool all_sized = true;
-        bool skipping = false;
+		/* Heuristics for detecting pixz file index:
+		 *    - Input must be streaming (otherwise read_thread does this) 
+		 *    - Data must look tar-like
+		 *    - Must have all sized blocks, followed by unsized file index */
+		bool start = !gIndex && verify,
+			 tar = false, all_sized = true, skipping = false;
 		
 		pipeline_item_t *pi;
         while ((pi = pipeline_merged())) {
             io_block_t *ib = (io_block_t*)(pi->data);
-			if (first) {
-				tar = taste_tar(ib);
-				first = false;
-			}
 			if (skipping && ib->btype != BLOCK_CONTINUATION) {
-				die("File index heuristic failed, retry with -t flag");
+				fprintf(stderr,
+					"Warning: File index heuristic failed, use -t flag.\n");
 				skipping = false;
 			}
-			if (verify && !skipping && !first && tar && all_sized
+			if (!skipping && tar && !start && all_sized
 					&& ib->btype == BLOCK_UNSIZED && taste_file_index(ib))
 				skipping = true;
+			if (start) {
+				tar = taste_tar(ib);
+				start = false;
+			}
 			if (ib->btype != BLOCK_SIZED)
 				all_sized = false;
 			
@@ -541,7 +543,7 @@ static void read_thread(void) {
 	        offset += bsize;
 	        ib->uoffset = iter.block.uncompressed_file_offset;
 			ib->check = iter.stream.flags->check;
-			ib->btype = BLOCK_SIZED;
+			ib->btype = BLOCK_SIZED; // Indexed blocks always sized
 			
 	        pipeline_split(pi);
 		}