Update bzip2.js to latest version by @gbarco

openid
Marcin Kulik 12 years ago
parent 8f51defdaa
commit 1410fd390f

@ -13,213 +13,122 @@
var ArchUtils = (function(){ var ArchUtils = (function(){
'use strict'; 'use strict';
// python functions // python functions eliminated, Gonzalo
function ord(c) { return String(c).charCodeAt(); }
function chr(n) { return String.fromCharCode(n); }
function sum(l) { return l.reduce(function(a,b){return a+b}, 0); }
// NOTE: for in loop works another way in js and iterates over keys
// therefore you can't use for (x in range(...)) the same way as in python
function range(start, stop, step) {
switch(arguments.length) {
case 0: return [];
case 1: stop = start; start = 0; step = 1; break;
case 2: step = 1;
}
if ((stop - start) * step < 0) return [];
var a = [];
if (start < stop) {
for (var i = start; i < stop; i += step) { a.push(i); }
} else {
for (var i = start; i > stop; i += step) { a.push(i); }
}
return a;
}
/** /**
* bwt_reverse code from wikipedia (slightly modified) * bwt_reverse code from wikipedia (slightly modified)
* @url http://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform * @url http://en.wikipedia.org/wiki/Burrows%E2%80%93Wheeler_transform
* @license: CC-SA 3.0 * @license: CC-SA 3.0
*/ */
function bwt_reverse(src, primary) { function bwt_reverse(src, primary) {
var len = src.length; var len = src.length;
if (primary >= len) throw RangeError("Out of bound"); if (primary >= len) throw RangeError("Out of bound");
if (primary < 0) throw RangeError("Out of bound"); if (primary < 0) throw RangeError("Out of bound");
if (typeof src == 'string') { //only used on arrays, optimized, Gonzalo
var A = src.split(''); var A = src;
} else { src = src.join('');
var A = src;
src = src.join('');
}
A.sort(); A.sort();
var start = {}; var start = {};
for (var i = len-1; i >= 0; i--) start[A[i]] = i; for (var i = len-1; i >= 0; i--) start[A[i]] = i;
var links = []; var links = [];
for (i = 0; i < len; i++) links.push(start[src[i]]++); for (i = 0; i < len; i++) links.push(start[src[i]]++);
var i, first = A[i = primary], ret = []; var i, first = A[i = primary], ret = [];
//while (i != primary) {
for (var j = 1; j < len; j++) { for (var j = 1; j < len; j++) {
ret.push(A[i = links[i]]); ret.push(A[i = links[i]]);
} }
return first + ret.reverse().join(''); return first + ret.reverse().join('');
} }
function move_to_front(a, c) { //move_to_front is always used to store reslt in array, optimized, Gonzalo
function move_to_front_and_store(a, c, buff) {
var v = a[c]; var v = a[c];
for (var i = c; i > 0; a[i] = a[--i]); for (var i = c; i > 0; a[i] = a[--i]);
a[0] = v; buff.push(a[0] = v);
}
/**
* @class BitfieldBase
* base class for bit-precision reading from stream
*/
var BitfieldBase = function() {
// init
this.init = function(x) {
this._masks = [];
for (var i = 0; i < 31; i++) this._masks[i] = (1 << i) - 1;
this._masks[31] = -0x80000000;
if (x instanceof BitfieldBase) {
this.f = x.f;
this.bits = x.bits;
this.bitfield = x.bitfield;
this.count = x.count;
} else {
this.f = x;
this.bits = 0;
this.bitfield = 0x0;
this.count = 0;
}
}
// FIXME: this will throw an Exception when one tries to read zero-length string
this._read = function(n) {
var s = this.f.substr(this.count, n);
if (!s) throw RangeError("Length Error");
this.count += s.length;
return s;
}
this._readByte = function _readByte() {
return this.f.charCodeAt(this.count++);
}
this.needbits = function(n) {
do { this._more() } while (this.bits < n);
}
this.toskip = function() {
return this.bits & 0x7;
}
this.align = function() {
this.readbits(this.toskip());
}
this.dropbits = function(n) {
if (typeof n == 'undefined') n = 8;
while (n >= this.bits && n > 7) {
n -= this.bits;
this.bits = 0;
n -= (this.f._read(n >> 3)).length << 3;
}
if (n) this.readbits(n);
}
this.dropbytes = function(n) {
if (typeof n == 'undefined') n = 1;
this.dropbits(n << 3);
}
// some function for debugging
this.tell = function() {
return [this.count - ((this.bits+7) >> 3), 7 - ((this.bits-1) & 0x7)];
}
} }
// not used after all // BitfieldBase never used directly, optimized, Gonzalo
/* ìar Bitfield = function() {
this._more = function() {
this.bitfield += this._readByte() << this.bits;
this.bits += 8;
}
this.readbits = function(n) {
if (typeof n == 'undefined') n = 8;
if (n >= 32) {
var n2 = n >> 1;
return this.readbits(n2) * (1 << n2) + this.readbits(n - n2);
}
if (n > this.bits)
this.needbits(n);
var r = this.bitfield & this._masks[n];
this.bits -= n;
this.bitfield >>= n;
return r;
}
}
Bitfield.prototype = new BitfieldBase();*/
/** /**
* @class BitfieldBase * @class RBitfield
* right-sided bitfield for reading bits in byte from right to left * right-sided bitfield for reading bits in byte from right to left
*/ */
var RBitfield = function() { var RBitfield = function() {
this._more = function() { this.init = function(x) {
this.bitfield = (this.bitfield << 8) + this._readByte(); this.masks = [];
this.bits += 8; for (var i = 0; i < 31; i++) this.masks[i] = (1 << i) - 1;
this.masks[31] = -0x80000000;
//eliminated support for RBitfield.init( RBitfield ), never used, Gonzalo
this.f = x;
this.bits = 0;
this.bitfield = 0x0;
this.count = 0;
} }
//_read not used, optimized, Gonzalo
//readByte inlined, Gonzalo
//needbits inlined, Gonzalo
//align inlined, Gonzalo
//toskip inlined, Gonzalo
// this.dropbytes not used, eliminated, Gonzalo
// this.tell not used, eliminated, Gonzalo
// since js truncate args to int32 with bit operators // since js truncate args to int32 with bit operators
// we need to specific processing for n >= 32 bits reading // we need to specific processing for n >= 32 bits reading
// separate function is created for optimization purposes // separate function is created for optimization purposes
//readbits2 always called ith constants >=32, check removed, Gonzalo
this.readbits2 = function readbits2(n) { this.readbits2 = function readbits2(n) {
if (n >= 32) { //only for n>=32!!!, check removed
var n2 = n >> 1; var n2 = n >> 1;
return this.readbits(n2) * (1 << n2) + this.readbits(n - n2); return this.readbits(n2) * (1 << n2) + this.readbits(n - n2);
} else {
return this.readbits(n);
}
} }
this.readbits = function readbits(n) { this.readbits = function readbits(n) {
//if (n > this.bits) this.needbits(n); //if (n > this.bits) this.needbits(n);
// INLINED: needbits // INLINED: needbits, readByte
while (this.bits < n) { while (this.bits < n) {
this.bitfield = (this.bitfield << 8) + this._readByte(); this.bitfield = (this.bitfield << 8) + this.f.charCodeAt(this.count++);
this.bits += 8; this.bits += 8;
} }
var m = this._masks[n]; var m = this.masks[n];
var r = (this.bitfield >> (this.bits - n)) & m; var r = (this.bitfield >> (this.bits - n)) & m;
this.bits -= n; this.bits -= n;
this.bitfield &= ~(m << this.bits); this.bitfield &= ~(m << this.bits);
return r; return r;
} }
} }
RBitfield.prototype = new BitfieldBase();
/** /**
* @class HuffmanLength * @class HuffmanLength
* utility class, used for comparison of huffman codes * utility class, used for comparison of huffman codes
*/ */
var HuffmanLength = function(code, bits) { var HuffmanLength = function(code, bits) {
if (typeof bits == "undefined") bits = 0;
this.code = code; this.code = code;
this.bits = bits; this.bits = bits;
this.symbol = undefined; this.symbol = undefined;
} //cropped unused functions and needless checks, Gonzalo
this.toString = function() {
return [this.code, this.bits, this.symbol/*, this.reverse_symbol*/]; //class HuffmanTable never used directly..., optimized, Gonzalo
}
this.valueOf = function() {
return this.bits * 1000 + this.code;
}
}
/** /**
* @class HuffmanLength * @class OrderedHuffmanTable
* utility class for working with huffman table * utility class for working with huffman table
*/ */
var HuffmanTable = function() { var OrderedHuffmanTable = function() {
this.init = function initHuffmanTable(bootstrap) { this.process = function(lengths) {
var len = lengths.length;
var z = [];
for (var i = 0; i < len; i++) {
z.push([i, lengths[i]]);
}
z.push([len, -1]);
var l = []; var l = [];
var b = bootstrap[0]; var b = z[0];
var start = b[0], bits = b[1]; var start = b[0], bits = b[1];
for (var p = 1; p < bootstrap.length; p++) { for (var p = 1; p < z.length; p++) {
var finish = bootstrap[p][0], endbits = bootstrap[p][1]; var finish = z[p][0], endbits = z[p][1];
if (bits) if (bits)
for (var code = start; code < finish; code++) for (var code = start; code < finish; code++)
l.push(new HuffmanLength(code, bits)); l.push(new HuffmanLength(code, bits));
@ -227,14 +136,13 @@ var ArchUtils = (function(){
bits = endbits; bits = endbits;
if (endbits == -1) break; if (endbits == -1) break;
} }
l.sort(function cmpHuffmanTable(a, b){ l.sort(function (a, b) { //function cmpHuffmanTable(a, b), can be anonymous, optimized, Gonzalo
return (a.bits - b.bits) || (a.code - b.code); return (a.bits - b.bits) || (a.code - b.code);
}); });
this.table = l; this.table = l;
}
//inlined populate_huffman_symbols, Gonzalo
this.populate_huffman_symbols = function() { var temp_bits = 0;
var bits = 0;
var symbol = -1; var symbol = -1;
// faht = Fast Access Huffman Table // faht = Fast Access Huffman Table
this.faht = []; this.faht = [];
@ -242,15 +150,15 @@ var ArchUtils = (function(){
for (var i = 0; i < this.table.length; i++) { for (var i = 0; i < this.table.length; i++) {
var x = this.table[i]; var x = this.table[i];
symbol += 1; symbol += 1;
if (x.bits != bits) { if (x.bits != temp_bits ) {
symbol <<= x.bits - bits; symbol <<= x.bits - temp_bits ;
cb = this.faht[bits = x.bits] = {}; cb = this.faht[temp_bits = x.bits] = {};
} }
cb[x.symbol = symbol] = x; cb[x.symbol = symbol] = x;
} }
}
//inlined min_max_bits
this.min_max_bits = function() {
this.min_bits = 16; this.min_bits = 16;
this.max_bits = -1; this.max_bits = -1;
this.table.forEach(function(x){ this.table.forEach(function(x){
@ -258,39 +166,22 @@ var ArchUtils = (function(){
if (x.bits > this.max_bits) this.max_bits = x.bits; if (x.bits > this.max_bits) this.max_bits = x.bits;
}, this); }, this);
} }
} }
var OrderedHuffmanTable = function() { return ({ bz2: { decode: function(input) { //eliminated unused unpackSize, Gonzalo
this.init = function(lengths) {
var l = lengths.length;
var z = [];
for (var i = 0; i < l; i++) {
z.push([i, lengths[i]]);
}
z.push([l, -1]);
OrderedHuffmanTable.prototype.init.call(this, z);
}
}
OrderedHuffmanTable.prototype = new HuffmanTable();
// unpackedSize is ignored here but added for uniformity
// this param simplifies Java (applet) implementation of bzip decoder
return ({ bz2: { decode: function(input, unpackedSize) {
var b = new RBitfield(); var b = new RBitfield();
b.init(input); b.init(input);
b.readbits(16); b.readbits(16);
var method = b.readbits(8); var method = b.readbits(8);
if (method != ord('h')) { if (method != 104) { //char 'h'
throw "Unknown (not type 'h'uffman Bzip2) compression method"; throw "Unknown (not type 'h'uffman Bzip2) compression method";
} }
var blocksize = b.readbits(8); var blocksize = b.readbits(8);
if (ord('1') <= blocksize if ( 49 <= blocksize && blocksize <= 57) { //char '1' && char '9'
&& blocksize <= ord('9')) { blocksize -= 48; //char 0
blocksize -= ord('0');
} else { } else {
throw "Unknown (not size '0'-'9') Bzip2 blocksize"; throw "Unknown (not size '1'-'9') Bzip2 blocksize";
} }
function getUsedCharTable(b) { function getUsedCharTable(b) {
@ -310,8 +201,7 @@ var ArchUtils = (function(){
} }
var out = []; var out = [];
// TODO: I hope exection may me splitted into chunks
// and run with them in background
function main_loop() { while (true) { function main_loop() { while (true) {
var blocktype = b.readbits2(48); var blocktype = b.readbits2(48);
var crc = b.readbits2(32); var crc = b.readbits2(32);
@ -319,11 +209,11 @@ var ArchUtils = (function(){
if (b.readbits(1)) throw "Bzip2 randomised support not implemented"; if (b.readbits(1)) throw "Bzip2 randomised support not implemented";
var pointer = b.readbits(24); var pointer = b.readbits(24);
var used = getUsedCharTable(b); var used = getUsedCharTable(b);
var huffman_groups = b.readbits(3); var huffman_groups = b.readbits(3);
if (2 > huffman_groups || huffman_groups > 6) if (2 > huffman_groups || huffman_groups > 6)
throw RangeError("Bzip2: Number of Huffman groups not in range 2..6"); throw RangeError("Bzip2: Number of Huffman groups not in range 2..6");
var mtf = range(huffman_groups); var mtf = [0,1,2,3,4,5,6].slice(0,huffman_groups); //eliminate use of range, Gonzalo
var selectors_list = []; var selectors_list = [];
for (var i = 0, selectors_used = b.readbits(15); i < selectors_used; i++) { for (var i = 0, selectors_used = b.readbits(15); i < selectors_used; i++) {
// zero-terminated bit runs (0..62) of MTF'ed huffman table // zero-terminated bit runs (0..62) of MTF'ed huffman table
@ -332,11 +222,13 @@ var ArchUtils = (function(){
if (c++ >= huffman_groups) if (c++ >= huffman_groups)
throw RangeError("More than max ("+huffman_groups+") groups"); throw RangeError("More than max ("+huffman_groups+") groups");
} }
move_to_front(mtf, c); move_to_front_and_store(mtf, c, selectors_list); //optimized to single function, Gonzalo
selectors_list.push(mtf[0]);
} }
var groups_lengths = []; var groups_lengths = [];
var symbols_in_use = sum(used) + 2 // remember RUN[AB] RLE symbols
// INLINE: sum used only once, Gonzalo
var symbols_in_use = used.reduce( function(a, b) {return a + b}, 0 ) + 2; //sum(used) + 2 // remember RUN[AB] RLE symbols
for (var j = 0; j < huffman_groups; j++) { for (var j = 0; j < huffman_groups; j++) {
var length = b.readbits(5); var length = b.readbits(5);
var lengths = []; var lengths = [];
@ -351,32 +243,30 @@ var ArchUtils = (function(){
var tables = []; var tables = [];
for (var g = 0; g < groups_lengths.length; g++) { for (var g = 0; g < groups_lengths.length; g++) {
var codes = new OrderedHuffmanTable(); var codes = new OrderedHuffmanTable();
codes.init(groups_lengths[g]); codes.process(groups_lengths[g]); //consolidated function calls
codes.populate_huffman_symbols();
codes.min_max_bits();
tables.push(codes); tables.push(codes);
} }
var favourites = []; var favourites = [];
for (var c = used.length - 1; c >= 0; c--) { for (var c = used.length - 1; c >= 0; c--) {
if (used[c]) favourites.push(chr(c)); if (used[c]) favourites.push(String.fromCharCode(c)); //inlined chr, used once, Gonzalo
} }
favourites.reverse(); favourites.reverse();
var selector_pointer = 0; var selector_pointer = 0;
var decoded = 0; var decoded = 0;
var t; var t;
// Main Huffman loop // Main Huffman loop
var repeat = 0; var repeat = 0;
var repeat_power = 0; var repeat_power = 0;
var buffer = [], r; var buffer = [], r;
while (true) { while (true) {
if (--decoded <= 0) { if (--decoded <= 0) {
decoded = 50; decoded = 50;
if (selector_pointer <= selectors_list.length) if (selector_pointer <= selectors_list.length)
t = tables[selectors_list[selector_pointer++]]; t = tables[selectors_list[selector_pointer++]];
} }
// INLINED: find_next_symbol // INLINED: find_next_symbol
for (var bb in t.faht) { for (var bb in t.faht) {
if (b.bits < bb) { if (b.bits < bb) {
@ -384,12 +274,12 @@ var ArchUtils = (function(){
b.bits += 8; b.bits += 8;
} }
if (r = t.faht[bb][ b.bitfield >> (b.bits - bb) ]) { if (r = t.faht[bb][ b.bitfield >> (b.bits - bb) ]) {
b.bitfield &= b._masks[b.bits -= bb]; b.bitfield &= b.masks[b.bits -= bb];
r = r.code; r = r.code;
break; break;
} }
} }
if (0 <= r && r <= 1) { if (0 <= r && r <= 1) {
if (repeat == 0) repeat_power = 1; if (repeat == 0) repeat_power = 1;
repeat += repeat_power << r; repeat += repeat_power << r;
@ -402,12 +292,9 @@ var ArchUtils = (function(){
if (r == symbols_in_use - 1) { // eof symbol if (r == symbols_in_use - 1) { // eof symbol
break; break;
} else { } else {
// INLINED: move_to_front move_to_front_and_store(favourites,r-1,buffer); //Uninlined, size efficiency, Gonzalo
var v = favourites[r-1];
for (var i = r-1; i > 0; favourites[i] = favourites[--i]);
buffer.push(favourites[0] = v);
} }
} }
var nt = bwt_reverse(buffer, pointer); var nt = bwt_reverse(buffer, pointer);
var done = []; var done = [];
var i = 0; var i = 0;
@ -429,7 +316,7 @@ var ArchUtils = (function(){
} }
out.push(done.join('')); out.push(done.join(''));
} else if (blocktype == 0x177245385090) { // sqrt(pi) } else if (blocktype == 0x177245385090) { // sqrt(pi)
b.align(); b.readbits(b.bits & 0x7); //align
break; break;
} else { } else {
throw "Illegal Bzip2 blocktype = 0x" + blocktype.toString(16); throw "Illegal Bzip2 blocktype = 0x" + blocktype.toString(16);
@ -438,4 +325,4 @@ var ArchUtils = (function(){
main_loop(); main_loop();
return out.join(''); return out.join('');
} } }); } } });
})(); })();
Loading…
Cancel
Save