src/mini-lz4.js - external/github.com/kripken/emscripten - Git at Google

 /*
 MiniLZ4: Minimal LZ4 block decoding and encoding.

 based off of node-lz4, https://github.com/pierrec/node-lz4

 ====
 Copyright (c) 2012 Pierre Curto

 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:

 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.

 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
 ====

 changes have the same license
 */

 var MiniLZ4 = (function() {

 var exports = {};

 /**
  * Decode a block. Assumptions: input contains all sequences of a
  * chunk, output is large enough to receive the decoded data.
  * If the output buffer is too small, an error will be thrown.
  * If the returned value is negative, an error occured at the returned offset.
  *
  * @param input {Buffer} input data
  * @param output {Buffer} output data
  * @return {Number} number of decoded bytes
  * @private
  */
 exports.uncompress = function (input, output, sIdx, eIdx) {
 	sIdx = sIdx || 0
 	eIdx = eIdx || (input.length - sIdx)
 	// Process each sequence in the incoming data
 	for (var i = sIdx, n = eIdx, j = 0; i < n;) {
 		var token = input[i++]

 		// Literals
 		var literals_length = (token >> 4)
 		if (literals_length > 0) {
 			// length of literals
 			var l = literals_length + 240
 			while (l === 255) {
 				l = input[i++]
 				literals_length += l
 			}

 			// Copy the literals
 			var end = i + literals_length
 			while (i < end) output[j++] = input[i++]

 			// End of buffer?
 			if (i === n) return j
 		}

 		// Match copy
 		// 2 bytes offset (little endian)
 		var offset = input[i++] | (input[i++] << 8)

 		// XXX 0 is an invalid offset value
 		if (offset === 0) return j
 		if (offset > j) return -(i-2)

 		// length of match copy
 		var match_length = (token & 0xf)
 		var l = match_length + 240
 		while (l === 255) {
 			l = input[i++]
 			match_length += l
 		}

 		// Copy the match
 		var pos = j - offset // position of the match copy in the current output
 		var end = j + match_length + 4 // minmatch = 4
 		while (j < end) output[j++] = output[pos++]
 	}

 	return j
 }

 var
 	maxInputSize	= 0x7E000000
 ,	minMatch		= 4
 // uint32() optimization
 ,	hashLog			= 16
 ,	hashShift		= (minMatch * 8) - hashLog
 ,	hashSize		= 1 << hashLog

 ,	copyLength		= 8
 ,	lastLiterals	= 5
 ,	mfLimit			= copyLength + minMatch
 ,	skipStrength	= 6

 ,	mlBits  		= 4
 ,	mlMask  		= (1 << mlBits) - 1
 ,	runBits 		= 8 - mlBits
 ,	runMask 		= (1 << runBits) - 1

 ,	hasher 			= /* XXX uint32( */ 2654435761 /* ) */

 assert(hashShift === 16);
 var hashTable = new Int16Array(1<<16);
 var empty = new Int16Array(hashTable.length);

 // CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
 exports.compressBound = function (isize) {
 	return isize > maxInputSize
 		? 0
 		: (isize + (isize/255) + 16) | 0
 }

 exports.compress = function (src, dst, sIdx, eIdx) {
 	hashTable.set(empty);
 	return compressBlock(src, dst, 0, sIdx || 0, eIdx || dst.length)
 }

 function compressBlock (src, dst, pos, sIdx, eIdx) {
 	// XXX var Hash = uint32() // Reusable unsigned 32 bits integer
 	var dpos = sIdx
 	var dlen = eIdx - sIdx
 	var anchor = 0

 	if (src.length >= maxInputSize) throw new Error("input too large")

 	// Minimum of input bytes for compression (LZ4 specs)
 	if (src.length > mfLimit) {
 		var n = exports.compressBound(src.length)
 		if ( dlen < n ) throw Error("output too small: " + dlen + " < " + n)

 		var
 			step  = 1
 		,	findMatchAttempts = (1 << skipStrength) + 3
 		// Keep last few bytes incompressible (LZ4 specs):
 		// last 5 bytes must be literals
 		,	srcLength = src.length - mfLimit

 		while (pos + minMatch < srcLength) {
 			// Find a match
 			// min match of 4 bytes aka sequence
 			var sequenceLowBits = src[pos+1]<<8 | src[pos]
 			var sequenceHighBits = src[pos+3]<<8 | src[pos+2]
 			// compute hash for the current sequence
 			var hash = Math.imul(sequenceLowBits | (sequenceHighBits << 16), hasher) >>> hashShift;
 			/* XXX Hash.fromBits(sequenceLowBits, sequenceHighBits)
 							.multiply(hasher)
 							.shiftr(hashShift)
 							.toNumber() */
 			// get the position of the sequence matching the hash
 			// NB. since 2 different sequences may have the same hash
 			// it is double-checked below
 			// do -1 to distinguish between initialized and uninitialized values
 			var ref = hashTable[hash] - 1
 			// save position of current sequence in hash table
 			hashTable[hash] = pos + 1

 			// first reference or within 64k limit or current sequence !== hashed one: no match
 			if ( ref < 0 ||
 				((pos - ref) >>> 16) > 0 ||
 				(
 					((src[ref+3]<<8 | src[ref+2]) != sequenceHighBits) ||
 					((src[ref+1]<<8 | src[ref]) != sequenceLowBits )
 				)
 			) {
 				// increase step if nothing found within limit
 				step = findMatchAttempts++ >> skipStrength
 				pos += step
 				continue
 			}

 			findMatchAttempts = (1 << skipStrength) + 3

 			// got a match
 			var literals_length = pos - anchor
 			var offset = pos - ref

 			// minMatch already verified
 			pos += minMatch
 			ref += minMatch

 			// move to the end of the match (>=minMatch)
 			var match_length = pos
 			while (pos < srcLength && src[pos] == src[ref]) {
 				pos++
 				ref++
 			}

 			// match length
 			match_length = pos - match_length

 			// token
 			var token = match_length < mlMask ? match_length : mlMask

 			// encode literals length
 			if (literals_length >= runMask) {
 				// add match length to the token
 				dst[dpos++] = (runMask << mlBits) + token
 				for (var len = literals_length - runMask; len > 254; len -= 255) {
 					dst[dpos++] = 255
 				}
 				dst[dpos++] = len
 			} else {
 				// add match length to the token
 				dst[dpos++] = (literals_length << mlBits) + token
 			}

 			// write literals
 			for (var i = 0; i < literals_length; i++) {
 				dst[dpos++] = src[anchor+i]
 			}

 			// encode offset
 			dst[dpos++] = offset
 			dst[dpos++] = (offset >> 8)

 			// encode match length
 			if (match_length >= mlMask) {
 				match_length -= mlMask
 				while (match_length >= 255) {
 					match_length -= 255
 					dst[dpos++] = 255
 				}

 				dst[dpos++] = match_length
 			}

 			anchor = pos
 		}
 	}

 	// cannot compress input
 	if (anchor == 0) return 0

 	// Write last literals
 	// encode literals length
 	literals_length = src.length - anchor
 	if (literals_length >= runMask) {
 		// add match length to the token
 		dst[dpos++] = (runMask << mlBits)
 		for (var ln = literals_length - runMask; ln > 254; ln -= 255) {
 			dst[dpos++] = 255
 		}
 		dst[dpos++] = ln
 	} else {
 		// add match length to the token
 		dst[dpos++] = (literals_length << mlBits)
 	}

 	// write literals
 	pos = anchor
 	while (pos < src.length) {
 		dst[dpos++] = src[pos++]
 	}

 	return dpos
 }

 exports.CHUNK_SIZE = 2048; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea

 exports.compressPackage = function(data, verify) {
   if (verify) {
     var temp = new Uint8Array(exports.CHUNK_SIZE);
   }
   // compress the data in chunks
   assert(data instanceof ArrayBuffer);
   data = new Uint8Array(data);
   console.log('compressing package of size ' + data.length);
   var compressedChunks = [];
   var successes = [];
   var offset = 0;
   var total = 0;
   while (offset < data.length) {
     var chunk = data.subarray(offset, offset + exports.CHUNK_SIZE);
     //console.log('compress a chunk ' + [offset, total, data.length]);
     offset += exports.CHUNK_SIZE;
     var bound = exports.compressBound(chunk.length);
     var compressed = new Uint8Array(bound);
     var compressedSize = exports.compress(chunk, compressed);
     if (compressedSize > 0) {
       assert(compressedSize <= bound);
       compressed = compressed.subarray(0, compressedSize);
       compressedChunks.push(compressed);
       total += compressedSize;
       successes.push(1);
       if (verify) {
         var back = exports.uncompress(compressed, temp);
         assert(back === chunk.length, [back, chunk.length]);
         for (var i = 0; i < chunk.length; i++) {
           assert(chunk[i] === temp[i]);
         }
       }
     } else {
       assert(compressedSize === 0);
       // failure to compress :(
       compressedChunks.push(chunk);
       total += chunk.length; // last chunk may not be the full exports.CHUNK_SIZE size
       successes.push(0);
     }
   }
   data = null; // XXX null out pack['data'] too?
   var compressedData = {
     data: new Uint8Array(total + exports.CHUNK_SIZE*2), // store all the compressed data, plus room for two cached decompressed chunk, in one fast array
     cachedOffset: total,
     cachedIndexes: [-1, -1], // cache last two blocks, so that reading 1,2,3 + preloading another block won't trigger decompress thrashing
     cachedChunks: [null, null],
     offsets: [], // chunk# => start in compressed data
     sizes: [],
     successes: successes, // 1 if chunk is compressed
   };
   offset = 0;
   for (var i = 0; i < compressedChunks.length; i++) {
     compressedData.data.set(compressedChunks[i], offset);
     compressedData.offsets[i] = offset;
     compressedData.sizes[i] = compressedChunks[i].length
     offset += compressedChunks[i].length;
   }
   console.log('compressed package into ' + [compressedData.data.length]);
   assert(offset === total);
   return compressedData;
 };

 assert(exports.CHUNK_SIZE < (1 << 15)); // we use 16-bit ints as the type of the hash table, chunk size must be smaller

 return exports;

 })();
	/*
	MiniLZ4: Minimal LZ4 block decoding and encoding.

	based off of node-lz4, https://github.com/pierrec/node-lz4

	====
	Copyright (c) 2012 Pierre Curto

	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:

	The above copyright notice and this permission notice shall be included in
	all copies or substantial portions of the Software.

	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
	IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
	FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
	AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
	LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
	OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
	THE SOFTWARE.
	====

	changes have the same license
	*/

	var MiniLZ4 = (function() {

	var exports = {};

	/**
	* Decode a block. Assumptions: input contains all sequences of a
	* chunk, output is large enough to receive the decoded data.
	* If the output buffer is too small, an error will be thrown.
	* If the returned value is negative, an error occured at the returned offset.
	*
	* @param input {Buffer} input data
	* @param output {Buffer} output data
	* @return {Number} number of decoded bytes
	* @private
	*/
	exports.uncompress = function (input, output, sIdx, eIdx) {
	sIdx = sIdx \|\| 0
	eIdx = eIdx \|\| (input.length - sIdx)
	// Process each sequence in the incoming data
	for (var i = sIdx, n = eIdx, j = 0; i < n;) {
	var token = input[i++]

	// Literals
	var literals_length = (token >> 4)
	if (literals_length > 0) {
	// length of literals
	var l = literals_length + 240
	while (l === 255) {
	l = input[i++]
	literals_length += l
	}

	// Copy the literals
	var end = i + literals_length
	while (i < end) output[j++] = input[i++]

	// End of buffer?
	if (i === n) return j
	}

	// Match copy
	// 2 bytes offset (little endian)
	var offset = input[i++] \| (input[i++] << 8)

	// XXX 0 is an invalid offset value
	if (offset === 0) return j
	if (offset > j) return -(i-2)

	// length of match copy
	var match_length = (token & 0xf)
	var l = match_length + 240
	while (l === 255) {
	l = input[i++]
	match_length += l
	}

	// Copy the match
	var pos = j - offset // position of the match copy in the current output
	var end = j + match_length + 4 // minmatch = 4
	while (j < end) output[j++] = output[pos++]
	}

	return j
	}

	var
	maxInputSize = 0x7E000000
	, minMatch = 4
	// uint32() optimization
	, hashLog = 16
	, hashShift = (minMatch * 8) - hashLog
	, hashSize = 1 << hashLog

	, copyLength = 8
	, lastLiterals = 5
	, mfLimit = copyLength + minMatch
	, skipStrength = 6

	, mlBits = 4
	, mlMask = (1 << mlBits) - 1
	, runBits = 8 - mlBits
	, runMask = (1 << runBits) - 1

	, hasher = /* XXX uint32( / 2654435761 / ) */

	assert(hashShift === 16);
	var hashTable = new Int16Array(1<<16);
	var empty = new Int16Array(hashTable.length);

	// CompressBound returns the maximum length of a lz4 block, given it's uncompressed length
	exports.compressBound = function (isize) {
	return isize > maxInputSize
	? 0
	: (isize + (isize/255) + 16) \| 0
	}

	exports.compress = function (src, dst, sIdx, eIdx) {
	hashTable.set(empty);
	return compressBlock(src, dst, 0, sIdx \|\| 0, eIdx \|\| dst.length)
	}

	function compressBlock (src, dst, pos, sIdx, eIdx) {
	// XXX var Hash = uint32() // Reusable unsigned 32 bits integer
	var dpos = sIdx
	var dlen = eIdx - sIdx
	var anchor = 0

	if (src.length >= maxInputSize) throw new Error("input too large")

	// Minimum of input bytes for compression (LZ4 specs)
	if (src.length > mfLimit) {
	var n = exports.compressBound(src.length)
	if ( dlen < n ) throw Error("output too small: " + dlen + " < " + n)

	var
	step = 1
	, findMatchAttempts = (1 << skipStrength) + 3
	// Keep last few bytes incompressible (LZ4 specs):
	// last 5 bytes must be literals
	, srcLength = src.length - mfLimit

	while (pos + minMatch < srcLength) {
	// Find a match
	// min match of 4 bytes aka sequence
	var sequenceLowBits = src[pos+1]<<8 \| src[pos]
	var sequenceHighBits = src[pos+3]<<8 \| src[pos+2]
	// compute hash for the current sequence
	var hash = Math.imul(sequenceLowBits \| (sequenceHighBits << 16), hasher) >>> hashShift;
	/* XXX Hash.fromBits(sequenceLowBits, sequenceHighBits)
	.multiply(hasher)
	.shiftr(hashShift)
	.toNumber() */
	// get the position of the sequence matching the hash
	// NB. since 2 different sequences may have the same hash
	// it is double-checked below
	// do -1 to distinguish between initialized and uninitialized values
	var ref = hashTable[hash] - 1
	// save position of current sequence in hash table
	hashTable[hash] = pos + 1

	// first reference or within 64k limit or current sequence !== hashed one: no match
	if ( ref < 0 \|\|
	((pos - ref) >>> 16) > 0 \|\|
	(
	((src[ref+3]<<8 \| src[ref+2]) != sequenceHighBits) \|\|
	((src[ref+1]<<8 \| src[ref]) != sequenceLowBits )
	)
	) {
	// increase step if nothing found within limit
	step = findMatchAttempts++ >> skipStrength
	pos += step
	continue
	}

	findMatchAttempts = (1 << skipStrength) + 3

	// got a match
	var literals_length = pos - anchor
	var offset = pos - ref

	// minMatch already verified
	pos += minMatch
	ref += minMatch

	// move to the end of the match (>=minMatch)
	var match_length = pos
	while (pos < srcLength && src[pos] == src[ref]) {
	pos++
	ref++
	}

	// match length
	match_length = pos - match_length

	// token
	var token = match_length < mlMask ? match_length : mlMask

	// encode literals length
	if (literals_length >= runMask) {
	// add match length to the token
	dst[dpos++] = (runMask << mlBits) + token
	for (var len = literals_length - runMask; len > 254; len -= 255) {
	dst[dpos++] = 255
	}
	dst[dpos++] = len
	} else {
	// add match length to the token
	dst[dpos++] = (literals_length << mlBits) + token
	}

	// write literals
	for (var i = 0; i < literals_length; i++) {
	dst[dpos++] = src[anchor+i]
	}

	// encode offset
	dst[dpos++] = offset
	dst[dpos++] = (offset >> 8)

	// encode match length
	if (match_length >= mlMask) {
	match_length -= mlMask
	while (match_length >= 255) {
	match_length -= 255
	dst[dpos++] = 255
	}

	dst[dpos++] = match_length
	}

	anchor = pos
	}
	}

	// cannot compress input
	if (anchor == 0) return 0

	// Write last literals
	// encode literals length
	literals_length = src.length - anchor
	if (literals_length >= runMask) {
	// add match length to the token
	dst[dpos++] = (runMask << mlBits)
	for (var ln = literals_length - runMask; ln > 254; ln -= 255) {
	dst[dpos++] = 255
	}
	dst[dpos++] = ln
	} else {
	// add match length to the token
	dst[dpos++] = (literals_length << mlBits)
	}

	// write literals
	pos = anchor
	while (pos < src.length) {
	dst[dpos++] = src[pos++]
	}

	return dpos
	}

	exports.CHUNK_SIZE = 2048; // musl libc does readaheads of 1024 bytes, so a multiple of that is a good idea

	exports.compressPackage = function(data, verify) {
	if (verify) {
	var temp = new Uint8Array(exports.CHUNK_SIZE);
	}
	// compress the data in chunks
	assert(data instanceof ArrayBuffer);
	data = new Uint8Array(data);
	console.log('compressing package of size ' + data.length);
	var compressedChunks = [];
	var successes = [];
	var offset = 0;
	var total = 0;
	while (offset < data.length) {
	var chunk = data.subarray(offset, offset + exports.CHUNK_SIZE);
	//console.log('compress a chunk ' + [offset, total, data.length]);
	offset += exports.CHUNK_SIZE;
	var bound = exports.compressBound(chunk.length);
	var compressed = new Uint8Array(bound);
	var compressedSize = exports.compress(chunk, compressed);
	if (compressedSize > 0) {
	assert(compressedSize <= bound);
	compressed = compressed.subarray(0, compressedSize);
	compressedChunks.push(compressed);
	total += compressedSize;
	successes.push(1);
	if (verify) {
	var back = exports.uncompress(compressed, temp);
	assert(back === chunk.length, [back, chunk.length]);
	for (var i = 0; i < chunk.length; i++) {
	assert(chunk[i] === temp[i]);
	}
	}
	} else {
	assert(compressedSize === 0);
	// failure to compress :(
	compressedChunks.push(chunk);
	total += chunk.length; // last chunk may not be the full exports.CHUNK_SIZE size
	successes.push(0);
	}
	}
	data = null; // XXX null out pack['data'] too?
	var compressedData = {
	data: new Uint8Array(total + exports.CHUNK_SIZE*2), // store all the compressed data, plus room for two cached decompressed chunk, in one fast array
	cachedOffset: total,
	cachedIndexes: [-1, -1], // cache last two blocks, so that reading 1,2,3 + preloading another block won't trigger decompress thrashing
	cachedChunks: [null, null],
	offsets: [], // chunk# => start in compressed data
	sizes: [],
	successes: successes, // 1 if chunk is compressed
	};
	offset = 0;
	for (var i = 0; i < compressedChunks.length; i++) {
	compressedData.data.set(compressedChunks[i], offset);
	compressedData.offsets[i] = offset;
	compressedData.sizes[i] = compressedChunks[i].length
	offset += compressedChunks[i].length;
	}
	console.log('compressed package into ' + [compressedData.data.length]);
	assert(offset === total);
	return compressedData;
	};

	assert(exports.CHUNK_SIZE < (1 << 15)); // we use 16-bit ints as the type of the hash table, chunk size must be smaller

	return exports;

	})();