src/runtime_strings.js - external/github.com/emscripten-core/emscripten.git - Git at Google

 /**
  * @license
  * Copyright 2019 The Emscripten Authors
  * SPDX-License-Identifier: MIT
  */

 // runtime_strings.js: Strings related runtime functions that are part of both MINIMAL_RUNTIME and regular runtime.

 // Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the given array that contains uint8 values, returns
 // a copy of that string as a Javascript String object.

 #if TEXTDECODER == 2
 var UTF8Decoder = new TextDecoder('utf8');
 #else // TEXTDECODER == 2
 #if TEXTDECODER
 var UTF8Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
 #endif // TEXTDECODER
 #endif // TEXTDECODER == 2

 /**
  * @param {number} idx
  * @param {number=} maxBytesToRead
  * @return {string}
  */
 function UTF8ArrayToString(heap, idx, maxBytesToRead) {
 #if CAN_ADDRESS_2GB
   idx >>>= 0;
 #endif
   var endIdx = idx + maxBytesToRead;
 #if TEXTDECODER
   var endPtr = idx;
   // TextDecoder needs to know the byte length in advance, it doesn't stop on null terminator by itself.
   // Also, use the length info to avoid running tiny strings through TextDecoder, since .subarray() allocates garbage.
   // (As a tiny code save trick, compare endPtr against endIdx using a negation, so that undefined means Infinity)
   while (heap[endPtr] && !(endPtr >= endIdx)) ++endPtr;
 #endif // TEXTDECODER

 #if TEXTDECODER == 2
   return UTF8Decoder.decode(
     heap.subarray ? heap.subarray(idx, endPtr) : new Uint8Array(heap.slice(idx, endPtr))
   );
 #else // TEXTDECODER == 2
 #if TEXTDECODER
   if (endPtr - idx > 16 && heap.subarray && UTF8Decoder) {
     return UTF8Decoder.decode(heap.subarray(idx, endPtr));
   } else {
 #endif // TEXTDECODER
     var str = '';
 #if TEXTDECODER
     // If building with TextDecoder, we have already computed the string length above, so test loop end condition against that
     while (idx < endPtr) {
 #else
     while (!(idx >= endIdx)) {
 #endif
       // For UTF8 byte structure, see:
       // http://en.wikipedia.org/wiki/UTF-8#Description
       // https://www.ietf.org/rfc/rfc2279.txt
       // https://tools.ietf.org/html/rfc3629
       var u0 = heap[idx++];
 #if !TEXTDECODER
       // If not building with TextDecoder enabled, we don't know the string length, so scan for \0 byte.
       // If building with TextDecoder, we know exactly at what byte index the string ends, so checking for nulls here would be redundant.
       if (!u0) return str;
 #endif
       if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; }
       var u1 = heap[idx++] & 63;
       if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) | u1); continue; }
       var u2 = heap[idx++] & 63;
       if ((u0 & 0xF0) == 0xE0) {
         u0 = ((u0 & 15) << 12) | (u1 << 6) | u2;
       } else {
 #if ASSERTIONS
         if ((u0 & 0xF8) != 0xF0) warnOnce('Invalid UTF-8 leading byte 0x' + u0.toString(16) + ' encountered when deserializing a UTF-8 string in wasm memory to a JS string!');
 #endif
         u0 = ((u0 & 7) << 18) | (u1 << 12) | (u2 << 6) | (heap[idx++] & 63);
       }

       if (u0 < 0x10000) {
         str += String.fromCharCode(u0);
       } else {
         var ch = u0 - 0x10000;
         str += String.fromCharCode(0xD800 | (ch >> 10), 0xDC00 | (ch & 0x3FF));
       }
     }
 #if TEXTDECODER
   }
 #endif // TEXTDECODER
   return str;
 #endif // TEXTDECODER == 2
 }

 // Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the emscripten HEAP, returns a
 // copy of that string as a Javascript String object.
 // maxBytesToRead: an optional length that specifies the maximum number of bytes to read. You can omit
 //                 this parameter to scan the string until the first \0 byte. If maxBytesToRead is
 //                 passed, and the string at [ptr, ptr+maxBytesToReadr[ contains a null byte in the
 //                 middle, then the string will cut short at that byte index (i.e. maxBytesToRead will
 //                 not produce a string of exact length [ptr, ptr+maxBytesToRead[)
 //                 N.B. mixing frequent uses of UTF8ToString() with and without maxBytesToRead may
 //                 throw JS JIT optimizations off, so it is worth to consider consistently using one
 //                 style or the other.
 /**
  * @param {number} ptr
  * @param {number=} maxBytesToRead
  * @return {string}
  */
 function UTF8ToString(ptr, maxBytesToRead) {
 #if CAN_ADDRESS_2GB
   ptr >>>= 0;
 #endif
 #if TEXTDECODER == 2
   if (!ptr) return '';
   var maxPtr = ptr + maxBytesToRead;
   for (var end = ptr; !(end >= maxPtr) && HEAPU8[end];) ++end;
   return UTF8Decoder.decode(HEAPU8.subarray(ptr, end));
 #else
   return ptr ? UTF8ArrayToString(HEAPU8, ptr, maxBytesToRead) : '';
 #endif
 }

 // Copies the given Javascript String object 'str' to the given byte array at address 'outIdx',
 // encoded in UTF8 form and null-terminated. The copy will require at most str.length*4+1 bytes of space in the HEAP.
 // Use the function lengthBytesUTF8 to compute the exact number of bytes (excluding null terminator) that this function will write.
 // Parameters:
 //   str: the Javascript string to copy.
 //   heap: the array to copy to. Each index in this array is assumed to be one 8-byte element.
 //   outIdx: The starting offset in the array to begin the copying.
 //   maxBytesToWrite: The maximum number of bytes this function can write to the array.
 //                    This count should include the null terminator,
 //                    i.e. if maxBytesToWrite=1, only the null terminator will be written and nothing else.
 //                    maxBytesToWrite=0 does not write any bytes to the output, not even the null terminator.
 // Returns the number of bytes written, EXCLUDING the null terminator.

 function stringToUTF8Array(str, heap, outIdx, maxBytesToWrite) {
 #if CAN_ADDRESS_2GB
   outIdx >>>= 0;
 #endif
   if (!(maxBytesToWrite > 0)) // Parameter maxBytesToWrite is not optional. Negative values, 0, null, undefined and false each don't write out any bytes.
     return 0;

   var startIdx = outIdx;
   var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
   for (var i = 0; i < str.length; ++i) {
     // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
     // See http://unicode.org/faq/utf_bom.html#utf16-3
     // For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description and https://www.ietf.org/rfc/rfc2279.txt and https://tools.ietf.org/html/rfc3629
     var u = str.charCodeAt(i); // possibly a lead surrogate
     if (u >= 0xD800 && u <= 0xDFFF) {
       var u1 = str.charCodeAt(++i);
       u = 0x10000 + ((u & 0x3FF) << 10) | (u1 & 0x3FF);
     }
     if (u <= 0x7F) {
       if (outIdx >= endIdx) break;
       heap[outIdx++] = u;
     } else if (u <= 0x7FF) {
       if (outIdx + 1 >= endIdx) break;
       heap[outIdx++] = 0xC0 | (u >> 6);
       heap[outIdx++] = 0x80 | (u & 63);
     } else if (u <= 0xFFFF) {
       if (outIdx + 2 >= endIdx) break;
       heap[outIdx++] = 0xE0 | (u >> 12);
       heap[outIdx++] = 0x80 | ((u >> 6) & 63);
       heap[outIdx++] = 0x80 | (u & 63);
     } else {
       if (outIdx + 3 >= endIdx) break;
 #if ASSERTIONS
       if (u >= 0x200000) warnOnce('Invalid Unicode code point 0x' + u.toString(16) + ' encountered when serializing a JS string to a UTF-8 string in wasm memory! (Valid unicode code points should be in range 0-0x1FFFFF).');
 #endif
       heap[outIdx++] = 0xF0 | (u >> 18);
       heap[outIdx++] = 0x80 | ((u >> 12) & 63);
       heap[outIdx++] = 0x80 | ((u >> 6) & 63);
       heap[outIdx++] = 0x80 | (u & 63);
     }
   }
   // Null-terminate the pointer to the buffer.
   heap[outIdx] = 0;
   return outIdx - startIdx;
 }

 // Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr',
 // null-terminated and encoded in UTF8 form. The copy will require at most str.length*4+1 bytes of space in the HEAP.
 // Use the function lengthBytesUTF8 to compute the exact number of bytes (excluding null terminator) that this function will write.
 // Returns the number of bytes written, EXCLUDING the null terminator.

 function stringToUTF8(str, outPtr, maxBytesToWrite) {
 #if ASSERTIONS
   assert(typeof maxBytesToWrite == 'number', 'stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!');
 #endif
   return stringToUTF8Array(str, {{{ heapAndOffset('HEAPU8', 'outPtr') }}}, maxBytesToWrite);
 }

 // Returns the number of bytes the given Javascript string takes if encoded as a UTF8 byte array, EXCLUDING the null terminator byte.
 function lengthBytesUTF8(str) {
   var len = 0;
   for (var i = 0; i < str.length; ++i) {
     // Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
     // See http://unicode.org/faq/utf_bom.html#utf16-3
     var u = str.charCodeAt(i); // possibly a lead surrogate
     if (u >= 0xD800 && u <= 0xDFFF) u = 0x10000 + ((u & 0x3FF) << 10) | (str.charCodeAt(++i) & 0x3FF);
     if (u <= 0x7F) ++len;
     else if (u <= 0x7FF) len += 2;
     else if (u <= 0xFFFF) len += 3;
     else len += 4;
   }
   return len;
 }
	/**
	* @license
	* Copyright 2019 The Emscripten Authors
	* SPDX-License-Identifier: MIT
	*/

	// runtime_strings.js: Strings related runtime functions that are part of both MINIMAL_RUNTIME and regular runtime.

	// Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the given array that contains uint8 values, returns
	// a copy of that string as a Javascript String object.

	#if TEXTDECODER == 2
	var UTF8Decoder = new TextDecoder('utf8');
	#else // TEXTDECODER == 2
	#if TEXTDECODER
	var UTF8Decoder = typeof TextDecoder !== 'undefined' ? new TextDecoder('utf8') : undefined;
	#endif // TEXTDECODER
	#endif // TEXTDECODER == 2

	/**
	* @param {number} idx
	* @param {number=} maxBytesToRead
	* @return {string}
	*/
	function UTF8ArrayToString(heap, idx, maxBytesToRead) {
	#if CAN_ADDRESS_2GB
	idx >>>= 0;
	#endif
	var endIdx = idx + maxBytesToRead;
	#if TEXTDECODER
	var endPtr = idx;
	// TextDecoder needs to know the byte length in advance, it doesn't stop on null terminator by itself.
	// Also, use the length info to avoid running tiny strings through TextDecoder, since .subarray() allocates garbage.
	// (As a tiny code save trick, compare endPtr against endIdx using a negation, so that undefined means Infinity)
	while (heap[endPtr] && !(endPtr >= endIdx)) ++endPtr;
	#endif // TEXTDECODER

	#if TEXTDECODER == 2
	return UTF8Decoder.decode(
	heap.subarray ? heap.subarray(idx, endPtr) : new Uint8Array(heap.slice(idx, endPtr))
	);
	#else // TEXTDECODER == 2
	#if TEXTDECODER
	if (endPtr - idx > 16 && heap.subarray && UTF8Decoder) {
	return UTF8Decoder.decode(heap.subarray(idx, endPtr));
	} else {
	#endif // TEXTDECODER
	var str = '';
	#if TEXTDECODER
	// If building with TextDecoder, we have already computed the string length above, so test loop end condition against that
	while (idx < endPtr) {
	#else
	while (!(idx >= endIdx)) {
	#endif
	// For UTF8 byte structure, see:
	// http://en.wikipedia.org/wiki/UTF-8#Description
	// https://www.ietf.org/rfc/rfc2279.txt
	// https://tools.ietf.org/html/rfc3629
	var u0 = heap[idx++];
	#if !TEXTDECODER
	// If not building with TextDecoder enabled, we don't know the string length, so scan for \0 byte.
	// If building with TextDecoder, we know exactly at what byte index the string ends, so checking for nulls here would be redundant.
	if (!u0) return str;
	#endif
	if (!(u0 & 0x80)) { str += String.fromCharCode(u0); continue; }
	var u1 = heap[idx++] & 63;
	if ((u0 & 0xE0) == 0xC0) { str += String.fromCharCode(((u0 & 31) << 6) \| u1); continue; }
	var u2 = heap[idx++] & 63;
	if ((u0 & 0xF0) == 0xE0) {
	u0 = ((u0 & 15) << 12) \| (u1 << 6) \| u2;
	} else {
	#if ASSERTIONS
	if ((u0 & 0xF8) != 0xF0) warnOnce('Invalid UTF-8 leading byte 0x' + u0.toString(16) + ' encountered when deserializing a UTF-8 string in wasm memory to a JS string!');
	#endif
	u0 = ((u0 & 7) << 18) \| (u1 << 12) \| (u2 << 6) \| (heap[idx++] & 63);
	}

	if (u0 < 0x10000) {
	str += String.fromCharCode(u0);
	} else {
	var ch = u0 - 0x10000;
	str += String.fromCharCode(0xD800 \| (ch >> 10), 0xDC00 \| (ch & 0x3FF));
	}
	}
	#if TEXTDECODER
	}
	#endif // TEXTDECODER
	return str;
	#endif // TEXTDECODER == 2
	}

	// Given a pointer 'ptr' to a null-terminated UTF8-encoded string in the emscripten HEAP, returns a
	// copy of that string as a Javascript String object.
	// maxBytesToRead: an optional length that specifies the maximum number of bytes to read. You can omit
	// this parameter to scan the string until the first \0 byte. If maxBytesToRead is
	// passed, and the string at [ptr, ptr+maxBytesToReadr[ contains a null byte in the
	// middle, then the string will cut short at that byte index (i.e. maxBytesToRead will
	// not produce a string of exact length [ptr, ptr+maxBytesToRead[)
	// N.B. mixing frequent uses of UTF8ToString() with and without maxBytesToRead may
	// throw JS JIT optimizations off, so it is worth to consider consistently using one
	// style or the other.
	/**
	* @param {number} ptr
	* @param {number=} maxBytesToRead
	* @return {string}
	*/
	function UTF8ToString(ptr, maxBytesToRead) {
	#if CAN_ADDRESS_2GB
	ptr >>>= 0;
	#endif
	#if TEXTDECODER == 2
	if (!ptr) return '';
	var maxPtr = ptr + maxBytesToRead;
	for (var end = ptr; !(end >= maxPtr) && HEAPU8[end];) ++end;
	return UTF8Decoder.decode(HEAPU8.subarray(ptr, end));
	#else
	return ptr ? UTF8ArrayToString(HEAPU8, ptr, maxBytesToRead) : '';
	#endif
	}

	// Copies the given Javascript String object 'str' to the given byte array at address 'outIdx',
	// encoded in UTF8 form and null-terminated. The copy will require at most str.length*4+1 bytes of space in the HEAP.
	// Use the function lengthBytesUTF8 to compute the exact number of bytes (excluding null terminator) that this function will write.
	// Parameters:
	// str: the Javascript string to copy.
	// heap: the array to copy to. Each index in this array is assumed to be one 8-byte element.
	// outIdx: The starting offset in the array to begin the copying.
	// maxBytesToWrite: The maximum number of bytes this function can write to the array.
	// This count should include the null terminator,
	// i.e. if maxBytesToWrite=1, only the null terminator will be written and nothing else.
	// maxBytesToWrite=0 does not write any bytes to the output, not even the null terminator.
	// Returns the number of bytes written, EXCLUDING the null terminator.

	function stringToUTF8Array(str, heap, outIdx, maxBytesToWrite) {
	#if CAN_ADDRESS_2GB
	outIdx >>>= 0;
	#endif
	if (!(maxBytesToWrite > 0)) // Parameter maxBytesToWrite is not optional. Negative values, 0, null, undefined and false each don't write out any bytes.
	return 0;

	var startIdx = outIdx;
	var endIdx = outIdx + maxBytesToWrite - 1; // -1 for string null terminator.
	for (var i = 0; i < str.length; ++i) {
	// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
	// See http://unicode.org/faq/utf_bom.html#utf16-3
	// For UTF8 byte structure, see http://en.wikipedia.org/wiki/UTF-8#Description and https://www.ietf.org/rfc/rfc2279.txt and https://tools.ietf.org/html/rfc3629
	var u = str.charCodeAt(i); // possibly a lead surrogate
	if (u >= 0xD800 && u <= 0xDFFF) {
	var u1 = str.charCodeAt(++i);
	u = 0x10000 + ((u & 0x3FF) << 10) \| (u1 & 0x3FF);
	}
	if (u <= 0x7F) {
	if (outIdx >= endIdx) break;
	heap[outIdx++] = u;
	} else if (u <= 0x7FF) {
	if (outIdx + 1 >= endIdx) break;
	heap[outIdx++] = 0xC0 \| (u >> 6);
	heap[outIdx++] = 0x80 \| (u & 63);
	} else if (u <= 0xFFFF) {
	if (outIdx + 2 >= endIdx) break;
	heap[outIdx++] = 0xE0 \| (u >> 12);
	heap[outIdx++] = 0x80 \| ((u >> 6) & 63);
	heap[outIdx++] = 0x80 \| (u & 63);
	} else {
	if (outIdx + 3 >= endIdx) break;
	#if ASSERTIONS
	if (u >= 0x200000) warnOnce('Invalid Unicode code point 0x' + u.toString(16) + ' encountered when serializing a JS string to a UTF-8 string in wasm memory! (Valid unicode code points should be in range 0-0x1FFFFF).');
	#endif
	heap[outIdx++] = 0xF0 \| (u >> 18);
	heap[outIdx++] = 0x80 \| ((u >> 12) & 63);
	heap[outIdx++] = 0x80 \| ((u >> 6) & 63);
	heap[outIdx++] = 0x80 \| (u & 63);
	}
	}
	// Null-terminate the pointer to the buffer.
	heap[outIdx] = 0;
	return outIdx - startIdx;
	}

	// Copies the given Javascript String object 'str' to the emscripten HEAP at address 'outPtr',
	// null-terminated and encoded in UTF8 form. The copy will require at most str.length*4+1 bytes of space in the HEAP.
	// Use the function lengthBytesUTF8 to compute the exact number of bytes (excluding null terminator) that this function will write.
	// Returns the number of bytes written, EXCLUDING the null terminator.

	function stringToUTF8(str, outPtr, maxBytesToWrite) {
	#if ASSERTIONS
	assert(typeof maxBytesToWrite == 'number', 'stringToUTF8(str, outPtr, maxBytesToWrite) is missing the third parameter that specifies the length of the output buffer!');
	#endif
	return stringToUTF8Array(str, {{{ heapAndOffset('HEAPU8', 'outPtr') }}}, maxBytesToWrite);
	}

	// Returns the number of bytes the given Javascript string takes if encoded as a UTF8 byte array, EXCLUDING the null terminator byte.
	function lengthBytesUTF8(str) {
	var len = 0;
	for (var i = 0; i < str.length; ++i) {
	// Gotcha: charCodeAt returns a 16-bit word that is a UTF-16 encoded code unit, not a Unicode code point of the character! So decode UTF16->UTF32->UTF8.
	// See http://unicode.org/faq/utf_bom.html#utf16-3
	var u = str.charCodeAt(i); // possibly a lead surrogate
	if (u >= 0xD800 && u <= 0xDFFF) u = 0x10000 + ((u & 0x3FF) << 10) \| (str.charCodeAt(++i) & 0x3FF);
	if (u <= 0x7F) ++len;
	else if (u <= 0x7FF) len += 2;
	else if (u <= 0xFFFF) len += 3;
	else len += 4;
	}
	return len;
	}