bin/NativeTests/CodexTests.cpp - external/github.com/Microsoft/ChakraCore - Git at Google

 //-------------------------------------------------------------------------------------------------------
 // Copyright (C) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
 //-------------------------------------------------------------------------------------------------------
 #include "stdafx.h"
 #include "catch.hpp"
 #include <process.h>
 #include "Codex\Utf8Codex.h"

 #pragma warning(disable:4100) // unreferenced formal parameter
 #pragma warning(disable:6387) // suppressing preFAST which raises warning for passing null to the JsRT APIs
 #pragma warning(disable:6262) // CATCH is using stack variables to report errors, suppressing the preFAST warning.

 namespace CodexTest
 {
     ///
     /// The following test verifies that for invalid characters, we replace them
     /// with the unicode replacement character
     ///

     // Verify single utf8-encoded codepoint
     void CheckIsUnicodeReplacementChar(const utf8char_t* encodedBuffer)
     {
         CHECK(encodedBuffer[0] == 0xEF);
         CHECK(encodedBuffer[1] == 0xBF);
         CHECK(encodedBuffer[2] == 0xBD);
     }

     //
     // Following test cases are based on the Utf-8 decoder tests
     // suggested by Markus Kuhn at https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
     //
     TEST_CASE("CodexTest_EncodeTrueUtf8_SingleSurrogates", "[CodexTest]")
     {
         const charcount_t charCount = 1;
         utf8char_t encodedBuffer[(charCount + 1) * 3]; // +1 since the buffer will be null-terminated

         char16 testValues[] = { 0xD800, 0xDB7F, 0xDB80, 0xDBFF, 0xDC00, 0xDF80, 0xDFFF };
         const int numTestCases = _countof(testValues);

         for (int i = 0; i < numTestCases; i++)
         {
             size_t numEncodedBytes = utf8::EncodeTrueUtf8IntoAndNullTerminate(encodedBuffer, &testValues[i], charCount);
             CHECK(numEncodedBytes == 3);
             CheckIsUnicodeReplacementChar(encodedBuffer);
         }
     }

     //
     // Test encoding of given utf16-encoded strings into another encoding
     //
     // In the expected encoded string, extra bytes are represented as 0
     //

     template <typename TTestCase, typename TEncodingFunc>
     void RunUtf8EncodingTestCase(const TTestCase &testCases, const TEncodingFunc func)
     {
         const int numTestCases = _countof(testCases);
         const charcount_t charCount = _countof(testCases[0].surrogatePair);
         const charcount_t maxEncodedByteCount = _countof(testCases[0].utf8Encoding);
         utf8char_t encodedBuffer[maxEncodedByteCount + 1]; // +1 in case a null-terminating func is passed in

         for (int i = 0; i < numTestCases; i++)
         {
             size_t numEncodedBytes = func(encodedBuffer, testCases[i].surrogatePair, charCount);
             CHECK(numEncodedBytes <= maxEncodedByteCount);
             for (size_t j = 0; j < numEncodedBytes; j++)
             {
                 CHECK(encodedBuffer[j] == testCases[i].utf8Encoding[j]);
             }

             // Check and make sure there were no other bytes expected in the encoded string
             if (numEncodedBytes < maxEncodedByteCount)
             {
                 for (size_t j = numEncodedBytes; j < maxEncodedByteCount; j++)
                 {
                     CHECK(testCases[i].utf8Encoding[j] == 0);
                 }
             }
         }
     }

     TEST_CASE("CodexTest_EncodeCesu_PairedSurrogates", "[CodexTest]")
     {
         // Each of these test cases verifies the encoding
         // of a single surrogate pair into a 6 byte CESU string
         // Each surrogate-pair unit is encoded seperately into utf8
         struct TestCase
         {
             char16     surrogatePair[2];
             utf8char_t utf8Encoding[6];
         };

         TestCase testCases[] = {
             { { 0xD800, 0xDC00 }, { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 } }, //  U+010000 LINEAR B SYLLABLE B008 A character
             { { 0xD800, 0xDFFF }, { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF } }, //  U+0103FF
             { { 0xDB7F, 0xDC00 }, { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 } }, //  U+0EFC00
             { { 0xDB7F, 0xDFFF }, { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF } }, //  U+0EFFFF
             { { 0xDB80, 0xDC00 }, { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 } }, //  U+0F0000 Plane 15 Private Use First
             { { 0xDB80, 0xDFFF }, { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0xBF } }, //  U+0F03FF
             { { 0xDBFF, 0xDC00 }, { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 } }, //  U+10FC00
             { { 0xDBFF, 0xDFFF }, { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF } }  //  U+10FFFF
         };

         RunUtf8EncodingTestCase(testCases, static_cast<size_t (*)(utf8char_t*, const char16*, charcount_t)>(utf8::EncodeInto));
     }

     TEST_CASE("CodexTest_EncodeUtf8_PairedSurrogates", "[CodexTest]")
     {
         // Each of these test cases verifies the encoding
         // of a single surrogate pair into a 4 byte utf8 string
         // Each surrogate-pair unit is decoded to its original codepoint
         // and then encoded into utf8
         struct TestCase
         {
             char16     surrogatePair[2];
             utf8char_t utf8Encoding[4];
         };

         TestCase testCases[] = {
             { { 0xD800, 0xDC00 }, { 0xF0, 0x90, 0x80, 0x80 } }, //  U+010000 LINEAR B SYLLABLE B008 A character
             { { 0xD800, 0xDFFF }, { 0xF0, 0x90, 0x8F, 0xBF } }, //  U+0103FF
             { { 0xDB7F, 0xDC00 }, { 0xF3, 0xAF, 0xB0, 0x80 } }, //  U+0EFC00
             { { 0xDB7F, 0xDFFF }, { 0xF3, 0xAF, 0xBF, 0xBF } }, //  U+0EFFFF
             { { 0xDB80, 0xDC00 }, { 0xF3, 0xB0, 0x80, 0x80 } }, //  U+0F0000 Plane 15 Private Use First
             { { 0xDB80, 0xDFFF }, { 0xF3, 0xB0, 0x8F, 0xBF } }, //  U+0F03FF
             { { 0xDBFF, 0xDC00 }, { 0xF4, 0x8F, 0xB0, 0x80 } }, //  U+10FC00
             { { 0xDBFF, 0xDFFF }, { 0xF4, 0x8F, 0xBF, 0xBF } }  //  U+10FFFF
         };

         RunUtf8EncodingTestCase(testCases, utf8::EncodeTrueUtf8IntoAndNullTerminate);
     }

     TEST_CASE("CodexTest_EncodeUtf8_NonCharacters", "[CodexTest]")
     {
         // Each of these test cases verifies the encoding
         // of certain problematic codepoints that do not represent
         // characters
         struct TestCase
         {
             char16     surrogatePair[1];
             utf8char_t utf8Encoding[3];
         };

         TestCase testCases[] = {
             { { 0xFFFE }, { 0xEF, 0xBF, 0xBE } }, //  U+FFFE
             { { 0xFFFF }, { 0xEF, 0xBF, 0xBF } }  //  U+FFFF
         };

         RunUtf8EncodingTestCase(testCases, utf8::EncodeTrueUtf8IntoAndNullTerminate);
     }

     TEST_CASE("CodexTest_EncodeUtf8_BoundaryChars", "[CodexTest]")
     {
         // Each of these test cases verifies the encoding
         // of boundary conditions
         struct SingleChar16TestCase
         {
             char16     surrogatePair[1];
             utf8char_t utf8Encoding[3];
         };

         SingleChar16TestCase testCases[] = {
             { { 0xD7FF }, { 0xED, 0x9F, 0xBF } }, //  U+D7FF
             { { 0xE000 }, { 0xEE, 0x80, 0x80 } }, //  U+E000
             { { 0xFFFD }, { 0xEF, 0xBF, 0xBD } }  //  U+FFFD
         };

         struct TwoChar16TestCase
         {
             char16     surrogatePair[2];
             utf8char_t utf8Encoding[4];
         };

         TwoChar16TestCase testCases2[] = {
             { { 0xDBFF, 0xDFFF }, { 0xF4, 0x8F, 0xBF, 0xBF } } //  U+10FFFF
         };

         RunUtf8EncodingTestCase(testCases, utf8::EncodeTrueUtf8IntoAndNullTerminate);
         RunUtf8EncodingTestCase(testCases2, utf8::EncodeTrueUtf8IntoAndNullTerminate);
     }

     TEST_CASE("CodexTest_EncodeUtf8_SimpleCharacters", "[CodexTest]")
     {
         // Each of these test cases verifies the encoding
         // of certain problematic codepoints that do not represent
         // characters
         struct TestCase
         {
             char16     surrogatePair[1];
             utf8char_t utf8Encoding[3];
         };

         TestCase testCases[] = {
             { { 0x0024 }, { 0x24 } },              //  U+0024 - Dollar Symbol
             { { 0x00A2 }, { 0xC2, 0xA2 } },        //  U+00A2 - Cent symbol
             { { 0x20AC }, { 0xE2, 0x82, 0xAC } }   //  U+20AC - Euro symbol
         };

         RunUtf8EncodingTestCase(testCases, utf8::EncodeTrueUtf8IntoAndNullTerminate);
     }

     TEST_CASE("CodexTest_EncodeTrueUtf8_SimpleString", "[CodexTest]")
     {
         const charcount_t charCount = 3;
         utf8char_t encodedBuffer[(charCount + 1) * 3]; // +1 since the buffer will be null terminated
         const char16* sourceBuffer = L"abc";
         size_t numEncodedBytes = utf8::EncodeTrueUtf8IntoAndNullTerminate(encodedBuffer, sourceBuffer, charCount);
         CHECK(numEncodedBytes == charCount);
         for (int i = 0; i < charCount; i++)
         {
             CHECK(sourceBuffer[i] == (const char16)encodedBuffer[i]);
         }
     }

     template <typename TTestCase, typename TDecodeFunc>
     void RunUtf8DecodeTestCase(const TTestCase &testCases, const TDecodeFunc func)
     {
         const int numTestCases = _countof(testCases);
         const charcount_t charCount = _countof(testCases[0].result);
         char16 decodedBuffer[charCount + 1]; // +1 in case a null-terminating func is passed in

         for (int i = 0; i < numTestCases; i++)
         {
             bool chunkEndsInTruncatedSequence = false;
             size_t decodedCount = func(decodedBuffer, testCases[i].utf8Encoding, testCases[i].utf8Encoding + testCases[i].bytesToDecode, utf8::DecodeOptions::doChunkedEncoding, &chunkEndsInTruncatedSequence);
             CHECK(decodedCount == testCases[i].expectedDecodedChars);

             for (size_t j = 0; j < decodedCount; j++)
             {
                 CHECK(decodedBuffer[j] == testCases[i].result[j]);
             }

             CHECK(testCases[i].shouldEndInTruncation == chunkEndsInTruncatedSequence);
         }
     }

     TEST_CASE("CodexTest_DecodeUnitsInto_ChunkEndsInTruncatedSequence", "[CodexTest]")
     {
         struct TestCase
         {
             int bytesToDecode;
             size_t expectedDecodedChars;
             bool shouldEndInTruncation;
             char16 result[8];
             utf8char_t utf8Encoding[8];
         };

         TestCase testCases[] = {
             { 2, 1, false, { 0xc1 }, { 0xc3, 0x81 } }, // Valid 2-byte sequence
             { 1, 0, true, { 0x0 }, { 0xc3, 0x81 } }, // Valid 2-byte sequence truncated at the end of the chunk
             { 2, 2, false, { 0xfffd, 0x79 },{ 0xc3, 0x79 } }, // Invalid 2-byte sequence
             { 1, 0, true, { 0x0 }, { 0xc3, 0x79 } }, // Invalid 2-byte sequence truncated at the end of the chunk
             { 3, 1, false, { 0x3042 },{ 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence
             { 1, 0, true, { 0x0 }, { 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence truncated at the end of the chunk
             { 2, 0, true, { 0x0 }, { 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence truncated at the end of the chunk
             { 3, 3, false, { 0xfffd, 0x79, 0xfffd }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence
             { 1, 0, true, { 0x0 }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence truncated at the end of the chunk
             { 2, 0, true, { 0x0 }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence truncated at the end of the chunk
             { 3, 3, false, { 0xfffd, 0xfffd, 0x79 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence
             { 1, 0, true, { 0x0 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence truncated at the end of the chunk
             { 2, 0, true, { 0x0 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence truncated at the end of the chunk
             { 4, 2, false, { 0xd9c4, 0xdc83 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence
             { 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
             { 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
             { 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
             { 4, 4, false, { 0xfffd, 0x79, 0xfffd, 0xfffd }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence
             { 1, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
             { 2, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
             { 3, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
             { 4, 4, false, { 0xfffd, 0xfffd, 0x79, 0xfffd }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence
             { 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
             { 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
             { 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
             { 4, 4, false, { 0xfffd, 0xfffd, 0xfffd, 0x79 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence
             { 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
             { 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
             { 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
         };

         RunUtf8DecodeTestCase(testCases, utf8::DecodeUnitsIntoAndNullTerminateNoAdvance);
     }
 };
	//-------------------------------------------------------------------------------------------------------
	// Copyright (C) Microsoft. All rights reserved.
	// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
	//-------------------------------------------------------------------------------------------------------
	#include "stdafx.h"
	#include "catch.hpp"
	#include <process.h>
	#include "Codex\Utf8Codex.h"

	#pragma warning(disable:4100) // unreferenced formal parameter
	#pragma warning(disable:6387) // suppressing preFAST which raises warning for passing null to the JsRT APIs
	#pragma warning(disable:6262) // CATCH is using stack variables to report errors, suppressing the preFAST warning.

	namespace CodexTest
	{
	///
	/// The following test verifies that for invalid characters, we replace them
	/// with the unicode replacement character
	///

	// Verify single utf8-encoded codepoint
	void CheckIsUnicodeReplacementChar(const utf8char_t* encodedBuffer)
	{
	CHECK(encodedBuffer[0] == 0xEF);
	CHECK(encodedBuffer[1] == 0xBF);
	CHECK(encodedBuffer[2] == 0xBD);
	}

	//
	// Following test cases are based on the Utf-8 decoder tests
	// suggested by Markus Kuhn at https://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt
	//
	TEST_CASE("CodexTest_EncodeTrueUtf8_SingleSurrogates", "[CodexTest]")
	{
	const charcount_t charCount = 1;
	utf8char_t encodedBuffer[(charCount + 1) * 3]; // +1 since the buffer will be null-terminated

	char16 testValues[] = { 0xD800, 0xDB7F, 0xDB80, 0xDBFF, 0xDC00, 0xDF80, 0xDFFF };
	const int numTestCases = _countof(testValues);

	for (int i = 0; i < numTestCases; i++)
	{
	size_t numEncodedBytes = utf8::EncodeTrueUtf8IntoAndNullTerminate(encodedBuffer, &testValues[i], charCount);
	CHECK(numEncodedBytes == 3);
	CheckIsUnicodeReplacementChar(encodedBuffer);
	}
	}

	//
	// Test encoding of given utf16-encoded strings into another encoding
	//
	// In the expected encoded string, extra bytes are represented as 0
	//

	template <typename TTestCase, typename TEncodingFunc>
	void RunUtf8EncodingTestCase(const TTestCase &testCases, const TEncodingFunc func)
	{
	const int numTestCases = _countof(testCases);
	const charcount_t charCount = _countof(testCases[0].surrogatePair);
	const charcount_t maxEncodedByteCount = _countof(testCases[0].utf8Encoding);
	utf8char_t encodedBuffer[maxEncodedByteCount + 1]; // +1 in case a null-terminating func is passed in

	for (int i = 0; i < numTestCases; i++)
	{
	size_t numEncodedBytes = func(encodedBuffer, testCases[i].surrogatePair, charCount);
	CHECK(numEncodedBytes <= maxEncodedByteCount);
	for (size_t j = 0; j < numEncodedBytes; j++)
	{
	CHECK(encodedBuffer[j] == testCases[i].utf8Encoding[j]);
	}

	// Check and make sure there were no other bytes expected in the encoded string
	if (numEncodedBytes < maxEncodedByteCount)
	{
	for (size_t j = numEncodedBytes; j < maxEncodedByteCount; j++)
	{
	CHECK(testCases[i].utf8Encoding[j] == 0);
	}
	}
	}
	}

	TEST_CASE("CodexTest_EncodeCesu_PairedSurrogates", "[CodexTest]")
	{
	// Each of these test cases verifies the encoding
	// of a single surrogate pair into a 6 byte CESU string
	// Each surrogate-pair unit is encoded seperately into utf8
	struct TestCase
	{
	char16 surrogatePair[2];
	utf8char_t utf8Encoding[6];
	};

	TestCase testCases[] = {
	{ { 0xD800, 0xDC00 }, { 0xED, 0xA0, 0x80, 0xED, 0xB0, 0x80 } }, // U+010000 LINEAR B SYLLABLE B008 A character
	{ { 0xD800, 0xDFFF }, { 0xED, 0xA0, 0x80, 0xED, 0xBF, 0xBF } }, // U+0103FF
	{ { 0xDB7F, 0xDC00 }, { 0xED, 0xAD, 0xBF, 0xED, 0xB0, 0x80 } }, // U+0EFC00
	{ { 0xDB7F, 0xDFFF }, { 0xED, 0xAD, 0xBF, 0xED, 0xBF, 0xBF } }, // U+0EFFFF
	{ { 0xDB80, 0xDC00 }, { 0xED, 0xAE, 0x80, 0xED, 0xB0, 0x80 } }, // U+0F0000 Plane 15 Private Use First
	{ { 0xDB80, 0xDFFF }, { 0xED, 0xAE, 0x80, 0xED, 0xBF, 0xBF } }, // U+0F03FF
	{ { 0xDBFF, 0xDC00 }, { 0xED, 0xAF, 0xBF, 0xED, 0xB0, 0x80 } }, // U+10FC00
	{ { 0xDBFF, 0xDFFF }, { 0xED, 0xAF, 0xBF, 0xED, 0xBF, 0xBF } } // U+10FFFF
	};

	RunUtf8EncodingTestCase(testCases, static_cast<size_t ()(utf8char_t, const char16*, charcount_t)>(utf8::EncodeInto));
	}

	TEST_CASE("CodexTest_EncodeUtf8_PairedSurrogates", "[CodexTest]")
	{
	// Each of these test cases verifies the encoding
	// of a single surrogate pair into a 4 byte utf8 string
	// Each surrogate-pair unit is decoded to its original codepoint
	// and then encoded into utf8
	struct TestCase
	{
	char16 surrogatePair[2];
	utf8char_t utf8Encoding[4];
	};

	TestCase testCases[] = {
	{ { 0xD800, 0xDC00 }, { 0xF0, 0x90, 0x80, 0x80 } }, // U+010000 LINEAR B SYLLABLE B008 A character
	{ { 0xD800, 0xDFFF }, { 0xF0, 0x90, 0x8F, 0xBF } }, // U+0103FF
	{ { 0xDB7F, 0xDC00 }, { 0xF3, 0xAF, 0xB0, 0x80 } }, // U+0EFC00
	{ { 0xDB7F, 0xDFFF }, { 0xF3, 0xAF, 0xBF, 0xBF } }, // U+0EFFFF
	{ { 0xDB80, 0xDC00 }, { 0xF3, 0xB0, 0x80, 0x80 } }, // U+0F0000 Plane 15 Private Use First
	{ { 0xDB80, 0xDFFF }, { 0xF3, 0xB0, 0x8F, 0xBF } }, // U+0F03FF
	{ { 0xDBFF, 0xDC00 }, { 0xF4, 0x8F, 0xB0, 0x80 } }, // U+10FC00
	{ { 0xDBFF, 0xDFFF }, { 0xF4, 0x8F, 0xBF, 0xBF } } // U+10FFFF
	};

	RunUtf8EncodingTestCase(testCases, utf8::EncodeTrueUtf8IntoAndNullTerminate);
	}

	TEST_CASE("CodexTest_EncodeUtf8_NonCharacters", "[CodexTest]")
	{
	// Each of these test cases verifies the encoding
	// of certain problematic codepoints that do not represent
	// characters
	struct TestCase
	{
	char16 surrogatePair[1];
	utf8char_t utf8Encoding[3];
	};

	TestCase testCases[] = {
	{ { 0xFFFE }, { 0xEF, 0xBF, 0xBE } }, // U+FFFE
	{ { 0xFFFF }, { 0xEF, 0xBF, 0xBF } } // U+FFFF
	};

	RunUtf8EncodingTestCase(testCases, utf8::EncodeTrueUtf8IntoAndNullTerminate);
	}

	TEST_CASE("CodexTest_EncodeUtf8_BoundaryChars", "[CodexTest]")
	{
	// Each of these test cases verifies the encoding
	// of boundary conditions
	struct SingleChar16TestCase
	{
	char16 surrogatePair[1];
	utf8char_t utf8Encoding[3];
	};

	SingleChar16TestCase testCases[] = {
	{ { 0xD7FF }, { 0xED, 0x9F, 0xBF } }, // U+D7FF
	{ { 0xE000 }, { 0xEE, 0x80, 0x80 } }, // U+E000
	{ { 0xFFFD }, { 0xEF, 0xBF, 0xBD } } // U+FFFD
	};

	struct TwoChar16TestCase
	{
	char16 surrogatePair[2];
	utf8char_t utf8Encoding[4];
	};

	TwoChar16TestCase testCases2[] = {
	{ { 0xDBFF, 0xDFFF }, { 0xF4, 0x8F, 0xBF, 0xBF } } // U+10FFFF
	};

	RunUtf8EncodingTestCase(testCases, utf8::EncodeTrueUtf8IntoAndNullTerminate);
	RunUtf8EncodingTestCase(testCases2, utf8::EncodeTrueUtf8IntoAndNullTerminate);
	}

	TEST_CASE("CodexTest_EncodeUtf8_SimpleCharacters", "[CodexTest]")
	{
	// Each of these test cases verifies the encoding
	// of certain problematic codepoints that do not represent
	// characters
	struct TestCase
	{
	char16 surrogatePair[1];
	utf8char_t utf8Encoding[3];
	};

	TestCase testCases[] = {
	{ { 0x0024 }, { 0x24 } }, // U+0024 - Dollar Symbol
	{ { 0x00A2 }, { 0xC2, 0xA2 } }, // U+00A2 - Cent symbol
	{ { 0x20AC }, { 0xE2, 0x82, 0xAC } } // U+20AC - Euro symbol
	};

	RunUtf8EncodingTestCase(testCases, utf8::EncodeTrueUtf8IntoAndNullTerminate);
	}

	TEST_CASE("CodexTest_EncodeTrueUtf8_SimpleString", "[CodexTest]")
	{
	const charcount_t charCount = 3;
	utf8char_t encodedBuffer[(charCount + 1) * 3]; // +1 since the buffer will be null terminated
	const char16* sourceBuffer = L"abc";
	size_t numEncodedBytes = utf8::EncodeTrueUtf8IntoAndNullTerminate(encodedBuffer, sourceBuffer, charCount);
	CHECK(numEncodedBytes == charCount);
	for (int i = 0; i < charCount; i++)
	{
	CHECK(sourceBuffer[i] == (const char16)encodedBuffer[i]);
	}
	}

	template <typename TTestCase, typename TDecodeFunc>
	void RunUtf8DecodeTestCase(const TTestCase &testCases, const TDecodeFunc func)
	{
	const int numTestCases = _countof(testCases);
	const charcount_t charCount = _countof(testCases[0].result);
	char16 decodedBuffer[charCount + 1]; // +1 in case a null-terminating func is passed in

	for (int i = 0; i < numTestCases; i++)
	{
	bool chunkEndsInTruncatedSequence = false;
	size_t decodedCount = func(decodedBuffer, testCases[i].utf8Encoding, testCases[i].utf8Encoding + testCases[i].bytesToDecode, utf8::DecodeOptions::doChunkedEncoding, &chunkEndsInTruncatedSequence);
	CHECK(decodedCount == testCases[i].expectedDecodedChars);

	for (size_t j = 0; j < decodedCount; j++)
	{
	CHECK(decodedBuffer[j] == testCases[i].result[j]);
	}

	CHECK(testCases[i].shouldEndInTruncation == chunkEndsInTruncatedSequence);
	}
	}

	TEST_CASE("CodexTest_DecodeUnitsInto_ChunkEndsInTruncatedSequence", "[CodexTest]")
	{
	struct TestCase
	{
	int bytesToDecode;
	size_t expectedDecodedChars;
	bool shouldEndInTruncation;
	char16 result[8];
	utf8char_t utf8Encoding[8];
	};

	TestCase testCases[] = {
	{ 2, 1, false, { 0xc1 }, { 0xc3, 0x81 } }, // Valid 2-byte sequence
	{ 1, 0, true, { 0x0 }, { 0xc3, 0x81 } }, // Valid 2-byte sequence truncated at the end of the chunk
	{ 2, 2, false, { 0xfffd, 0x79 },{ 0xc3, 0x79 } }, // Invalid 2-byte sequence
	{ 1, 0, true, { 0x0 }, { 0xc3, 0x79 } }, // Invalid 2-byte sequence truncated at the end of the chunk
	{ 3, 1, false, { 0x3042 },{ 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence
	{ 1, 0, true, { 0x0 }, { 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence truncated at the end of the chunk
	{ 2, 0, true, { 0x0 }, { 0xe3, 0x81, 0x82 } }, // Valid 3-byte sequence truncated at the end of the chunk
	{ 3, 3, false, { 0xfffd, 0x79, 0xfffd }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence
	{ 1, 0, true, { 0x0 }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence truncated at the end of the chunk
	{ 2, 0, true, { 0x0 }, { 0xe3, 0x79, 0x82 } }, // Invalid 3-byte sequence truncated at the end of the chunk
	{ 3, 3, false, { 0xfffd, 0xfffd, 0x79 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence
	{ 1, 0, true, { 0x0 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence truncated at the end of the chunk
	{ 2, 0, true, { 0x0 }, { 0xe3, 0x81, 0x79 } }, // Invalid 3-byte sequence truncated at the end of the chunk
	{ 4, 2, false, { 0xd9c4, 0xdc83 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence
	{ 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
	{ 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
	{ 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x83 } }, // Valid 4-byte sequence truncated at the end of the chunk
	{ 4, 4, false, { 0xfffd, 0x79, 0xfffd, 0xfffd }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence
	{ 1, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
	{ 2, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
	{ 3, 0, true, { 0x0 }, { 0xf2, 0x79, 0x82, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
	{ 4, 4, false, { 0xfffd, 0xfffd, 0x79, 0xfffd }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence
	{ 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
	{ 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
	{ 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x79, 0x83 } }, // Invalid 4-byte sequence truncated at the end of the chunk
	{ 4, 4, false, { 0xfffd, 0xfffd, 0xfffd, 0x79 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence
	{ 1, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
	{ 2, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
	{ 3, 0, true, { 0x0 }, { 0xf2, 0x81, 0x82, 0x79 } }, // Invalid 4-byte sequence truncated at the end of the chunk
	};

	RunUtf8DecodeTestCase(testCases, utf8::DecodeUnitsIntoAndNullTerminateNoAdvance);
	}
	};