lib/Runtime/Library/UriHelper.cpp - external/github.com/Microsoft/ChakraCore - Git at Google

 //-------------------------------------------------------------------------------------------------------
 // Copyright (C) Microsoft. All rights reserved.
 // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
 //-------------------------------------------------------------------------------------------------------
 #include "RuntimeLibraryPch.h"

 namespace Js
 {
     Var UriHelper::EncodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char flags )
     {
         AssertMsg(args.Info.Count > 0, "Should always have implicit 'this'");
         JavascriptString * strURI;
         //TODO make sure this string is pinned when the memory recycler is in
         if(args.Info.Count < 2)
         {
             strURI = scriptContext->GetLibrary()->GetUndefinedDisplayString();
         }
         else
         {

             if (JavascriptString::Is(args[1]))
             {
                 strURI = JavascriptString::FromVar(args[1]);
             }
             else
             {
                 strURI = JavascriptConversion::ToString(args[1], scriptContext);
             }
         }
         return Encode(strURI->GetSz(), strURI->GetLength(), flags, scriptContext);
     }

     unsigned char UriHelper::s_uriProps[128] =
     {
         //0x00  0x01  0x02  0x03  0x04  0x05  0x06  0x07  0x08  0x09  0x0a  0x0b  0x0c  0x0d  0x0e  0x0f
         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
         //0x10  0x11  0x12  0x13  0x14  0x15  0x16  0x17  0x18  0x19  0x1a  0x1b  0x1c  0x1d  0x1e  0x1f
         0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
         //         !     "     #     $     %     &     '     (     )     *     +     ,     -     .     /
         0, 0x02,    0, 0x01, 0x01,    0, 0x01, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02, 0x01,
         //   0     1     2     3     4     5     6     7     8     9     :     ;     <     =     >     ?
         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01,    0, 0x01,    0, 0x01,
         //   @     A     B     C     D     E     F     G     H     I     J     K     L     M     N     O
         0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
         //   P     Q     R     S     T     U     V     W     X     Y     Z     [     \     ]     ^     _
         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,    0,    0,    0,    0, 0x02,
         //   `     a     b     c     d     e     f     g     h     i     j     k     l     m     n     o
         0, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
         //   p     q     r     s     t     u     v     w     x     y     z     {     |     }     ~  0x7f
         0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,    0,    0,    0, 0x02,    0,
     };

     // Convert 'uVal' to it's UTF-8 encoding in the array 'bUTF8'. Returns
     // the number of characters in the output array.
     // This routine assumes that it's input 'uVal' is a valid Unicode code-point value
     // and does no error checking.
     uint32 UriHelper::ToUTF8( uint32 uVal, BYTE bUTF8[MaxUTF8Len])
     {
         uint32 uRet;
         if( uVal <= 0x007F )
         {
             bUTF8[0] = (BYTE)uVal;
             uRet = 1;
         }
         else if( uVal <= 0x07FF )
         {
             uint32 z = uVal & 0x3F;
             uint32 y = uVal >> 6;
             bUTF8[0] = (BYTE) (0xC0 | y);
             bUTF8[1] = (BYTE) (0x80 | z);
             uRet = 2;
         }
         else if( uVal <= 0xFFFF )
         {
             Assert( uVal <= 0xD7FF || uVal >= 0xE000 );
             uint32 z = uVal & 0x3F;
             uint32 y = (uVal >> 6) & 0x3F;
             uint32 x = (uVal >> 12);
             bUTF8[0] = (BYTE) (0xE0 | x);
             bUTF8[1] = (BYTE) (0x80 | y);
             bUTF8[2] = (BYTE) (0x80 | z);
             uRet = 3;
         }
         else
         {
             uint32 z = uVal & 0x3F;
             uint32 y = (uVal >> 6) &0x3F;
             uint32 x = (uVal >> 12) &0x3F;
             uint32 w = (uVal >> 18);
             bUTF8[0] = (BYTE) (0xF0 | w);
             bUTF8[1] = (BYTE) (0x80 | x);
             bUTF8[2] = (BYTE) (0x80 | y);
             bUTF8[3] = (BYTE) (0x80 | z);
             uRet = 4;
         }

         return uRet;
     }

     // Return the Unicode code-point value of the UTF-8 encoding passed in as the
     // array 'bUTF8'. uLen is the number of characters in the UTF-8 encoding.
     // This routine assumes that a valid UTF-8 encoding of a character is passed in
     // and does no error checking.
     uint32 UriHelper::FromUTF8( BYTE bUTF8[MaxUTF8Len], uint32 uLen )
     {
         Assert( 1 <= uLen && uLen <= MaxUTF8Len );
         if( uLen == 1 )
         {
             return bUTF8[0];
         }
         else if( uLen == 2 )
         {
             return ((bUTF8[0] & 0x1F) << 6 ) | (bUTF8[1] & 0x3F);
         }
         else if( uLen == 3 )
         {
             return ((bUTF8[0] & 0x0F) << 12) | ((bUTF8[1] & 0x3F) << 6) | (bUTF8[2] & 0x3F);
         }
         else
         {
             Assert( uLen == 4 );
             return ((bUTF8[0] & 0x07) << 18) | ((bUTF8[1] & 0x3F) << 12) | ((bUTF8[2] & 0x3F) << 6 ) | (bUTF8[3] & 0x3F) ;
         }
     }

     // The Encode algorithm described in sec. 15.1.3 of the spec. The input string is
     // 'pSz' and the Unescaped set is described by the flags 'unescapedFlags'. The
     // output is a string var.
     Var UriHelper::Encode(__in_ecount(len) const  char16* pSz, uint32 len, unsigned char unescapedFlags, ScriptContext* scriptContext )
     {
         BYTE bUTF8[MaxUTF8Len];

         // pass 1 calculate output length and error check
         uint32 outputLen = 0;
         for( uint32 k = 0; k < len; k++ )
         {
             char16 c = pSz[k];
             uint32 uVal;
             if( InURISet(c, unescapedFlags) )
             {
                 outputLen = UInt32Math::Add(outputLen, 1);
             }
             else
             {
                 if( c >= 0xDC00 && c <= 0xDFFF )
                 {
                     JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                 }
                 else if( c < 0xD800 || c > 0xDBFF )
                 {
                     uVal = (uint32)c;
                 }
                 else
                 {
                     ++k;
                     if(k == len)
                     {
                         JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }
                     __analysis_assume(k < len); // because we throw exception if k==len
                     char16 c1 = pSz[k];
                     if( c1 < 0xDC00 || c1 > 0xDFFF )
                     {
                         JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }
                     uVal = (c - 0xD800) * 0x400 + (c1 - 0xDC00) + 0x10000;
                 }
                 uint32 utfLen = ToUTF8(uVal, bUTF8);
                 utfLen = UInt32Math::Mul(utfLen, 3);
                 outputLen = UInt32Math::Add(outputLen, utfLen);
             }
         }

         //pass 2 generate the encoded URI

         uint32 allocSize = UInt32Math::Add(outputLen, 1);
         char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize);
         char16* outCurrent = outURI;

         for( uint32 k = 0; k < len; k++ )
         {
             char16 c = pSz[k];
             uint32 uVal;
             if( InURISet(c, unescapedFlags) )
             {
                 __analysis_assume(outCurrent < outURI + allocSize);
                 *outCurrent++ = c;
             }
             else
             {
 #if DBG
                 if( c >= 0xDC00 && c <= 0xDFFF )
                 {
                     JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                 }
 #endif
                 if( c < 0xD800 || c > 0xDBFF )
                 {
                     uVal = (uint32)c;
                 }
                 else
                 {
                     ++k;
 #if DBG
                     if(k == len)
                     {
                         JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }
 #endif
                     __analysis_assume(k < len);// because we throw exception if k==len
                     char16 c1 = pSz[k];

 #if DBG
                     if( c1 < 0xDC00 || c1 > 0xDFFF )
                     {
                         JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }
 #endif
                     uVal = (c - 0xD800) * 0x400 + (c1 - 0xDC00) + 0x10000;
                 }

                 uint32 utfLen = ToUTF8(uVal, bUTF8);
                 for( uint32 j = 0; j < utfLen; j++ )
                 {
 #pragma prefast(suppress: 26014, "buffer length was calculated earlier");
                     swprintf_s(outCurrent, 4, _u("%%%02X"), (int)bUTF8[j] );
                     outCurrent +=3;
 #pragma prefast(default: 26014);
                 }
             }
         }
         AssertMsg(outURI + outputLen == outCurrent, " URI out buffer out of sync");
         __analysis_assume(outputLen + 1 == allocSize);
         outURI[outputLen] = _u('\0');

         return JavascriptString::NewCopyBuffer(outURI, outputLen, scriptContext);
     }

     Var UriHelper::DecodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char reservedFlags )
     {
         AssertMsg(args.Info.Count > 0, "Should always have implicit 'this'");
         JavascriptString * strURI;
         //TODO make sure this string is pinned when the memory recycler is in
         if(args.Info.Count < 2)
         {
             strURI = scriptContext->GetLibrary()->GetUndefinedDisplayString();
         }
         else
         {

             if (JavascriptString::Is(args[1]))
             {
                 strURI = JavascriptString::FromVar(args[1]);
             }
             else
             {
                 strURI = JavascriptConversion::ToString(args[1], scriptContext);
             }
         }
         return Decode(strURI->GetSz(), strURI->GetLength(), reservedFlags, scriptContext);
     }

     // The Decode algorithm described in sec. 15.1.3 of the spec. The input string is
     // 'pSZ' and the Reserved set is described by the flags 'reservedFlags'. The
     // output is a string var.
     Var UriHelper::Decode(__in_ecount(len) const char16* pSz, uint32 len, unsigned char reservedFlags, ScriptContext* scriptContext)
     {
         char16 c1;
         char16 c;
         // pass 1 calculate output length and error check
         uint32 outputLen = 0;
         for( uint32 k = 0; k < len; k++ )
         {
             c = pSz[k];

             if( c == '%')
             {
                 uint32 start = k;
                 if( k + 2 >= len )
                 {
                     JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                 }

                 // %-encoded components in a URI may only contain hexadecimal digits from the ASCII character set. 'swscanf_s'
                 // only supports those characters when decoding hexadecimal integers. 'iswxdigit' on the other hand, uses the
                 // current locale to see if the specified character maps to a hexadecimal digit, which causes it to consider some
                 // characters outside the ASCII character set to be hexadecimal digits, so we can't use that. 'swscanf_s' seems
                 // to be overkill for this, so using a simple function that parses two hex digits and produces their value.
                 BYTE b;
                 if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b))
                 {
                     JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError);
                 }

                 k += 2;

                 if( (b & 0x80) ==  0)
                 {
                     c1 = b;
                 }
                 else
                 {
                     int n;
                     for( n = 1; ((b << n) & 0x80) != 0; n++ )
                         ;

                     if( n == 1 || n > UriHelper::MaxUTF8Len )
                     {
                         JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }

                     BYTE bOctets[UriHelper::MaxUTF8Len];
                     bOctets[0] = b;

                     if( k + 3 * (n-1) >= len )
                     {
                         JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }

                     for( int j = 1; j < n; j++ )
                     {
                         if( pSz[++k] != '%' )
                         {
                             JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                         }

                         if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b))
                         {
                             JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                         }

                         // The two leading bits should be 10 for a valid UTF-8 encoding
                         if( (b & 0xC0) != 0x80)
                         {
                             JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                         }
                         k += 2;

                         bOctets[j] = b;
                     }

                     uint32 uVal = UriHelper::FromUTF8( bOctets, n );

                     if( uVal >= 0xD800 && uVal <= 0xDFFF)
                     {
                         JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }
                     if( uVal < 0x10000 )
                     {
                         c1 = (char16)uVal;
                     }
                     else if( uVal > 0x10ffff )
                     {
                         JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }
                     else
                     {
                         outputLen +=2;
                         continue;
                     }
                 }

                 if( ! UriHelper::InURISet( c1, reservedFlags ))
                 {
                     outputLen++;
                 }
                 else
                 {
                     outputLen += k - start + 1;
                 }
             }
             else // c is not '%'
             {
                 outputLen++;
             }
         }

         //pass 2 generate the decoded URI
         uint32 allocSize = UInt32Math::Add(outputLen, 1);
         char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize);
         char16* outCurrent = outURI;


         for( uint32 k = 0; k < len; k++ )
         {
             c = pSz[k];
             if( c == '%')
             {
                 uint32 start = k;
 #if DBG
                 Assert(!(k + 2 >= len));
                 if( k + 2 >= len )
                 {
                     JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                 }
 #endif
                 // Let OACR know some things about 'k' that we checked just above, to let it know that we are not going to
                 // overflow later. The same checks are done in the first pass in non-debug builds, and the conditions
                 // checked upon in the first and second pass are the same.
                 __analysis_assume(!(k + 2 >= len));

                 BYTE b;
                 if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b))
                 {
 #if DBG
                     AssertMsg(false, "!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b)");
                     JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
 #endif
                 }

                 k += 2;

                 if( (b & 0x80) ==  0)
                 {
                     c1 = b;
                 }
                 else
                 {
                     int n;
                     for( n = 1; ((b << n) & 0x80) != 0; n++ )
                         ;

                     if( n == 1 || n > UriHelper::MaxUTF8Len )
                     {
                         JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }

                     BYTE bOctets[UriHelper::MaxUTF8Len];
                     bOctets[0] = b;

 #if DBG
                     Assert(!(k + 3 * (n-1) >= len));
                     if( k + 3 * (n-1) >= len )
                     {
                         JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }
 #endif
                     // Let OACR know some things about 'k' that we checked just above, to let it know that we are not going to
                     // overflow later. The same checks are done in the first pass in non-debug builds, and the conditions
                     // checked upon in the first and second pass are the same.
                     __analysis_assume(!(k + 3 * (n-1) >= len));

                     for( int j = 1; j < n; j++ )
                     {
                         ++k;

 #if DBG
                         Assert(!(pSz[k] != '%'));
                         if( pSz[k] != '%' )
                         {
                             JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                         }
 #endif

                         if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b))
                         {
 #if DBG
                             AssertMsg(false, "!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b)");
                             JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
 #endif
                         }

 #if DBG
                         // The two leading bits should be 10 for a valid UTF-8 encoding
                         Assert(!((b & 0xC0) != 0x80));
                         if( (b & 0xC0) != 0x80)
                         {
                             JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                         }
 #endif

                         k += 2;

                         bOctets[j] = b;
                     }

                     uint32 uVal = UriHelper::FromUTF8( bOctets, n );

 #if DBG
                     Assert(!(uVal >= 0xD800 && uVal <= 0xDFFF));
                     if( uVal >= 0xD800 && uVal <= 0xDFFF)
                     {
                         JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }
 #endif

                     if( uVal < 0x10000 )
                     {
                         c1 = (char16)uVal;
                     }

 #if DBG
                     else if( uVal > 0x10ffff )
                     {
                         AssertMsg(false, "uVal > 0x10ffff");
                         JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
                     }
 #endif
                     else
                     {
                         uint32 l = (( uVal - 0x10000) & 0x3ff) + 0xdc00;
                         uint32 h = ((( uVal - 0x10000) >> 10) & 0x3ff) + 0xd800;

                         __analysis_assume(outCurrent + 2 <= outURI + allocSize);
                         *outCurrent++ = (char16)h;
                         *outCurrent++ = (char16)l;
                         continue;
                     }
                 }

                 if( !UriHelper::InURISet( c1, reservedFlags ))
                 {
                     __analysis_assume(outCurrent < outURI + allocSize);
                     *outCurrent++ = c1;
                 }
                 else
                 {
                     js_memcpy_s(outCurrent, (allocSize - (outCurrent - outURI)) * sizeof(char16), &pSz[start], (k - start + 1)*sizeof(char16));
                     outCurrent += k - start + 1;
                 }
             }
             else // c is not '%'
             {
                 __analysis_assume(outCurrent < outURI + allocSize);
                 *outCurrent++ = c;
             }
         }

         AssertMsg(outURI + outputLen == outCurrent, " URI out buffer out of sync");
         __analysis_assume(outputLen + 1 == allocSize);
         outURI[outputLen] = _u('\0');

         return JavascriptString::NewCopyBuffer(outURI, outputLen, scriptContext);
     }

     // Decodes a two-hexadecimal-digit wide character pair into the byte value it represents
     bool UriHelper::DecodeByteFromHex(const char16 digit1, const char16 digit2, unsigned char &value)
     {
         int x;
         if(!Js::NumberUtilities::FHexDigit(digit1, &x))
         {
             return false;
         }
         Assert(static_cast<unsigned int>(x) <= 0xfU);
         value = static_cast<unsigned char>(x) << 4;

         if(!Js::NumberUtilities::FHexDigit(digit2, &x))
         {
             return false;
         }
         Assert(static_cast<unsigned int>(x) <= 0xfU);
         value += static_cast<unsigned char>(x);

         return true;
     }
 }
	//-------------------------------------------------------------------------------------------------------
	// Copyright (C) Microsoft. All rights reserved.
	// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
	//-------------------------------------------------------------------------------------------------------
	#include "RuntimeLibraryPch.h"

	namespace Js
	{
	Var UriHelper::EncodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char flags )
	{
	AssertMsg(args.Info.Count > 0, "Should always have implicit 'this'");
	JavascriptString * strURI;
	//TODO make sure this string is pinned when the memory recycler is in
	if(args.Info.Count < 2)
	{
	strURI = scriptContext->GetLibrary()->GetUndefinedDisplayString();
	}
	else
	{

	if (JavascriptString::Is(args[1]))
	{
	strURI = JavascriptString::FromVar(args[1]);
	}
	else
	{
	strURI = JavascriptConversion::ToString(args[1], scriptContext);
	}
	}
	return Encode(strURI->GetSz(), strURI->GetLength(), flags, scriptContext);
	}

	unsigned char UriHelper::s_uriProps[128] =
	{
	//0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	//0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d 0x1e 0x1f
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	// ! " # $ % & ' ( ) * + , - . /
	0, 0x02, 0, 0x01, 0x01, 0, 0x01, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02, 0x01,
	// 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01, 0, 0x01, 0, 0x01,
	// @ A B C D E F G H I J K L M N O
	0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
	// P Q R S T U V W X Y Z [ \ ] ^ _
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0, 0, 0, 0, 0x02,
	// ` a b c d e f g h i j k l m n o
	0, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
	// p q r s t u v w x y z { \| } ~ 0x7f
	0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0, 0, 0, 0x02, 0,
	};

	// Convert 'uVal' to it's UTF-8 encoding in the array 'bUTF8'. Returns
	// the number of characters in the output array.
	// This routine assumes that it's input 'uVal' is a valid Unicode code-point value
	// and does no error checking.
	uint32 UriHelper::ToUTF8( uint32 uVal, BYTE bUTF8[MaxUTF8Len])
	{
	uint32 uRet;
	if( uVal <= 0x007F )
	{
	bUTF8[0] = (BYTE)uVal;
	uRet = 1;
	}
	else if( uVal <= 0x07FF )
	{
	uint32 z = uVal & 0x3F;
	uint32 y = uVal >> 6;
	bUTF8[0] = (BYTE) (0xC0 \| y);
	bUTF8[1] = (BYTE) (0x80 \| z);
	uRet = 2;
	}
	else if( uVal <= 0xFFFF )
	{
	Assert( uVal <= 0xD7FF \|\| uVal >= 0xE000 );
	uint32 z = uVal & 0x3F;
	uint32 y = (uVal >> 6) & 0x3F;
	uint32 x = (uVal >> 12);
	bUTF8[0] = (BYTE) (0xE0 \| x);
	bUTF8[1] = (BYTE) (0x80 \| y);
	bUTF8[2] = (BYTE) (0x80 \| z);
	uRet = 3;
	}
	else
	{
	uint32 z = uVal & 0x3F;
	uint32 y = (uVal >> 6) &0x3F;
	uint32 x = (uVal >> 12) &0x3F;
	uint32 w = (uVal >> 18);
	bUTF8[0] = (BYTE) (0xF0 \| w);
	bUTF8[1] = (BYTE) (0x80 \| x);
	bUTF8[2] = (BYTE) (0x80 \| y);
	bUTF8[3] = (BYTE) (0x80 \| z);
	uRet = 4;
	}

	return uRet;
	}

	// Return the Unicode code-point value of the UTF-8 encoding passed in as the
	// array 'bUTF8'. uLen is the number of characters in the UTF-8 encoding.
	// This routine assumes that a valid UTF-8 encoding of a character is passed in
	// and does no error checking.
	uint32 UriHelper::FromUTF8( BYTE bUTF8[MaxUTF8Len], uint32 uLen )
	{
	Assert( 1 <= uLen && uLen <= MaxUTF8Len );
	if( uLen == 1 )
	{
	return bUTF8[0];
	}
	else if( uLen == 2 )
	{
	return ((bUTF8[0] & 0x1F) << 6 ) \| (bUTF8[1] & 0x3F);
	}
	else if( uLen == 3 )
	{
	return ((bUTF8[0] & 0x0F) << 12) \| ((bUTF8[1] & 0x3F) << 6) \| (bUTF8[2] & 0x3F);
	}
	else
	{
	Assert( uLen == 4 );
	return ((bUTF8[0] & 0x07) << 18) \| ((bUTF8[1] & 0x3F) << 12) \| ((bUTF8[2] & 0x3F) << 6 ) \| (bUTF8[3] & 0x3F) ;
	}
	}

	// The Encode algorithm described in sec. 15.1.3 of the spec. The input string is
	// 'pSz' and the Unescaped set is described by the flags 'unescapedFlags'. The
	// output is a string var.
	Var UriHelper::Encode(__in_ecount(len) const char16* pSz, uint32 len, unsigned char unescapedFlags, ScriptContext* scriptContext )
	{
	BYTE bUTF8[MaxUTF8Len];

	// pass 1 calculate output length and error check
	uint32 outputLen = 0;
	for( uint32 k = 0; k < len; k++ )
	{
	char16 c = pSz[k];
	uint32 uVal;
	if( InURISet(c, unescapedFlags) )
	{
	outputLen = UInt32Math::Add(outputLen, 1);
	}
	else
	{
	if( c >= 0xDC00 && c <= 0xDFFF )
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	else if( c < 0xD800 \|\| c > 0xDBFF )
	{
	uVal = (uint32)c;
	}
	else
	{
	++k;
	if(k == len)
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	__analysis_assume(k < len); // because we throw exception if k==len
	char16 c1 = pSz[k];
	if( c1 < 0xDC00 \|\| c1 > 0xDFFF )
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	uVal = (c - 0xD800) * 0x400 + (c1 - 0xDC00) + 0x10000;
	}
	uint32 utfLen = ToUTF8(uVal, bUTF8);
	utfLen = UInt32Math::Mul(utfLen, 3);
	outputLen = UInt32Math::Add(outputLen, utfLen);
	}
	}

	//pass 2 generate the encoded URI

	uint32 allocSize = UInt32Math::Add(outputLen, 1);
	char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize);
	char16* outCurrent = outURI;

	for( uint32 k = 0; k < len; k++ )
	{
	char16 c = pSz[k];
	uint32 uVal;
	if( InURISet(c, unescapedFlags) )
	{
	__analysis_assume(outCurrent < outURI + allocSize);
	*outCurrent++ = c;
	}
	else
	{
	#if DBG
	if( c >= 0xDC00 && c <= 0xDFFF )
	{
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	#endif
	if( c < 0xD800 \|\| c > 0xDBFF )
	{
	uVal = (uint32)c;
	}
	else
	{
	++k;
	#if DBG
	if(k == len)
	{
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	#endif
	__analysis_assume(k < len);// because we throw exception if k==len
	char16 c1 = pSz[k];

	#if DBG
	if( c1 < 0xDC00 \|\| c1 > 0xDFFF )
	{
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	#endif
	uVal = (c - 0xD800) * 0x400 + (c1 - 0xDC00) + 0x10000;
	}

	uint32 utfLen = ToUTF8(uVal, bUTF8);
	for( uint32 j = 0; j < utfLen; j++ )
	{
	#pragma prefast(suppress: 26014, "buffer length was calculated earlier");
	swprintf_s(outCurrent, 4, _u("%%%02X"), (int)bUTF8[j] );
	outCurrent +=3;
	#pragma prefast(default: 26014);
	}
	}
	}
	AssertMsg(outURI + outputLen == outCurrent, " URI out buffer out of sync");
	__analysis_assume(outputLen + 1 == allocSize);
	outURI[outputLen] = _u('\0');

	return JavascriptString::NewCopyBuffer(outURI, outputLen, scriptContext);
	}

	Var UriHelper::DecodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char reservedFlags )
	{
	AssertMsg(args.Info.Count > 0, "Should always have implicit 'this'");
	JavascriptString * strURI;
	//TODO make sure this string is pinned when the memory recycler is in
	if(args.Info.Count < 2)
	{
	strURI = scriptContext->GetLibrary()->GetUndefinedDisplayString();
	}
	else
	{

	if (JavascriptString::Is(args[1]))
	{
	strURI = JavascriptString::FromVar(args[1]);
	}
	else
	{
	strURI = JavascriptConversion::ToString(args[1], scriptContext);
	}
	}
	return Decode(strURI->GetSz(), strURI->GetLength(), reservedFlags, scriptContext);
	}

	// The Decode algorithm described in sec. 15.1.3 of the spec. The input string is
	// 'pSZ' and the Reserved set is described by the flags 'reservedFlags'. The
	// output is a string var.
	Var UriHelper::Decode(__in_ecount(len) const char16* pSz, uint32 len, unsigned char reservedFlags, ScriptContext* scriptContext)
	{
	char16 c1;
	char16 c;
	// pass 1 calculate output length and error check
	uint32 outputLen = 0;
	for( uint32 k = 0; k < len; k++ )
	{
	c = pSz[k];

	if( c == '%')
	{
	uint32 start = k;
	if( k + 2 >= len )
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}

	// %-encoded components in a URI may only contain hexadecimal digits from the ASCII character set. 'swscanf_s'
	// only supports those characters when decoding hexadecimal integers. 'iswxdigit' on the other hand, uses the
	// current locale to see if the specified character maps to a hexadecimal digit, which causes it to consider some
	// characters outside the ASCII character set to be hexadecimal digits, so we can't use that. 'swscanf_s' seems
	// to be overkill for this, so using a simple function that parses two hex digits and produces their value.
	BYTE b;
	if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b))
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError);
	}

	k += 2;

	if( (b & 0x80) == 0)
	{
	c1 = b;
	}
	else
	{
	int n;
	for( n = 1; ((b << n) & 0x80) != 0; n++ )
	;

	if( n == 1 \|\| n > UriHelper::MaxUTF8Len )
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}

	BYTE bOctets[UriHelper::MaxUTF8Len];
	bOctets[0] = b;

	if( k + 3 * (n-1) >= len )
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}

	for( int j = 1; j < n; j++ )
	{
	if( pSz[++k] != '%' )
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}

	if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b))
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}

	// The two leading bits should be 10 for a valid UTF-8 encoding
	if( (b & 0xC0) != 0x80)
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	k += 2;

	bOctets[j] = b;
	}

	uint32 uVal = UriHelper::FromUTF8( bOctets, n );

	if( uVal >= 0xD800 && uVal <= 0xDFFF)
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	if( uVal < 0x10000 )
	{
	c1 = (char16)uVal;
	}
	else if( uVal > 0x10ffff )
	{
	JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	else
	{
	outputLen +=2;
	continue;
	}
	}

	if( ! UriHelper::InURISet( c1, reservedFlags ))
	{
	outputLen++;
	}
	else
	{
	outputLen += k - start + 1;
	}
	}
	else // c is not '%'
	{
	outputLen++;
	}
	}

	//pass 2 generate the decoded URI
	uint32 allocSize = UInt32Math::Add(outputLen, 1);
	char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize);
	char16* outCurrent = outURI;


	for( uint32 k = 0; k < len; k++ )
	{
	c = pSz[k];
	if( c == '%')
	{
	uint32 start = k;
	#if DBG
	Assert(!(k + 2 >= len));
	if( k + 2 >= len )
	{
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	#endif
	// Let OACR know some things about 'k' that we checked just above, to let it know that we are not going to
	// overflow later. The same checks are done in the first pass in non-debug builds, and the conditions
	// checked upon in the first and second pass are the same.
	__analysis_assume(!(k + 2 >= len));

	BYTE b;
	if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b))
	{
	#if DBG
	AssertMsg(false, "!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b)");
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	#endif
	}

	k += 2;

	if( (b & 0x80) == 0)
	{
	c1 = b;
	}
	else
	{
	int n;
	for( n = 1; ((b << n) & 0x80) != 0; n++ )
	;

	if( n == 1 \|\| n > UriHelper::MaxUTF8Len )
	{
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}

	BYTE bOctets[UriHelper::MaxUTF8Len];
	bOctets[0] = b;

	#if DBG
	Assert(!(k + 3 * (n-1) >= len));
	if( k + 3 * (n-1) >= len )
	{
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	#endif
	// Let OACR know some things about 'k' that we checked just above, to let it know that we are not going to
	// overflow later. The same checks are done in the first pass in non-debug builds, and the conditions
	// checked upon in the first and second pass are the same.
	__analysis_assume(!(k + 3 * (n-1) >= len));

	for( int j = 1; j < n; j++ )
	{
	++k;

	#if DBG
	Assert(!(pSz[k] != '%'));
	if( pSz[k] != '%' )
	{
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	#endif

	if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b))
	{
	#if DBG
	AssertMsg(false, "!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b)");
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	#endif
	}

	#if DBG
	// The two leading bits should be 10 for a valid UTF-8 encoding
	Assert(!((b & 0xC0) != 0x80));
	if( (b & 0xC0) != 0x80)
	{
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	#endif

	k += 2;

	bOctets[j] = b;
	}

	uint32 uVal = UriHelper::FromUTF8( bOctets, n );

	#if DBG
	Assert(!(uVal >= 0xD800 && uVal <= 0xDFFF));
	if( uVal >= 0xD800 && uVal <= 0xDFFF)
	{
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	#endif

	if( uVal < 0x10000 )
	{
	c1 = (char16)uVal;
	}

	#if DBG
	else if( uVal > 0x10ffff )
	{
	AssertMsg(false, "uVal > 0x10ffff");
	JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */);
	}
	#endif
	else
	{
	uint32 l = (( uVal - 0x10000) & 0x3ff) + 0xdc00;
	uint32 h = ((( uVal - 0x10000) >> 10) & 0x3ff) + 0xd800;

	__analysis_assume(outCurrent + 2 <= outURI + allocSize);
	*outCurrent++ = (char16)h;
	*outCurrent++ = (char16)l;
	continue;
	}
	}

	if( !UriHelper::InURISet( c1, reservedFlags ))
	{
	__analysis_assume(outCurrent < outURI + allocSize);
	*outCurrent++ = c1;
	}
	else
	{
	js_memcpy_s(outCurrent, (allocSize - (outCurrent - outURI)) * sizeof(char16), &pSz[start], (k - start + 1)*sizeof(char16));
	outCurrent += k - start + 1;
	}
	}
	else // c is not '%'
	{
	__analysis_assume(outCurrent < outURI + allocSize);
	*outCurrent++ = c;
	}
	}

	AssertMsg(outURI + outputLen == outCurrent, " URI out buffer out of sync");
	__analysis_assume(outputLen + 1 == allocSize);
	outURI[outputLen] = _u('\0');

	return JavascriptString::NewCopyBuffer(outURI, outputLen, scriptContext);
	}

	// Decodes a two-hexadecimal-digit wide character pair into the byte value it represents
	bool UriHelper::DecodeByteFromHex(const char16 digit1, const char16 digit2, unsigned char &value)
	{
	int x;
	if(!Js::NumberUtilities::FHexDigit(digit1, &x))
	{
	return false;
	}
	Assert(static_cast<unsigned int>(x) <= 0xfU);
	value = static_cast<unsigned char>(x) << 4;

	if(!Js::NumberUtilities::FHexDigit(digit2, &x))
	{
	return false;
	}
	Assert(static_cast<unsigned int>(x) <= 0xfU);
	value += static_cast<unsigned char>(x);

	return true;
	}
	}