| //------------------------------------------------------------------------------------------------------- |
| // Copyright (C) Microsoft. All rights reserved. |
| // Licensed under the MIT license. See LICENSE.txt file in the project root for full license information. |
| //------------------------------------------------------------------------------------------------------- |
| #include "RuntimeLibraryPch.h" |
| |
| namespace Js |
| { |
| Var UriHelper::EncodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char flags ) |
| { |
| AssertMsg(args.Info.Count > 0, "Should always have implicit 'this'"); |
| JavascriptString * strURI; |
| //TODO make sure this string is pinned when the memory recycler is in |
| if(args.Info.Count < 2) |
| { |
| strURI = scriptContext->GetLibrary()->GetUndefinedDisplayString(); |
| } |
| else |
| { |
| |
| if (JavascriptString::Is(args[1])) |
| { |
| strURI = JavascriptString::FromVar(args[1]); |
| } |
| else |
| { |
| strURI = JavascriptConversion::ToString(args[1], scriptContext); |
| } |
| } |
| return Encode(strURI->GetSz(), strURI->GetLength(), flags, scriptContext); |
| } |
| |
| unsigned char UriHelper::s_uriProps[128] = |
| { |
| //0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0a 0x0b 0x0c 0x0d 0x0e 0x0f |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| //0x10 0x11 0x12 0x13 0x14 0x15 0x16 0x17 0x18 0x19 0x1a 0x1b 0x1c 0x1d 0x1e 0x1f |
| 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
| // ! " # $ % & ' ( ) * + , - . / |
| 0, 0x02, 0, 0x01, 0x01, 0, 0x01, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01, 0x02, 0x02, 0x01, |
| // 0 1 2 3 4 5 6 7 8 9 : ; < = > ? |
| 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x01, 0x01, 0, 0x01, 0, 0x01, |
| // @ A B C D E F G H I J K L M N O |
| 0x01, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, |
| // P Q R S T U V W X Y Z [ \ ] ^ _ |
| 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0, 0, 0, 0, 0x02, |
| // ` a b c d e f g h i j k l m n o |
| 0, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, |
| // p q r s t u v w x y z { | } ~ 0x7f |
| 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0, 0, 0, 0x02, 0, |
| }; |
| |
| // Convert 'uVal' to it's UTF-8 encoding in the array 'bUTF8'. Returns |
| // the number of characters in the output array. |
| // This routine assumes that it's input 'uVal' is a valid Unicode code-point value |
| // and does no error checking. |
| uint32 UriHelper::ToUTF8( uint32 uVal, BYTE bUTF8[MaxUTF8Len]) |
| { |
| uint32 uRet; |
| if( uVal <= 0x007F ) |
| { |
| bUTF8[0] = (BYTE)uVal; |
| uRet = 1; |
| } |
| else if( uVal <= 0x07FF ) |
| { |
| uint32 z = uVal & 0x3F; |
| uint32 y = uVal >> 6; |
| bUTF8[0] = (BYTE) (0xC0 | y); |
| bUTF8[1] = (BYTE) (0x80 | z); |
| uRet = 2; |
| } |
| else if( uVal <= 0xFFFF ) |
| { |
| Assert( uVal <= 0xD7FF || uVal >= 0xE000 ); |
| uint32 z = uVal & 0x3F; |
| uint32 y = (uVal >> 6) & 0x3F; |
| uint32 x = (uVal >> 12); |
| bUTF8[0] = (BYTE) (0xE0 | x); |
| bUTF8[1] = (BYTE) (0x80 | y); |
| bUTF8[2] = (BYTE) (0x80 | z); |
| uRet = 3; |
| } |
| else |
| { |
| uint32 z = uVal & 0x3F; |
| uint32 y = (uVal >> 6) &0x3F; |
| uint32 x = (uVal >> 12) &0x3F; |
| uint32 w = (uVal >> 18); |
| bUTF8[0] = (BYTE) (0xF0 | w); |
| bUTF8[1] = (BYTE) (0x80 | x); |
| bUTF8[2] = (BYTE) (0x80 | y); |
| bUTF8[3] = (BYTE) (0x80 | z); |
| uRet = 4; |
| } |
| |
| return uRet; |
| } |
| |
| // Return the Unicode code-point value of the UTF-8 encoding passed in as the |
| // array 'bUTF8'. uLen is the number of characters in the UTF-8 encoding. |
| // This routine assumes that a valid UTF-8 encoding of a character is passed in |
| // and does no error checking. |
| uint32 UriHelper::FromUTF8( BYTE bUTF8[MaxUTF8Len], uint32 uLen ) |
| { |
| Assert( 1 <= uLen && uLen <= MaxUTF8Len ); |
| if( uLen == 1 ) |
| { |
| return bUTF8[0]; |
| } |
| else if( uLen == 2 ) |
| { |
| return ((bUTF8[0] & 0x1F) << 6 ) | (bUTF8[1] & 0x3F); |
| } |
| else if( uLen == 3 ) |
| { |
| return ((bUTF8[0] & 0x0F) << 12) | ((bUTF8[1] & 0x3F) << 6) | (bUTF8[2] & 0x3F); |
| } |
| else |
| { |
| Assert( uLen == 4 ); |
| return ((bUTF8[0] & 0x07) << 18) | ((bUTF8[1] & 0x3F) << 12) | ((bUTF8[2] & 0x3F) << 6 ) | (bUTF8[3] & 0x3F) ; |
| } |
| } |
| |
| // The Encode algorithm described in sec. 15.1.3 of the spec. The input string is |
| // 'pSz' and the Unescaped set is described by the flags 'unescapedFlags'. The |
| // output is a string var. |
| Var UriHelper::Encode(__in_ecount(len) const char16* pSz, uint32 len, unsigned char unescapedFlags, ScriptContext* scriptContext ) |
| { |
| BYTE bUTF8[MaxUTF8Len]; |
| |
| // pass 1 calculate output length and error check |
| uint32 outputLen = 0; |
| for( uint32 k = 0; k < len; k++ ) |
| { |
| char16 c = pSz[k]; |
| uint32 uVal; |
| if( InURISet(c, unescapedFlags) ) |
| { |
| outputLen = UInt32Math::Add(outputLen, 1); |
| } |
| else |
| { |
| if( c >= 0xDC00 && c <= 0xDFFF ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| else if( c < 0xD800 || c > 0xDBFF ) |
| { |
| uVal = (uint32)c; |
| } |
| else |
| { |
| ++k; |
| if(k == len) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| __analysis_assume(k < len); // because we throw exception if k==len |
| char16 c1 = pSz[k]; |
| if( c1 < 0xDC00 || c1 > 0xDFFF ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIEncodeError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| uVal = (c - 0xD800) * 0x400 + (c1 - 0xDC00) + 0x10000; |
| } |
| uint32 utfLen = ToUTF8(uVal, bUTF8); |
| utfLen = UInt32Math::Mul(utfLen, 3); |
| outputLen = UInt32Math::Add(outputLen, utfLen); |
| } |
| } |
| |
| //pass 2 generate the encoded URI |
| |
| uint32 allocSize = UInt32Math::Add(outputLen, 1); |
| char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize); |
| char16* outCurrent = outURI; |
| |
| for( uint32 k = 0; k < len; k++ ) |
| { |
| char16 c = pSz[k]; |
| uint32 uVal; |
| if( InURISet(c, unescapedFlags) ) |
| { |
| __analysis_assume(outCurrent < outURI + allocSize); |
| *outCurrent++ = c; |
| } |
| else |
| { |
| #if DBG |
| if( c >= 0xDC00 && c <= 0xDFFF ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| #endif |
| if( c < 0xD800 || c > 0xDBFF ) |
| { |
| uVal = (uint32)c; |
| } |
| else |
| { |
| ++k; |
| #if DBG |
| if(k == len) |
| { |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| #endif |
| __analysis_assume(k < len);// because we throw exception if k==len |
| char16 c1 = pSz[k]; |
| |
| #if DBG |
| if( c1 < 0xDC00 || c1 > 0xDFFF ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| #endif |
| uVal = (c - 0xD800) * 0x400 + (c1 - 0xDC00) + 0x10000; |
| } |
| |
| uint32 utfLen = ToUTF8(uVal, bUTF8); |
| for( uint32 j = 0; j < utfLen; j++ ) |
| { |
| #pragma prefast(suppress: 26014, "buffer length was calculated earlier"); |
| swprintf_s(outCurrent, 4, _u("%%%02X"), (int)bUTF8[j] ); |
| outCurrent +=3; |
| #pragma prefast(default: 26014); |
| } |
| } |
| } |
| AssertMsg(outURI + outputLen == outCurrent, " URI out buffer out of sync"); |
| __analysis_assume(outputLen + 1 == allocSize); |
| outURI[outputLen] = _u('\0'); |
| |
| return JavascriptString::NewCopyBuffer(outURI, outputLen, scriptContext); |
| } |
| |
| Var UriHelper::DecodeCoreURI(ScriptContext* scriptContext, Arguments& args, unsigned char reservedFlags ) |
| { |
| AssertMsg(args.Info.Count > 0, "Should always have implicit 'this'"); |
| JavascriptString * strURI; |
| //TODO make sure this string is pinned when the memory recycler is in |
| if(args.Info.Count < 2) |
| { |
| strURI = scriptContext->GetLibrary()->GetUndefinedDisplayString(); |
| } |
| else |
| { |
| |
| if (JavascriptString::Is(args[1])) |
| { |
| strURI = JavascriptString::FromVar(args[1]); |
| } |
| else |
| { |
| strURI = JavascriptConversion::ToString(args[1], scriptContext); |
| } |
| } |
| return Decode(strURI->GetSz(), strURI->GetLength(), reservedFlags, scriptContext); |
| } |
| |
| // The Decode algorithm described in sec. 15.1.3 of the spec. The input string is |
| // 'pSZ' and the Reserved set is described by the flags 'reservedFlags'. The |
| // output is a string var. |
| Var UriHelper::Decode(__in_ecount(len) const char16* pSz, uint32 len, unsigned char reservedFlags, ScriptContext* scriptContext) |
| { |
| char16 c1; |
| char16 c; |
| // pass 1 calculate output length and error check |
| uint32 outputLen = 0; |
| for( uint32 k = 0; k < len; k++ ) |
| { |
| c = pSz[k]; |
| |
| if( c == '%') |
| { |
| uint32 start = k; |
| if( k + 2 >= len ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| |
| // %-encoded components in a URI may only contain hexadecimal digits from the ASCII character set. 'swscanf_s' |
| // only supports those characters when decoding hexadecimal integers. 'iswxdigit' on the other hand, uses the |
| // current locale to see if the specified character maps to a hexadecimal digit, which causes it to consider some |
| // characters outside the ASCII character set to be hexadecimal digits, so we can't use that. 'swscanf_s' seems |
| // to be overkill for this, so using a simple function that parses two hex digits and produces their value. |
| BYTE b; |
| if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b)) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError); |
| } |
| |
| k += 2; |
| |
| if( (b & 0x80) == 0) |
| { |
| c1 = b; |
| } |
| else |
| { |
| int n; |
| for( n = 1; ((b << n) & 0x80) != 0; n++ ) |
| ; |
| |
| if( n == 1 || n > UriHelper::MaxUTF8Len ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| |
| BYTE bOctets[UriHelper::MaxUTF8Len]; |
| bOctets[0] = b; |
| |
| if( k + 3 * (n-1) >= len ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| |
| for( int j = 1; j < n; j++ ) |
| { |
| if( pSz[++k] != '%' ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| |
| if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b)) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| |
| // The two leading bits should be 10 for a valid UTF-8 encoding |
| if( (b & 0xC0) != 0x80) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| k += 2; |
| |
| bOctets[j] = b; |
| } |
| |
| uint32 uVal = UriHelper::FromUTF8( bOctets, n ); |
| |
| if( uVal >= 0xD800 && uVal <= 0xDFFF) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| if( uVal < 0x10000 ) |
| { |
| c1 = (char16)uVal; |
| } |
| else if( uVal > 0x10ffff ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, JSERR_URIDecodeError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| else |
| { |
| outputLen +=2; |
| continue; |
| } |
| } |
| |
| if( ! UriHelper::InURISet( c1, reservedFlags )) |
| { |
| outputLen++; |
| } |
| else |
| { |
| outputLen += k - start + 1; |
| } |
| } |
| else // c is not '%' |
| { |
| outputLen++; |
| } |
| } |
| |
| //pass 2 generate the decoded URI |
| uint32 allocSize = UInt32Math::Add(outputLen, 1); |
| char16* outURI = RecyclerNewArrayLeaf(scriptContext->GetRecycler(), char16, allocSize); |
| char16* outCurrent = outURI; |
| |
| |
| for( uint32 k = 0; k < len; k++ ) |
| { |
| c = pSz[k]; |
| if( c == '%') |
| { |
| uint32 start = k; |
| #if DBG |
| Assert(!(k + 2 >= len)); |
| if( k + 2 >= len ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| #endif |
| // Let OACR know some things about 'k' that we checked just above, to let it know that we are not going to |
| // overflow later. The same checks are done in the first pass in non-debug builds, and the conditions |
| // checked upon in the first and second pass are the same. |
| __analysis_assume(!(k + 2 >= len)); |
| |
| BYTE b; |
| if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b)) |
| { |
| #if DBG |
| AssertMsg(false, "!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b)"); |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| #endif |
| } |
| |
| k += 2; |
| |
| if( (b & 0x80) == 0) |
| { |
| c1 = b; |
| } |
| else |
| { |
| int n; |
| for( n = 1; ((b << n) & 0x80) != 0; n++ ) |
| ; |
| |
| if( n == 1 || n > UriHelper::MaxUTF8Len ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| |
| BYTE bOctets[UriHelper::MaxUTF8Len]; |
| bOctets[0] = b; |
| |
| #if DBG |
| Assert(!(k + 3 * (n-1) >= len)); |
| if( k + 3 * (n-1) >= len ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| #endif |
| // Let OACR know some things about 'k' that we checked just above, to let it know that we are not going to |
| // overflow later. The same checks are done in the first pass in non-debug builds, and the conditions |
| // checked upon in the first and second pass are the same. |
| __analysis_assume(!(k + 3 * (n-1) >= len)); |
| |
| for( int j = 1; j < n; j++ ) |
| { |
| ++k; |
| |
| #if DBG |
| Assert(!(pSz[k] != '%')); |
| if( pSz[k] != '%' ) |
| { |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| #endif |
| |
| if(!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b)) |
| { |
| #if DBG |
| AssertMsg(false, "!DecodeByteFromHex(pSz[k + 1], pSz[k + 2], b)"); |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| #endif |
| } |
| |
| #if DBG |
| // The two leading bits should be 10 for a valid UTF-8 encoding |
| Assert(!((b & 0xC0) != 0x80)); |
| if( (b & 0xC0) != 0x80) |
| { |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| #endif |
| |
| k += 2; |
| |
| bOctets[j] = b; |
| } |
| |
| uint32 uVal = UriHelper::FromUTF8( bOctets, n ); |
| |
| #if DBG |
| Assert(!(uVal >= 0xD800 && uVal <= 0xDFFF)); |
| if( uVal >= 0xD800 && uVal <= 0xDFFF) |
| { |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| #endif |
| |
| if( uVal < 0x10000 ) |
| { |
| c1 = (char16)uVal; |
| } |
| |
| #if DBG |
| else if( uVal > 0x10ffff ) |
| { |
| AssertMsg(false, "uVal > 0x10ffff"); |
| JavascriptError::ThrowURIError(scriptContext, VBSERR_InternalError /* TODO-ERROR: _u("NEED MESSAGE") */); |
| } |
| #endif |
| else |
| { |
| uint32 l = (( uVal - 0x10000) & 0x3ff) + 0xdc00; |
| uint32 h = ((( uVal - 0x10000) >> 10) & 0x3ff) + 0xd800; |
| |
| __analysis_assume(outCurrent + 2 <= outURI + allocSize); |
| *outCurrent++ = (char16)h; |
| *outCurrent++ = (char16)l; |
| continue; |
| } |
| } |
| |
| if( !UriHelper::InURISet( c1, reservedFlags )) |
| { |
| __analysis_assume(outCurrent < outURI + allocSize); |
| *outCurrent++ = c1; |
| } |
| else |
| { |
| js_memcpy_s(outCurrent, (allocSize - (outCurrent - outURI)) * sizeof(char16), &pSz[start], (k - start + 1)*sizeof(char16)); |
| outCurrent += k - start + 1; |
| } |
| } |
| else // c is not '%' |
| { |
| __analysis_assume(outCurrent < outURI + allocSize); |
| *outCurrent++ = c; |
| } |
| } |
| |
| AssertMsg(outURI + outputLen == outCurrent, " URI out buffer out of sync"); |
| __analysis_assume(outputLen + 1 == allocSize); |
| outURI[outputLen] = _u('\0'); |
| |
| return JavascriptString::NewCopyBuffer(outURI, outputLen, scriptContext); |
| } |
| |
| // Decodes a two-hexadecimal-digit wide character pair into the byte value it represents |
| bool UriHelper::DecodeByteFromHex(const char16 digit1, const char16 digit2, unsigned char &value) |
| { |
| int x; |
| if(!Js::NumberUtilities::FHexDigit(digit1, &x)) |
| { |
| return false; |
| } |
| Assert(static_cast<unsigned int>(x) <= 0xfU); |
| value = static_cast<unsigned char>(x) << 4; |
| |
| if(!Js::NumberUtilities::FHexDigit(digit2, &x)) |
| { |
| return false; |
| } |
| Assert(static_cast<unsigned int>(x) <= 0xfU); |
| value += static_cast<unsigned char>(x); |
| |
| return true; |
| } |
| } |