| // |
| // Copyright (c) Microsoft. All rights reserved. |
| // Licensed under the MIT license. See LICENSE file in the project root for full license information. |
| // |
| |
| /*++ |
| |
| |
| |
| Module Name: |
| |
| unicode.cpp |
| |
| Abstract: |
| |
| Implementation of all functions related to Unicode support |
| |
| Revision History: |
| |
| |
| |
| --*/ |
| |
| #include "pal/thread.hpp" |
| |
| #include "pal/palinternal.h" |
| #include "pal/unicode_data.h" |
| #include "pal/dbgmsg.h" |
| #include "pal/file.h" |
| #include "pal/utf8.h" |
| #include "pal/locale.h" |
| #include "pal/cruntime.h" |
| #include "pal/stackstring.hpp" |
| |
| #if !(HAVE_PTHREAD_RWLOCK_T || HAVE_COREFOUNDATION) |
| #error Either pthread rwlocks or Core Foundation are required for Unicode support |
| #endif /* !(HAVE_PTHREAD_RWLOCK_T || HAVE_COREFOUNDATION) */ |
| |
| #include <pthread.h> |
| #include <locale.h> |
| #if !defined(__APPLE__) && !defined(__ANDROID__) |
| #include <libintl.h> |
| #endif // __APPLE__ |
| #include <errno.h> |
| #if HAVE_COREFOUNDATION |
| #include <CoreFoundation/CoreFoundation.h> |
| #endif // HAVE_COREFOUNDATION |
| |
| #include <debugmacrosext.h> |
| |
| using namespace CorUnix; |
| |
| SET_DEFAULT_DEBUG_CHANNEL(UNICODE); |
| |
| #if HAVE_COREFOUNDATION |
| |
| static CP_MAPPING CP_TO_NATIVE_TABLE[] = { |
| { 65001, kCFStringEncodingUTF8, 4, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| { 1252, kCFStringEncodingWindowsLatin1, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| { 1251, kCFStringEncodingWindowsCyrillic, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| { 1253, kCFStringEncodingWindowsGreek, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| { 1254, kCFStringEncodingWindowsLatin5, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| { 1258, kCFStringEncodingWindowsVietnamese, 1, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| { 932, kCFStringEncodingDOSJapanese, 2, { 129, 159, 224, 252, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| { 949, kCFStringEncodingDOSKorean, 2, { 129, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } }, |
| { 950, kCFStringEncodingDOSChineseTrad, 2, { 129, 254, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } |
| }; |
| |
| #else // HAVE_COREFOUNDATION |
| |
| static const CP_MAPPING CP_TO_NATIVE_TABLE[] = { |
| { 65001, "utf8", 4, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } |
| }; |
| |
| #endif // HAVE_COREFOUNDATION |
| |
| // We hardcode the system's default codepage to be UTF-8. |
| // There are several reasons for this: |
| // - On OSX, HFS+ file names are encoded as UTF-8. |
| // - On OSX, When writing strings to the console, the Terminal.app will interpret them as UTF-8. |
| // - We want Ansi marshalling to mean marshal to UTF-8 on Mac and Linux |
| static const UINT PAL_ACP = 65001; |
| |
| #if !HAVE_COREFOUNDATION |
| /*++ |
| Function: |
| UnicodeDataComp |
| This is the comparison function used by the bsearch function to search |
| for unicode characters in the UnicodeData array. |
| |
| Parameter: |
| pnKey |
| The unicode character value to search for. |
| elem |
| A pointer to a UnicodeDataRec. |
| |
| Return value: |
| <0 if pnKey < elem->nUnicodeValue |
| 0 if pnKey == elem->nUnicodeValue |
| >0 if pnKey > elem->nUnicodeValue |
| --*/ |
| static int UnicodeDataComp(const void *pnKey, const void *elem) |
| { |
| WCHAR uValue = ((UnicodeDataRec*)elem)->nUnicodeValue; |
| WORD rangeValue = ((UnicodeDataRec*)elem)->rangeValue; |
| |
| if (*((INT*)pnKey) < uValue) |
| { |
| return -1; |
| } |
| else |
| { |
| if (*((INT*)pnKey) > (uValue + rangeValue)) |
| { |
| return 1; |
| } |
| else |
| { |
| return 0; |
| } |
| } |
| } |
| |
| /*++ |
| Function: |
| GetUnicodeData |
| This function is used to get information about a Unicode character. |
| |
| Parameters: |
| nUnicodeValue |
| The numeric value of the Unicode character to get information about. |
| pDataRec |
| The UnicodeDataRec to fill in with the data for the Unicode character. |
| |
| Return value: |
| TRUE if the Unicode character was found. |
| |
| --*/ |
| BOOL GetUnicodeData(INT nUnicodeValue, UnicodeDataRec *pDataRec) |
| { |
| BOOL bRet; |
| if (nUnicodeValue <= UNICODE_DATA_DIRECT_ACCESS) |
| { |
| *pDataRec = UnicodeData[nUnicodeValue]; |
| bRet = TRUE; |
| } |
| else |
| { |
| UnicodeDataRec *dataRec; |
| INT nNumOfChars = UNICODE_DATA_SIZE; |
| dataRec = (UnicodeDataRec *) bsearch(&nUnicodeValue, UnicodeData, nNumOfChars, |
| sizeof(UnicodeDataRec), UnicodeDataComp); |
| if (dataRec == NULL) |
| { |
| bRet = FALSE; |
| } |
| else |
| { |
| bRet = TRUE; |
| *pDataRec = *dataRec; |
| } |
| } |
| return bRet; |
| } |
| #endif /* !HAVE_COREFOUNDATION */ |
| |
| /*++ |
| Function: |
| CODEPAGEGetData |
| |
| IN UINT CodePage - The code page the caller |
| is attempting to retrieve data on. |
| |
| Returns a pointer to structure, NULL otherwise. |
| --*/ |
| const CP_MAPPING * |
| CODEPAGEGetData( IN UINT CodePage ) |
| { |
| UINT nSize = sizeof( CP_TO_NATIVE_TABLE ) / sizeof( CP_TO_NATIVE_TABLE[ 0 ] ); |
| UINT nIndex = 0; |
| |
| if ( CP_ACP == CodePage ) |
| { |
| CodePage = PAL_ACP; |
| } |
| |
| /* checking if the CodePage is ACP and returning true if so */ |
| while (nIndex < nSize) |
| { |
| if ( ( CP_TO_NATIVE_TABLE[ nIndex ] ).nCodePage == CodePage ) |
| { |
| return &(CP_TO_NATIVE_TABLE[ nIndex ]); |
| } |
| nIndex++; |
| } |
| return NULL; |
| } |
| |
| #if HAVE_COREFOUNDATION |
| /*++ |
| Function : |
| |
| CODEPAGECPToCFStringEncoding - Gets the CFStringEncoding for |
| the given codepage. |
| |
| Returns the CFStringEncoding for the given codepage. |
| --*/ |
| CFStringEncoding CODEPAGECPToCFStringEncoding(UINT codepage) |
| { |
| const CP_MAPPING *cp_mapping = CODEPAGEGetData(codepage); |
| if (cp_mapping == NULL) |
| { |
| return kCFStringEncodingInvalidId; |
| } |
| else |
| { |
| return cp_mapping->nCFEncoding; |
| } |
| } |
| #endif // HAVE_COREFOUNDATION |
| |
| /*++ |
| Function: |
| CharNextA |
| |
| Parameters |
| |
| lpsz |
| [in] Pointer to a character in a null-terminated string. |
| |
| Return Values |
| |
| A pointer to the next character in the string, or to the terminating null character if at the end of the string, indicates success. |
| |
| If lpsz points to the terminating null character, the return value is equal to lpsz. |
| |
| See MSDN doc. |
| --*/ |
| LPSTR |
| PALAPI |
| CharNextA( |
| IN LPCSTR lpsz) |
| { |
| LPSTR pRet; |
| PERF_ENTRY(CharNextA); |
| ENTRY("CharNextA (lpsz=%p (%s))\n", lpsz?lpsz:NULL, lpsz?lpsz:NULL); |
| |
| pRet = CharNextExA(GetACP(), lpsz, 0); |
| |
| LOGEXIT ("CharNextA returns LPSTR %p\n", pRet); |
| PERF_EXIT(CharNextA); |
| return pRet; |
| } |
| |
| |
| /*++ |
| Function: |
| CharNextExA |
| |
| See MSDN doc. |
| --*/ |
| LPSTR |
| PALAPI |
| CharNextExA( |
| IN WORD CodePage, |
| IN LPCSTR lpCurrentChar, |
| IN DWORD dwFlags) |
| { |
| LPSTR pRet = (LPSTR) lpCurrentChar; |
| |
| PERF_ENTRY(CharNextExA); |
| ENTRY("CharNextExA (CodePage=%hu, lpCurrentChar=%p (%s), dwFlags=%#x)\n", |
| CodePage, lpCurrentChar?lpCurrentChar:"NULL", lpCurrentChar?lpCurrentChar:"NULL", dwFlags); |
| |
| if ((lpCurrentChar != NULL) && (*lpCurrentChar != 0)) |
| { |
| pRet += (*(lpCurrentChar+1) != 0) && |
| IsDBCSLeadByteEx(CodePage, *lpCurrentChar) ? 2 : 1; |
| } |
| |
| LOGEXIT("CharNextExA returns LPSTR:%p (%s)\n", pRet, pRet); |
| PERF_EXIT(CharNextExA); |
| return pRet; |
| } |
| |
| |
| |
| /*++ |
| Function: |
| AreFileApisANSI |
| |
| The AreFileApisANSI function determines whether the file I/O functions |
| are using the ANSI or OEM character set code page. This function is |
| useful for 8-bit console input and output operations. |
| |
| Return Values |
| |
| If the set of file I/O functions is using the ANSI code page, the return value is nonzero. |
| |
| If the set of file I/O functions is using the OEM code page, the return value is zero. |
| |
| In the ROTOR version we always return true since there is no concept |
| of OEM code pages. |
| |
| --*/ |
| BOOL |
| PALAPI |
| AreFileApisANSI( |
| VOID) |
| { |
| PERF_ENTRY(AreFileApisANSI); |
| ENTRY("AreFileApisANSI ()\n"); |
| |
| LOGEXIT("AreFileApisANSI returns BOOL TRUE\n"); |
| PERF_EXIT(AreFileApisANSI); |
| return TRUE; |
| } |
| |
| |
| /*++ |
| Function: |
| GetConsoleCP |
| |
| See MSDN doc. |
| --*/ |
| UINT |
| PALAPI |
| GetConsoleCP( |
| VOID) |
| { |
| UINT nRet = 0; |
| PERF_ENTRY(GetConsoleCP); |
| ENTRY("GetConsoleCP()\n"); |
| |
| nRet = GetACP(); |
| |
| LOGEXIT("GetConsoleCP returns UINT %d\n", nRet ); |
| PERF_EXIT(GetConsoleCP); |
| return nRet; |
| } |
| |
| /*++ |
| Function: |
| GetConsoleOutputCP |
| |
| See MSDN doc. |
| --*/ |
| UINT |
| PALAPI |
| GetConsoleOutputCP( |
| VOID) |
| { |
| UINT nRet = 0; |
| PERF_ENTRY(GetConsoleOutputCP); |
| ENTRY("GetConsoleOutputCP()\n"); |
| nRet = GetACP(); |
| LOGEXIT("GetConsoleOutputCP returns UINT %d \n", nRet ); |
| PERF_EXIT(GetConsoleOutputCP); |
| return nRet; |
| } |
| |
| |
| /*++ |
| Function: |
| IsValidCodePage |
| |
| See MSDN doc. |
| |
| Notes : |
| "pseudo code pages", like CP_ACP, aren't considered 'valid' in this context. |
| CP_UTF7 and CP_UTF8, however, *are* considered valid code pages, even though |
| MSDN fails to mention them in the IsValidCodePage entry. |
| Note : CP_UTF7 support isn't required for Rotor |
| --*/ |
| BOOL |
| PALAPI |
| IsValidCodePage( |
| IN UINT CodePage) |
| { |
| BOOL retval = FALSE; |
| |
| PERF_ENTRY(IsValidCodePage); |
| ENTRY("IsValidCodePage(%d)\n", CodePage ); |
| |
| switch(CodePage) |
| { |
| case CP_ACP : /* fall through */ |
| case CP_OEMCP : /* fall through */ |
| case CP_MACCP : /* fall through */ |
| case CP_THREAD_ACP: |
| /* 'pseudo code pages' : not valid */ |
| retval = FALSE; |
| break; |
| case CP_UTF7: |
| /* valid in Win32, but not supported in Rotor */ |
| retval = FALSE; |
| break; |
| case CP_UTF8: |
| /* valid, but not part of CODEPAGEGetData's tables */ |
| retval = TRUE; |
| break; |
| default: |
| retval = (NULL != CODEPAGEGetData( CodePage )); |
| break; |
| } |
| |
| LOGEXIT("IsValidCodePage returns BOOL %d\n",retval); |
| PERF_EXIT(IsValidCodePage); |
| return retval; |
| } |
| |
| #if ENABLE_DOWNLEVEL_FOR_NLS |
| /*++ |
| Function: |
| GetStringTypeEx |
| |
| See MSDN doc. |
| --*/ |
| BOOL |
| PALAPI |
| GetStringTypeExW( |
| IN LCID Locale, |
| IN DWORD dwInfoType, |
| IN LPCWSTR lpSrcStr, |
| IN int cchSrc, |
| OUT LPWORD lpCharType) |
| { |
| |
| |
| int i = 0; |
| #if !HAVE_COREFOUNDATION |
| UnicodeDataRec unicodeDataRec; |
| #endif /* !HAVE_COREFOUNDATION */ |
| BOOL bRet = TRUE; |
| char16_t wcstr ; |
| PERF_ENTRY(GetStringTypeExW); |
| ENTRY("GetStringTypeExW(Locale=%#x, dwInfoType=%#x, lpSrcStr=%p (%S), " |
| "cchSrc=%d, lpCharType=%p)\n", |
| Locale, dwInfoType, lpSrcStr?lpSrcStr:W16_NULLSTRING, lpSrcStr?lpSrcStr:W16_NULLSTRING, cchSrc, lpCharType); |
| |
| if((Locale != LOCALE_USER_DEFAULT)||(dwInfoType != CT_CTYPE1) |
| || (cchSrc != 1) || (lpSrcStr == (LPCWSTR)lpCharType)) |
| { |
| ASSERT("One of the input parameters is invalid\n"); |
| SetLastError(ERROR_INVALID_PARAMETER); |
| bRet = FALSE; |
| goto GetStringTypeExExit; |
| } |
| |
| |
| /* |
| * get length if needed... |
| */ |
| if(cchSrc == -1) |
| { |
| cchSrc = PAL_wcslen(lpSrcStr); |
| } |
| |
| /* |
| * Loop through each character of the source string and update |
| * lpCharType accordingly. |
| */ |
| for(i = 0; i < cchSrc; i++) |
| { |
| wcstr = lpSrcStr[i]; |
| #if HAVE_COREFOUNDATION |
| lpCharType[i] = 0; |
| if (PAL_iswlower(wcstr)) |
| { |
| lpCharType[i] |= C1_LOWER; |
| } |
| if (PAL_iswupper(wcstr)) |
| { |
| lpCharType[i] |= C1_UPPER; |
| } |
| if (PAL_iswalpha(wcstr)) |
| { |
| lpCharType[i] |= C1_ALPHA; |
| } |
| if (PAL_iswdigit(wcstr)) |
| { |
| lpCharType[i] |= C1_DIGIT; |
| } |
| if (PAL_iswspace(wcstr)) |
| { |
| lpCharType[i] |= C1_SPACE; |
| } |
| if (PAL_iswblank(wcstr)) |
| { |
| lpCharType[i] |= C1_BLANK; |
| } |
| if (PAL_iswcntrl(wcstr)) |
| { |
| lpCharType[i] |= C1_CNTRL; |
| } |
| if (PAL_iswpunct(wcstr)) |
| { |
| lpCharType[i] |= C1_PUNCT; |
| } |
| #else /* HAVE_COREFOUNDATION */ |
| /* |
| * Get the unicode data record for that character. |
| */ |
| if(GetUnicodeData(wcstr, &unicodeDataRec)) |
| { |
| lpCharType[i] = unicodeDataRec.C1_TYPE_FLAGS; |
| } |
| else |
| { |
| lpCharType[i] = 0; |
| } |
| #endif /* HAVE_COREFOUNDATION */ |
| } |
| |
| GetStringTypeExExit: |
| LOGEXIT("GetStringTypeEx returns BOOL %d\n", bRet); |
| PERF_EXIT(GetStringTypeExW); |
| return bRet; |
| } |
| #endif // ENABLE_DOWNLEVEL_FOR_NLS |
| |
| /*++ |
| Function: |
| GetCPInfo |
| |
| See MSDN doc. |
| --*/ |
| BOOL |
| PALAPI |
| GetCPInfo( |
| IN UINT CodePage, |
| OUT LPCPINFO lpCPInfo) |
| { |
| const CP_MAPPING * lpStruct = NULL; |
| BOOL bRet = FALSE; |
| |
| PERF_ENTRY(GetCPInfo); |
| ENTRY("GetCPInfo(CodePage=%hu, lpCPInfo=%p)\n", CodePage, lpCPInfo); |
| |
| /*check if the input code page is valid*/ |
| if( CP_ACP != CodePage && !IsValidCodePage( CodePage ) ) |
| { |
| /* error, invalid argument */ |
| ERROR("CodePage(%d) parameter is invalid\n",CodePage); |
| SetLastError( ERROR_INVALID_PARAMETER ); |
| goto done; |
| } |
| |
| /*check if the lpCPInfo parameter is valid. */ |
| if( !lpCPInfo ) |
| { |
| /* error, invalid argument */ |
| ERROR("lpCPInfo cannot be NULL\n" ); |
| SetLastError( ERROR_INVALID_PARAMETER ); |
| goto done; |
| } |
| |
| if ( NULL != ( lpStruct = CODEPAGEGetData( CodePage ) ) ) |
| { |
| lpCPInfo->MaxCharSize = lpStruct->nMaxByteSize;; |
| memcpy( lpCPInfo->LeadByte, lpStruct->LeadByte , MAX_LEADBYTES ); |
| |
| /* Don't need to be set, according to the spec. */ |
| memset( lpCPInfo->DefaultChar, '?', MAX_DEFAULTCHAR ); |
| |
| bRet = TRUE; |
| } |
| |
| done: |
| LOGEXIT("GetCPInfo returns BOOL %d \n",bRet); |
| PERF_EXIT(GetCPInfo); |
| return bRet; |
| } |
| |
| |
| /*++ |
| Function: |
| GetACP |
| |
| See MSDN doc. |
| --*/ |
| UINT |
| PALAPI |
| GetACP(VOID) |
| { |
| PERF_ENTRY(GetACP); |
| ENTRY("GetACP(VOID)\n"); |
| |
| LOGEXIT("GetACP returning UINT %d\n", PAL_ACP ); |
| PERF_EXIT(GetACP); |
| |
| return PAL_ACP; |
| } |
| |
| |
| /*++ |
| Function: |
| IsDBCSLeadByteEx |
| |
| See MSDN doc. |
| --*/ |
| BOOL |
| PALAPI |
| IsDBCSLeadByteEx( |
| IN UINT CodePage, |
| IN BYTE TestChar) |
| { |
| CPINFO cpinfo; |
| SIZE_T i; |
| BOOL bRet = FALSE; |
| |
| PERF_ENTRY(IsDBCSLeadByteEx); |
| ENTRY("IsDBCSLeadByteEx(CodePage=%#x, TestChar=%d)\n", CodePage, TestChar); |
| |
| /* Get the lead byte info with respect to the given codepage*/ |
| if( !GetCPInfo( CodePage, &cpinfo ) ) |
| { |
| ERROR("Error CodePage(%#x) parameter is invalid\n", CodePage ); |
| SetLastError( ERROR_INVALID_PARAMETER ); |
| goto done; |
| } |
| |
| for( i=0; i < sizeof(cpinfo.LeadByte)/sizeof(cpinfo.LeadByte[0]); i += 2 ) |
| { |
| if( 0 == cpinfo.LeadByte[ i ] ) |
| { |
| goto done; |
| } |
| |
| /*check if the given char is in one of the lead byte ranges*/ |
| if( cpinfo.LeadByte[i] <= TestChar && TestChar<= cpinfo.LeadByte[i+1] ) |
| { |
| bRet = TRUE; |
| goto done; |
| } |
| } |
| done: |
| LOGEXIT("IsDBCSLeadByteEx returns BOOL %d\n",bRet); |
| PERF_EXIT(IsDBCSLeadByteEx); |
| return bRet; |
| } |
| |
| /*++ |
| Function: |
| IsDBCSLeadByte |
| |
| See MSDN doc. |
| --*/ |
| BOOL |
| PALAPI |
| IsDBCSLeadByte( |
| IN BYTE TestChar) |
| { |
| // UNIXTODO: Implement this! |
| ERROR("Needs Implementation!!!"); |
| return FALSE; |
| } |
| |
| /*++ |
| Function: |
| MultiByteToWideChar |
| |
| See MSDN doc. |
| |
| --*/ |
| int |
| PALAPI |
| MultiByteToWideChar( |
| IN UINT CodePage, |
| IN DWORD dwFlags, |
| IN LPCSTR lpMultiByteStr, |
| IN int cbMultiByte, |
| OUT LPWSTR lpWideCharStr, |
| IN int cchWideChar) |
| { |
| INT retval =0; |
| #if HAVE_COREFOUNDATION |
| CFStringRef cfString = NULL; |
| CFStringEncoding cfEncoding; |
| int bytesToConvert; |
| #endif /* HAVE_COREFOUNDATION */ |
| |
| PERF_ENTRY(MultiByteToWideChar); |
| ENTRY("MultiByteToWideChar(CodePage=%u, dwFlags=%#x, lpMultiByteStr=%p (%s)," |
| " cbMultiByte=%d, lpWideCharStr=%p, cchWideChar=%d)\n", |
| CodePage, dwFlags, lpMultiByteStr?lpMultiByteStr:"NULL", lpMultiByteStr?lpMultiByteStr:"NULL", |
| cbMultiByte, lpWideCharStr, cchWideChar); |
| |
| if (dwFlags & ~(MB_ERR_INVALID_CHARS | MB_PRECOMPOSED)) |
| { |
| ASSERT("Error dwFlags(0x%x) parameter is invalid\n", dwFlags); |
| SetLastError(ERROR_INVALID_FLAGS); |
| goto EXIT; |
| } |
| |
| if ( (cbMultiByte == 0) || (cchWideChar < 0) || |
| (lpMultiByteStr == NULL) || |
| ((cchWideChar != 0) && |
| ((lpWideCharStr == NULL) || |
| (lpMultiByteStr == (LPSTR)lpWideCharStr))) ) |
| { |
| ERROR("Error lpMultiByteStr parameters are invalid\n"); |
| SetLastError(ERROR_INVALID_PARAMETER); |
| goto EXIT; |
| } |
| |
| // Use UTF8ToUnicode on all systems, since it replaces |
| // invalid characters and Core Foundation doesn't do that. |
| if (CodePage == CP_UTF8 || (CodePage == CP_ACP && GetACP() == CP_UTF8)) |
| { |
| if (cbMultiByte <= -1) |
| { |
| cbMultiByte = strlen(lpMultiByteStr) + 1; |
| } |
| |
| retval = UTF8ToUnicode(lpMultiByteStr, cbMultiByte, lpWideCharStr, cchWideChar, dwFlags); |
| goto EXIT; |
| } |
| |
| #if !HAVE_COREFOUNDATION |
| ERROR( "This code page is not in the system.\n" ); |
| SetLastError( ERROR_INVALID_PARAMETER ); |
| goto EXIT; |
| #else /* !HAVE_COREFOUNDATION */ |
| bytesToConvert = cbMultiByte; |
| if (bytesToConvert == -1) |
| { |
| /* Plus one for the trailing '\0', which will end up |
| * in the CFString. */ |
| bytesToConvert = strlen(lpMultiByteStr) + 1; |
| } |
| |
| cfEncoding = CODEPAGECPToCFStringEncoding(CodePage); |
| if (cfEncoding == kCFStringEncodingInvalidId) |
| { |
| ERROR( "This code page is not in the system.\n" ); |
| SetLastError( ERROR_INVALID_PARAMETER ); |
| goto EXIT; |
| } |
| |
| cfString = CFStringCreateWithBytes(kCFAllocatorDefault, (UInt8*)lpMultiByteStr, |
| bytesToConvert, cfEncoding, TRUE); |
| if (cfString == NULL) |
| { |
| ERROR( "Failed to convert the string to the specified encoding.\n" ); |
| SetLastError( ERROR_NO_UNICODE_TRANSLATION ); |
| goto EXIT; |
| } |
| |
| if (cchWideChar != 0) |
| { |
| /* Do the conversion. */ |
| CFIndex length = CFStringGetLength(cfString); |
| if (length > cchWideChar) |
| { |
| ERROR("Error insufficient buffer\n"); |
| SetLastError(ERROR_INSUFFICIENT_BUFFER); |
| retval = 0; |
| goto ReleaseString; |
| } |
| CFStringGetCharacters(cfString, CFRangeMake(0, length), |
| (UniChar*)lpWideCharStr); |
| retval = length; |
| } |
| else |
| { |
| /* Just return the number of wide characters needed. */ |
| retval = CFStringGetLength(cfString); |
| } |
| |
| ReleaseString: |
| if (cfString != NULL) |
| { |
| CFRelease(cfString); |
| } |
| #endif /* !HAVE_COREFOUNDATION */ |
| |
| EXIT: |
| |
| LOGEXIT("MultiByteToWideChar returns %d.\n",retval); |
| PERF_EXIT(MultiByteToWideChar); |
| return retval; |
| } |
| |
| |
| /*++ |
| Function: |
| WideCharToMultiByte |
| |
| See MSDN doc. |
| |
| --*/ |
| int |
| PALAPI |
| WideCharToMultiByte( |
| IN UINT CodePage, |
| IN DWORD dwFlags, |
| IN LPCWSTR lpWideCharStr, |
| IN int cchWideChar, |
| OUT LPSTR lpMultiByteStr, |
| IN int cbMultiByte, |
| IN LPCSTR lpDefaultChar, |
| OUT LPBOOL lpUsedDefaultChar) |
| { |
| INT retval =0; |
| char defaultChar = '?'; |
| BOOL usedDefaultChar = FALSE; |
| #if HAVE_COREFOUNDATION |
| CFStringRef cfString = NULL; |
| CFStringEncoding cfEncoding; |
| int charsToConvert; |
| CFIndex charsConverted; |
| CFIndex bytesConverted; |
| #endif /* !HAVE_COREFOUNDATION */ |
| |
| PERF_ENTRY(WideCharToMultiByte); |
| ENTRY("WideCharToMultiByte(CodePage=%u, dwFlags=%#x, lpWideCharStr=%p (%S), " |
| "cchWideChar=%d, lpMultiByteStr=%p, cbMultiByte=%d, " |
| "lpDefaultChar=%p, lpUsedDefaultChar=%p)\n", |
| CodePage, dwFlags, lpWideCharStr?lpWideCharStr:W16_NULLSTRING, lpWideCharStr?lpWideCharStr:W16_NULLSTRING, |
| cchWideChar, lpMultiByteStr, cbMultiByte, |
| lpDefaultChar, lpUsedDefaultChar); |
| |
| if (dwFlags & ~WC_NO_BEST_FIT_CHARS) |
| { |
| ERROR("dwFlags %d invalid\n", dwFlags); |
| SetLastError(ERROR_INVALID_FLAGS); |
| goto EXIT; |
| } |
| |
| // No special action is needed for WC_NO_BEST_FIT_CHARS. The default |
| // behavior of this API on Unix is not to find the best fit for a unicode |
| // character that does not map directly into a code point in the given |
| // code page. The best fit functionality is not available in wctomb on Unix |
| // and is better left unimplemented for security reasons anyway. |
| |
| if ((cchWideChar < -1) || (cbMultiByte < 0) || |
| (lpWideCharStr == NULL) || |
| ((cbMultiByte != 0) && |
| ((lpMultiByteStr == NULL) || |
| (lpWideCharStr == (LPWSTR)lpMultiByteStr))) ) |
| { |
| ERROR("Error lpWideCharStr parameters are invalid\n"); |
| SetLastError(ERROR_INVALID_PARAMETER); |
| goto EXIT; |
| } |
| |
| if (lpDefaultChar != NULL) |
| { |
| defaultChar = *lpDefaultChar; |
| } |
| |
| // Use UnicodeToUTF8 on all systems because we use |
| // UTF8ToUnicode in MultiByteToWideChar() on all systems. |
| if (CodePage == CP_UTF8 || (CodePage == CP_ACP && GetACP() == CP_UTF8)) |
| { |
| if (cchWideChar == -1) |
| { |
| cchWideChar = PAL_wcslen(lpWideCharStr) + 1; |
| } |
| retval = UnicodeToUTF8(lpWideCharStr, cchWideChar, lpMultiByteStr, cbMultiByte); |
| goto EXIT; |
| } |
| |
| #if HAVE_COREFOUNDATION |
| charsToConvert = cchWideChar; |
| if (charsToConvert == -1) |
| { |
| LPCWSTR ptr = lpWideCharStr; |
| |
| charsToConvert = 0; |
| while(*ptr++ != 0) |
| { |
| charsToConvert++; |
| } |
| charsToConvert++; /* For the terminating '\0' */ |
| } |
| |
| cfEncoding = CODEPAGECPToCFStringEncoding(CodePage); |
| if (cfEncoding == kCFStringEncodingInvalidId) |
| { |
| ERROR( "This code page is not in the system.\n" ); |
| SetLastError(ERROR_INVALID_PARAMETER); |
| goto EXIT; |
| } |
| |
| cfString = CFStringCreateWithCharacters(kCFAllocatorDefault, |
| (const UniChar*)lpWideCharStr, charsToConvert); |
| if (cfString == NULL) |
| { |
| ERROR("CFString creation failed.\n"); |
| SetLastError(ERROR_INVALID_PARAMETER); |
| goto EXIT; |
| } |
| |
| if (cbMultiByte == 0) |
| { |
| lpMultiByteStr = NULL; |
| } |
| charsConverted = CFStringGetBytes(cfString, |
| CFRangeMake(0, charsToConvert), |
| cfEncoding, '?', TRUE, (UInt8*)lpMultiByteStr, |
| cbMultiByte, &bytesConverted); |
| if (charsConverted != charsToConvert) |
| { |
| if (lpMultiByteStr != NULL) |
| { |
| // CFStringGetBytes can fail due to an insufficient buffer or for |
| // other reasons. We need to check if we're out of buffer space. |
| charsConverted = CFStringGetBytes(cfString, |
| CFRangeMake(0, charsToConvert), |
| cfEncoding, '?', TRUE, NULL, |
| 0, &bytesConverted); |
| if (cbMultiByte < bytesConverted) |
| { |
| ERROR("Insufficient buffer for CFStringGetBytes.\n"); |
| SetLastError(ERROR_INSUFFICIENT_BUFFER); |
| goto ReleaseString; |
| } |
| } |
| ERROR("Not all characters were converted.\n"); |
| SetLastError(ERROR_INVALID_PARAMETER); |
| goto ReleaseString; |
| } |
| retval = bytesConverted; |
| |
| ReleaseString: |
| if (cfString != NULL) |
| { |
| CFRelease(cfString); |
| } |
| #endif /* HAVE_COREFOUNDATION */ |
| |
| EXIT: |
| |
| if ( lpUsedDefaultChar != NULL ) |
| { |
| *lpUsedDefaultChar = usedDefaultChar; |
| } |
| |
| /* Flag the cases when WC_NO_BEST_FIT_CHARS was not specified |
| * but we found characters that had to be replaced with default |
| * characters. Note that Windows would have attempted to find |
| * best fit characters under these conditions and that could pose |
| * a security risk. |
| */ |
| _ASSERT_MSG((dwFlags & WC_NO_BEST_FIT_CHARS) || !usedDefaultChar, |
| "WideCharToMultiByte found a string which doesn't round trip: (%p)%S " |
| "and WC_NO_BEST_FIT_CHARS was not specified\n", |
| lpWideCharStr, lpWideCharStr); |
| |
| LOGEXIT("WideCharToMultiByte returns INT %d\n", retval); |
| PERF_EXIT(WideCharToMultiByte); |
| return retval; |
| } |
| |
| /*++ |
| Function : |
| |
| PAL_GetResourceString - get localized string for a specified resource. |
| The string that is passed in should be the English string, since it |
| will be returned if an appropriately localized version is not found. |
| |
| Returns number of characters retrieved, 0 if it failed. |
| --*/ |
| int |
| PALAPI |
| PAL_GetResourceString( |
| IN LPCSTR lpDomain, |
| IN LPCSTR lpResourceStr, |
| OUT LPWSTR lpWideCharStr, |
| IN int cchWideChar |
| ) |
| { |
| #if !defined(__APPLE__) && !defined(__ANDROID__) |
| // NOTE: dgettext returns the key if it fails to locate the appropriate |
| // resource. In our case, that will be the English string. |
| LPCSTR resourceString = dgettext(lpDomain, lpResourceStr); |
| #else // __APPLE__ |
| // UNIXTODO: Implement for OSX using the native localization API |
| |
| // This is a temporary solution until we add the real native resource support. |
| LPCSTR resourceString = lpResourceStr; |
| #endif // __APPLE__ |
| |
| int length = strlen(resourceString); |
| return UTF8ToUnicode(lpResourceStr, length + 1, lpWideCharStr, cchWideChar, 0); |
| } |