blob: 9fd22f99d5692c27f641a995cec83eee0bbab39a [file] [log] [blame] [edit]
///////////////////////////////////////////////////////////////////////////////
// //
// Unicode.cpp //
// Copyright (C) Microsoft Corporation. All rights reserved. //
// This file is distributed under the University of Illinois Open Source //
// License. See LICENSE.TXT for details. //
// //
// Provides utitlity functions to work with Unicode and other encodings. //
// //
///////////////////////////////////////////////////////////////////////////////
#include "dxc/Support/Global.h"
#include <specstrings.h>
#include "dxc/Support/Unicode.h"
#include <string>
#include "dxc/Support/WinIncludes.h"
namespace Unicode {
_Success_(return != false)
bool UTF16ToEncodedString(_In_z_ const wchar_t* text, DWORD cp, DWORD flags, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
BOOL usedDefaultChar;
LPBOOL pUsedDefaultChar = (lossy == nullptr) ? nullptr : &usedDefaultChar;
size_t cUTF16 = wcslen(text);
if (lossy != nullptr) *lossy = false;
// Handle zero-length as a special case; it's a special value to indicate errors in WideCharToMultiByte.
if (cUTF16 == 0) {
pValue->resize(0);
DXASSERT(lossy == nullptr || *lossy == false, "otherwise earlier initialization in this function was updated");
return true;
}
int cbUTF8 = ::WideCharToMultiByte(cp, flags, text, cUTF16, nullptr, 0, nullptr, pUsedDefaultChar);
if (cbUTF8 == 0)
return false;
pValue->resize(cbUTF8);
cbUTF8 = ::WideCharToMultiByte(cp, flags, text, cUTF16, &(*pValue)[0], pValue->size(), nullptr, pUsedDefaultChar);
DXASSERT(cbUTF8 > 0, "otherwise contents have changed");
DXASSERT((*pValue)[pValue->size()] == '\0', "otherwise string didn't null-terminate after resize() call");
if (lossy != nullptr) *lossy = usedDefaultChar;
return true;
}
_Use_decl_annotations_
bool UTF8ToUTF16String(const char *pUTF8, std::wstring *pUTF16) {
size_t cbUTF8 = (pUTF8 == nullptr) ? 0 : strlen(pUTF8);
return UTF8ToUTF16String(pUTF8, cbUTF8, pUTF16);
}
_Use_decl_annotations_
bool UTF8ToUTF16String(const char *pUTF8, size_t cbUTF8, std::wstring *pUTF16) {
DXASSERT_NOMSG(pUTF16 != nullptr);
// Handle zero-length as a special case; it's a special value to indicate
// errors in MultiByteToWideChar.
if (cbUTF8 == 0) {
pUTF16->resize(0);
return true;
}
int cUTF16 = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pUTF8,
cbUTF8, nullptr, 0);
if (cUTF16 == 0)
return false;
pUTF16->resize(cUTF16);
cUTF16 = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pUTF8, cbUTF8,
&(*pUTF16)[0], pUTF16->size());
DXASSERT(cUTF16 > 0, "otherwise contents changed");
DXASSERT((*pUTF16)[pUTF16->size()] == L'\0',
"otherwise wstring didn't null-terminate after resize() call");
return true;
}
std::wstring UTF8ToUTF16StringOrThrow(_In_z_ const char *pUTF8) {
std::wstring result;
if (!UTF8ToUTF16String(pUTF8, &result)) {
throw hlsl::Exception(DXC_E_STRING_ENCODING_FAILED);
}
return result;
}
_Use_decl_annotations_
bool UTF8ToConsoleString(_In_z_ const char* text, _Inout_ std::string* pValue, _Out_opt_ bool* lossy) {
DXASSERT_NOMSG(text != nullptr);
DXASSERT_NOMSG(pValue != nullptr);
std::wstring text16;
if (lossy != nullptr) *lossy = false;
if (!UTF8ToUTF16String(text, &text16)) {
return false;
}
return UTF16ToConsoleString(text16.c_str(), pValue, lossy);
}
_Use_decl_annotations_
bool UTF16ToConsoleString(const wchar_t* text, std::string* pValue, bool* lossy) {
DXASSERT_NOMSG(text != nullptr);
DXASSERT_NOMSG(pValue != nullptr);
UINT cp = GetConsoleOutputCP();
return UTF16ToEncodedString(text, cp, 0, pValue, lossy);
}
_Use_decl_annotations_
bool UTF16ToUTF8String(const wchar_t *pUTF16, std::string *pUTF8) {
DXASSERT_NOMSG(pUTF16 != nullptr);
DXASSERT_NOMSG(pUTF8 != nullptr);
return UTF16ToEncodedString(pUTF16, CP_UTF8, 0, pUTF8, nullptr);
}
std::string UTF16ToUTF8StringOrThrow(_In_z_ const wchar_t *pUTF16) {
std::string result;
if (!UTF16ToUTF8String(pUTF16, &result)) {
throw hlsl::Exception(DXC_E_STRING_ENCODING_FAILED);
}
return result;
}
_Use_decl_annotations_
bool UTF8BufferToUTF16ComHeap(const char *pUTF8, wchar_t **ppUTF16) throw() {
*ppUTF16 = nullptr;
int c = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pUTF8, -1,
nullptr, 0);
if (c == 0)
return false;
CComHeapPtr<wchar_t> p;
if (!p.Allocate(c))
return false;
DXVERIFY_NOMSG(0 < ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pUTF8,
-1, p.m_pData, c));
*ppUTF16 = p.Detach();
return true;
}
_Use_decl_annotations_
bool UTF8BufferToUTF16Buffer(const char *pUTF8, int cbUTF8, wchar_t **ppUTF16, size_t *pcUTF16) throw() {
*ppUTF16 = nullptr;
*pcUTF16 = 0;
if (cbUTF8 == 0 || (cbUTF8 == -1 && *pUTF8 == '\0')) {
*ppUTF16 = new (std::nothrow) wchar_t[1];
if (*ppUTF16 == nullptr)
return false;
(*ppUTF16)[0] = L'\0';
*pcUTF16 = 1;
return true;
}
int c = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, pUTF8, cbUTF8, nullptr, 0);
if (c == 0)
return false;
// add space for null-terminator if we're not accounting for it
if (cbUTF8 != -1)
c += 1;
wchar_t *p = new (std::nothrow) wchar_t[c];
if (p == nullptr)
return false;
int converted = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS,
pUTF8, cbUTF8,
p, c);
(void)converted;
DXASSERT(converted > 0, "otherwise contents have changed");
p[c - 1] = L'\0';
*ppUTF16 = p;
*pcUTF16 = c;
return true;
}
_Use_decl_annotations_
bool UTF16BufferToUTF8Buffer(const wchar_t *pUTF16, int cUTF16, char **ppUTF8, size_t *pcUTF8) throw() {
*ppUTF8 = nullptr;
*pcUTF8 = 0;
if (cUTF16 == 0 || (cUTF16 == -1 && *pUTF16 == '\0')) {
*ppUTF8 = new (std::nothrow) char[1];
if (*ppUTF8 == nullptr)
return false;
(*ppUTF8)[0] = '\0';
*pcUTF8 = 1;
return true;
}
int c1 = ::WideCharToMultiByte(CP_UTF8, // code page
0, // flags
pUTF16, // string to convert
cUTF16, // size, in chars, of string to convert
nullptr, // output buffer
0, // size of output buffer
nullptr, nullptr);
if (c1 == 0)
return false;
// add space for null-terminator if we're not accounting for it
if (cUTF16 != -1)
c1 += 1;
char *p = new (std::nothrow) char[c1];
if (p == nullptr)
return false;
int converted = ::WideCharToMultiByte(CP_UTF8, 0,
pUTF16, cUTF16,
p, c1,
nullptr, nullptr);
(void)converted;
DXASSERT(converted > 0, "otherwise contents have changed");
p[c1 - 1] = '\0';
*ppUTF8 = p;
*pcUTF8 = c1;
return true;
}
template<typename TChar>
static
bool IsStarMatchT(const TChar *pMask, size_t maskLen, const TChar *pName, size_t nameLen, TChar star) {
if (maskLen == 0 && nameLen == 0) {
return true;
}
if (maskLen == 0 || nameLen == 0) {
return false;
}
if (pMask[maskLen - 1] == star) {
// Prefix match.
if (maskLen == 1) { // For just '*', everything is a match.
return true;
}
--maskLen;
if (maskLen > nameLen) { // Mask is longer than name, can't be a match.
return false;
}
return 0 == memcmp(pMask, pName, sizeof(TChar) * maskLen);
}
else {
// Exact match.
if (nameLen != maskLen) {
return false;
}
return 0 == memcmp(pMask, pName, sizeof(TChar) * nameLen);
}
}
_Use_decl_annotations_
bool IsStarMatchUTF8(const char *pMask, size_t maskLen, const char *pName, size_t nameLen) {
return IsStarMatchT<char>(pMask, maskLen, pName, nameLen, '*');
}
_Use_decl_annotations_
bool IsStarMatchUTF16(const wchar_t *pMask, size_t maskLen, const wchar_t *pName, size_t nameLen) {
return IsStarMatchT<wchar_t>(pMask, maskLen, pName, nameLen, L'*');
}
} // namespace Unicode