blob: b2060fdd080159cd2aa75b82dac58cffa233d8d5 [file]
//-------------------------------------------------------------------------------------------------------
// Copyright (C) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
//-------------------------------------------------------------------------------------------------------
#pragma once
#include "Utf8Codex.h"
namespace utf8
{
///
/// Use the codex library to encode a UTF16 string to UTF8.
/// The caller is responsible for freeing the memory, which is allocated
/// using Allocator.
/// The returned string is null terminated.
///
template <typename AllocatorFunction>
HRESULT WideStringToNarrow(_In_ AllocatorFunction allocator, _In_ LPCWSTR sourceString, size_t sourceCount, _Out_ LPSTR* destStringPtr, _Out_ size_t* destCount, size_t* allocateCount = nullptr)
{
size_t cchSourceString = sourceCount;
if (cchSourceString >= MAXUINT32)
{
return E_OUTOFMEMORY;
}
size_t cbDestString = (cchSourceString + 1) * 3;
// Check for overflow- cbDestString should be >= cchSourceString
if (cbDestString < cchSourceString)
{
return E_OUTOFMEMORY;
}
utf8char_t* destString = (utf8char_t*)allocator(cbDestString);
if (destString == nullptr)
{
return E_OUTOFMEMORY;
}
size_t cbEncoded = utf8::EncodeTrueUtf8IntoAndNullTerminate(destString, sourceString, (charcount_t) cchSourceString);
Assert(cbEncoded <= cbDestString);
static_assert(sizeof(utf8char_t) == sizeof(char), "Needs to be valid for cast");
*destStringPtr = (char*)destString;
*destCount = cbEncoded;
if (allocateCount != nullptr) *allocateCount = cbEncoded;
return S_OK;
}
template <class Allocator>
HRESULT WideStringToNarrow(_In_ LPCWSTR sourceString, size_t sourceCount, _Out_ LPSTR* destStringPtr, _Out_ size_t* destCount, size_t* allocateCount = nullptr)
{
return WideStringToNarrow(Allocator::allocate, sourceString, sourceCount, destStringPtr, destCount, allocateCount);
}
///
/// Use the codex library to encode a UTF8 string to UTF16.
/// The caller is responsible for freeing the memory, which is allocated
/// using Allocator.
/// The returned string is null terminated.
///
template <typename AllocatorFunction>
HRESULT NarrowStringToWide(_In_ AllocatorFunction allocator,_In_ LPCSTR sourceString, size_t sourceCount, _Out_ LPWSTR* destStringPtr, _Out_ size_t* destCount, size_t* allocateCount = nullptr)
{
size_t cbSourceString = sourceCount;
size_t sourceStart = 0;
size_t cbDestString = (sourceCount + 1) * sizeof(WCHAR);
if (cbDestString < sourceCount) // overflow ?
{
return E_OUTOFMEMORY;
}
WCHAR* destString = (WCHAR*)allocator(cbDestString);
if (destString == nullptr)
{
return E_OUTOFMEMORY;
}
if (allocateCount != nullptr) *allocateCount = cbDestString;
for (; sourceStart < sourceCount; sourceStart++)
{
const char ch = sourceString[sourceStart];
if ( ! (ch > 0 && ch < 0x0080) )
{
size_t fallback = sourceStart > 3 ? 3 : sourceStart; // 3 + 1 -> fallback at least 1 unicode char
sourceStart -= fallback;
break;
}
destString[sourceStart] = (WCHAR) ch;
}
if (sourceStart == sourceCount)
{
*destCount = sourceCount;
destString[sourceCount] = WCHAR(0);
*destStringPtr = destString;
}
else
{
LPCUTF8 remSourceString = (LPCUTF8)sourceString + sourceStart;
WCHAR *remDestString = destString + sourceStart;
charcount_t cchDestString = utf8::ByteIndexIntoCharacterIndex(remSourceString, cbSourceString - sourceStart);
cchDestString += (charcount_t)sourceStart;
Assert (cchDestString <= sourceCount);
// Some node tests depend on the utf8 decoder not swallowing invalid unicode characters
// instead of replacing them with the "replacement" chracter. Pass a flag to our
// decoder to require such behavior
utf8::DecodeUnitsIntoAndNullTerminateNoAdvance(remDestString, remSourceString, (LPCUTF8) sourceString + cbSourceString, DecodeOptions::doAllowInvalidWCHARs);
Assert(destString[cchDestString] == 0);
static_assert(sizeof(utf8char_t) == sizeof(char), "Needs to be valid for cast");
*destStringPtr = destString;
*destCount = cchDestString;
}
return S_OK;
}
template <class Allocator>
HRESULT NarrowStringToWide(_In_ LPCSTR sourceString, size_t sourceCount, _Out_ LPWSTR* destStringPtr, _Out_ size_t* destCount, size_t* allocateCount = nullptr)
{
return NarrowStringToWide(Allocator::allocate, sourceString, sourceCount, destStringPtr, destCount, allocateCount);
}
class malloc_allocator
{
public:
static void* allocate(size_t size) { return ::malloc(size); }
static void free(void* ptr, size_t count) { ::free(ptr); }
};
inline HRESULT WideStringToNarrowDynamic(_In_ LPCWSTR sourceString, _Out_ LPSTR* destStringPtr)
{
size_t unused;
return WideStringToNarrow<malloc_allocator>(
sourceString, wcslen(sourceString), destStringPtr, &unused);
}
inline HRESULT NarrowStringToWideDynamic(_In_ LPCSTR sourceString, _Out_ LPWSTR* destStringPtr)
{
size_t unused;
return NarrowStringToWide<malloc_allocator>(
sourceString, strlen(sourceString), destStringPtr, &unused);
}
inline HRESULT NarrowStringToWideDynamicGetLength(_In_ LPCSTR sourceString, _Out_ LPWSTR* destStringPtr, _Out_ size_t* destLength)
{
return NarrowStringToWide<malloc_allocator>(
sourceString, strlen(sourceString), destStringPtr, destLength);
}
template <class Allocator, class SrcType, class DstType>
class NarrowWideStringConverter
{
public:
static size_t Length(const SrcType& src);
static HRESULT Convert(
SrcType src, size_t srcCount, DstType* dst, size_t* dstCount, size_t* allocateCount = nullptr);
};
template <class Allocator>
class NarrowWideStringConverter<Allocator, LPCSTR, LPWSTR>
{
public:
// Note: Typically caller should pass in Utf8 string length. Following
// is used as fallback.
static size_t Length(LPCSTR src)
{
return strnlen(src, INT_MAX);
}
static HRESULT Convert(
LPCSTR sourceString, size_t sourceCount,
LPWSTR* destStringPtr, size_t* destCount, size_t* allocateCount = nullptr)
{
return NarrowStringToWide<Allocator>(
sourceString, sourceCount, destStringPtr, destCount, allocateCount);
}
};
template <class Allocator>
class NarrowWideStringConverter<Allocator, LPCWSTR, LPSTR>
{
public:
// Note: Typically caller should pass in WCHAR string length. Following
// is used as fallback.
static size_t Length(LPCWSTR src)
{
return wcslen(src);
}
static HRESULT Convert(
LPCWSTR sourceString, size_t sourceCount,
LPSTR* destStringPtr, size_t* destCount, size_t* allocateCount = nullptr)
{
return WideStringToNarrow<Allocator>(
sourceString, sourceCount, destStringPtr, destCount, allocateCount);
}
};
template <class Allocator, class SrcType, class DstType>
class NarrowWideConverter
{
typedef NarrowWideStringConverter<Allocator, SrcType, DstType>
StringConverter;
private:
DstType dst;
size_t dstCount;
size_t allocateCount;
public:
NarrowWideConverter() : dst()
{
// do nothing
}
NarrowWideConverter(const SrcType& src, size_t srcCount = -1): dst()
{
Initialize(src, srcCount);
}
void Initialize(const SrcType& src, size_t srcCount = -1)
{
if (srcCount == -1)
{
srcCount = StringConverter::Length(src);
}
StringConverter::Convert(src, srcCount, &dst, &dstCount, &allocateCount);
}
~NarrowWideConverter()
{
if (dst)
{
Allocator::free(dst, allocateCount);
}
}
DstType Detach()
{
DstType result = dst;
dst = DstType();
return result;
}
operator DstType()
{
return dst;
}
size_t Length() const
{
return dstCount;
}
};
typedef NarrowWideConverter<malloc_allocator, LPCSTR, LPWSTR> NarrowToWide;
typedef NarrowWideConverter<malloc_allocator, LPCWSTR, LPSTR> WideToNarrow;
}