third_party/unrar/src/unicode.cpp - chromium/src - Git at Google

 #include "rar.hpp"
 #define MBFUNCTIONS

 #if !defined(_WIN_ALL) && !defined(_APPLE) && defined(_UNIX) && defined(MBFUNCTIONS)

 static bool WideToCharMap(const wchar *Src,char *Dest,size_t DestSize,bool &Success);
 static void CharToWideMap(const char *Src,wchar *Dest,size_t DestSize,bool &Success);

 // In Unix we map high ASCII characters which cannot be converted to Unicode
 // to 0xE000 - 0xE0FF private use Unicode area.
 static const uint MapAreaStart=0xE000;

 // Mapped string marker. Initially we used 0xFFFF for this purpose,
 // but it causes MSVC2008 swprintf to fail (it treats 0xFFFF as error marker).
 // While we could workaround it, it is safer to use another character.
 static const uint MappedStringMark=0xFFFE;

 #endif

 bool WideToChar(const wchar *Src,char *Dest,size_t DestSize)
 {
   bool RetCode=true;
   *Dest=0; // Set 'Dest' to zero just in case the conversion will fail.

 #ifdef _WIN_ALL
   if (WideCharToMultiByte(CP_ACP,0,Src,-1,Dest,(int)DestSize,NULL,NULL)==0)
     RetCode=false;

 // wcstombs is broken in Android NDK r9.
 #elif defined(_APPLE)
   WideToUtf(Src,Dest,DestSize);

 #elif defined(_UNIX) && defined(MBFUNCTIONS)
   if (!WideToCharMap(Src,Dest,DestSize,RetCode))
   {
     mbstate_t ps; // Use thread safe external state based functions.
     memset (&ps, 0, sizeof(ps));
     const wchar *SrcParam=Src; // wcsrtombs can change the pointer.

     // Some implementations of wcsrtombs can cause memory analyzing tools
     // like valgrind to report uninitialized data access. It happens because
     // internally these implementations call SSE4 based wcslen function,
     // which reads 16 bytes at once including those beyond of trailing 0.
     size_t ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps);

     if (ResultingSize==(size_t)-1 && errno==EILSEQ)
     {
       // Aborted on inconvertible character not zero terminating the result.
       // EILSEQ helps to distinguish it from small output buffer abort.
       // We want to convert as much as we can, so we clean the output buffer
       // and repeat conversion.
       memset (&ps, 0, sizeof(ps));
       SrcParam=Src; // wcsrtombs can change the pointer.
       memset(Dest,0,DestSize);
       ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps);
     }

     if (ResultingSize==(size_t)-1)
       RetCode=false;
     if (ResultingSize==0 && *Src!=0)
       RetCode=false;
   }
 #else
   for (int I=0;I<DestSize;I++)
   {
     Dest[I]=(char)Src[I];
     if (Src[I]==0)
       break;
   }
 #endif
   if (DestSize>0)
     Dest[DestSize-1]=0;

   // We tried to return the empty string if conversion is failed,
   // but it does not work well. WideCharToMultiByte returns 'failed' code
   // and partially converted string even if we wanted to convert only a part
   // of string and passed DestSize smaller than required for fully converted
   // string. Such call is the valid behavior in RAR code and we do not expect
   // the empty string in this case.

   return RetCode;
 }


 bool CharToWide(const char *Src,wchar *Dest,size_t DestSize)
 {
   bool RetCode=true;
   *Dest=0; // Set 'Dest' to zero just in case the conversion will fail.

 #ifdef _WIN_ALL
   if (MultiByteToWideChar(CP_ACP,0,Src,-1,Dest,(int)DestSize)==0)
     RetCode=false;

 // mbstowcs is broken in Android NDK r9.
 #elif defined(_APPLE)
   UtfToWide(Src,Dest,DestSize);

 #elif defined(_UNIX) && defined(MBFUNCTIONS)
   mbstate_t ps;
   memset (&ps, 0, sizeof(ps));
   const char *SrcParam=Src; // mbsrtowcs can change the pointer.
   size_t ResultingSize=mbsrtowcs(Dest,&SrcParam,DestSize,&ps);
   if (ResultingSize==(size_t)-1)
     RetCode=false;
   if (ResultingSize==0 && *Src!=0)
     RetCode=false;

   if (RetCode==false && DestSize>1)
     CharToWideMap(Src,Dest,DestSize,RetCode);
 #else
   for (int I=0;I<DestSize;I++)
   {
     Dest[I]=(wchar_t)Src[I];
     if (Src[I]==0)
       break;
   }
 #endif
   if (DestSize>0)
     Dest[DestSize-1]=0;

   // We tried to return the empty string if conversion is failed,
   // but it does not work well. MultiByteToWideChar returns 'failed' code
   // even if we wanted to convert only a part of string and passed DestSize
   // smaller than required for fully converted string. Such call is the valid
   // behavior in RAR code and we do not expect the empty string in this case.

   return RetCode;
 }


 #if !defined(_WIN_ALL) && !defined(_APPLE) && defined(_UNIX) && defined(MBFUNCTIONS)
 // Convert and restore mapped inconvertible Unicode characters.
 // We use it for extended ASCII names in Unix.
 bool WideToCharMap(const wchar *Src,char *Dest,size_t DestSize,bool &Success)
 {
   // String with inconvertible characters mapped to private use Unicode area
   // must have the mark code somewhere.
   if (wcschr(Src,(wchar)MappedStringMark)==NULL)
     return false;

   Success=true;
   uint SrcPos=0,DestPos=0;
   while (Src[SrcPos]!=0 && DestPos<DestSize-MB_CUR_MAX)
   {
     if (uint(Src[SrcPos])==MappedStringMark)
     {
       SrcPos++;
       continue;
     }
     // For security reasons do not restore low ASCII codes, so mapping cannot
     // be used to hide control codes like path separators.
     if (uint(Src[SrcPos])>=MapAreaStart+0x80 && uint(Src[SrcPos])<MapAreaStart+0x100)
       Dest[DestPos++]=char(uint(Src[SrcPos++])-MapAreaStart);
     else
     {
       mbstate_t ps;
       memset(&ps,0,sizeof(ps));
       if (wcrtomb(Dest+DestPos,Src[SrcPos],&ps)==(size_t)-1)
       {
         Dest[DestPos]='_';
         Success=false;
       }
       SrcPos++;
       memset(&ps,0,sizeof(ps));
       int Length=mbrlen(Dest+DestPos,MB_CUR_MAX,&ps);
       DestPos+=Max(Length,1);
     }
   }
   Dest[Min(DestPos,DestSize-1)]=0;
   return true;
 }
 #endif


 #if !defined(_WIN_ALL) && !defined(_APPLE) && defined(_UNIX) && defined(MBFUNCTIONS)
 // Convert and map inconvertible Unicode characters.
 // We use it for extended ASCII names in Unix.
 void CharToWideMap(const char *Src,wchar *Dest,size_t DestSize,bool &Success)
 {
   // Map inconvertible characters to private use Unicode area 0xE000.
   // Mark such string by placing special non-character code before
   // first inconvertible character.
   Success=false;
   bool MarkAdded=false;
   uint SrcPos=0,DestPos=0;
   while (DestPos<DestSize)
   {
     if (Src[SrcPos]==0)
     {
       Success=true;
       break;
     }
     mbstate_t ps;
     memset(&ps,0,sizeof(ps));
     size_t res=mbrtowc(Dest+DestPos,Src+SrcPos,MB_CUR_MAX,&ps);
     if (res==(size_t)-1 || res==(size_t)-2)
     {
       // For security reasons we do not want to map low ASCII characters,
       // so we do not have additional .. and path separator codes.
       if (byte(Src[SrcPos])>=0x80)
       {
         if (!MarkAdded)
         {
           Dest[DestPos++]=MappedStringMark;
           MarkAdded=true;
           if (DestPos>=DestSize)
             break;
         }
         Dest[DestPos++]=byte(Src[SrcPos++])+MapAreaStart;
       }
       else
         break;
     }
     else
     {
       memset(&ps,0,sizeof(ps));
       int Length=mbrlen(Src+SrcPos,MB_CUR_MAX,&ps);
       SrcPos+=Max(Length,1);
       DestPos++;
     }
   }
   Dest[Min(DestPos,DestSize-1)]=0;
 }
 #endif


 // SrcSize is in wide characters, not in bytes.
 byte* WideToRaw(const wchar *Src,byte *Dest,size_t SrcSize)
 {
   for (size_t I=0;I<SrcSize;I++,Src++)
   {
     Dest[I*2]=(byte)*Src;
     Dest[I*2+1]=(byte)(*Src>>8);
     if (*Src==0)
       break;
   }
   return Dest;
 }


 wchar* RawToWide(const byte *Src,wchar *Dest,size_t DestSize)
 {
   for (size_t I=0;I<DestSize;I++)
     if ((Dest[I]=Src[I*2]+(Src[I*2+1]<<8))==0)
       break;
   return Dest;
 }


 void WideToUtf(const wchar *Src,char *Dest,size_t DestSize)
 {
   long dsize=(long)DestSize;
   dsize--;
   while (*Src!=0 && --dsize>=0)
   {
     uint c=*(Src++);
     if (c<0x80)
       *(Dest++)=c;
     else
       if (c<0x800 && --dsize>=0)
       {
         *(Dest++)=(0xc0|(c>>6));
         *(Dest++)=(0x80|(c&0x3f));
       }
       else
       {
         if (c>=0xd800 && c<=0xdbff && *Src>=0xdc00 && *Src<=0xdfff) // Surrogate pair.
         {
           c=((c-0xd800)<<10)+(*Src-0xdc00)+0x10000;
           Src++;
         }
         if (c<0x10000 && (dsize-=2)>=0)
         {
           *(Dest++)=(0xe0|(c>>12));
           *(Dest++)=(0x80|((c>>6)&0x3f));
           *(Dest++)=(0x80|(c&0x3f));
         }
         else
           if (c < 0x200000 && (dsize-=3)>=0)
           {
             *(Dest++)=(0xf0|(c>>18));
             *(Dest++)=(0x80|((c>>12)&0x3f));
             *(Dest++)=(0x80|((c>>6)&0x3f));
             *(Dest++)=(0x80|(c&0x3f));
           }
       }
   }
   *Dest=0;
 }


 size_t WideToUtfSize(const wchar *Src)
 {
   size_t Size=0;
   for (;*Src!=0;Src++)
     if (*Src<0x80)
       Size++;
     else
       if (*Src<0x800)
         Size+=2;
       else
         if ((uint)*Src<0x10000)
         {
           if (Src[0]>=0xd800 && Src[0]<=0xdbff && Src[1]>=0xdc00 && Src[1]<=0xdfff)
           {
             Size+=4; // 4 output bytes for Unicode surrogate pair.
             Src++;
           }
           else
             Size+=3;
         }
         else
           if ((uint)*Src<0x200000)
             Size+=4;
   return Size+1; // Include terminating zero.
 }


 bool UtfToWide(const char *Src,wchar *Dest,size_t DestSize)
 {
   bool Success=true;
   long dsize=(long)DestSize;
   dsize--;
   while (*Src!=0)
   {
     uint c=byte(*(Src++)),d;
     if (c<0x80)
       d=c;
     else
       if ((c>>5)==6)
       {
         if ((*Src&0xc0)!=0x80)
         {
           Success=false;
           break;
         }
         d=((c&0x1f)<<6)|(*Src&0x3f);
         Src++;
       }
       else
         if ((c>>4)==14)
         {
           if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80)
           {
             Success=false;
             break;
           }
           d=((c&0xf)<<12)|((Src[0]&0x3f)<<6)|(Src[1]&0x3f);
           Src+=2;
         }
         else
           if ((c>>3)==30)
           {
             if ((Src[0]&0xc0)!=0x80 || (Src[1]&0xc0)!=0x80 || (Src[2]&0xc0)!=0x80)
             {
               Success=false;
               break;
             }
             d=((c&7)<<18)|((Src[0]&0x3f)<<12)|((Src[1]&0x3f)<<6)|(Src[2]&0x3f);
             Src+=3;
           }
           else
           {
             Success=false;
             break;
           }
     if (--dsize<0)
       break;
     if (d>0xffff)
     {
       if (--dsize<0)
         break;
       if (d>0x10ffff) // UTF-8 must end at 0x10ffff according to RFC 3629.
       {
         Success=false;
         continue;
       }
       if (sizeof(*Dest)==2) // Use the surrogate pair.
       {
         *(Dest++)=((d-0x10000)>>10)+0xd800;
         *(Dest++)=(d&0x3ff)+0xdc00;
       }
       else
         *(Dest++)=d;
     }
     else
       *(Dest++)=d;
   }
   *Dest=0;
   return Success;
 }


 // For zero terminated strings.
 bool IsTextUtf8(const byte *Src)
 {
   return IsTextUtf8(Src,strlen((const char *)Src));
 }


 // Source data can be both with and without UTF-8 BOM.
 bool IsTextUtf8(const byte *Src,size_t SrcSize)
 {
   while (SrcSize-- > 0)
   {
     byte C=*(Src++);
     int HighOne=0; // Number of leftmost '1' bits.
     for (byte Mask=0x80;Mask!=0 && (C & Mask)!=0;Mask>>=1)
       HighOne++;
     if (HighOne==1 || HighOne>6)
       return false;
     while (--HighOne > 0)
       if (SrcSize-- <= 0 || (*(Src++) & 0xc0)!=0x80)
         return false;
   }
   return true;
 }


 int wcsicomp(const wchar *s1,const wchar *s2)
 {
 #ifdef _WIN_ALL
   return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,-1,s2,-1)-2;
 #else
   while (true)
   {
     wchar u1 = towupper(*s1);
     wchar u2 = towupper(*s2);
     if (u1 != u2)
       return u1 < u2 ? -1 : 1;
     if (*s1==0)
       break;
     s1++;
     s2++;
   }
   return 0;
 #endif
 }


 int wcsnicomp(const wchar *s1,const wchar *s2,size_t n)
 {
 #ifdef _WIN_ALL
   // If we specify 'n' exceeding the actual string length, CompareString goes
   // beyond the trailing zero and compares garbage. So we need to limit 'n'
   // to real string length.
   size_t l1=Min(wcslen(s1)+1,n);
   size_t l2=Min(wcslen(s2)+1,n);
   return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE|SORT_STRINGSORT,s1,(int)l1,s2,(int)l2)-2;
 #else
   if (n==0)
     return 0;
   while (true)
   {
     wchar u1 = towupper(*s1);
     wchar u2 = towupper(*s2);
     if (u1 != u2)
       return u1 < u2 ? -1 : 1;
     if (*s1==0 || --n==0)
       break;
     s1++;
     s2++;
   }
   return 0;
 #endif
 }


 const wchar_t* wcscasestr(const wchar_t *str, const wchar_t *search)
 {
   for (size_t i=0;str[i]!=0;i++)
     for (size_t j=0;;j++)
     {
       if (search[j]==0)
         return str+i;
       if (tolowerw(str[i+j])!=tolowerw(search[j]))
         break;
     }
   return NULL;
 }


 #ifndef SFX_MODULE
 wchar* wcslower(wchar *s)
 {
 #ifdef _WIN_ALL
   CharLower(s);
 #else
   for (wchar *c=s;*c!=0;c++)
     *c=towlower(*c);
 #endif
   return s;
 }
 #endif


 #ifndef SFX_MODULE
 wchar* wcsupper(wchar *s)
 {
 #ifdef _WIN_ALL
   CharUpper(s);
 #else
   for (wchar *c=s;*c!=0;c++)
     *c=towupper(*c);
 #endif
   return s;
 }
 #endif


 int toupperw(int ch)
 {
 #if defined(_WIN_ALL)
   // CharUpper is more reliable than towupper in Windows, which seems to be
   // C locale dependent even in Unicode version. For example, towupper failed
   // to convert lowercase Russian characters.
   return (int)(INT_PTR)CharUpper((wchar *)(INT_PTR)ch);
 #else
   return towupper(ch);
 #endif
 }


 int tolowerw(int ch)
 {
 #if defined(_WIN_ALL)
   // CharLower is more reliable than towlower in Windows.
   // See comment for towupper above.
   return (int)(INT_PTR)CharLower((wchar *)(INT_PTR)ch);
 #else
   return towlower(ch);
 #endif
 }


 int atoiw(const wchar *s)
 {
   return (int)atoilw(s);
 }


 int64 atoilw(const wchar *s)
 {
   bool sign=false;
   if (*s=='-')
   {
     s++;
     sign=true;
   }
   // Use unsigned type here, since long string can overflow the variable
   // and signed integer overflow is undefined behavior in C++.
   uint64 n=0;
   while (*s>='0' && *s<='9')
   {
     n=n*10+(*s-'0');
     s++;
   }
   // Check int64(n)>=0 to avoid the signed overflow with undefined behavior
   // when negating 0x8000000000000000.
   return sign && int64(n)>=0 ? -int64(n) : int64(n);
 }


 #ifdef DBCS_SUPPORTED
 SupportDBCS gdbcs;

 SupportDBCS::SupportDBCS()
 {
   Init();
 }


 void SupportDBCS::Init()
 {
   CPINFO CPInfo;
   GetCPInfo(CP_ACP,&CPInfo);
   DBCSMode=CPInfo.MaxCharSize > 1;
   for (uint I=0;I<ASIZE(IsLeadByte);I++)
     IsLeadByte[I]=IsDBCSLeadByte(I)!=0;
 }


 char* SupportDBCS::charnext(const char *s)
 {
   // Zero cannot be the trail byte. So if next byte after the lead byte
   // is 0, the string is corrupt and we'll better return the pointer to 0,
   // to break string processing loops.
   return (char *)(IsLeadByte[(byte)*s] && s[1]!=0 ? s+2:s+1);
 }


 size_t SupportDBCS::strlend(const char *s)
 {
   size_t Length=0;
   while (*s!=0)
   {
     if (IsLeadByte[(byte)*s])
       s+=2;
     else
       s++;
     Length++;
   }
   return(Length);
 }


 char* SupportDBCS::strchrd(const char *s, int c)
 {
   while (*s!=0)
     if (IsLeadByte[(byte)*s])
       s+=2;
     else
       if (*s==c)
         return((char *)s);
       else
         s++;
   return(NULL);
 }


 void SupportDBCS::copychrd(char *dest,const char *src)
 {
   dest[0]=src[0];
   if (IsLeadByte[(byte)src[0]])
     dest[1]=src[1];
 }


 char* SupportDBCS::strrchrd(const char *s, int c)
 {
   const char *found=NULL;
   while (*s!=0)
     if (IsLeadByte[(byte)*s])
       s+=2;
     else
     {
       if (*s==c)
         found=s;
       s++;
     }
   return((char *)found);
 }
 #endif
	#include "rar.hpp"
	#define MBFUNCTIONS

	#if !defined(_WIN_ALL) && !defined(_APPLE) && defined(_UNIX) && defined(MBFUNCTIONS)

	static bool WideToCharMap(const wchar Src,char Dest,size_t DestSize,bool &Success);
	static void CharToWideMap(const char Src,wchar Dest,size_t DestSize,bool &Success);

	// In Unix we map high ASCII characters which cannot be converted to Unicode
	// to 0xE000 - 0xE0FF private use Unicode area.
	static const uint MapAreaStart=0xE000;

	// Mapped string marker. Initially we used 0xFFFF for this purpose,
	// but it causes MSVC2008 swprintf to fail (it treats 0xFFFF as error marker).
	// While we could workaround it, it is safer to use another character.
	static const uint MappedStringMark=0xFFFE;

	#endif

	bool WideToChar(const wchar Src,char Dest,size_t DestSize)
	{
	bool RetCode=true;
	*Dest=0; // Set 'Dest' to zero just in case the conversion will fail.

	#ifdef _WIN_ALL
	if (WideCharToMultiByte(CP_ACP,0,Src,-1,Dest,(int)DestSize,NULL,NULL)==0)
	RetCode=false;

	// wcstombs is broken in Android NDK r9.
	#elif defined(_APPLE)
	WideToUtf(Src,Dest,DestSize);

	#elif defined(_UNIX) && defined(MBFUNCTIONS)
	if (!WideToCharMap(Src,Dest,DestSize,RetCode))
	{
	mbstate_t ps; // Use thread safe external state based functions.
	memset (&ps, 0, sizeof(ps));
	const wchar *SrcParam=Src; // wcsrtombs can change the pointer.

	// Some implementations of wcsrtombs can cause memory analyzing tools
	// like valgrind to report uninitialized data access. It happens because
	// internally these implementations call SSE4 based wcslen function,
	// which reads 16 bytes at once including those beyond of trailing 0.
	size_t ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps);

	if (ResultingSize==(size_t)-1 && errno==EILSEQ)
	{
	// Aborted on inconvertible character not zero terminating the result.
	// EILSEQ helps to distinguish it from small output buffer abort.
	// We want to convert as much as we can, so we clean the output buffer
	// and repeat conversion.
	memset (&ps, 0, sizeof(ps));
	SrcParam=Src; // wcsrtombs can change the pointer.
	memset(Dest,0,DestSize);
	ResultingSize=wcsrtombs(Dest,&SrcParam,DestSize,&ps);
	}

	if (ResultingSize==(size_t)-1)
	RetCode=false;
	if (ResultingSize==0 && *Src!=0)
	RetCode=false;
	}
	#else
	for (int I=0;I<DestSize;I++)
	{
	Dest[I]=(char)Src[I];
	if (Src[I]==0)
	break;
	}
	#endif
	if (DestSize>0)
	Dest[DestSize-1]=0;

	// We tried to return the empty string if conversion is failed,
	// but it does not work well. WideCharToMultiByte returns 'failed' code
	// and partially converted string even if we wanted to convert only a part
	// of string and passed DestSize smaller than required for fully converted
	// string. Such call is the valid behavior in RAR code and we do not expect
	// the empty string in this case.

	return RetCode;
	}


	bool CharToWide(const char Src,wchar Dest,size_t DestSize)
	{
	bool RetCode=true;
	*Dest=0; // Set 'Dest' to zero just in case the conversion will fail.

	#ifdef _WIN_ALL
	if (MultiByteToWideChar(CP_ACP,0,Src,-1,Dest,(int)DestSize)==0)
	RetCode=false;

	// mbstowcs is broken in Android NDK r9.
	#elif defined(_APPLE)
	UtfToWide(Src,Dest,DestSize);

	#elif defined(_UNIX) && defined(MBFUNCTIONS)
	mbstate_t ps;
	memset (&ps, 0, sizeof(ps));
	const char *SrcParam=Src; // mbsrtowcs can change the pointer.
	size_t ResultingSize=mbsrtowcs(Dest,&SrcParam,DestSize,&ps);
	if (ResultingSize==(size_t)-1)
	RetCode=false;
	if (ResultingSize==0 && *Src!=0)
	RetCode=false;

	if (RetCode==false && DestSize>1)
	CharToWideMap(Src,Dest,DestSize,RetCode);
	#else
	for (int I=0;I<DestSize;I++)
	{
	Dest[I]=(wchar_t)Src[I];
	if (Src[I]==0)
	break;
	}
	#endif
	if (DestSize>0)
	Dest[DestSize-1]=0;

	// We tried to return the empty string if conversion is failed,
	// but it does not work well. MultiByteToWideChar returns 'failed' code
	// even if we wanted to convert only a part of string and passed DestSize
	// smaller than required for fully converted string. Such call is the valid
	// behavior in RAR code and we do not expect the empty string in this case.

	return RetCode;
	}


	#if !defined(_WIN_ALL) && !defined(_APPLE) && defined(_UNIX) && defined(MBFUNCTIONS)
	// Convert and restore mapped inconvertible Unicode characters.
	// We use it for extended ASCII names in Unix.
	bool WideToCharMap(const wchar Src,char Dest,size_t DestSize,bool &Success)
	{
	// String with inconvertible characters mapped to private use Unicode area
	// must have the mark code somewhere.
	if (wcschr(Src,(wchar)MappedStringMark)==NULL)
	return false;

	Success=true;
	uint SrcPos=0,DestPos=0;
	while (Src[SrcPos]!=0 && DestPos<DestSize-MB_CUR_MAX)
	{
	if (uint(Src[SrcPos])==MappedStringMark)
	{
	SrcPos++;
	continue;
	}
	// For security reasons do not restore low ASCII codes, so mapping cannot
	// be used to hide control codes like path separators.
	if (uint(Src[SrcPos])>=MapAreaStart+0x80 && uint(Src[SrcPos])<MapAreaStart+0x100)
	Dest[DestPos++]=char(uint(Src[SrcPos++])-MapAreaStart);
	else
	{
	mbstate_t ps;
	memset(&ps,0,sizeof(ps));
	if (wcrtomb(Dest+DestPos,Src[SrcPos],&ps)==(size_t)-1)
	{
	Dest[DestPos]='_';
	Success=false;
	}
	SrcPos++;
	memset(&ps,0,sizeof(ps));
	int Length=mbrlen(Dest+DestPos,MB_CUR_MAX,&ps);
	DestPos+=Max(Length,1);
	}
	}
	Dest[Min(DestPos,DestSize-1)]=0;
	return true;
	}
	#endif


	#if !defined(_WIN_ALL) && !defined(_APPLE) && defined(_UNIX) && defined(MBFUNCTIONS)
	// Convert and map inconvertible Unicode characters.
	// We use it for extended ASCII names in Unix.
	void CharToWideMap(const char Src,wchar Dest,size_t DestSize,bool &Success)
	{
	// Map inconvertible characters to private use Unicode area 0xE000.
	// Mark such string by placing special non-character code before
	// first inconvertible character.
	Success=false;
	bool MarkAdded=false;
	uint SrcPos=0,DestPos=0;
	while (DestPos<DestSize)
	{
	if (Src[SrcPos]==0)
	{
	Success=true;
	break;
	}
	mbstate_t ps;
	memset(&ps,0,sizeof(ps));
	size_t res=mbrtowc(Dest+DestPos,Src+SrcPos,MB_CUR_MAX,&ps);
	if (res==(size_t)-1 \|\| res==(size_t)-2)
	{
	// For security reasons we do not want to map low ASCII characters,
	// so we do not have additional .. and path separator codes.
	if (byte(Src[SrcPos])>=0x80)
	{
	if (!MarkAdded)
	{
	Dest[DestPos++]=MappedStringMark;
	MarkAdded=true;
	if (DestPos>=DestSize)
	break;
	}
	Dest[DestPos++]=byte(Src[SrcPos++])+MapAreaStart;
	}
	else
	break;
	}
	else
	{
	memset(&ps,0,sizeof(ps));
	int Length=mbrlen(Src+SrcPos,MB_CUR_MAX,&ps);
	SrcPos+=Max(Length,1);
	DestPos++;
	}
	}
	Dest[Min(DestPos,DestSize-1)]=0;
	}
	#endif


	// SrcSize is in wide characters, not in bytes.
	byte* WideToRaw(const wchar Src,byte Dest,size_t SrcSize)
	{
	for (size_t I=0;I<SrcSize;I++,Src++)
	{
	Dest[I2]=(byte)Src;
	Dest[I2+1]=(byte)(Src>>8);
	if (*Src==0)
	break;
	}
	return Dest;
	}


	wchar* RawToWide(const byte Src,wchar Dest,size_t DestSize)
	{
	for (size_t I=0;I<DestSize;I++)
	if ((Dest[I]=Src[I2]+(Src[I2+1]<<8))==0)
	break;
	return Dest;
	}


	void WideToUtf(const wchar Src,char Dest,size_t DestSize)
	{
	long dsize=(long)DestSize;
	dsize--;
	while (*Src!=0 && --dsize>=0)
	{
	uint c=*(Src++);
	if (c<0x80)
	*(Dest++)=c;
	else
	if (c<0x800 && --dsize>=0)
	{
	*(Dest++)=(0xc0\|(c>>6));
	*(Dest++)=(0x80\|(c&0x3f));
	}
	else
	{
	if (c>=0xd800 && c<=0xdbff && Src>=0xdc00 && Src<=0xdfff) // Surrogate pair.
	{
	c=((c-0xd800)<<10)+(*Src-0xdc00)+0x10000;
	Src++;
	}
	if (c<0x10000 && (dsize-=2)>=0)
	{
	*(Dest++)=(0xe0\|(c>>12));
	*(Dest++)=(0x80\|((c>>6)&0x3f));
	*(Dest++)=(0x80\|(c&0x3f));
	}
	else
	if (c < 0x200000 && (dsize-=3)>=0)
	{
	*(Dest++)=(0xf0\|(c>>18));
	*(Dest++)=(0x80\|((c>>12)&0x3f));
	*(Dest++)=(0x80\|((c>>6)&0x3f));
	*(Dest++)=(0x80\|(c&0x3f));
	}
	}
	}
	*Dest=0;
	}


	size_t WideToUtfSize(const wchar *Src)
	{
	size_t Size=0;
	for (;*Src!=0;Src++)
	if (*Src<0x80)
	Size++;
	else
	if (*Src<0x800)
	Size+=2;
	else
	if ((uint)*Src<0x10000)
	{
	if (Src[0]>=0xd800 && Src[0]<=0xdbff && Src[1]>=0xdc00 && Src[1]<=0xdfff)
	{
	Size+=4; // 4 output bytes for Unicode surrogate pair.
	Src++;
	}
	else
	Size+=3;
	}
	else
	if ((uint)*Src<0x200000)
	Size+=4;
	return Size+1; // Include terminating zero.
	}


	bool UtfToWide(const char Src,wchar Dest,size_t DestSize)
	{
	bool Success=true;
	long dsize=(long)DestSize;
	dsize--;
	while (*Src!=0)
	{
	uint c=byte(*(Src++)),d;
	if (c<0x80)
	d=c;
	else
	if ((c>>5)==6)
	{
	if ((*Src&0xc0)!=0x80)
	{
	Success=false;
	break;
	}
	d=((c&0x1f)<<6)\|(*Src&0x3f);
	Src++;
	}
	else
	if ((c>>4)==14)
	{
	if ((Src[0]&0xc0)!=0x80 \|\| (Src[1]&0xc0)!=0x80)
	{
	Success=false;
	break;
	}
	d=((c&0xf)<<12)\|((Src[0]&0x3f)<<6)\|(Src[1]&0x3f);
	Src+=2;
	}
	else
	if ((c>>3)==30)
	{
	if ((Src[0]&0xc0)!=0x80 \|\| (Src[1]&0xc0)!=0x80 \|\| (Src[2]&0xc0)!=0x80)
	{
	Success=false;
	break;
	}
	d=((c&7)<<18)\|((Src[0]&0x3f)<<12)\|((Src[1]&0x3f)<<6)\|(Src[2]&0x3f);
	Src+=3;
	}
	else
	{
	Success=false;
	break;
	}
	if (--dsize<0)
	break;
	if (d>0xffff)
	{
	if (--dsize<0)
	break;
	if (d>0x10ffff) // UTF-8 must end at 0x10ffff according to RFC 3629.
	{
	Success=false;
	continue;
	}
	if (sizeof(*Dest)==2) // Use the surrogate pair.
	{
	*(Dest++)=((d-0x10000)>>10)+0xd800;
	*(Dest++)=(d&0x3ff)+0xdc00;
	}
	else
	*(Dest++)=d;
	}
	else
	*(Dest++)=d;
	}
	*Dest=0;
	return Success;
	}


	// For zero terminated strings.
	bool IsTextUtf8(const byte *Src)
	{
	return IsTextUtf8(Src,strlen((const char *)Src));
	}


	// Source data can be both with and without UTF-8 BOM.
	bool IsTextUtf8(const byte *Src,size_t SrcSize)
	{
	while (SrcSize-- > 0)
	{
	byte C=*(Src++);
	int HighOne=0; // Number of leftmost '1' bits.
	for (byte Mask=0x80;Mask!=0 && (C & Mask)!=0;Mask>>=1)
	HighOne++;
	if (HighOne==1 \|\| HighOne>6)
	return false;
	while (--HighOne > 0)
	if (SrcSize-- <= 0 \|\| (*(Src++) & 0xc0)!=0x80)
	return false;
	}
	return true;
	}


	int wcsicomp(const wchar s1,const wchar s2)
	{
	#ifdef _WIN_ALL
	return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE\|SORT_STRINGSORT,s1,-1,s2,-1)-2;
	#else
	while (true)
	{
	wchar u1 = towupper(*s1);
	wchar u2 = towupper(*s2);
	if (u1 != u2)
	return u1 < u2 ? -1 : 1;
	if (*s1==0)
	break;
	s1++;
	s2++;
	}
	return 0;
	#endif
	}


	int wcsnicomp(const wchar s1,const wchar s2,size_t n)
	{
	#ifdef _WIN_ALL
	// If we specify 'n' exceeding the actual string length, CompareString goes
	// beyond the trailing zero and compares garbage. So we need to limit 'n'
	// to real string length.
	size_t l1=Min(wcslen(s1)+1,n);
	size_t l2=Min(wcslen(s2)+1,n);
	return CompareStringW(LOCALE_USER_DEFAULT,NORM_IGNORECASE\|SORT_STRINGSORT,s1,(int)l1,s2,(int)l2)-2;
	#else
	if (n==0)
	return 0;
	while (true)
	{
	wchar u1 = towupper(*s1);
	wchar u2 = towupper(*s2);
	if (u1 != u2)
	return u1 < u2 ? -1 : 1;
	if (*s1==0 \|\| --n==0)
	break;
	s1++;
	s2++;
	}
	return 0;
	#endif
	}


	const wchar_t* wcscasestr(const wchar_t str, const wchar_t search)
	{
	for (size_t i=0;str[i]!=0;i++)
	for (size_t j=0;;j++)
	{
	if (search[j]==0)
	return str+i;
	if (tolowerw(str[i+j])!=tolowerw(search[j]))
	break;
	}
	return NULL;
	}


	#ifndef SFX_MODULE
	wchar* wcslower(wchar *s)
	{
	#ifdef _WIN_ALL
	CharLower(s);
	#else
	for (wchar c=s;c!=0;c++)
	c=towlower(c);
	#endif
	return s;
	}
	#endif


	#ifndef SFX_MODULE
	wchar* wcsupper(wchar *s)
	{
	#ifdef _WIN_ALL
	CharUpper(s);
	#else
	for (wchar c=s;c!=0;c++)
	c=towupper(c);
	#endif
	return s;
	}
	#endif




	int toupperw(int ch)
	{
	#if defined(_WIN_ALL)
	// CharUpper is more reliable than towupper in Windows, which seems to be
	// C locale dependent even in Unicode version. For example, towupper failed
	// to convert lowercase Russian characters.
	return (int)(INT_PTR)CharUpper((wchar *)(INT_PTR)ch);
	#else
	return towupper(ch);
	#endif
	}


	int tolowerw(int ch)
	{
	#if defined(_WIN_ALL)
	// CharLower is more reliable than towlower in Windows.
	// See comment for towupper above.
	return (int)(INT_PTR)CharLower((wchar *)(INT_PTR)ch);
	#else
	return towlower(ch);
	#endif
	}


	int atoiw(const wchar *s)
	{
	return (int)atoilw(s);
	}


	int64 atoilw(const wchar *s)
	{
	bool sign=false;
	if (*s=='-')
	{
	s++;
	sign=true;
	}
	// Use unsigned type here, since long string can overflow the variable
	// and signed integer overflow is undefined behavior in C++.
	uint64 n=0;
	while (s>='0' && s<='9')
	{
	n=n10+(s-'0');
	s++;
	}
	// Check int64(n)>=0 to avoid the signed overflow with undefined behavior
	// when negating 0x8000000000000000.
	return sign && int64(n)>=0 ? -int64(n) : int64(n);
	}


	#ifdef DBCS_SUPPORTED
	SupportDBCS gdbcs;

	SupportDBCS::SupportDBCS()
	{
	Init();
	}


	void SupportDBCS::Init()
	{
	CPINFO CPInfo;
	GetCPInfo(CP_ACP,&CPInfo);
	DBCSMode=CPInfo.MaxCharSize > 1;
	for (uint I=0;I<ASIZE(IsLeadByte);I++)
	IsLeadByte[I]=IsDBCSLeadByte(I)!=0;
	}


	char* SupportDBCS::charnext(const char *s)
	{
	// Zero cannot be the trail byte. So if next byte after the lead byte
	// is 0, the string is corrupt and we'll better return the pointer to 0,
	// to break string processing loops.
	return (char )(IsLeadByte[(byte)s] && s[1]!=0 ? s+2:s+1);
	}


	size_t SupportDBCS::strlend(const char *s)
	{
	size_t Length=0;
	while (*s!=0)
	{
	if (IsLeadByte[(byte)*s])
	s+=2;
	else
	s++;
	Length++;
	}
	return(Length);
	}


	char* SupportDBCS::strchrd(const char *s, int c)
	{
	while (*s!=0)
	if (IsLeadByte[(byte)*s])
	s+=2;
	else
	if (*s==c)
	return((char *)s);
	else
	s++;
	return(NULL);
	}


	void SupportDBCS::copychrd(char dest,const char src)
	{
	dest[0]=src[0];
	if (IsLeadByte[(byte)src[0]])
	dest[1]=src[1];
	}


	char* SupportDBCS::strrchrd(const char *s, int c)
	{
	const char *found=NULL;
	while (*s!=0)
	if (IsLeadByte[(byte)*s])
	s+=2;
	else
	{
	if (*s==c)
	found=s;
	s++;
	}
	return((char *)found);
	}
	#endif