mirror of
https://github.com/reactos/reactos.git
synced 2025-02-22 08:25:03 +00:00
[KERNEL32] Improve MultiByteToWideChar (#1477)
Reduce MultiByteToWideChar failures. CORE-13349
This commit is contained in:
parent
3ce3b4c283
commit
c7bebe40cb
1 changed files with 102 additions and 11 deletions
|
@ -7,6 +7,7 @@
|
|||
* Hartmut Birr
|
||||
* Gunnar Andre Dalsnes
|
||||
* Thomas Weidenmueller
|
||||
* Katayama Hirofumi MZ
|
||||
* UPDATE HISTORY:
|
||||
* Created 24/08/2004
|
||||
*/
|
||||
|
@ -36,6 +37,10 @@ static const char UTF8Length[128] =
|
|||
/* First byte mask depending on UTF-8 sequence length. */
|
||||
static const unsigned char UTF8Mask[6] = {0x7f, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
||||
|
||||
/* UTF-8 length to lower bound */
|
||||
static const unsigned long UTF8LBound[] =
|
||||
{0, 0x80, 0x800, 0x10000, 0x200000, 0x2000000, 0xFFFFFFFF};
|
||||
|
||||
/* FIXME: Change to HASH table or linear array. */
|
||||
static LIST_ENTRY CodePageListHead;
|
||||
static CODEPAGE_ENTRY AnsiCodePage;
|
||||
|
@ -352,7 +357,6 @@ IntGetCodePageEntry(UINT CodePage)
|
|||
* Internal version of MultiByteToWideChar for UTF8.
|
||||
*
|
||||
* @see MultiByteToWideChar
|
||||
* @todo Add UTF8 validity checks.
|
||||
*/
|
||||
|
||||
static
|
||||
|
@ -364,10 +368,12 @@ IntMultiByteToWideCharUTF8(DWORD Flags,
|
|||
LPWSTR WideCharString,
|
||||
INT WideCharCount)
|
||||
{
|
||||
LPCSTR MbsEnd;
|
||||
UCHAR Char, Length;
|
||||
LPCSTR MbsEnd, MbsPtrSave;
|
||||
UCHAR Char, TrailLength;
|
||||
WCHAR WideChar;
|
||||
LONG Count;
|
||||
BOOL CharIsValid, StringIsValid = TRUE;
|
||||
const WCHAR InvalidChar = 0xFFFD;
|
||||
|
||||
if (Flags != 0 && Flags != MB_ERR_INVALID_CHARS)
|
||||
{
|
||||
|
@ -378,17 +384,61 @@ IntMultiByteToWideCharUTF8(DWORD Flags,
|
|||
/* Does caller query for output buffer size? */
|
||||
if (WideCharCount == 0)
|
||||
{
|
||||
/* validate and count the wide characters */
|
||||
MbsEnd = MultiByteString + MultiByteCount;
|
||||
for (; MultiByteString < MbsEnd; WideCharCount++)
|
||||
{
|
||||
Char = *MultiByteString++;
|
||||
if (Char < 0xC0)
|
||||
{
|
||||
TrailLength = 0;
|
||||
continue;
|
||||
MultiByteString += UTF8Length[Char - 0x80];
|
||||
}
|
||||
if (Char >= 0xF8 || (Char & 0xC0) == 0x80)
|
||||
{
|
||||
TrailLength = 0;
|
||||
StringIsValid = FALSE;
|
||||
continue;
|
||||
}
|
||||
|
||||
CharIsValid = TRUE;
|
||||
MbsPtrSave = MultiByteString;
|
||||
TrailLength = UTF8Length[Char - 0x80];
|
||||
WideChar = Char & UTF8Mask[TrailLength];
|
||||
|
||||
while (TrailLength && MultiByteString < MbsEnd)
|
||||
{
|
||||
if ((*MultiByteString & 0xC0) != 0x80)
|
||||
{
|
||||
CharIsValid = StringIsValid = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
WideChar = (WideChar << 6) | (*MultiByteString++ & 0x7f);
|
||||
TrailLength--;
|
||||
}
|
||||
|
||||
if (!CharIsValid || WideChar < UTF8LBound[UTF8Length[Char - 0x80]])
|
||||
{
|
||||
MultiByteString = MbsPtrSave;
|
||||
}
|
||||
}
|
||||
|
||||
if (TrailLength)
|
||||
{
|
||||
WideCharCount++;
|
||||
}
|
||||
|
||||
if (Flags == MB_ERR_INVALID_CHARS && (!StringIsValid || TrailLength))
|
||||
{
|
||||
SetLastError(ERROR_NO_UNICODE_TRANSLATION);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return WideCharCount;
|
||||
}
|
||||
|
||||
/* convert */
|
||||
MbsEnd = MultiByteString + MultiByteCount;
|
||||
for (Count = 0; Count < WideCharCount && MultiByteString < MbsEnd; Count++)
|
||||
{
|
||||
|
@ -396,20 +446,61 @@ IntMultiByteToWideCharUTF8(DWORD Flags,
|
|||
if (Char < 0x80)
|
||||
{
|
||||
*WideCharString++ = Char;
|
||||
TrailLength = 0;
|
||||
continue;
|
||||
}
|
||||
Length = UTF8Length[Char - 0x80];
|
||||
WideChar = Char & UTF8Mask[Length];
|
||||
while (Length && MultiByteString < MbsEnd)
|
||||
if (Char >= 0xF8 || Char == 0x80 || (Char & 0xC0) == 0x80)
|
||||
{
|
||||
WideChar = (WideChar << 6) | (*MultiByteString++ & 0x7f);
|
||||
Length--;
|
||||
*WideCharString++ = InvalidChar;
|
||||
TrailLength = 0;
|
||||
continue;
|
||||
}
|
||||
*WideCharString++ = WideChar;
|
||||
|
||||
CharIsValid = TRUE;
|
||||
MbsPtrSave = MultiByteString;
|
||||
TrailLength = UTF8Length[Char - 0x80];
|
||||
WideChar = Char & UTF8Mask[TrailLength];
|
||||
|
||||
while (TrailLength && MultiByteString < MbsEnd)
|
||||
{
|
||||
if ((*MultiByteString & 0xC0) != 0x80)
|
||||
{
|
||||
CharIsValid = StringIsValid = FALSE;
|
||||
break;
|
||||
}
|
||||
|
||||
WideChar = (WideChar << 6) | (*MultiByteString++ & 0x7f);
|
||||
TrailLength--;
|
||||
}
|
||||
|
||||
if (CharIsValid && UTF8LBound[UTF8Length[Char - 0x80]] <= WideChar)
|
||||
{
|
||||
*WideCharString++ = WideChar;
|
||||
}
|
||||
else
|
||||
{
|
||||
*WideCharString++ = InvalidChar;
|
||||
MultiByteString = MbsPtrSave;
|
||||
}
|
||||
}
|
||||
|
||||
if (TrailLength && Count < WideCharCount && MultiByteString < MbsEnd)
|
||||
{
|
||||
*WideCharString = InvalidChar;
|
||||
WideCharCount++;
|
||||
}
|
||||
|
||||
if (MultiByteString < MbsEnd)
|
||||
{
|
||||
SetLastError(ERROR_INSUFFICIENT_BUFFER);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (Flags == MB_ERR_INVALID_CHARS && (!StringIsValid || TrailLength))
|
||||
{
|
||||
SetLastError(ERROR_NO_UNICODE_TRANSLATION);
|
||||
return 0;
|
||||
}
|
||||
|
||||
return Count;
|
||||
}
|
||||
|
@ -549,7 +640,7 @@ IntMultiByteToWideCharCP(UINT CodePage,
|
|||
|
||||
if (MultiByteString == MbsEnd)
|
||||
{
|
||||
*WideCharString++ = UNICODE_NULL;
|
||||
*WideCharString++ = MultiByteTable[Char];
|
||||
}
|
||||
else if (*MultiByteString == 0)
|
||||
{
|
||||
|
|
Loading…
Reference in a new issue