From 4e21017693a645608b66f812c8d2b88d60250e5f Mon Sep 17 00:00:00 2001 From: Thomas Faber Date: Mon, 26 Sep 2016 10:12:58 +0000 Subject: [PATCH] [KERNEL32] - Handle UTF-16 surrogate pairs in IntWideCharToMultiByteUTF8. CORE-12042 #resolve svn path=/trunk/; revision=72810 --- .../dll/win32/kernel32/winnls/string/nls.c | 45 +++++++++++++++++-- 1 file changed, 42 insertions(+), 3 deletions(-) diff --git a/reactos/dll/win32/kernel32/winnls/string/nls.c b/reactos/dll/win32/kernel32/winnls/string/nls.c index 0693c7ed238..193cda1ba42 100644 --- a/reactos/dll/win32/kernel32/winnls/string/nls.c +++ b/reactos/dll/win32/kernel32/winnls/string/nls.c @@ -95,7 +95,7 @@ NlsInit(VOID) RtlInitCodePageTable((PUSHORT)AnsiCodePage.SectionMapping, &AnsiCodePage.CodePageTable); AnsiCodePage.CodePage = AnsiCodePage.CodePageTable.CodePage; - + InsertTailList(&CodePageListHead, &AnsiCodePage.Entry); /* Setup OEM code page. */ @@ -515,7 +515,7 @@ IntMultiByteToWideCharCP(UINT CodePage, TempString++; } } - + /* Does caller query for output buffer size? */ if (WideCharCount == 0) { @@ -753,7 +753,7 @@ IntWideCharToMultiByteUTF8(UINT CodePage, LPBOOL UsedDefaultChar) { INT TempLength; - WCHAR Char; + DWORD Char; /* Does caller query for output buffer size? */ if (MultiByteCount == 0) @@ -766,7 +766,17 @@ IntWideCharToMultiByteUTF8(UINT CodePage, { TempLength++; if (*WideCharString >= 0x800) + { TempLength++; + if (*WideCharString >= 0xd800 && *WideCharString < 0xdc00 && + WideCharCount >= 1 && + WideCharString[1] >= 0xdc00 && WideCharString[1] <= 0xe000) + { + WideCharCount--; + WideCharString++; + TempLength++; + } + } } } return TempLength; @@ -801,6 +811,35 @@ IntWideCharToMultiByteUTF8(UINT CodePage, continue; } + /* surrogate pair 0x10000-0x10ffff: 4 bytes */ + if (Char >= 0xd800 && Char < 0xdc00 && + WideCharCount >= 1 && + WideCharString[1] >= 0xdc00 && WideCharString[1] < 0xe000) + { + WideCharCount--; + WideCharString++; + + if (TempLength < 4) + { + SetLastError(ERROR_INSUFFICIENT_BUFFER); + break; + } + + Char = (Char - 0xd800) << 10; + Char |= *WideCharString - 0xdc00; + ASSERT(Char <= 0xfffff); + Char += 0x10000; + ASSERT(Char <= 0x10ffff); + + MultiByteString[3] = 0x80 | (Char & 0x3f); Char >>= 6; + MultiByteString[2] = 0x80 | (Char & 0x3f); Char >>= 6; + MultiByteString[1] = 0x80 | (Char & 0x3f); Char >>= 6; + MultiByteString[0] = 0xf0 | Char; + MultiByteString += 4; + TempLength -= 4; + continue; + } + /* 0x800-0xffff: 3 bytes */ if (TempLength < 3) {