From f25ac715b0f56b3c6295a944255796fe46538e35 Mon Sep 17 00:00:00 2001 From: Colin Finck Date: Fri, 11 Jul 2008 19:11:06 +0000 Subject: [PATCH] - Port Wine's WideCharToMultiByte implementation for conversion to a codepage to ReactOS. (with comments :-)) It adds support for DefaultChar, UsedDefaultChar and the flag WC_NO_BEST_FIT_CHARS. WC_COMPOSITECHECK is also supported by the Wine implementation, but I don't have an idea how to port it to ReactOS, as we don't seem to have composition tables. I left FIXME's for this flag in the appropriate blocks, this is why some of the code might look badly structured/unoptimized at the moment. As we completely rely on the NLS tables for the conversion now, this commit might trigger some bugs there. I already found out that the CP950 table doesn't map Unicode 0 back to MultiByte 0 (but 254), using Windows' c_950.nls it works correctly. Other tables could be buggy as well, c_1252.nls worked flawlessy for me though. - Added comments to the CPTABLEINFO structure based on documentation from http://www.ping.uio.no/~ovehk/nls/ svn path=/trunk/; revision=34426 --- reactos/dll/win32/kernel32/misc/nls.c | 318 ++++++++++++++++++++------ reactos/include/ddk/ntnls.h | 16 +- 2 files changed, 256 insertions(+), 78 deletions(-) diff --git a/reactos/dll/win32/kernel32/misc/nls.c b/reactos/dll/win32/kernel32/misc/nls.c index f55cacda970..d348483a42d 100644 --- a/reactos/dll/win32/kernel32/misc/nls.c +++ b/reactos/dll/win32/kernel32/misc/nls.c @@ -687,15 +687,66 @@ IntWideCharToMultiByteUTF8(UINT CodePage, DWORD Flags, return MultiByteCount - TempLength; } +/** + * @name IsValidSBCSMapping + * + * Checks if ch (single-byte character) is a valid mapping for wch + * + * @see IntWideCharToMultiByteCP + */ +static inline BOOL +IntIsValidSBCSMapping(PCPTABLEINFO CodePageTable, DWORD Flags, WCHAR wch, UCHAR ch) +{ + /* If the WC_NO_BEST_FIT_CHARS flag has been specified, the characters need to match exactly. */ + if(Flags & WC_NO_BEST_FIT_CHARS) + return (CodePageTable->MultiByteTable[ch] != wch); + + /* By default, all characters except TransDefaultChar apply as a valid mapping for ch (so also "nearest" characters) */ + if(ch != CodePageTable->TransDefaultChar) + return TRUE; + + /* The only possible left valid mapping is the default character itself */ + return (wch == CodePageTable->TransUniDefaultChar); +} + +/** + * @name IsValidDBCSMapping + * + * Checks if ch (double-byte character) is a valid mapping for wch + * + * @see IntWideCharToMultiByteCP + */ +static inline BOOL +IntIsValidDBCSMapping(PCPTABLEINFO CodePageTable, DWORD Flags, WCHAR wch, USHORT ch) +{ + /* If ch is the default character, but the wch is not, it can't be a valid mapping */ + if(ch == CodePageTable->TransDefaultChar && wch != CodePageTable->TransUniDefaultChar) + return FALSE; + + /* If the WC_NO_BEST_FIT_CHARS flag has been specified, the characters need to match exactly. */ + if(Flags & WC_NO_BEST_FIT_CHARS) + { + if(ch & 0xff00) + { + UCHAR uOffset = CodePageTable->DBCSOffsets[ch >> 8]; + return (CodePageTable->MultiByteTable[(uOffset << 8) + (ch & 0xff)] == wch); + } + + return (CodePageTable->MultiByteTable[ch] == wch); + } + + /* If we're still here, we have a valid mapping */ + return TRUE; +} + /** * @name IntWideCharToMultiByteCP * * Internal version of WideCharToMultiByte for code page tables. * * @see WideCharToMultiByte - * @todo Handle default characters and flags. + * @todo Handle WC_COMPOSITECHECK */ - static INT STDCALL IntWideCharToMultiByteCP(UINT CodePage, DWORD Flags, LPCWSTR WideCharString, INT WideCharCount, @@ -715,108 +766,233 @@ IntWideCharToMultiByteCP(UINT CodePage, DWORD Flags, } CodePageTable = &CodePageEntry->CodePageTable; + /* Different handling for DBCS code pages. */ if (CodePageTable->MaximumCharacterSize > 1) { - /* FIXME */ + /* If Flags, DefaultChar or UsedDefaultChar were given, we have to do some more work */ + if(Flags || DefaultChar || UsedDefaultChar) + { + BOOL TempUsedDefaultChar; + USHORT DefChar; - USHORT WideChar; - USHORT MbChar; + /* If UsedDefaultChar is not set, set it to a temporary value, so we don't have to check on every character */ + if(!UsedDefaultChar) + UsedDefaultChar = &TempUsedDefaultChar; + + *UsedDefaultChar = FALSE; + + /* Use the CodePage's TransDefaultChar if none was given. Don't modify the DefaultChar pointer here. */ + if(DefaultChar) + DefChar = DefaultChar[1] ? ((DefaultChar[0] << 8) | DefaultChar[1]) : DefaultChar[0]; + else + DefChar = CodePageTable->TransDefaultChar; + + /* Does caller query for output buffer size? */ + if(!MultiByteCount) + { + for(TempLength = 0; WideCharCount; WideCharCount--, WideCharString++, TempLength++) + { + USHORT uChar; + + if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1) + { + /* FIXME: Handle WC_COMPOSITECHECK */ + } + + uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString]; + + /* Verify if the mapping is valid for handling DefaultChar and UsedDefaultChar */ + if(!IntIsValidDBCSMapping(CodePageTable, Flags, *WideCharString, uChar)) + { + uChar = DefChar; + *UsedDefaultChar = TRUE; + } + + /* Increment TempLength again if this is a double-byte character */ + if(uChar & 0xff00) + TempLength++; + } + + return TempLength; + } + + /* Convert the WideCharString to the MultiByteString and verify if the mapping is valid */ + for(TempLength = MultiByteCount; WideCharCount && TempLength; TempLength--, WideCharString++, WideCharCount--) + { + USHORT uChar; + + if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1) + { + /* FIXME: Handle WC_COMPOSITECHECK */ + } + + uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString]; + + /* Verify if the mapping is valid for handling DefaultChar and UsedDefaultChar */ + if(!IntIsValidDBCSMapping(CodePageTable, Flags, *WideCharString, uChar)) + { + uChar = DefChar; + *UsedDefaultChar = TRUE; + } + + /* Handle double-byte characters */ + if(uChar & 0xff00) + { + /* Don't output a partial character */ + if(TempLength == 1) + break; + + TempLength--; + *MultiByteString++ = uChar >> 8; + } + + *MultiByteString++ = (char)uChar; + } + + /* WideCharCount should be 0 if all characters were converted */ + if(WideCharCount) + { + SetLastError(ERROR_INSUFFICIENT_BUFFER); + return 0; + } + + return MultiByteCount - TempLength; + } /* Does caller query for output buffer size? */ - if (MultiByteCount == 0) + if(!MultiByteCount) { - for (TempLength = 0; WideCharCount; WideCharCount--, TempLength++) + for(TempLength = 0; WideCharCount; WideCharCount--, WideCharString++, TempLength++) { - WideChar = *WideCharString++; - - if (WideChar < 0x80) - continue; - - MbChar = ((PWCHAR)CodePageTable->WideCharTable)[WideChar]; - - if (!(MbChar & 0xff00)) - continue; - - TempLength++; + /* Increment TempLength again if this is a double-byte character */ + if (((PWCHAR)CodePageTable->WideCharTable)[*WideCharString] & 0xff00) + TempLength++; } return TempLength; } - for (TempLength = MultiByteCount; WideCharCount; WideCharCount--) + /* Convert the WideCharString to the MultiByteString */ + for(TempLength = MultiByteCount; WideCharCount && TempLength; TempLength--, WideCharString++, WideCharCount--) { - WideChar = *WideCharString++; + USHORT uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString]; - if (WideChar < 0x80) + /* Is this a double-byte character? */ + if(uChar & 0xff00) { - if (!TempLength) - { - SetLastError(ERROR_INSUFFICIENT_BUFFER); + /* Don't output a partial character */ + if(TempLength == 1) break; - } + TempLength--; - - *MultiByteString++ = (CHAR)WideChar; - continue; + *MultiByteString++ = uChar >> 8; } - MbChar = ((PWCHAR)CodePageTable->WideCharTable)[WideChar]; + *MultiByteString++ = (char)uChar; + } - if (!(MbChar & 0xff00)) - { - if (!TempLength) - { - SetLastError(ERROR_INSUFFICIENT_BUFFER); - break; - } - TempLength--; - - *MultiByteString++ = (CHAR)MbChar; - continue;; - } - - if (TempLength >= 2) - { - MultiByteString[1] = (CHAR)MbChar; MbChar >>= 8; - MultiByteString[0] = (CHAR)MbChar; - MultiByteString += 2; - TempLength -= 2; - } - else - { - SetLastError(ERROR_INSUFFICIENT_BUFFER); - break; - } + /* WideCharCount should be 0 if all characters were converted */ + if(WideCharCount) + { + SetLastError(ERROR_INSUFFICIENT_BUFFER); + return 0; } return MultiByteCount - TempLength; } else /* Not DBCS code page */ { - /* Does caller query for output buffer size? */ - if (MultiByteCount == 0) - return WideCharCount; + INT nReturn; - /* Adjust buffer size. Wine trick ;-) */ - if (MultiByteCount < WideCharCount) + /* If Flags, DefaultChar or UsedDefaultChar were given, we have to do some more work */ + if(Flags || DefaultChar || UsedDefaultChar) { - WideCharCount = MultiByteCount; - SetLastError(ERROR_INSUFFICIENT_BUFFER); - } + BOOL TempUsedDefaultChar; + CHAR DefChar; - for (TempLength = WideCharCount; - TempLength > 0; - WideCharString++, TempLength--) - { - *MultiByteString++ = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString]; - } + /* If UsedDefaultChar is not set, set it to a temporary value, so we don't have to check on every character */ + if(!UsedDefaultChar) + UsedDefaultChar = &TempUsedDefaultChar; - /* FIXME */ - if (UsedDefaultChar != NULL) *UsedDefaultChar = FALSE; - return WideCharCount; + /* Does caller query for output buffer size? */ + if(!MultiByteCount) + { + /* Loop through the whole WideCharString and check if we can get a valid mapping for each character */ + for(TempLength = 0; WideCharCount; TempLength++, WideCharString++, WideCharCount--) + { + if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1) + { + /* FIXME: Handle WC_COMPOSITECHECK */ + } + + if(!*UsedDefaultChar) + *UsedDefaultChar = !IntIsValidSBCSMapping(CodePageTable, Flags, *WideCharString, ((PCHAR)CodePageTable->WideCharTable)[*WideCharString]); + } + + return TempLength; + } + + /* Use the CodePage's TransDefaultChar if none was given. Don't modify the DefaultChar pointer here. */ + if(DefaultChar) + DefChar = *DefaultChar; + else + DefChar = CodePageTable->TransDefaultChar; + + /* Convert the WideCharString to the MultiByteString and verify if the mapping is valid */ + for(TempLength = MultiByteCount; WideCharCount && TempLength; MultiByteString++, TempLength--, WideCharString++, WideCharCount--) + { + if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1) + { + /* FIXME: Handle WC_COMPOSITECHECK */ + } + + *MultiByteString = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString]; + + if(!IntIsValidSBCSMapping(CodePageTable, Flags, *WideCharString, *MultiByteString)) + { + *MultiByteString = DefChar; + *UsedDefaultChar = TRUE; + } + } + + /* WideCharCount should be 0 if all characters were converted */ + if(WideCharCount) + { + SetLastError(ERROR_INSUFFICIENT_BUFFER); + return 0; + } + + return MultiByteCount - TempLength; + } + + /* Does caller query for output buffer size? */ + if(!MultiByteCount) + return WideCharCount; + + /* Is the buffer large enough? */ + if(MultiByteCount < WideCharCount) + { + /* Convert the string up to MultiByteCount and return 0 */ + WideCharCount = MultiByteCount; + SetLastError(ERROR_INSUFFICIENT_BUFFER); + nReturn = 0; + } + else + { + /* Otherwise WideCharCount will be the number of converted characters */ + nReturn = WideCharCount; + } + + /* Convert the WideCharString to the MultiByteString */ + for(TempLength = WideCharCount; --TempLength >= 0; WideCharString++, MultiByteString++) + { + *MultiByteString = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString]; + } + + return nReturn; } } diff --git a/reactos/include/ddk/ntnls.h b/reactos/include/ddk/ntnls.h index 3e52ddc4d87..aa75009a7c0 100644 --- a/reactos/include/ddk/ntnls.h +++ b/reactos/include/ddk/ntnls.h @@ -7,18 +7,20 @@ extern "C" { #endif #define MAXIMUM_LEADBYTES 12 + +/* Some documentation can be found here: http://www.ping.uio.no/~ovehk/nls/ */ typedef struct _CPTABLEINFO { USHORT CodePage; - USHORT MaximumCharacterSize; - USHORT DefaultChar; - USHORT UniDefaultChar; - USHORT TransDefaultChar; - USHORT TransUniDefaultChar; + USHORT MaximumCharacterSize; /* 1 = SBCS, 2 = DBCS */ + USHORT DefaultChar; /* Default MultiByte Character for the CP->Unicode conversion */ + USHORT UniDefaultChar; /* Default Unicode Character for the CP->Unicode conversion */ + USHORT TransDefaultChar; /* Default MultiByte Character for the Unicode->CP conversion */ + USHORT TransUniDefaultChar; /* Default Unicode Character for the Unicode->CP conversion */ USHORT DBCSCodePage; UCHAR LeadByte[MAXIMUM_LEADBYTES]; - PUSHORT MultiByteTable; - PVOID WideCharTable; + PUSHORT MultiByteTable; /* Table for CP->Unicode conversion */ + PVOID WideCharTable; /* Table for Unicode->CP conversion */ PUSHORT DBCSRanges; PUSHORT DBCSOffsets; } CPTABLEINFO, *PCPTABLEINFO;