- Port Wine's WideCharToMultiByte implementation for conversion to a codepage to ReactOS. (with comments :-))

It adds support for DefaultChar, UsedDefaultChar and the flag WC_NO_BEST_FIT_CHARS.
  WC_COMPOSITECHECK is also supported by the Wine implementation, but I don't have an idea how to port it to ReactOS, as we don't seem to have composition tables. I left FIXME's for this flag in the appropriate blocks, this is why some of the code might look badly structured/unoptimized at the moment.
  As we completely rely on the NLS tables for the conversion now, this commit might trigger some bugs there. I already found out that the CP950 table doesn't map Unicode 0 back to MultiByte 0 (but 254), using Windows' c_950.nls it works correctly. Other tables could be buggy as well, c_1252.nls worked flawlessy for me though.
- Added comments to the CPTABLEINFO structure based on documentation from http://www.ping.uio.no/~ovehk/nls/

svn path=/trunk/; revision=34426
This commit is contained in:
Colin Finck 2008-07-11 19:11:06 +00:00
parent dfb10c4404
commit f25ac715b0
2 changed files with 256 additions and 78 deletions

View file

@ -687,15 +687,66 @@ IntWideCharToMultiByteUTF8(UINT CodePage, DWORD Flags,
return MultiByteCount - TempLength;
}
/**
* @name IsValidSBCSMapping
*
* Checks if ch (single-byte character) is a valid mapping for wch
*
* @see IntWideCharToMultiByteCP
*/
static inline BOOL
IntIsValidSBCSMapping(PCPTABLEINFO CodePageTable, DWORD Flags, WCHAR wch, UCHAR ch)
{
/* If the WC_NO_BEST_FIT_CHARS flag has been specified, the characters need to match exactly. */
if(Flags & WC_NO_BEST_FIT_CHARS)
return (CodePageTable->MultiByteTable[ch] != wch);
/* By default, all characters except TransDefaultChar apply as a valid mapping for ch (so also "nearest" characters) */
if(ch != CodePageTable->TransDefaultChar)
return TRUE;
/* The only possible left valid mapping is the default character itself */
return (wch == CodePageTable->TransUniDefaultChar);
}
/**
* @name IsValidDBCSMapping
*
* Checks if ch (double-byte character) is a valid mapping for wch
*
* @see IntWideCharToMultiByteCP
*/
static inline BOOL
IntIsValidDBCSMapping(PCPTABLEINFO CodePageTable, DWORD Flags, WCHAR wch, USHORT ch)
{
/* If ch is the default character, but the wch is not, it can't be a valid mapping */
if(ch == CodePageTable->TransDefaultChar && wch != CodePageTable->TransUniDefaultChar)
return FALSE;
/* If the WC_NO_BEST_FIT_CHARS flag has been specified, the characters need to match exactly. */
if(Flags & WC_NO_BEST_FIT_CHARS)
{
if(ch & 0xff00)
{
UCHAR uOffset = CodePageTable->DBCSOffsets[ch >> 8];
return (CodePageTable->MultiByteTable[(uOffset << 8) + (ch & 0xff)] == wch);
}
return (CodePageTable->MultiByteTable[ch] == wch);
}
/* If we're still here, we have a valid mapping */
return TRUE;
}
/**
* @name IntWideCharToMultiByteCP
*
* Internal version of WideCharToMultiByte for code page tables.
*
* @see WideCharToMultiByte
* @todo Handle default characters and flags.
* @todo Handle WC_COMPOSITECHECK
*/
static INT STDCALL
IntWideCharToMultiByteCP(UINT CodePage, DWORD Flags,
LPCWSTR WideCharString, INT WideCharCount,
@ -715,108 +766,233 @@ IntWideCharToMultiByteCP(UINT CodePage, DWORD Flags,
}
CodePageTable = &CodePageEntry->CodePageTable;
/* Different handling for DBCS code pages. */
if (CodePageTable->MaximumCharacterSize > 1)
{
/* FIXME */
/* If Flags, DefaultChar or UsedDefaultChar were given, we have to do some more work */
if(Flags || DefaultChar || UsedDefaultChar)
{
BOOL TempUsedDefaultChar;
USHORT DefChar;
USHORT WideChar;
USHORT MbChar;
/* If UsedDefaultChar is not set, set it to a temporary value, so we don't have to check on every character */
if(!UsedDefaultChar)
UsedDefaultChar = &TempUsedDefaultChar;
*UsedDefaultChar = FALSE;
/* Use the CodePage's TransDefaultChar if none was given. Don't modify the DefaultChar pointer here. */
if(DefaultChar)
DefChar = DefaultChar[1] ? ((DefaultChar[0] << 8) | DefaultChar[1]) : DefaultChar[0];
else
DefChar = CodePageTable->TransDefaultChar;
/* Does caller query for output buffer size? */
if(!MultiByteCount)
{
for(TempLength = 0; WideCharCount; WideCharCount--, WideCharString++, TempLength++)
{
USHORT uChar;
if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
{
/* FIXME: Handle WC_COMPOSITECHECK */
}
uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString];
/* Verify if the mapping is valid for handling DefaultChar and UsedDefaultChar */
if(!IntIsValidDBCSMapping(CodePageTable, Flags, *WideCharString, uChar))
{
uChar = DefChar;
*UsedDefaultChar = TRUE;
}
/* Increment TempLength again if this is a double-byte character */
if(uChar & 0xff00)
TempLength++;
}
return TempLength;
}
/* Convert the WideCharString to the MultiByteString and verify if the mapping is valid */
for(TempLength = MultiByteCount; WideCharCount && TempLength; TempLength--, WideCharString++, WideCharCount--)
{
USHORT uChar;
if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
{
/* FIXME: Handle WC_COMPOSITECHECK */
}
uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString];
/* Verify if the mapping is valid for handling DefaultChar and UsedDefaultChar */
if(!IntIsValidDBCSMapping(CodePageTable, Flags, *WideCharString, uChar))
{
uChar = DefChar;
*UsedDefaultChar = TRUE;
}
/* Handle double-byte characters */
if(uChar & 0xff00)
{
/* Don't output a partial character */
if(TempLength == 1)
break;
TempLength--;
*MultiByteString++ = uChar >> 8;
}
*MultiByteString++ = (char)uChar;
}
/* WideCharCount should be 0 if all characters were converted */
if(WideCharCount)
{
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
return MultiByteCount - TempLength;
}
/* Does caller query for output buffer size? */
if (MultiByteCount == 0)
if(!MultiByteCount)
{
for (TempLength = 0; WideCharCount; WideCharCount--, TempLength++)
for(TempLength = 0; WideCharCount; WideCharCount--, WideCharString++, TempLength++)
{
WideChar = *WideCharString++;
if (WideChar < 0x80)
continue;
MbChar = ((PWCHAR)CodePageTable->WideCharTable)[WideChar];
if (!(MbChar & 0xff00))
continue;
TempLength++;
/* Increment TempLength again if this is a double-byte character */
if (((PWCHAR)CodePageTable->WideCharTable)[*WideCharString] & 0xff00)
TempLength++;
}
return TempLength;
}
for (TempLength = MultiByteCount; WideCharCount; WideCharCount--)
/* Convert the WideCharString to the MultiByteString */
for(TempLength = MultiByteCount; WideCharCount && TempLength; TempLength--, WideCharString++, WideCharCount--)
{
WideChar = *WideCharString++;
USHORT uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString];
if (WideChar < 0x80)
/* Is this a double-byte character? */
if(uChar & 0xff00)
{
if (!TempLength)
{
SetLastError(ERROR_INSUFFICIENT_BUFFER);
/* Don't output a partial character */
if(TempLength == 1)
break;
}
TempLength--;
*MultiByteString++ = (CHAR)WideChar;
continue;
*MultiByteString++ = uChar >> 8;
}
MbChar = ((PWCHAR)CodePageTable->WideCharTable)[WideChar];
*MultiByteString++ = (char)uChar;
}
if (!(MbChar & 0xff00))
{
if (!TempLength)
{
SetLastError(ERROR_INSUFFICIENT_BUFFER);
break;
}
TempLength--;
*MultiByteString++ = (CHAR)MbChar;
continue;;
}
if (TempLength >= 2)
{
MultiByteString[1] = (CHAR)MbChar; MbChar >>= 8;
MultiByteString[0] = (CHAR)MbChar;
MultiByteString += 2;
TempLength -= 2;
}
else
{
SetLastError(ERROR_INSUFFICIENT_BUFFER);
break;
}
/* WideCharCount should be 0 if all characters were converted */
if(WideCharCount)
{
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
return MultiByteCount - TempLength;
}
else /* Not DBCS code page */
{
/* Does caller query for output buffer size? */
if (MultiByteCount == 0)
return WideCharCount;
INT nReturn;
/* Adjust buffer size. Wine trick ;-) */
if (MultiByteCount < WideCharCount)
/* If Flags, DefaultChar or UsedDefaultChar were given, we have to do some more work */
if(Flags || DefaultChar || UsedDefaultChar)
{
WideCharCount = MultiByteCount;
SetLastError(ERROR_INSUFFICIENT_BUFFER);
}
BOOL TempUsedDefaultChar;
CHAR DefChar;
for (TempLength = WideCharCount;
TempLength > 0;
WideCharString++, TempLength--)
{
*MultiByteString++ = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString];
}
/* If UsedDefaultChar is not set, set it to a temporary value, so we don't have to check on every character */
if(!UsedDefaultChar)
UsedDefaultChar = &TempUsedDefaultChar;
/* FIXME */
if (UsedDefaultChar != NULL)
*UsedDefaultChar = FALSE;
return WideCharCount;
/* Does caller query for output buffer size? */
if(!MultiByteCount)
{
/* Loop through the whole WideCharString and check if we can get a valid mapping for each character */
for(TempLength = 0; WideCharCount; TempLength++, WideCharString++, WideCharCount--)
{
if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
{
/* FIXME: Handle WC_COMPOSITECHECK */
}
if(!*UsedDefaultChar)
*UsedDefaultChar = !IntIsValidSBCSMapping(CodePageTable, Flags, *WideCharString, ((PCHAR)CodePageTable->WideCharTable)[*WideCharString]);
}
return TempLength;
}
/* Use the CodePage's TransDefaultChar if none was given. Don't modify the DefaultChar pointer here. */
if(DefaultChar)
DefChar = *DefaultChar;
else
DefChar = CodePageTable->TransDefaultChar;
/* Convert the WideCharString to the MultiByteString and verify if the mapping is valid */
for(TempLength = MultiByteCount; WideCharCount && TempLength; MultiByteString++, TempLength--, WideCharString++, WideCharCount--)
{
if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
{
/* FIXME: Handle WC_COMPOSITECHECK */
}
*MultiByteString = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString];
if(!IntIsValidSBCSMapping(CodePageTable, Flags, *WideCharString, *MultiByteString))
{
*MultiByteString = DefChar;
*UsedDefaultChar = TRUE;
}
}
/* WideCharCount should be 0 if all characters were converted */
if(WideCharCount)
{
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
return MultiByteCount - TempLength;
}
/* Does caller query for output buffer size? */
if(!MultiByteCount)
return WideCharCount;
/* Is the buffer large enough? */
if(MultiByteCount < WideCharCount)
{
/* Convert the string up to MultiByteCount and return 0 */
WideCharCount = MultiByteCount;
SetLastError(ERROR_INSUFFICIENT_BUFFER);
nReturn = 0;
}
else
{
/* Otherwise WideCharCount will be the number of converted characters */
nReturn = WideCharCount;
}
/* Convert the WideCharString to the MultiByteString */
for(TempLength = WideCharCount; --TempLength >= 0; WideCharString++, MultiByteString++)
{
*MultiByteString = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString];
}
return nReturn;
}
}

View file

@ -7,18 +7,20 @@
extern "C" {
#endif
#define MAXIMUM_LEADBYTES 12
/* Some documentation can be found here: http://www.ping.uio.no/~ovehk/nls/ */
typedef struct _CPTABLEINFO
{
USHORT CodePage;
USHORT MaximumCharacterSize;
USHORT DefaultChar;
USHORT UniDefaultChar;
USHORT TransDefaultChar;
USHORT TransUniDefaultChar;
USHORT MaximumCharacterSize; /* 1 = SBCS, 2 = DBCS */
USHORT DefaultChar; /* Default MultiByte Character for the CP->Unicode conversion */
USHORT UniDefaultChar; /* Default Unicode Character for the CP->Unicode conversion */
USHORT TransDefaultChar; /* Default MultiByte Character for the Unicode->CP conversion */
USHORT TransUniDefaultChar; /* Default Unicode Character for the Unicode->CP conversion */
USHORT DBCSCodePage;
UCHAR LeadByte[MAXIMUM_LEADBYTES];
PUSHORT MultiByteTable;
PVOID WideCharTable;
PUSHORT MultiByteTable; /* Table for CP->Unicode conversion */
PVOID WideCharTable; /* Table for Unicode->CP conversion */
PUSHORT DBCSRanges;
PUSHORT DBCSOffsets;
} CPTABLEINFO, *PCPTABLEINFO;