mirror of
https://github.com/reactos/reactos.git
synced 2025-02-23 17:05:46 +00:00
- Port Wine's WideCharToMultiByte implementation for conversion to a codepage to ReactOS. (with comments :-))
It adds support for DefaultChar, UsedDefaultChar and the flag WC_NO_BEST_FIT_CHARS. WC_COMPOSITECHECK is also supported by the Wine implementation, but I don't have an idea how to port it to ReactOS, as we don't seem to have composition tables. I left FIXME's for this flag in the appropriate blocks, this is why some of the code might look badly structured/unoptimized at the moment. As we completely rely on the NLS tables for the conversion now, this commit might trigger some bugs there. I already found out that the CP950 table doesn't map Unicode 0 back to MultiByte 0 (but 254), using Windows' c_950.nls it works correctly. Other tables could be buggy as well, c_1252.nls worked flawlessy for me though. - Added comments to the CPTABLEINFO structure based on documentation from http://www.ping.uio.no/~ovehk/nls/ svn path=/trunk/; revision=34426
This commit is contained in:
parent
dfb10c4404
commit
f25ac715b0
2 changed files with 256 additions and 78 deletions
|
@ -687,15 +687,66 @@ IntWideCharToMultiByteUTF8(UINT CodePage, DWORD Flags,
|
||||||
return MultiByteCount - TempLength;
|
return MultiByteCount - TempLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @name IsValidSBCSMapping
|
||||||
|
*
|
||||||
|
* Checks if ch (single-byte character) is a valid mapping for wch
|
||||||
|
*
|
||||||
|
* @see IntWideCharToMultiByteCP
|
||||||
|
*/
|
||||||
|
static inline BOOL
|
||||||
|
IntIsValidSBCSMapping(PCPTABLEINFO CodePageTable, DWORD Flags, WCHAR wch, UCHAR ch)
|
||||||
|
{
|
||||||
|
/* If the WC_NO_BEST_FIT_CHARS flag has been specified, the characters need to match exactly. */
|
||||||
|
if(Flags & WC_NO_BEST_FIT_CHARS)
|
||||||
|
return (CodePageTable->MultiByteTable[ch] != wch);
|
||||||
|
|
||||||
|
/* By default, all characters except TransDefaultChar apply as a valid mapping for ch (so also "nearest" characters) */
|
||||||
|
if(ch != CodePageTable->TransDefaultChar)
|
||||||
|
return TRUE;
|
||||||
|
|
||||||
|
/* The only possible left valid mapping is the default character itself */
|
||||||
|
return (wch == CodePageTable->TransUniDefaultChar);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @name IsValidDBCSMapping
|
||||||
|
*
|
||||||
|
* Checks if ch (double-byte character) is a valid mapping for wch
|
||||||
|
*
|
||||||
|
* @see IntWideCharToMultiByteCP
|
||||||
|
*/
|
||||||
|
static inline BOOL
|
||||||
|
IntIsValidDBCSMapping(PCPTABLEINFO CodePageTable, DWORD Flags, WCHAR wch, USHORT ch)
|
||||||
|
{
|
||||||
|
/* If ch is the default character, but the wch is not, it can't be a valid mapping */
|
||||||
|
if(ch == CodePageTable->TransDefaultChar && wch != CodePageTable->TransUniDefaultChar)
|
||||||
|
return FALSE;
|
||||||
|
|
||||||
|
/* If the WC_NO_BEST_FIT_CHARS flag has been specified, the characters need to match exactly. */
|
||||||
|
if(Flags & WC_NO_BEST_FIT_CHARS)
|
||||||
|
{
|
||||||
|
if(ch & 0xff00)
|
||||||
|
{
|
||||||
|
UCHAR uOffset = CodePageTable->DBCSOffsets[ch >> 8];
|
||||||
|
return (CodePageTable->MultiByteTable[(uOffset << 8) + (ch & 0xff)] == wch);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (CodePageTable->MultiByteTable[ch] == wch);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we're still here, we have a valid mapping */
|
||||||
|
return TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @name IntWideCharToMultiByteCP
|
* @name IntWideCharToMultiByteCP
|
||||||
*
|
*
|
||||||
* Internal version of WideCharToMultiByte for code page tables.
|
* Internal version of WideCharToMultiByte for code page tables.
|
||||||
*
|
*
|
||||||
* @see WideCharToMultiByte
|
* @see WideCharToMultiByte
|
||||||
* @todo Handle default characters and flags.
|
* @todo Handle WC_COMPOSITECHECK
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static INT STDCALL
|
static INT STDCALL
|
||||||
IntWideCharToMultiByteCP(UINT CodePage, DWORD Flags,
|
IntWideCharToMultiByteCP(UINT CodePage, DWORD Flags,
|
||||||
LPCWSTR WideCharString, INT WideCharCount,
|
LPCWSTR WideCharString, INT WideCharCount,
|
||||||
|
@ -715,108 +766,233 @@ IntWideCharToMultiByteCP(UINT CodePage, DWORD Flags,
|
||||||
}
|
}
|
||||||
CodePageTable = &CodePageEntry->CodePageTable;
|
CodePageTable = &CodePageEntry->CodePageTable;
|
||||||
|
|
||||||
|
|
||||||
/* Different handling for DBCS code pages. */
|
/* Different handling for DBCS code pages. */
|
||||||
if (CodePageTable->MaximumCharacterSize > 1)
|
if (CodePageTable->MaximumCharacterSize > 1)
|
||||||
{
|
{
|
||||||
/* FIXME */
|
/* If Flags, DefaultChar or UsedDefaultChar were given, we have to do some more work */
|
||||||
|
if(Flags || DefaultChar || UsedDefaultChar)
|
||||||
|
{
|
||||||
|
BOOL TempUsedDefaultChar;
|
||||||
|
USHORT DefChar;
|
||||||
|
|
||||||
USHORT WideChar;
|
/* If UsedDefaultChar is not set, set it to a temporary value, so we don't have to check on every character */
|
||||||
USHORT MbChar;
|
if(!UsedDefaultChar)
|
||||||
|
UsedDefaultChar = &TempUsedDefaultChar;
|
||||||
|
|
||||||
|
*UsedDefaultChar = FALSE;
|
||||||
|
|
||||||
|
/* Use the CodePage's TransDefaultChar if none was given. Don't modify the DefaultChar pointer here. */
|
||||||
|
if(DefaultChar)
|
||||||
|
DefChar = DefaultChar[1] ? ((DefaultChar[0] << 8) | DefaultChar[1]) : DefaultChar[0];
|
||||||
|
else
|
||||||
|
DefChar = CodePageTable->TransDefaultChar;
|
||||||
|
|
||||||
|
/* Does caller query for output buffer size? */
|
||||||
|
if(!MultiByteCount)
|
||||||
|
{
|
||||||
|
for(TempLength = 0; WideCharCount; WideCharCount--, WideCharString++, TempLength++)
|
||||||
|
{
|
||||||
|
USHORT uChar;
|
||||||
|
|
||||||
|
if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
|
||||||
|
{
|
||||||
|
/* FIXME: Handle WC_COMPOSITECHECK */
|
||||||
|
}
|
||||||
|
|
||||||
|
uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString];
|
||||||
|
|
||||||
|
/* Verify if the mapping is valid for handling DefaultChar and UsedDefaultChar */
|
||||||
|
if(!IntIsValidDBCSMapping(CodePageTable, Flags, *WideCharString, uChar))
|
||||||
|
{
|
||||||
|
uChar = DefChar;
|
||||||
|
*UsedDefaultChar = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Increment TempLength again if this is a double-byte character */
|
||||||
|
if(uChar & 0xff00)
|
||||||
|
TempLength++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return TempLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert the WideCharString to the MultiByteString and verify if the mapping is valid */
|
||||||
|
for(TempLength = MultiByteCount; WideCharCount && TempLength; TempLength--, WideCharString++, WideCharCount--)
|
||||||
|
{
|
||||||
|
USHORT uChar;
|
||||||
|
|
||||||
|
if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
|
||||||
|
{
|
||||||
|
/* FIXME: Handle WC_COMPOSITECHECK */
|
||||||
|
}
|
||||||
|
|
||||||
|
uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString];
|
||||||
|
|
||||||
|
/* Verify if the mapping is valid for handling DefaultChar and UsedDefaultChar */
|
||||||
|
if(!IntIsValidDBCSMapping(CodePageTable, Flags, *WideCharString, uChar))
|
||||||
|
{
|
||||||
|
uChar = DefChar;
|
||||||
|
*UsedDefaultChar = TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Handle double-byte characters */
|
||||||
|
if(uChar & 0xff00)
|
||||||
|
{
|
||||||
|
/* Don't output a partial character */
|
||||||
|
if(TempLength == 1)
|
||||||
|
break;
|
||||||
|
|
||||||
|
TempLength--;
|
||||||
|
*MultiByteString++ = uChar >> 8;
|
||||||
|
}
|
||||||
|
|
||||||
|
*MultiByteString++ = (char)uChar;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* WideCharCount should be 0 if all characters were converted */
|
||||||
|
if(WideCharCount)
|
||||||
|
{
|
||||||
|
SetLastError(ERROR_INSUFFICIENT_BUFFER);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return MultiByteCount - TempLength;
|
||||||
|
}
|
||||||
|
|
||||||
/* Does caller query for output buffer size? */
|
/* Does caller query for output buffer size? */
|
||||||
if (MultiByteCount == 0)
|
if(!MultiByteCount)
|
||||||
{
|
{
|
||||||
for (TempLength = 0; WideCharCount; WideCharCount--, TempLength++)
|
for(TempLength = 0; WideCharCount; WideCharCount--, WideCharString++, TempLength++)
|
||||||
{
|
{
|
||||||
WideChar = *WideCharString++;
|
/* Increment TempLength again if this is a double-byte character */
|
||||||
|
if (((PWCHAR)CodePageTable->WideCharTable)[*WideCharString] & 0xff00)
|
||||||
if (WideChar < 0x80)
|
TempLength++;
|
||||||
continue;
|
|
||||||
|
|
||||||
MbChar = ((PWCHAR)CodePageTable->WideCharTable)[WideChar];
|
|
||||||
|
|
||||||
if (!(MbChar & 0xff00))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
TempLength++;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return TempLength;
|
return TempLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (TempLength = MultiByteCount; WideCharCount; WideCharCount--)
|
/* Convert the WideCharString to the MultiByteString */
|
||||||
|
for(TempLength = MultiByteCount; WideCharCount && TempLength; TempLength--, WideCharString++, WideCharCount--)
|
||||||
{
|
{
|
||||||
WideChar = *WideCharString++;
|
USHORT uChar = ((PUSHORT)CodePageTable->WideCharTable)[*WideCharString];
|
||||||
|
|
||||||
if (WideChar < 0x80)
|
/* Is this a double-byte character? */
|
||||||
|
if(uChar & 0xff00)
|
||||||
{
|
{
|
||||||
if (!TempLength)
|
/* Don't output a partial character */
|
||||||
{
|
if(TempLength == 1)
|
||||||
SetLastError(ERROR_INSUFFICIENT_BUFFER);
|
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
TempLength--;
|
TempLength--;
|
||||||
|
*MultiByteString++ = uChar >> 8;
|
||||||
*MultiByteString++ = (CHAR)WideChar;
|
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MbChar = ((PWCHAR)CodePageTable->WideCharTable)[WideChar];
|
*MultiByteString++ = (char)uChar;
|
||||||
|
}
|
||||||
|
|
||||||
if (!(MbChar & 0xff00))
|
/* WideCharCount should be 0 if all characters were converted */
|
||||||
{
|
if(WideCharCount)
|
||||||
if (!TempLength)
|
{
|
||||||
{
|
SetLastError(ERROR_INSUFFICIENT_BUFFER);
|
||||||
SetLastError(ERROR_INSUFFICIENT_BUFFER);
|
return 0;
|
||||||
break;
|
|
||||||
}
|
|
||||||
TempLength--;
|
|
||||||
|
|
||||||
*MultiByteString++ = (CHAR)MbChar;
|
|
||||||
continue;;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (TempLength >= 2)
|
|
||||||
{
|
|
||||||
MultiByteString[1] = (CHAR)MbChar; MbChar >>= 8;
|
|
||||||
MultiByteString[0] = (CHAR)MbChar;
|
|
||||||
MultiByteString += 2;
|
|
||||||
TempLength -= 2;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
SetLastError(ERROR_INSUFFICIENT_BUFFER);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return MultiByteCount - TempLength;
|
return MultiByteCount - TempLength;
|
||||||
}
|
}
|
||||||
else /* Not DBCS code page */
|
else /* Not DBCS code page */
|
||||||
{
|
{
|
||||||
/* Does caller query for output buffer size? */
|
INT nReturn;
|
||||||
if (MultiByteCount == 0)
|
|
||||||
return WideCharCount;
|
|
||||||
|
|
||||||
/* Adjust buffer size. Wine trick ;-) */
|
/* If Flags, DefaultChar or UsedDefaultChar were given, we have to do some more work */
|
||||||
if (MultiByteCount < WideCharCount)
|
if(Flags || DefaultChar || UsedDefaultChar)
|
||||||
{
|
{
|
||||||
WideCharCount = MultiByteCount;
|
BOOL TempUsedDefaultChar;
|
||||||
SetLastError(ERROR_INSUFFICIENT_BUFFER);
|
CHAR DefChar;
|
||||||
}
|
|
||||||
|
|
||||||
for (TempLength = WideCharCount;
|
/* If UsedDefaultChar is not set, set it to a temporary value, so we don't have to check on every character */
|
||||||
TempLength > 0;
|
if(!UsedDefaultChar)
|
||||||
WideCharString++, TempLength--)
|
UsedDefaultChar = &TempUsedDefaultChar;
|
||||||
{
|
|
||||||
*MultiByteString++ = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* FIXME */
|
|
||||||
if (UsedDefaultChar != NULL)
|
|
||||||
*UsedDefaultChar = FALSE;
|
*UsedDefaultChar = FALSE;
|
||||||
|
|
||||||
return WideCharCount;
|
/* Does caller query for output buffer size? */
|
||||||
|
if(!MultiByteCount)
|
||||||
|
{
|
||||||
|
/* Loop through the whole WideCharString and check if we can get a valid mapping for each character */
|
||||||
|
for(TempLength = 0; WideCharCount; TempLength++, WideCharString++, WideCharCount--)
|
||||||
|
{
|
||||||
|
if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
|
||||||
|
{
|
||||||
|
/* FIXME: Handle WC_COMPOSITECHECK */
|
||||||
|
}
|
||||||
|
|
||||||
|
if(!*UsedDefaultChar)
|
||||||
|
*UsedDefaultChar = !IntIsValidSBCSMapping(CodePageTable, Flags, *WideCharString, ((PCHAR)CodePageTable->WideCharTable)[*WideCharString]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return TempLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Use the CodePage's TransDefaultChar if none was given. Don't modify the DefaultChar pointer here. */
|
||||||
|
if(DefaultChar)
|
||||||
|
DefChar = *DefaultChar;
|
||||||
|
else
|
||||||
|
DefChar = CodePageTable->TransDefaultChar;
|
||||||
|
|
||||||
|
/* Convert the WideCharString to the MultiByteString and verify if the mapping is valid */
|
||||||
|
for(TempLength = MultiByteCount; WideCharCount && TempLength; MultiByteString++, TempLength--, WideCharString++, WideCharCount--)
|
||||||
|
{
|
||||||
|
if((Flags & WC_COMPOSITECHECK) && WideCharCount > 1)
|
||||||
|
{
|
||||||
|
/* FIXME: Handle WC_COMPOSITECHECK */
|
||||||
|
}
|
||||||
|
|
||||||
|
*MultiByteString = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString];
|
||||||
|
|
||||||
|
if(!IntIsValidSBCSMapping(CodePageTable, Flags, *WideCharString, *MultiByteString))
|
||||||
|
{
|
||||||
|
*MultiByteString = DefChar;
|
||||||
|
*UsedDefaultChar = TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* WideCharCount should be 0 if all characters were converted */
|
||||||
|
if(WideCharCount)
|
||||||
|
{
|
||||||
|
SetLastError(ERROR_INSUFFICIENT_BUFFER);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
return MultiByteCount - TempLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Does caller query for output buffer size? */
|
||||||
|
if(!MultiByteCount)
|
||||||
|
return WideCharCount;
|
||||||
|
|
||||||
|
/* Is the buffer large enough? */
|
||||||
|
if(MultiByteCount < WideCharCount)
|
||||||
|
{
|
||||||
|
/* Convert the string up to MultiByteCount and return 0 */
|
||||||
|
WideCharCount = MultiByteCount;
|
||||||
|
SetLastError(ERROR_INSUFFICIENT_BUFFER);
|
||||||
|
nReturn = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Otherwise WideCharCount will be the number of converted characters */
|
||||||
|
nReturn = WideCharCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert the WideCharString to the MultiByteString */
|
||||||
|
for(TempLength = WideCharCount; --TempLength >= 0; WideCharString++, MultiByteString++)
|
||||||
|
{
|
||||||
|
*MultiByteString = ((PCHAR)CodePageTable->WideCharTable)[*WideCharString];
|
||||||
|
}
|
||||||
|
|
||||||
|
return nReturn;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -7,18 +7,20 @@
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
#define MAXIMUM_LEADBYTES 12
|
#define MAXIMUM_LEADBYTES 12
|
||||||
|
|
||||||
|
/* Some documentation can be found here: http://www.ping.uio.no/~ovehk/nls/ */
|
||||||
typedef struct _CPTABLEINFO
|
typedef struct _CPTABLEINFO
|
||||||
{
|
{
|
||||||
USHORT CodePage;
|
USHORT CodePage;
|
||||||
USHORT MaximumCharacterSize;
|
USHORT MaximumCharacterSize; /* 1 = SBCS, 2 = DBCS */
|
||||||
USHORT DefaultChar;
|
USHORT DefaultChar; /* Default MultiByte Character for the CP->Unicode conversion */
|
||||||
USHORT UniDefaultChar;
|
USHORT UniDefaultChar; /* Default Unicode Character for the CP->Unicode conversion */
|
||||||
USHORT TransDefaultChar;
|
USHORT TransDefaultChar; /* Default MultiByte Character for the Unicode->CP conversion */
|
||||||
USHORT TransUniDefaultChar;
|
USHORT TransUniDefaultChar; /* Default Unicode Character for the Unicode->CP conversion */
|
||||||
USHORT DBCSCodePage;
|
USHORT DBCSCodePage;
|
||||||
UCHAR LeadByte[MAXIMUM_LEADBYTES];
|
UCHAR LeadByte[MAXIMUM_LEADBYTES];
|
||||||
PUSHORT MultiByteTable;
|
PUSHORT MultiByteTable; /* Table for CP->Unicode conversion */
|
||||||
PVOID WideCharTable;
|
PVOID WideCharTable; /* Table for Unicode->CP conversion */
|
||||||
PUSHORT DBCSRanges;
|
PUSHORT DBCSRanges;
|
||||||
PUSHORT DBCSOffsets;
|
PUSHORT DBCSOffsets;
|
||||||
} CPTABLEINFO, *PCPTABLEINFO;
|
} CPTABLEINFO, *PCPTABLEINFO;
|
||||||
|
|
Loading…
Reference in a new issue