[KERNEL32] LCMapString: Fullwidth/halfwidth conversion (#5206)

- Add file dll/win32/kernel32/winnls/string/dakuten.h.
- Add file dll/win32/kernel32/winnls/string/full2half.h.
- Support LCMAP_FULLWIDTH and LCMAP_HALFWIDTH in LCMapString.
CORE-11700
This commit is contained in:
Katayama Hirofumi MZ 2023-04-01 11:33:36 +09:00 committed by GitHub
parent 2687c1b415
commit 3f5bcf5775
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 554 additions and 112 deletions

View file

@ -0,0 +1,59 @@
/*
* The dakuten (Japanese voiced letter) table
*/
/* DEFINE_DAKUTEN(voiced, single1, single2, half1, half2) */
/* NOTE: This list must be sorted. */
DEFINE_DAKUTEN(0x304C, 0x304B, 0x309B, 0xFF76, 0x309B) /* が --> か゛ --> ガ */
DEFINE_DAKUTEN(0x304E, 0x304D, 0x309B, 0xFF77, 0x309B) /* ぎ --> き゛ --> ギ */
DEFINE_DAKUTEN(0x3050, 0x304F, 0x309B, 0xFF78, 0x309B) /* ぐ --> く゛ --> グ */
DEFINE_DAKUTEN(0x3052, 0x3051, 0x309B, 0xFF79, 0x309B) /* げ --> け゛ --> ゲ */
DEFINE_DAKUTEN(0x3054, 0x3053, 0x309B, 0xFF7A, 0x309B) /* ご --> こ゛ --> ゴ */
DEFINE_DAKUTEN(0x3056, 0x3055, 0x309B, 0xFF7B, 0x309B) /* ざ --> さ゛ --> ザ */
DEFINE_DAKUTEN(0x3058, 0x3057, 0x309B, 0xFF7C, 0x309B) /* じ --> し゛ --> ジ */
DEFINE_DAKUTEN(0x305A, 0x3059, 0xFF9E, 0xFF7D, 0x309B) /* ず --> す゛ --> ズ */
DEFINE_DAKUTEN(0x305C, 0x305B, 0x309B, 0xFF7E, 0x309B) /* ぜ --> せ゛ --> ゼ */
DEFINE_DAKUTEN(0x305E, 0x305D, 0x309B, 0xFF7F, 0x309B) /* ぞ --> そ゛ --> ゾ */
DEFINE_DAKUTEN(0x3060, 0x305F, 0x309B, 0xFF80, 0x309B) /* だ --> た゛ --> ダ */
DEFINE_DAKUTEN(0x3062, 0x3061, 0x309B, 0xFF81, 0x309B) /* ぢ --> ち゛ --> ヂ */
DEFINE_DAKUTEN(0x3065, 0x3064, 0x309B, 0xFF82, 0x309B) /* づ --> つ゛ --> ヅ */
DEFINE_DAKUTEN(0x3067, 0x3066, 0x309B, 0xFF83, 0x309B) /* で --> て゛ --> デ */
DEFINE_DAKUTEN(0x3069, 0x3068, 0x309B, 0xFF84, 0x309B) /* ど --> と゛ --> ド */
DEFINE_DAKUTEN(0x3070, 0x306F, 0x309B, 0xFF8A, 0x309B) /* ば --> は゛ --> バ */
DEFINE_DAKUTEN(0x3071, 0x306F, 0x309C, 0xFF8A, 0x309B) /* ぱ --> は゜ --> パ */
DEFINE_DAKUTEN(0x3073, 0x3072, 0x309B, 0xFF8B, 0x309B) /* び --> ひ゛ --> ビ */
DEFINE_DAKUTEN(0x3074, 0x3072, 0x309C, 0xFF8B, 0x309B) /* ぴ --> ひ゜ --> ピ */
DEFINE_DAKUTEN(0x3076, 0x3075, 0x309B, 0xFF8C, 0x309B) /* ぶ --> ふ゛ --> ブ */
DEFINE_DAKUTEN(0x3077, 0x3075, 0x309C, 0xFF8C, 0x309B) /* ぷ --> ふ゜ --> プ */
DEFINE_DAKUTEN(0x3079, 0x3078, 0x309B, 0xFF8D, 0x309B) /* べ --> へ゛ --> ベ */
DEFINE_DAKUTEN(0x307A, 0x3078, 0x309C, 0xFF8D, 0x309B) /* ぺ --> へ゜ --> ペ */
DEFINE_DAKUTEN(0x307C, 0x307B, 0x309B, 0xFF8E, 0x309B) /* ぼ --> ほ゛ --> ボ */
DEFINE_DAKUTEN(0x307D, 0x307B, 0x309C, 0xFF8E, 0x309B) /* ぽ --> ほ゜ --> ポ */
DEFINE_DAKUTEN(0x3094, 0x3046, 0x309B, 0xFF73, 0x309B) /* ゔ --> う゛ --> ヴ */
DEFINE_DAKUTEN(0x30AC, 0xFF76, 0x309B, 0xFF76, 0x309B) /* ガ --> カ゛ --> ガ */
DEFINE_DAKUTEN(0x30AE, 0xFF77, 0x309B, 0xFF77, 0x309B) /* ギ --> キ゛ --> ギ */
DEFINE_DAKUTEN(0x30B0, 0xFF78, 0x309B, 0xFF78, 0x309B) /* グ --> ク゛ --> グ */
DEFINE_DAKUTEN(0x30B2, 0xFF79, 0x309B, 0xFF79, 0x309B) /* ゲ --> ケ゛ --> ゲ */
DEFINE_DAKUTEN(0x30B4, 0xFF7A, 0x309B, 0xFF7A, 0x309B) /* ゴ --> コ゛ --> ゴ */
DEFINE_DAKUTEN(0x30B6, 0xFF7B, 0x309B, 0xFF7B, 0x309B) /* ザ --> サ゛ --> ザ */
DEFINE_DAKUTEN(0x30B8, 0xFF7C, 0x309B, 0xFF7C, 0x309B) /* ジ --> シ゛ --> ジ */
DEFINE_DAKUTEN(0x30BA, 0xFF7D, 0xFF9E, 0xFF7D, 0x309B) /* ズ --> ス゛ --> ズ */
DEFINE_DAKUTEN(0x30BC, 0xFF7E, 0x309B, 0xFF7E, 0x309B) /* ゼ --> セ゛ --> ゼ */
DEFINE_DAKUTEN(0x30BE, 0xFF7F, 0x309B, 0xFF7F, 0x309B) /* ゾ --> ソ゛ --> ゾ */
DEFINE_DAKUTEN(0x30C0, 0xFF80, 0x309B, 0xFF80, 0x309B) /* ダ --> タ゛ --> ダ */
DEFINE_DAKUTEN(0x30C2, 0xFF81, 0x309B, 0xFF81, 0x309B) /* ヂ --> チ゛ --> ヂ */
DEFINE_DAKUTEN(0x30C5, 0xFF82, 0x309B, 0xFF82, 0x309B) /* ヅ --> ツ゛ --> ヅ */
DEFINE_DAKUTEN(0x30C7, 0xFF83, 0x309B, 0xFF83, 0x309B) /* デ --> テ゛ --> デ */
DEFINE_DAKUTEN(0x30C9, 0xFF84, 0x309B, 0xFF84, 0x309B) /* ド --> ト゛ --> ド */
DEFINE_DAKUTEN(0x30D0, 0xFF8A, 0x309B, 0xFF8A, 0x309B) /* バ --> ハ゛ --> バ */
DEFINE_DAKUTEN(0x30D1, 0xFF8A, 0x309C, 0xFF8A, 0x309B) /* パ --> ハ゜ --> パ */
DEFINE_DAKUTEN(0x30D3, 0xFF8B, 0x309B, 0xFF8B, 0x309B) /* ビ --> ヒ゛ --> ビ */
DEFINE_DAKUTEN(0x30D4, 0xFF8B, 0x309C, 0xFF8B, 0x309B) /* ピ --> ヒ゜ --> ピ */
DEFINE_DAKUTEN(0x30D6, 0xFF8C, 0x309B, 0xFF8C, 0x309B) /* ブ --> フ゛ --> ブ */
DEFINE_DAKUTEN(0x30D7, 0xFF8C, 0x309C, 0xFF8C, 0x309B) /* プ --> フ゜ --> プ */
DEFINE_DAKUTEN(0x30D9, 0xFF8D, 0x309B, 0xFF8D, 0x309B) /* ベ --> ヘ゛ --> ベ */
DEFINE_DAKUTEN(0x30DA, 0xFF8D, 0x309C, 0xFF8D, 0x309B) /* ペ --> ヘ゜ --> ペ */
DEFINE_DAKUTEN(0x30DC, 0xFF8E, 0x309B, 0xFF8E, 0x309B) /* ボ --> ホ゛ --> ボ */
DEFINE_DAKUTEN(0x30DD, 0xFF8E, 0x309C, 0xFF8E, 0x309B) /* ポ --> ホ゜ --> ポ */
DEFINE_DAKUTEN(0x30F4, 0xFF73, 0x309B, 0xFF73, 0x309B) /* ヴ --> ウ゛ --> ヴ */
DEFINE_DAKUTEN(0x30F7, 0xFF9C, 0x309B, 0xFF9C, 0x309B) /* ヷ --> ワ゛ --> ヷ */
DEFINE_DAKUTEN(0x30FA, 0xFF66, 0x309B, 0xFF66, 0x309B) /* ヺ --> ヲ゛ --> ヺ */

View file

@ -0,0 +1,99 @@
/*
* The fullwidth/halfwidth character table.
*/
/* DEFINE_FULL2HALF(full, half1, half2) */
/* NOTE: This list must be sorted. */
DEFINE_FULL2HALF(0x3000, 0x0020, 0) /*   --> */
DEFINE_FULL2HALF(0x3001, 0xFF64, 0) /* 、 --> 、 */
DEFINE_FULL2HALF(0x3002, 0xFF61, 0) /* 。 --> 。 */
DEFINE_FULL2HALF(0x300C, 0xFF62, 0) /* 「 --> 「 */
DEFINE_FULL2HALF(0x300D, 0xFF63, 0) /* 」 --> 」 */
DEFINE_FULL2HALF(0x309B, 0xFF9E, 0) /* ゛ --> ゙ */
DEFINE_FULL2HALF(0x309C, 0xFF9F, 0) /* ゜ --> ゚ */
DEFINE_FULL2HALF(0x30A1, 0xFF67, 0) /* ァ --> ァ */
DEFINE_FULL2HALF(0x30A2, 0xFF71, 0) /* ア --> ア */
DEFINE_FULL2HALF(0x30A3, 0xFF68, 0) /* ィ --> ィ */
DEFINE_FULL2HALF(0x30A4, 0xFF72, 0) /* イ --> イ */
DEFINE_FULL2HALF(0x30A5, 0xFF69, 0) /* ゥ --> ゥ */
DEFINE_FULL2HALF(0x30A6, 0xFF73, 0) /* ウ --> ウ */
DEFINE_FULL2HALF(0x30A7, 0xFF6A, 0) /* ェ --> ェ */
DEFINE_FULL2HALF(0x30A8, 0xFF74, 0) /* エ --> エ */
DEFINE_FULL2HALF(0x30A9, 0xFF6B, 0) /* ォ --> ォ */
DEFINE_FULL2HALF(0x30AA, 0xFF75, 0) /* オ --> オ */
DEFINE_FULL2HALF(0x30AB, 0xFF76, 0) /* カ --> カ */
DEFINE_FULL2HALF(0x30AC, 0xFF76, 0xFF9E) /* ガ --> ガ */
DEFINE_FULL2HALF(0x30AD, 0xFF77, 0) /* キ --> キ */
DEFINE_FULL2HALF(0x30AE, 0xFF77, 0xFF9E) /* ギ --> ギ */
DEFINE_FULL2HALF(0x30AF, 0xFF78, 0) /* ク --> ク */
DEFINE_FULL2HALF(0x30B0, 0xFF78, 0xFF9E) /* グ --> グ */
DEFINE_FULL2HALF(0x30B1, 0xFF79, 0) /* ケ --> ケ */
DEFINE_FULL2HALF(0x30B2, 0xFF79, 0xFF9E) /* ゲ --> ゲ */
DEFINE_FULL2HALF(0x30B3, 0xFF7A, 0) /* コ --> コ */
DEFINE_FULL2HALF(0x30B4, 0xFF7A, 0xFF9E) /* ゴ --> ゴ */
DEFINE_FULL2HALF(0x30B5, 0xFF7B, 0) /* サ --> サ */
DEFINE_FULL2HALF(0x30B6, 0xFF7B, 0xFF9E) /* ザ --> ザ */
DEFINE_FULL2HALF(0x30B7, 0xFF7C, 0) /* シ --> シ */
DEFINE_FULL2HALF(0x30B8, 0xFF7C, 0xFF9E) /* ジ --> ジ */
DEFINE_FULL2HALF(0x30B9, 0xFF7D, 0) /* ス --> ス */
DEFINE_FULL2HALF(0x30BA, 0xFF7D, 0xFF9E) /* ズ --> ズ */
DEFINE_FULL2HALF(0x30BB, 0xFF7E, 0) /* セ --> セ */
DEFINE_FULL2HALF(0x30BC, 0xFF7E, 0xFF9E) /* ゼ --> ゼ */
DEFINE_FULL2HALF(0x30BD, 0xFF7F, 0) /* ソ --> ソ */
DEFINE_FULL2HALF(0x30BE, 0xFF7F, 0xFF9E) /* ゾ --> ゾ */
DEFINE_FULL2HALF(0x30BF, 0xFF80, 0) /* タ --> タ */
DEFINE_FULL2HALF(0x30C0, 0xFF80, 0xFF9E) /* ダ --> ダ */
DEFINE_FULL2HALF(0x30C1, 0xFF81, 0) /* チ --> チ */
DEFINE_FULL2HALF(0x30C2, 0xFF81, 0xFF9E) /* ヂ --> ヂ */
DEFINE_FULL2HALF(0x30C3, 0xFF6F, 0) /* ッ --> ッ */
DEFINE_FULL2HALF(0x30C4, 0xFF82, 0) /* ツ --> ツ */
DEFINE_FULL2HALF(0x30C5, 0xFF82, 0xFF9E) /* ヅ --> ヅ */
DEFINE_FULL2HALF(0x30C6, 0xFF83, 0) /* テ --> テ */
DEFINE_FULL2HALF(0x30C7, 0xFF83, 0xFF9E) /* デ --> デ */
DEFINE_FULL2HALF(0x30C8, 0xFF84, 0) /* ト --> ト */
DEFINE_FULL2HALF(0x30C9, 0xFF84, 0xFF9E) /* ド --> ド */
DEFINE_FULL2HALF(0x30CA, 0xFF85, 0) /* ナ --> ナ */
DEFINE_FULL2HALF(0x30CB, 0xFF86, 0) /* ニ --> ニ */
DEFINE_FULL2HALF(0x30CC, 0xFF87, 0) /* ヌ --> ヌ */
DEFINE_FULL2HALF(0x30CD, 0xFF88, 0) /* ネ --> ネ */
DEFINE_FULL2HALF(0x30CE, 0xFF89, 0) /* --> ノ */
DEFINE_FULL2HALF(0x30CF, 0xFF8A, 0) /* ハ --> ハ */
DEFINE_FULL2HALF(0x30D0, 0xFF8A, 0xFF9E) /* バ --> バ */
DEFINE_FULL2HALF(0x30D1, 0xFF8A, 0x309C) /* パ <-- ハ゜ */
DEFINE_FULL2HALF(0x30D1, 0xFF8A, 0xFF9F) /* パ --> パ */
DEFINE_FULL2HALF(0x30D2, 0xFF8B, 0) /* ヒ --> ヒ */
DEFINE_FULL2HALF(0x30D3, 0xFF8B, 0xFF9E) /* ビ --> ビ */
DEFINE_FULL2HALF(0x30D4, 0xFF8B, 0x309C) /* ピ <-- ヒ゜ */
DEFINE_FULL2HALF(0x30D4, 0xFF8B, 0xFF9F) /* ピ --> ピ */
DEFINE_FULL2HALF(0x30D5, 0xFF8C, 0) /* フ --> フ */
DEFINE_FULL2HALF(0x30D6, 0xFF8C, 0xFF9E) /* ブ --> ブ */
DEFINE_FULL2HALF(0x30D7, 0xFF8C, 0xFF9F) /* プ --> プ */
DEFINE_FULL2HALF(0x30D8, 0xFF8D, 0) /* ヘ --> ヘ */
DEFINE_FULL2HALF(0x30D9, 0xFF8D, 0xFF9E) /* ベ --> ベ */
DEFINE_FULL2HALF(0x30DA, 0xFF8D, 0xFF9F) /* ペ --> ペ */
DEFINE_FULL2HALF(0x30DB, 0xFF8E, 0) /* ホ --> ホ */
DEFINE_FULL2HALF(0x30DC, 0xFF8E, 0xFF9E) /* ボ --> ボ */
DEFINE_FULL2HALF(0x30DD, 0xFF8E, 0xFF9F) /* ポ --> ポ */
DEFINE_FULL2HALF(0x30DE, 0xFF8F, 0) /* マ --> マ */
DEFINE_FULL2HALF(0x30DF, 0xFF90, 0) /* ミ --> ミ */
DEFINE_FULL2HALF(0x30E0, 0xFF91, 0) /* ム --> ム */
DEFINE_FULL2HALF(0x30E1, 0xFF92, 0) /* メ --> メ */
DEFINE_FULL2HALF(0x30E2, 0xFF93, 0) /* モ --> モ */
DEFINE_FULL2HALF(0x30E3, 0xFF6C, 0) /* ャ --> ャ */
DEFINE_FULL2HALF(0x30E4, 0xFF94, 0) /* ヤ --> ヤ */
DEFINE_FULL2HALF(0x30E5, 0xFF6D, 0) /* ュ --> ュ */
DEFINE_FULL2HALF(0x30E6, 0xFF95, 0) /* ユ --> ユ */
DEFINE_FULL2HALF(0x30E7, 0xFF6E, 0) /* ョ --> ョ */
DEFINE_FULL2HALF(0x30E8, 0xFF96, 0) /* ヨ --> ヨ */
DEFINE_FULL2HALF(0x30E9, 0xFF97, 0) /* ラ --> ラ */
DEFINE_FULL2HALF(0x30EA, 0xFF98, 0) /* リ --> リ */
DEFINE_FULL2HALF(0x30EB, 0xFF99, 0) /* ル --> ル */
DEFINE_FULL2HALF(0x30EC, 0xFF9A, 0) /* レ --> レ */
DEFINE_FULL2HALF(0x30ED, 0xFF9B, 0) /* ロ --> ロ */
DEFINE_FULL2HALF(0x30EF, 0xFF9C, 0) /* ワ --> ワ */
DEFINE_FULL2HALF(0x30F2, 0xFF66, 0) /* ヲ --> ヲ */
DEFINE_FULL2HALF(0x30F3, 0xFF9D, 0) /* ン --> ン */
DEFINE_FULL2HALF(0x30F4, 0xFF73, 0xFF9E) /* ヴ --> ヴ */
DEFINE_FULL2HALF(0x30F7, 0xFF9C, 0xFF9E) /* ヷ --> ヷ */
DEFINE_FULL2HALF(0x30FA, 0xFF66, 0xFF9E) /* ヺ --> ヺ */
DEFINE_FULL2HALF(0x30FB, 0xFF65, 0) /* ・ --> ・ */
DEFINE_FULL2HALF(0x30FC, 0xFF70, 0) /* ー --> ー */

View file

@ -1866,6 +1866,399 @@ BOOL WINAPI GetStringTypeExA( LCID locale, DWORD type, LPCSTR src, INT count, LP
return GetStringTypeA(locale, type, src, count, chartype);
}
static inline void map_byterev(const WCHAR *src, int len, WCHAR *dst)
{
while (len--)
*dst++ = RtlUshortByteSwap(*src++);
}
static int map_to_hiragana(const WCHAR *src, int srclen, WCHAR *dst, int dstlen)
{
int pos;
for (pos = 0; srclen; src++, srclen--, pos++)
{
/*
* U+30A1 ... U+30F3: Katakana
* U+30F4: Katakana Letter VU
* U+30F5: Katakana Letter Small KA
* U+30FD: Katakana Iteration Mark
* U+30FE: Katakana Voiced Iteration Mark
*/
WCHAR wch = *src;
if ((0x30A1 <= wch && wch <= 0x30F3) ||
wch == 0x30F4 || wch == 0x30F5 || wch == 0x30FD || wch == 0x30FE)
{
wch -= 0x60; /* Katakana to Hiragana */
}
if (pos < dstlen)
dst[pos] = wch;
}
return pos;
}
static int map_to_katakana(const WCHAR *src, int srclen, WCHAR *dst, int dstlen)
{
int pos;
for (pos = 0; srclen; src++, srclen--, pos++)
{
/*
* U+3041 ... U+3093: Hiragana
* U+3094: Hiragana Letter VU
* U+3095: Hiragana Letter Small KA
* U+309D: Hiragana Iteration Mark
* U+309E: Hiragana Voiced Iteration Mark
*/
WCHAR wch = *src;
if ((0x3041 <= wch && wch <= 0x3093) ||
wch == 3094 || wch == 0x3095 || wch == 0x309D || wch == 0x309E)
{
wch += 0x60; /* Hiragana to Katakana */
}
if (pos < dstlen)
dst[pos] = wch;
}
return pos;
}
/* The table that contains fullwidth characters and halfwidth characters */
typedef WCHAR FULL2HALF_ENTRY[3];
static const FULL2HALF_ENTRY full2half_table[] =
{
#define DEFINE_FULL2HALF(full, half1, half2) { full, half1, half2 },
#include "full2half.h"
#undef DEFINE_FULL2HALF
};
#define GET_FULL(table, index) ((table)[index][0])
#define GET_HALF1(table, index) ((table)[index][1])
#define GET_HALF2(table, index) ((table)[index][2])
/* The table that contains dakuten entries */
typedef WCHAR DAKUTEN_ENTRY[3];
static const DAKUTEN_ENTRY dakuten_table[] =
{
#define DEFINE_DAKUTEN(voiced, single1, single2, half1, half2) { voiced, single1, single2 },
#include "dakuten.h"
#undef DEFINE_DAKUTEN
};
#define GET_VOICED(table, index) ((table)[index][0])
#define GET_SINGLE1(table, index) ((table)[index][1])
#define GET_SINGLE2(table, index) ((table)[index][2])
static int map_to_halfwidth(DWORD flags, const WCHAR *src, int srclen, WCHAR *dst, int dstlen)
{
int pos, i;
const int count1 = (int)ARRAY_SIZE(full2half_table);
const FULL2HALF_ENTRY *table1 = full2half_table;
for (pos = 0; srclen; src++, srclen--, pos++)
{
WCHAR ch = *src;
if (flags & LCMAP_KATAKANA)
map_to_katakana(&ch, 1, &ch, 1);
else if (flags & LCMAP_HIRAGANA)
map_to_hiragana(&ch, 1, &ch, 1);
if (ch < 0x3000) /* Quick judgment */
{
if (pos < dstlen)
dst[pos] = ch;
continue;
}
if (0xFF01 <= ch && ch <= 0xFF5E) /* U+FF01 ... U+FF5E */
{
if (pos < dstlen)
dst[pos] = ch - 0xFEE0; /* Fullwidth ASCII to halfwidth ASCII */
continue;
}
/* Search in table1 (full/half) */
for (i = count1 - 1; i >= 0; --i) /* In reverse order */
{
if (GET_FULL(table1, i) != ch)
continue;
if (GET_HALF2(table1, i) == 0)
{
if (pos < dstlen)
dst[pos] = GET_HALF1(table1, i);
}
else if (!dstlen)
{
pos++;
}
else if (pos + 1 < dstlen)
{
dst[pos++] = GET_HALF1(table1, i);
dst[pos ] = GET_HALF2(table1, i);
}
else
{
dst[pos] = ch;
}
break;
}
if (i >= 0)
continue;
if (pos < dstlen)
dst[pos] = ch;
}
return pos;
}
static int map_to_fullwidth(const WCHAR *src, int srclen, WCHAR *dst, int dstlen)
{
int pos, i;
const FULL2HALF_ENTRY *table1 = full2half_table;
const DAKUTEN_ENTRY *table2 = dakuten_table;
const int count1 = (int)ARRAY_SIZE(full2half_table);
const int count2 = (int)ARRAY_SIZE(dakuten_table);
for (pos = 0; srclen; src++, srclen--, pos++)
{
WCHAR ch = *src;
if (ch == 0x20) /* U+0020: Space */
{
if (pos < dstlen)
dst[pos] = 0x3000; /* U+3000: Ideographic Space */
continue;
}
if (0x21 <= ch && ch <= 0x7E) /* Mappable halfwidth ASCII */
{
if (pos < dstlen)
dst[pos] = ch + 0xFEE0; /* U+FF01 ... U+FF5E */
continue;
}
if (ch < 0xFF00) /* Quick judgment */
{
if (pos < dstlen)
dst[pos] = ch;
continue;
}
/* Search in table1 (full/half) */
for (i = count1 - 1; i >= 0; --i) /* In reverse order */
{
if (GET_HALF1(table1, i) != ch)
continue; /* Mismatched */
if (GET_HALF2(table1, i) == 0)
{
if (pos < dstlen)
dst[pos] = GET_FULL(table1, i);
break;
}
if (srclen <= 1 || GET_HALF2(table1, i) != src[1])
continue; /* Mismatched */
--srclen;
++src;
if (pos < dstlen)
dst[pos] = GET_FULL(table1, i);
break;
}
if (i >= 0)
continue;
/* Search in table2 (dakuten) */
for (i = count2 - 1; i >= 0; --i) /* In reverse order */
{
if (GET_SINGLE1(table2, i) != ch)
continue; /* Mismatched */
if (srclen <= 1 || GET_SINGLE2(table2, i) != src[1])
continue; /* Mismatched */
--srclen;
++src;
if (pos < dstlen)
dst[pos] = GET_VOICED(table2, i);
break;
}
if (i >= 0)
continue;
if (pos < dstlen)
dst[pos] = ch;
}
return pos;
}
static int map_to_lowercase(DWORD flags, const WCHAR *src, int srclen, WCHAR *dst, int dstlen)
{
int pos;
for (pos = 0; srclen; src++, srclen--)
{
WCHAR wch = *src;
if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
continue;
if (pos < dstlen)
dst[pos] = tolowerW(wch);
pos++;
}
return pos;
}
static int map_to_uppercase(DWORD flags, const WCHAR *src, int srclen, WCHAR *dst, int dstlen)
{
int pos;
for (pos = 0; srclen; src++, srclen--)
{
WCHAR wch = *src;
if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
continue;
if (pos < dstlen)
dst[pos] = toupperW(wch);
pos++;
}
return pos;
}
static int map_remove_ignored(DWORD flags, const WCHAR *src, int srclen, WCHAR *dst, int dstlen)
{
int pos;
WORD wC1, wC2, wC3;
for (pos = 0; srclen; src++, srclen--)
{
WCHAR wch = *src;
GetStringTypeW(CT_CTYPE1, &wch, 1, &wC1);
GetStringTypeW(CT_CTYPE2, &wch, 1, &wC2);
GetStringTypeW(CT_CTYPE3, &wch, 1, &wC3);
if (flags & NORM_IGNORESYMBOLS)
{
if ((wC1 & C1_PUNCT) || (wC3 & C3_SYMBOL))
continue;
}
if (flags & NORM_IGNORENONSPACE)
{
if ((wC2 & C2_OTHERNEUTRAL) && (wC3 & (C3_NONSPACING | C3_DIACRITIC)))
continue;
}
if (pos < dstlen)
dst[pos] = wch;
pos++;
}
return pos;
}
static int lcmap_string(DWORD flags, const WCHAR *src, int srclen, WCHAR *dst, int dstlen)
{
int ret = 0;
if ((flags & (LCMAP_LOWERCASE | LCMAP_UPPERCASE)) == (LCMAP_LOWERCASE | LCMAP_UPPERCASE))
{
SetLastError(ERROR_INVALID_FLAGS);
return 0;
}
switch (flags & ~(LCMAP_BYTEREV | LCMAP_LOWERCASE | LCMAP_UPPERCASE | LCMAP_LINGUISTIC_CASING))
{
case LCMAP_HIRAGANA:
ret = map_to_hiragana(src, srclen, dst, dstlen);
break;
case LCMAP_KATAKANA:
ret = map_to_katakana(src, srclen, dst, dstlen);
break;
case LCMAP_HALFWIDTH:
ret = map_to_halfwidth(flags, src, srclen, dst, dstlen);
break;
case LCMAP_HIRAGANA | LCMAP_HALFWIDTH:
ret = map_to_halfwidth(flags, src, srclen, dst, dstlen);
break;
case LCMAP_KATAKANA | LCMAP_HALFWIDTH:
ret = map_to_halfwidth(flags, src, srclen, dst, dstlen);
break;
case LCMAP_FULLWIDTH:
ret = map_to_fullwidth(src, srclen, dst, dstlen);
break;
case LCMAP_HIRAGANA | LCMAP_FULLWIDTH:
ret = map_to_fullwidth(src, srclen, dst, dstlen);
if (dstlen && ret)
map_to_hiragana(dst, ret, dst, dstlen);
break;
case LCMAP_KATAKANA | LCMAP_FULLWIDTH:
ret = map_to_fullwidth(src, srclen, dst, dstlen);
if (dstlen && ret)
map_to_katakana(dst, ret, dst, dstlen);
break;
case LCMAP_SIMPLIFIED_CHINESE:
FIXME("LCMAP_SIMPLIFIED_CHINESE\n");
break;
case LCMAP_TRADITIONAL_CHINESE:
FIXME("LCMAP_TRADITIONAL_CHINESE\n");
break;
case NORM_IGNORENONSPACE:
case NORM_IGNORESYMBOLS:
case NORM_IGNORENONSPACE | NORM_IGNORESYMBOLS:
if (flags & ~(NORM_IGNORENONSPACE | NORM_IGNORESYMBOLS | LCMAP_BYTEREV))
{
SetLastError(ERROR_INVALID_FLAGS);
return 0;
}
ret = map_remove_ignored(flags, src, srclen, dst, dstlen);
break;
case 0:
if (flags & LCMAP_LOWERCASE)
{
ret = map_to_lowercase(flags, src, srclen, dst, dstlen);
flags &= ~LCMAP_LOWERCASE;
break;
}
if (flags & LCMAP_UPPERCASE)
{
ret = map_to_uppercase(flags, src, srclen, dst, dstlen);
flags &= ~LCMAP_UPPERCASE;
break;
}
if (flags & LCMAP_BYTEREV)
{
if (dstlen == 0)
{
ret = srclen;
break;
}
ret = min(srclen, dstlen);
RtlCopyMemory(dst, src, ret * sizeof(WCHAR));
break;
}
/* fall through */
default:
SetLastError(ERROR_INVALID_FLAGS);
return 0;
}
if (dstlen)
{
if (flags & LCMAP_LOWERCASE)
map_to_lowercase(flags, dst, ret, dst, dstlen);
if (flags & LCMAP_UPPERCASE)
map_to_uppercase(flags, dst, ret, dst, dstlen);
if (flags & LCMAP_BYTEREV)
map_byterev(dst, min(ret, dstlen), dst);
if (dstlen < ret)
{
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
}
return ret;
}
/*************************************************************************
* LCMapStringEx (KERNEL32.@)
*
@ -1889,8 +2282,6 @@ BOOL WINAPI GetStringTypeExA( LCID locale, DWORD type, LPCSTR src, INT count, LP
INT WINAPI LCMapStringEx(LPCWSTR locale, DWORD flags, LPCWSTR src, INT srclen, LPWSTR dst, INT dstlen,
LPNLSVERSIONINFO version, LPVOID reserved, LPARAM handle)
{
LPWSTR dst_ptr;
if (version) FIXME("unsupported version structure %p\n", version);
if (reserved) FIXME("unsupported reserved pointer %p\n", reserved);
if (handle)
@ -1918,28 +2309,14 @@ INT WINAPI LCMapStringEx(LPCWSTR locale, DWORD flags, LPCWSTR src, INT srclen, L
return 0;
}
/* mutually exclusive flags */
if ((flags & (LCMAP_LOWERCASE | LCMAP_UPPERCASE)) == (LCMAP_LOWERCASE | LCMAP_UPPERCASE) ||
(flags & (LCMAP_HIRAGANA | LCMAP_KATAKANA)) == (LCMAP_HIRAGANA | LCMAP_KATAKANA) ||
(flags & (LCMAP_HALFWIDTH | LCMAP_FULLWIDTH)) == (LCMAP_HALFWIDTH | LCMAP_FULLWIDTH) ||
(flags & (LCMAP_TRADITIONAL_CHINESE | LCMAP_SIMPLIFIED_CHINESE)) == (LCMAP_TRADITIONAL_CHINESE | LCMAP_SIMPLIFIED_CHINESE))
{
SetLastError(ERROR_INVALID_FLAGS);
return 0;
}
if (!dstlen) dst = NULL;
if (flags & LCMAP_SORTKEY)
{
INT ret;
if (src == dst)
{
SetLastError(ERROR_INVALID_FLAGS);
return 0;
}
if (srclen < 0) srclen = strlenW(src);
if (srclen < 0)
srclen = strlenW(src);
ret = wine_get_sortkey(flags, src, srclen, (char *)dst, dstlen);
if (ret == 0)
@ -1956,100 +2333,7 @@ INT WINAPI LCMapStringEx(LPCWSTR locale, DWORD flags, LPCWSTR src, INT srclen, L
return 0;
}
if (!dst) /* return required string length */
{
INT len;
for (len = 0; srclen; src++, srclen--)
{
WCHAR wch = *src;
/* tests show that win2k just ignores NORM_IGNORENONSPACE,
* and skips white space and punctuation characters for
* NORM_IGNORESYMBOLS.
*/
if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
continue;
len++;
}
return len;
}
if (flags & LCMAP_UPPERCASE)
{
for (dst_ptr = dst; srclen && dstlen; src++, srclen--)
{
WCHAR wch = *src;
if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
continue;
*dst_ptr++ = toupperW(wch);
dstlen--;
}
}
else if (flags & LCMAP_LOWERCASE)
{
for (dst_ptr = dst; srclen && dstlen; src++, srclen--)
{
WCHAR wch = *src;
if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
continue;
*dst_ptr++ = tolowerW(wch);
dstlen--;
}
}
else
{
for (dst_ptr = dst; srclen && dstlen; src++, srclen--)
{
WCHAR wch = *src;
if ((flags & NORM_IGNORESYMBOLS) && (get_char_typeW(wch) & (C1_PUNCT | C1_SPACE)))
continue;
*dst_ptr++ = wch;
dstlen--;
}
}
#ifdef __REACTOS__
if (flags & LCMAP_KATAKANA)
{
INT convlen = dst_ptr - dst;
for (dst_ptr = dst; convlen; --convlen, ++dst_ptr)
{
/*
* U+3041 ... U+3093: Hiragana
* U+3095: Hiragana Letter Small KA
* U+309D: Hiragana Iteration Mark
* U+309E: Hiragana Voiced Iteration Mark
*/
WCHAR wch = *dst_ptr;
if ((0x3041 <= wch && wch <= 0x3093) || wch == 0x3095 || wch == 0x309D || wch == 0x309E)
*dst_ptr = wch + 0x60; /* Hiragana to Katanaka */
}
}
else if (flags & LCMAP_HIRAGANA)
{
INT convlen = dst_ptr - dst;
for (dst_ptr = dst; convlen; --convlen, ++dst_ptr)
{
/*
* U+30A1 ... U+30F3: Katakana
* U+30F5: Katakana Letter Small KA
* U+30FD: Katakana Iteration Mark
* U+30FE: Katakana Voiced Iteration Mark
*/
WCHAR wch = *dst_ptr;
if ((0x30A1 <= wch && wch <= 0x30F3) || wch == 0x30F5 || wch == 0x30FD || wch == 0x30FE)
*dst_ptr = wch - 0x60; /* Katanaka to Hiragana */
}
}
#endif
if (srclen)
{
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
return dst_ptr - dst;
return lcmap_string(flags, src, srclen, dst, dstlen);
}
/*************************************************************************