mirror of
https://github.com/reactos/reactos.git
synced 2025-08-10 01:03:01 +00:00
[UNICODE] Sync with Wine Staging 4.18. CORE-16441
This commit is contained in:
parent
3cfeed64b1
commit
1a472d9453
84 changed files with 1111 additions and 1845 deletions
|
@ -19,7 +19,7 @@
|
|||
*/
|
||||
#include "wine/unicode.h"
|
||||
|
||||
extern unsigned int wine_decompose( WCHAR ch, WCHAR *dst, unsigned int dstlen );
|
||||
extern unsigned int wine_decompose( int flags, WCHAR ch, WCHAR *dst, unsigned int dstlen );
|
||||
extern const unsigned int collation_table[];
|
||||
|
||||
/*
|
||||
|
@ -79,8 +79,8 @@ int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dst
|
|||
}
|
||||
|
||||
if (!dstlen) /* compute length */
|
||||
/* 4 * '\1' + 1 * '\0' + key length */
|
||||
return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1;
|
||||
/* 4 * '\1' + key length */
|
||||
return key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4;
|
||||
|
||||
if (dstlen < key_len[0] + key_len[1] + key_len[2] + key_len[3] + 4 + 1)
|
||||
return 0; /* overflow */
|
||||
|
@ -153,11 +153,50 @@ int wine_get_sortkey(int flags, const WCHAR *src, int srclen, char *dst, int dst
|
|||
return key_ptr[3] - dst;
|
||||
}
|
||||
|
||||
static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1,
|
||||
const WCHAR *str2, int len2)
|
||||
enum weight
|
||||
{
|
||||
UNICODE_WEIGHT,
|
||||
DIACRITIC_WEIGHT,
|
||||
CASE_WEIGHT
|
||||
};
|
||||
|
||||
static unsigned int get_weight(WCHAR ch, enum weight type)
|
||||
{
|
||||
unsigned int ret;
|
||||
|
||||
ret = collation_table[collation_table[ch >> 8] + (ch & 0xff)];
|
||||
if (ret == (unsigned int)-1)
|
||||
return ch;
|
||||
|
||||
switch(type)
|
||||
{
|
||||
case UNICODE_WEIGHT:
|
||||
return ret >> 16;
|
||||
case DIACRITIC_WEIGHT:
|
||||
return (ret >> 8) & 0xff;
|
||||
case CASE_WEIGHT:
|
||||
default:
|
||||
return (ret >> 4) & 0x0f;
|
||||
}
|
||||
}
|
||||
|
||||
static void inc_str_pos(const WCHAR **str, int *len, int *dpos, int *dlen)
|
||||
{
|
||||
(*dpos)++;
|
||||
if (*dpos == *dlen)
|
||||
{
|
||||
*dpos = *dlen = 0;
|
||||
(*str)++;
|
||||
(*len)--;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int compare_weights(int flags, const WCHAR *str1, int len1,
|
||||
const WCHAR *str2, int len2, enum weight type)
|
||||
{
|
||||
int dpos1 = 0, dpos2 = 0, dlen1 = 0, dlen2 = 0;
|
||||
WCHAR dstr1[4], dstr2[4];
|
||||
unsigned int ce1, ce2;
|
||||
int ret;
|
||||
|
||||
/* 32-bit collation element table format:
|
||||
* unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
|
||||
|
@ -165,20 +204,21 @@ static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1
|
|||
*/
|
||||
while (len1 > 0 && len2 > 0)
|
||||
{
|
||||
if (!dlen1) dlen1 = wine_decompose(0, *str1, dstr1, 4);
|
||||
if (!dlen2) dlen2 = wine_decompose(0, *str2, dstr2, 4);
|
||||
|
||||
if (flags & NORM_IGNORESYMBOLS)
|
||||
{
|
||||
int skip = 0;
|
||||
/* FIXME: not tested */
|
||||
if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
|
||||
if (get_char_typeW(dstr1[dpos1]) & (C1_PUNCT | C1_SPACE))
|
||||
{
|
||||
str1++;
|
||||
len1--;
|
||||
inc_str_pos(&str1, &len1, &dpos1, &dlen1);
|
||||
skip = 1;
|
||||
}
|
||||
if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
|
||||
if (get_char_typeW(dstr2[dpos2]) & (C1_PUNCT | C1_SPACE))
|
||||
{
|
||||
str2++;
|
||||
len2--;
|
||||
inc_str_pos(&str2, &len2, &dpos2, &dlen2);
|
||||
skip = 1;
|
||||
}
|
||||
if (skip) continue;
|
||||
|
@ -187,167 +227,56 @@ static inline int compare_unicode_weights(int flags, const WCHAR *str1, int len1
|
|||
/* hyphen and apostrophe are treated differently depending on
|
||||
* whether SORT_STRINGSORT specified or not
|
||||
*/
|
||||
if (!(flags & SORT_STRINGSORT))
|
||||
if (type == UNICODE_WEIGHT && !(flags & SORT_STRINGSORT))
|
||||
{
|
||||
if (*str1 == '-' || *str1 == '\'')
|
||||
if (dstr1[dpos1] == '-' || dstr1[dpos1] == '\'')
|
||||
{
|
||||
if (*str2 != '-' && *str2 != '\'')
|
||||
if (dstr2[dpos2] != '-' && dstr2[dpos2] != '\'')
|
||||
{
|
||||
str1++;
|
||||
len1--;
|
||||
inc_str_pos(&str1, &len1, &dpos1, &dlen1);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else if (*str2 == '-' || *str2 == '\'')
|
||||
else if (dstr2[dpos2] == '-' || dstr2[dpos2] == '\'')
|
||||
{
|
||||
str2++;
|
||||
len2--;
|
||||
inc_str_pos(&str2, &len2, &dpos2, &dlen2);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
|
||||
ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
|
||||
|
||||
if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
|
||||
ret = (ce1 >> 16) - (ce2 >> 16);
|
||||
else
|
||||
ret = *str1 - *str2;
|
||||
|
||||
if (ret) return ret;
|
||||
|
||||
str1++;
|
||||
str2++;
|
||||
len1--;
|
||||
len2--;
|
||||
}
|
||||
while (len1 && !*str1)
|
||||
{
|
||||
str1++;
|
||||
len1--;
|
||||
}
|
||||
while (len2 && !*str2)
|
||||
{
|
||||
str2++;
|
||||
len2--;
|
||||
}
|
||||
return len1 - len2;
|
||||
}
|
||||
|
||||
static inline int compare_diacritic_weights(int flags, const WCHAR *str1, int len1,
|
||||
const WCHAR *str2, int len2)
|
||||
{
|
||||
unsigned int ce1, ce2;
|
||||
int ret;
|
||||
|
||||
/* 32-bit collation element table format:
|
||||
* unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
|
||||
* case weight - high 4 bit of low 8 bit.
|
||||
*/
|
||||
while (len1 > 0 && len2 > 0)
|
||||
{
|
||||
if (flags & NORM_IGNORESYMBOLS)
|
||||
ce1 = get_weight(dstr1[dpos1], type);
|
||||
if (!ce1)
|
||||
{
|
||||
int skip = 0;
|
||||
/* FIXME: not tested */
|
||||
if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
|
||||
{
|
||||
str1++;
|
||||
len1--;
|
||||
skip = 1;
|
||||
}
|
||||
if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
|
||||
{
|
||||
str2++;
|
||||
len2--;
|
||||
skip = 1;
|
||||
}
|
||||
if (skip) continue;
|
||||
inc_str_pos(&str1, &len1, &dpos1, &dlen1);
|
||||
continue;
|
||||
}
|
||||
ce2 = get_weight(dstr2[dpos2], type);
|
||||
if (!ce2)
|
||||
{
|
||||
inc_str_pos(&str2, &len2, &dpos2, &dlen2);
|
||||
continue;
|
||||
}
|
||||
|
||||
ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
|
||||
ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
|
||||
if (ce1 - ce2) return ce1 - ce2;
|
||||
|
||||
if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
|
||||
ret = ((ce1 >> 8) & 0xff) - ((ce2 >> 8) & 0xff);
|
||||
else
|
||||
ret = *str1 - *str2;
|
||||
|
||||
if (ret) return ret;
|
||||
|
||||
str1++;
|
||||
str2++;
|
||||
len1--;
|
||||
len2--;
|
||||
inc_str_pos(&str1, &len1, &dpos1, &dlen1);
|
||||
inc_str_pos(&str2, &len2, &dpos2, &dlen2);
|
||||
}
|
||||
while (len1 && !*str1)
|
||||
while (len1)
|
||||
{
|
||||
str1++;
|
||||
len1--;
|
||||
if (!dlen1) dlen1 = wine_decompose(0, *str1, dstr1, 4);
|
||||
|
||||
ce1 = get_weight(dstr1[dpos1], type);
|
||||
if (ce1) break;
|
||||
inc_str_pos(&str1, &len1, &dpos1, &dlen1);
|
||||
}
|
||||
while (len2 && !*str2)
|
||||
while (len2)
|
||||
{
|
||||
str2++;
|
||||
len2--;
|
||||
}
|
||||
return len1 - len2;
|
||||
}
|
||||
if (!dlen2) dlen2 = wine_decompose(0, *str2, dstr2, 4);
|
||||
|
||||
static inline int compare_case_weights(int flags, const WCHAR *str1, int len1,
|
||||
const WCHAR *str2, int len2)
|
||||
{
|
||||
unsigned int ce1, ce2;
|
||||
int ret;
|
||||
|
||||
/* 32-bit collation element table format:
|
||||
* unicode weight - high 16 bit, diacritic weight - high 8 bit of low 16 bit,
|
||||
* case weight - high 4 bit of low 8 bit.
|
||||
*/
|
||||
while (len1 > 0 && len2 > 0)
|
||||
{
|
||||
if (flags & NORM_IGNORESYMBOLS)
|
||||
{
|
||||
int skip = 0;
|
||||
/* FIXME: not tested */
|
||||
if (get_char_typeW(*str1) & (C1_PUNCT | C1_SPACE))
|
||||
{
|
||||
str1++;
|
||||
len1--;
|
||||
skip = 1;
|
||||
}
|
||||
if (get_char_typeW(*str2) & (C1_PUNCT | C1_SPACE))
|
||||
{
|
||||
str2++;
|
||||
len2--;
|
||||
skip = 1;
|
||||
}
|
||||
if (skip) continue;
|
||||
}
|
||||
|
||||
ce1 = collation_table[collation_table[*str1 >> 8] + (*str1 & 0xff)];
|
||||
ce2 = collation_table[collation_table[*str2 >> 8] + (*str2 & 0xff)];
|
||||
|
||||
if (ce1 != (unsigned int)-1 && ce2 != (unsigned int)-1)
|
||||
ret = ((ce1 >> 4) & 0x0f) - ((ce2 >> 4) & 0x0f);
|
||||
else
|
||||
ret = *str1 - *str2;
|
||||
|
||||
if (ret) return ret;
|
||||
|
||||
str1++;
|
||||
str2++;
|
||||
len1--;
|
||||
len2--;
|
||||
}
|
||||
while (len1 && !*str1)
|
||||
{
|
||||
str1++;
|
||||
len1--;
|
||||
}
|
||||
while (len2 && !*str2)
|
||||
{
|
||||
str2++;
|
||||
len2--;
|
||||
ce2 = get_weight(dstr2[dpos2], type);
|
||||
if (ce2) break;
|
||||
inc_str_pos(&str2, &len2, &dpos2, &dlen2);
|
||||
}
|
||||
return len1 - len2;
|
||||
}
|
||||
|
@ -357,13 +286,13 @@ int wine_compare_string(int flags, const WCHAR *str1, int len1,
|
|||
{
|
||||
int ret;
|
||||
|
||||
ret = compare_unicode_weights(flags, str1, len1, str2, len2);
|
||||
ret = compare_weights(flags, str1, len1, str2, len2, UNICODE_WEIGHT);
|
||||
if (!ret)
|
||||
{
|
||||
if (!(flags & NORM_IGNORENONSPACE))
|
||||
ret = compare_diacritic_weights(flags, str1, len1, str2, len2);
|
||||
ret = compare_weights(flags, str1, len1, str2, len2, DIACRITIC_WEIGHT);
|
||||
if (!ret && !(flags & NORM_IGNORECASE))
|
||||
ret = compare_case_weights(flags, str1, len1, str2, len2);
|
||||
ret = compare_weights(flags, str1, len1, str2, len2, CASE_WEIGHT);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue