reactos/dll/nls/normaliz/IdnToAscii.c

383 lines
12 KiB
C
Raw Normal View History

#define WIN32_NO_STATUS
#include <wine/unicode.h>
#define NDEBUG
#include <debug.h>
/* Taken from Wine kernel32/locale.c */
enum {
BASE = 36,
TMIN = 1,
TMAX = 26,
SKEW = 38,
DAMP = 700,
INIT_BIAS = 72,
INIT_N = 128
};
static inline INT adapt(INT delta, INT numpoints, BOOL firsttime)
{
INT k;
delta /= (firsttime ? DAMP : 2);
delta += delta/numpoints;
for(k=0; delta>((BASE-TMIN)*TMAX)/2; k+=BASE)
delta /= BASE-TMIN;
return k+((BASE-TMIN+1)*delta)/(delta+SKEW);
}
static inline unsigned short get_table_entry( const unsigned short *table, WCHAR ch )
{
return table[table[table[ch >> 8] + ((ch >> 4) & 0x0f)] + (ch & 0xf)];
}
INT WINAPI IdnToNameprepUnicode(DWORD dwFlags, LPCWSTR lpUnicodeCharStr, INT cchUnicodeChar,
LPWSTR lpNameprepCharStr, INT cchNameprepChar)
{
enum {
UNASSIGNED = 0x1,
PROHIBITED = 0x2,
BIDI_RAL = 0x4,
BIDI_L = 0x8
};
extern const unsigned short nameprep_char_type[];
extern const WCHAR nameprep_mapping[];
const WCHAR *ptr;
WORD flags;
WCHAR buf[64], *map_str, norm_str[64], ch;
DWORD i, map_len, norm_len, mask, label_start, label_end, out = 0;
BOOL have_bidi_ral, prohibit_bidi_ral, ascii_only;
DPRINT("%x %p %d %p %d\n", dwFlags, lpUnicodeCharStr, cchUnicodeChar,
lpNameprepCharStr, cchNameprepChar);
if(dwFlags & ~(IDN_ALLOW_UNASSIGNED|IDN_USE_STD3_ASCII_RULES)) {
SetLastError(ERROR_INVALID_FLAGS);
return 0;
}
if(!lpUnicodeCharStr || cchUnicodeChar<-1) {
SetLastError(ERROR_INVALID_PARAMETER);
return 0;
}
if(cchUnicodeChar == -1)
cchUnicodeChar = (UINT)strlenW(lpUnicodeCharStr)+1;
if(!cchUnicodeChar || (cchUnicodeChar==1 && lpUnicodeCharStr[0]==0)) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
for(label_start=0; label_start<(UINT)cchUnicodeChar;) {
ascii_only = TRUE;
for(i=label_start; i<(UINT)cchUnicodeChar; i++) {
ch = lpUnicodeCharStr[i];
if(i!=cchUnicodeChar-1 && !ch) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
/* check if ch is one of label separators defined in RFC3490 */
if(!ch || ch=='.' || ch==0x3002 || ch==0xff0e || ch==0xff61)
break;
if(ch > 0x7f) {
ascii_only = FALSE;
continue;
}
if((dwFlags&IDN_USE_STD3_ASCII_RULES) == 0)
continue;
if((ch>='a' && ch<='z') || (ch>='A' && ch<='Z')
|| (ch>='0' && ch<='9') || ch=='-')
continue;
SetLastError(ERROR_INVALID_NAME);
return 0;
}
label_end = i;
/* last label may be empty */
if(label_start==label_end && ch) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
if((dwFlags&IDN_USE_STD3_ASCII_RULES) && (lpUnicodeCharStr[label_start]=='-' ||
lpUnicodeCharStr[label_end-1]=='-')) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
if(ascii_only) {
/* maximal label length is 63 characters */
if(label_end-label_start > 63) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
if(label_end < (UINT)cchUnicodeChar)
label_end++;
if(!lpNameprepCharStr) {
out += label_end-label_start;
}else if(out+label_end-label_start <= (UINT)cchNameprepChar) {
memcpy(lpNameprepCharStr+out, lpUnicodeCharStr+label_start,
(label_end-label_start)*sizeof(WCHAR));
if(lpUnicodeCharStr[label_end-1] > 0x7f)
lpNameprepCharStr[out+label_end-label_start-1] = '.';
out += label_end-label_start;
}else {
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
label_start = label_end;
continue;
}
map_len = 0;
for(i=label_start; i<label_end; i++) {
ch = lpUnicodeCharStr[i];
ptr = nameprep_mapping + nameprep_mapping[ch>>8];
ptr = nameprep_mapping + ptr[(ch>>4)&0x0f] + 3*(ch&0x0f);
if(!ptr[0]) map_len++;
else if(!ptr[1]) map_len++;
else if(!ptr[2]) map_len += 2;
else if(ptr[0]!=0xffff || ptr[1]!=0xffff || ptr[2]!=0xffff) map_len += 3;
}
if(map_len*sizeof(WCHAR) > sizeof(buf)) {
map_str = HeapAlloc(GetProcessHeap(), 0, map_len*sizeof(WCHAR));
if(!map_str) {
SetLastError(ERROR_NOT_ENOUGH_MEMORY);
return 0;
}
}else {
map_str = buf;
}
map_len = 0;
for(i=label_start; i<label_end; i++) {
ch = lpUnicodeCharStr[i];
ptr = nameprep_mapping + nameprep_mapping[ch>>8];
ptr = nameprep_mapping + ptr[(ch>>4)&0x0f] + 3*(ch&0x0f);
if(!ptr[0]) {
map_str[map_len++] = ch;
}else if(!ptr[1]) {
map_str[map_len++] = ptr[0];
}else if(!ptr[2]) {
map_str[map_len++] = ptr[0];
map_str[map_len++] = ptr[1];
}else if(ptr[0]!=0xffff || ptr[1]!=0xffff || ptr[2]!=0xffff) {
map_str[map_len++] = ptr[0];
map_str[map_len++] = ptr[1];
map_str[map_len++] = ptr[2];
}
}
norm_len = FoldStringW(MAP_FOLDCZONE, map_str, map_len,
norm_str, sizeof(norm_str)/sizeof(WCHAR)-1);
if(map_str != buf)
HeapFree(GetProcessHeap(), 0, map_str);
if(!norm_len) {
if(GetLastError() == ERROR_INSUFFICIENT_BUFFER)
SetLastError(ERROR_INVALID_NAME);
return 0;
}
if(label_end < (UINT)cchUnicodeChar) {
norm_str[norm_len++] = lpUnicodeCharStr[label_end] ? '.' : 0;
label_end++;
}
if(!lpNameprepCharStr) {
out += norm_len;
}else if(out+norm_len <= (UINT)cchNameprepChar) {
memcpy(lpNameprepCharStr+out, norm_str, norm_len*sizeof(WCHAR));
out += norm_len;
}else {
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
have_bidi_ral = prohibit_bidi_ral = FALSE;
mask = PROHIBITED;
if((dwFlags&IDN_ALLOW_UNASSIGNED) == 0)
mask |= UNASSIGNED;
for(i=0; i<norm_len; i++) {
ch = norm_str[i];
flags = get_table_entry( nameprep_char_type, ch );
if(flags & mask) {
SetLastError((flags & PROHIBITED) ? ERROR_INVALID_NAME
: ERROR_NO_UNICODE_TRANSLATION);
return 0;
}
if(flags & BIDI_RAL)
have_bidi_ral = TRUE;
if(flags & BIDI_L)
prohibit_bidi_ral = TRUE;
}
if(have_bidi_ral) {
ch = norm_str[0];
flags = get_table_entry( nameprep_char_type, ch );
if((flags & BIDI_RAL) == 0)
prohibit_bidi_ral = TRUE;
ch = norm_str[norm_len-1];
flags = get_table_entry( nameprep_char_type, ch );
if((flags & BIDI_RAL) == 0)
prohibit_bidi_ral = TRUE;
}
if(have_bidi_ral && prohibit_bidi_ral) {
SetLastError(ERROR_INVALID_NAME);
return 0;
}
label_start = label_end;
}
return out;
}
INT WINAPI IdnToAscii(DWORD dwFlags, LPCWSTR lpUnicodeCharStr, INT cchUnicodeChar,
LPWSTR lpASCIICharStr, INT cchASCIIChar)
{
static const WCHAR prefixW[] = {'x','n','-','-'};
WCHAR *norm_str;
INT i, label_start, label_end, norm_len, out_label, out = 0;
DPRINT("%x %p %d %p %d\n", dwFlags, lpUnicodeCharStr, cchUnicodeChar,
lpASCIICharStr, cchASCIIChar);
norm_len = IdnToNameprepUnicode(dwFlags, lpUnicodeCharStr, cchUnicodeChar, NULL, 0);
if(!norm_len)
return 0;
norm_str = HeapAlloc(GetProcessHeap(), 0, norm_len*sizeof(WCHAR));
if(!norm_str) {
SetLastError(ERROR_NOT_ENOUGH_MEMORY);
return 0;
}
norm_len = IdnToNameprepUnicode(dwFlags, lpUnicodeCharStr,
cchUnicodeChar, norm_str, norm_len);
if(!norm_len) {
HeapFree(GetProcessHeap(), 0, norm_str);
return 0;
}
for(label_start=0; label_start<norm_len;) {
INT n = INIT_N, bias = INIT_BIAS;
INT delta = 0, b = 0, h;
out_label = out;
for(i=label_start; i<norm_len && norm_str[i]!='.' &&
norm_str[i]!=0x3002 && norm_str[i]!='\0'; i++)
if(norm_str[i] < 0x80)
b++;
label_end = i;
if(b == label_end-label_start) {
if(label_end < norm_len)
b++;
if(!lpASCIICharStr) {
out += b;
}else if(out+b <= cchASCIIChar) {
memcpy(lpASCIICharStr+out, norm_str+label_start, b*sizeof(WCHAR));
out += b;
}else {
HeapFree(GetProcessHeap(), 0, norm_str);
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
label_start = label_end+1;
continue;
}
if(!lpASCIICharStr) {
out += 5+b; /* strlen(xn--...-) */
}else if(out+5+b <= cchASCIIChar) {
memcpy(lpASCIICharStr+out, prefixW, sizeof(prefixW));
out += 4;
for(i=label_start; i<label_end; i++)
if(norm_str[i] < 0x80)
lpASCIICharStr[out++] = norm_str[i];
lpASCIICharStr[out++] = '-';
}else {
HeapFree(GetProcessHeap(), 0, norm_str);
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
if(!b)
out--;
for(h=b; h<label_end-label_start;) {
INT m = 0xffff, q, k;
for(i=label_start; i<label_end; i++) {
if(norm_str[i]>=n && m>norm_str[i])
m = norm_str[i];
}
delta += (m-n)*(h+1);
n = m;
for(i=label_start; i<label_end; i++) {
if(norm_str[i] < n) {
delta++;
}else if(norm_str[i] == n) {
for(q=delta, k=BASE; ; k+=BASE) {
INT t = k<=bias ? TMIN : k>=bias+TMAX ? TMAX : k-bias;
INT disp = q<t ? q : t+(q-t)%(BASE-t);
if(!lpASCIICharStr) {
out++;
}else if(out+1 <= cchASCIIChar) {
lpASCIICharStr[out++] = disp<='z'-'a' ?
'a'+disp : '0'+disp-'z'+'a'-1;
}else {
HeapFree(GetProcessHeap(), 0, norm_str);
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
if(q < t)
break;
q = (q-t)/(BASE-t);
}
bias = adapt(delta, h+1, h==b);
delta = 0;
h++;
}
}
delta++;
n++;
}
if(out-out_label > 63) {
HeapFree(GetProcessHeap(), 0, norm_str);
SetLastError(ERROR_INVALID_NAME);
return 0;
}
if(label_end < norm_len) {
if(!lpASCIICharStr) {
out++;
}else if(out+1 <= cchASCIIChar) {
lpASCIICharStr[out++] = norm_str[label_end] ? '.' : 0;
}else {
HeapFree(GetProcessHeap(), 0, norm_str);
SetLastError(ERROR_INSUFFICIENT_BUFFER);
return 0;
}
}
label_start = label_end+1;
}
HeapFree(GetProcessHeap(), 0, norm_str);
return out;
}