diff --git a/reactos/tools/create_nls/Makefile b/reactos/tools/create_nls/Makefile new file mode 100644 index 00000000000..7056ce85b15 --- /dev/null +++ b/reactos/tools/create_nls/Makefile @@ -0,0 +1,54 @@ +# +# ReactOS/NLS conversion tool +# +PATH_TO_TOP = ../.. + +TARGET=create_nls$(EXE_POSTFIX) + +all: $(TARGET) + +OBJECTS = create_nls.o + +CLEAN_FILES = *.o create_nls$(EXE_POSTFIX) + +create_nls$(EXE_POSTFIX): $(OBJECTS) + $(HOST_CC) $(OBJECTS) -o create_nls$(EXE_POSTFIX) + +HOST_CFLAGS = -I. + +create_nls.o: create_nls.c + $(HOST_CC) $(HOST_CFLAGS) -c create_nls.c -o create_nls.o + +ifeq ($(HOST),mingw32-linux) +clean: + rm -f *.o + rm -f create_nls$(EXE_POSTFIX) +endif +ifeq ($(HOST),mingw32-windows) +clean: + del *.o + del create_nls$(EXE_POSTFIX) +endif + +.phony: clean + +nls: + create_nls$(EXE_POSTFIX) + +.phony: nls + +ifeq ($(HOST),mingw32-linux) +clean_nls: + rm -f ../../media/nls/*.nls +endif +ifeq ($(HOST),mingw32-windows) +clean_nls: + del ..\..\media\nls\*.nls +endif + +.phony: clean_nls + + +include $(PATH_TO_TOP)/rules.mak + +# EOF diff --git a/reactos/tools/create_nls/Readme b/reactos/tools/create_nls/Readme new file mode 100644 index 00000000000..e72804b6b74 --- /dev/null +++ b/reactos/tools/create_nls/Readme @@ -0,0 +1,23 @@ +Create_nls - .nls file creation tool +------------------------------------ + +Create_nls is a tool to create binary .nls files from codepage mapping +files provided by Unicode Inc. Due to the confusing license these files +are not part of the ReactOS source tree. + + +You can easily download the files from 'http://www.unicode.org/public' or +'ftp://ftp.unicode.org/pub'. Download the directory structure of the web/ftp +site into the unicode.org subdirectory and create_nls will build the .nls +files. + +Makefile targets: + 'make': builds create_nls tool. + 'make clean': deletes all executable and object files + 'make nls': generates binary .nls files. + 'make clean_nls': deletes binary .nls files. + +Since the binary .nls files are part of the ReactOS source tree it is not +necessary to build these files yourself. Therefore create_nls is not +integrated with the main makefile. You will have to build and run it +separately. diff --git a/reactos/tools/create_nls/create_nls.c b/reactos/tools/create_nls/create_nls.c new file mode 100644 index 00000000000..c3c31baffee --- /dev/null +++ b/reactos/tools/create_nls/create_nls.c @@ -0,0 +1,965 @@ +/* + * Tool for creating NT-like NLS files for Unicode <-> Codepage conversions. + * Tool for creating NT-like l_intl.nls file for case mapping of unicode + * characters. + * Copyright 2000 Timoshkov Dmitry + * Copyright 2001 Matei Alexandru + * + * Sources of information: + * Andrew Kozin's YAW project http://www.chat.ru/~stanson/yaw_en.html + * Ove Kõven's investigations http://www.ping.uio.no/~ovehk/nls + */ +#include +#include +#include +#include +#include +#include + +static const WCHAR * const uprtable[256]; +static const WCHAR * const lwrtable[256]; + +#define NLSDIR "../../media/nls" +#define LIBDIR "unicode.org/" + +typedef struct { + WORD wSize; /* in words 0x000D */ + WORD CodePage; + WORD MaxCharSize; /* 1 or 2 */ + BYTE DefaultChar[MAX_DEFAULTCHAR]; + WCHAR UnicodeDefaultChar; + WCHAR unknown1; + WCHAR unknown2; + BYTE LeadByte[MAX_LEADBYTES]; +} __attribute__((packed)) NLS_FILE_HEADER; + +/* +Support for translation from the multiple unicode chars +to the single code page char. + +002D;HYPHEN-MINUS;Pd;0;ET;;;;;N;;;;; +00AD;SOFT HYPHEN;Pd;0;ON;;;;;N;;;;; +2010;HYPHEN;Pd;0;ON;;;;;N;;;;; +2011;NON-BREAKING HYPHEN;Pd;0;ON; 2010;;;;N;;;;; +2013;EN DASH;Pd;0;ON;;;;;N;;;;; +2014;EM DASH;Pd;0;ON;;;;;N;;;;; +2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;; +*/ + +/* HYPHEN-MINUS aliases */ +static WCHAR hyphen_aliases[] = {0x00AD,0x2010,0x2011,0x2013,0x2014,0x2015,0}; + +static struct { + WCHAR cp_char; + WCHAR *alias; /* must be 0 terminated */ +} u2cp_alias[] = { +/* HYPHEN-MINUS aliases */ +{0x002D, hyphen_aliases} +}; + +static void patch_aliases(void *u2cp, CPINFOEXA *cpi) +{ + int i, j; + WCHAR *wc, *alias; + BYTE *c; + + if(cpi->MaxCharSize == 2) { + wc = (WCHAR *)u2cp; + for(i = 0; i < 65536; i++) { + for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) { + alias = u2cp_alias[j].alias; + while(*alias) { + if(*alias == i && wc[i] == *(WCHAR *)cpi->DefaultChar) { + wc[i] = u2cp_alias[j].cp_char; + } + alias++; + } + } + } + } + else { + c = (BYTE *)u2cp; + for(i = 0; i < 65536; i++) { + for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) { + alias = u2cp_alias[j].alias; + while(*alias) { + if(*alias == i && c[i] == cpi->DefaultChar[0] && u2cp_alias[j].cp_char < 256) { + c[i] = (BYTE)u2cp_alias[j].cp_char; + } + alias++; + } + } + } + } +} + +static BOOL write_unicode2cp_table(FILE *out, CPINFOEXA *cpi, WCHAR *table) +{ + void *u2cp; + WCHAR *wc; + CHAR *c; + int i; + BOOL ret = TRUE; + + u2cp = malloc(cpi->MaxCharSize * 65536); + if(!u2cp) { + printf("Not enough memory for Unicode to Codepage table\n"); + return FALSE; + } + + if(cpi->MaxCharSize == 2) { + wc = (WCHAR *)u2cp; + for(i = 0; i < 65536; i++) + wc[i] = *(WCHAR *)cpi->DefaultChar; + + for(i = 0; i < 65536; i++) + if (table[i] != '?') + wc[table[i]] = (WCHAR)i; + } + else { + c = (CHAR *)u2cp; + for(i = 0; i < 65536; i++) + c[i] = cpi->DefaultChar[0]; + + for(i = 0; i < 256; i++) + if (table[i] != '?') + c[table[i]] = (CHAR)i; + } + + patch_aliases(u2cp, cpi); + + if(fwrite(u2cp, 1, cpi->MaxCharSize * 65536, out) != cpi->MaxCharSize * 65536) + ret = FALSE; + + free(u2cp); + + return ret; +} + +static BOOL write_lb_ranges(FILE *out, CPINFOEXA *cpi, WCHAR *table) +{ + WCHAR sub_table[256]; + WORD offset, offsets[256]; + int i, j, range; + + memset(offsets, 0, sizeof(offsets)); + + offset = 0; + + for(i = 0; i < MAX_LEADBYTES; i += 2) { + for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) { + offset += 256; + offsets[range] = offset; + } + } + + if(fwrite(offsets, 1, sizeof(offsets), out) != sizeof(offsets)) + return FALSE; + + for(i = 0; i < MAX_LEADBYTES; i += 2) { + for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) { + /*printf("Writing sub table for LeadByte %02X\n", range);*/ + for(j = MAKEWORD(0, range); j <= MAKEWORD(0xFF, range); j++) { + sub_table[j - MAKEWORD(0, range)] = table[j]; + } + + if(fwrite(sub_table, 1, sizeof(sub_table), out) != sizeof(sub_table)) + return FALSE; + } + } + + return TRUE; +} + +static BOOL create_nls_file(char *name, CPINFOEXA *cpi, WCHAR *table, WCHAR *oemtable) +{ + FILE *out; + NLS_FILE_HEADER nls; + WORD wValue, number_of_lb_ranges, number_of_lb_subtables, i; + + printf("Creating NLS table \"%s\"\n", name); + + if(!(out = fopen(name, "wb"))) { + printf("Could not create file \"%s\"\n", name); + return FALSE; + } + + memset(&nls, 0, sizeof(nls)); + + nls.wSize = sizeof(nls) / sizeof(WORD); + nls.CodePage = cpi->CodePage; + nls.MaxCharSize = cpi->MaxCharSize; + memcpy(nls.DefaultChar, cpi->DefaultChar, MAX_DEFAULTCHAR); + nls.UnicodeDefaultChar = cpi->UnicodeDefaultChar; + nls.unknown1 = '?'; + nls.unknown2 = '?'; + memcpy(nls.LeadByte, cpi->LeadByte, MAX_LEADBYTES); + + if(fwrite(&nls, 1, sizeof(nls), out) != sizeof(nls)) { + fclose(out); + printf("Could not write to file \"%s\"\n", name); + return FALSE; + } + + number_of_lb_ranges = 0; + number_of_lb_subtables = 0; + + for(i = 0; i < MAX_LEADBYTES; i += 2) { + if(cpi->LeadByte[i] != 0 && cpi->LeadByte[i + 1] > cpi->LeadByte[i]) { + number_of_lb_ranges++; + number_of_lb_subtables += cpi->LeadByte[i + 1] - cpi->LeadByte[i] + 1; + } + } + + /*printf("Number of LeadByte ranges %d\n", number_of_lb_ranges);*/ + /*printf("Number of LeadByte subtables %d\n", number_of_lb_subtables);*/ + + /* Calculate offset to Unicode to CP table in words: + * 1. (256 * sizeof(WORD)) primary CP to Unicode table + + * 2. (WORD) optional OEM glyph table size in words + + * 3. OEM glyph table size in words * sizeof(WORD) + + * 4. (WORD) Number of DBCS LeadByte ranges + + * 5. if (Number of DBCS LeadByte ranges != 0) 256 * sizeof(WORD) offsets of lead byte sub tables + * 6. (Number of DBCS LeadByte sub tables * 256 * sizeof(WORD)) LeadByte sub tables + + * 7. (WORD) Unknown flag + */ + + wValue = (256 * sizeof(WORD) + /* 1 */ + sizeof(WORD) + /* 2 */ + ((oemtable !=NULL) ? (256 * sizeof(WORD)) : 0) + /* 3 */ + sizeof(WORD) + /* 4 */ + ((number_of_lb_subtables != 0) ? 256 * sizeof(WORD) : 0) + /* 5 */ + number_of_lb_subtables * 256 * sizeof(WORD) + /* 6 */ + sizeof(WORD) /* 7 */ + ) / sizeof(WORD); + + /* offset of Unicode to CP table in words */ + fwrite(&wValue, 1, sizeof(wValue), out); + + /* primary CP to Unicode table */ + if(fwrite(table, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) { + fclose(out); + printf("Could not write to file \"%s\"\n", name); + return FALSE; + } + + /* optional OEM glyph table size in words */ + wValue = (oemtable != NULL) ? (256 * sizeof(WORD)) : 0; + fwrite(&wValue, 1, sizeof(wValue), out); + + /* optional OEM to Unicode table */ + if (oemtable) { + if(fwrite(oemtable, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) { + fclose(out); + printf("Could not write to file \"%s\"\n", name); + return FALSE; + } + } + + /* Number of DBCS LeadByte ranges */ + fwrite(&number_of_lb_ranges, 1, sizeof(number_of_lb_ranges), out); + + /* offsets of lead byte sub tables and lead byte sub tables */ + if(number_of_lb_ranges > 0) { + if(!write_lb_ranges(out, cpi, table)) { + fclose(out); + printf("Could not write to file \"%s\"\n", name); + return FALSE; + } + } + + /* Unknown flag */ + wValue = 0; + fwrite(&wValue, 1, sizeof(wValue), out); + + if(!write_unicode2cp_table(out, cpi, table)) { + fclose(out); + printf("Could not write to file \"%s\"\n", name); + return FALSE; + } + + fclose(out); + return TRUE; +} + +/* correct the codepage information such as default chars */ +static void patch_codepage_info(CPINFOEXA *cpi) +{ + /* currently nothing */ +} + +static WCHAR *Load_CP2Unicode_Table(char *table_name, UINT cp, CPINFOEXA *cpi) +{ + char buf[256]; + char *p; + DWORD n, value; + FILE *file; + WCHAR *table; + int lb_ranges, lb_range_started, line; + + printf("Loading translation table \"%s\"\n", table_name); + + /* Init to default values */ + memset(cpi, 0, sizeof(CPINFOEXA)); + cpi->CodePage = cp; + *(WCHAR *)cpi->DefaultChar = '?'; + cpi->MaxCharSize = 1; + cpi->UnicodeDefaultChar = '?'; + + patch_codepage_info(cpi); + + table = (WCHAR *)malloc(sizeof(WCHAR) * 65536); + if(!table) { + printf("Not enough memory for Codepage to Unicode table\n"); + return NULL; + } + + for(n = 0; n < 256; n++) + table[n] = (WCHAR)n; + + for(n = 256; n < 65536; n++) + table[n] = cpi->UnicodeDefaultChar; + + file = fopen(table_name, "r"); + if(file == NULL) { + free(table); + return NULL; + } + + line = 0; + lb_ranges = 0; + lb_range_started = 0; + + while(fgets(buf, sizeof(buf), file)) { + line++; + p = buf; + while(isspace(*p)) p++; + + if(!*p || p[0] == '#') + continue; + + n = strtol(p, &p, 0); + if(n > 0xFFFF) { + printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, n, table_name); + continue; + } + + if(n > 0xFF && cpi->MaxCharSize != 2) { + /*printf("Line %d: Entry 0x%04lX: Switching to DBCS\n", line, n);*/ + cpi->MaxCharSize = 2; + } + + while(isspace(*p)) p++; + + if(!*p || p[0] == '#') { + /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/ + } + else { + value = strtol(p, &p, 0); + if(value > 0xFFFF) { + printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, n, table_name); + } + table[n] = (WCHAR)value; + } + + /* wait for comment */ + while(*p && *p != '#') p++; + + if(*p == '#' && strstr(p, "DBCS LEAD BYTE")) { + /*printf("Line %d, entry 0x%02lX DBCS LEAD BYTE\n", line, n);*/ + if(n > 0xFF) { + printf("Line %d: Entry 0x%04lX: Error: DBCS lead byte overflowed\n", line, n); + continue; + } + + table[n] = (WCHAR)0; + + if(lb_range_started) { + cpi->LeadByte[(lb_ranges - 1) * 2 + 1] = (BYTE)n; + } + else { + /*printf("Line %d: Starting new DBCS lead byte range, entry 0x%02lX\n", line, n);*/ + if(lb_ranges < MAX_LEADBYTES/2) { + lb_ranges++; + lb_range_started = 1; + cpi->LeadByte[(lb_ranges - 1) * 2] = (BYTE)n; + } + else + printf("Line %d: Error: could not start new lead byte range\n", line); + } + } + else { + if(lb_range_started) + lb_range_started = 0; + } + } + + fclose(file); + + return table; +} + +static WCHAR *Load_OEM2Unicode_Table(char *table_name, WCHAR *def_table, UINT cp, CPINFOEXA *cpi) +{ + char buf[256]; + char *p; + DWORD n, value; + FILE *file; + WCHAR *table; + int line; + + printf("Loading oem glyph table \"%s\"\n", table_name); + + table = (WCHAR *)malloc(sizeof(WCHAR) * 65536); + if(!table) { + printf("Not enough memory for Codepage to Unicode table\n"); + return NULL; + } + + memcpy(table, def_table, 65536 * sizeof(WCHAR)); + + file = fopen(table_name, "r"); + if(file == NULL) { + free(table); + return NULL; + } + + while(fgets(buf, sizeof(buf), file)) { + line++; + p = buf; + while(isspace(*p)) p++; + + if(!*p || p[0] == '#') + continue; + + value = strtol(p, &p, 16); + if(value > 0xFFFF) { + printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, value, table_name); + continue; + } + + while(isspace(*p)) p++; + + if(!*p || p[0] == '#') { + /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/ + continue; + } + else { + n = strtol(p, &p, 16); + if(n > 0xFFFF) { + printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, value, table_name); + continue; + } + } + + if (cpi->CodePage == 864) { + while(isspace(*p)) p++; + + if(!*p || p[0] == '#' || p[0] == '-') { + /*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/ + continue; + } + else { + n = strtol(p, &p, 16); + if(n > 0xFFFF) { + printf("Line %d: Entry 0x%06lX oem value: File \"%s\" corrupted\n", line, value, table_name); + } + continue; + } + } + + table[n] = (WCHAR)value; + } + + fclose(file); + + return table; +} + +int write_nls_files() +{ + WCHAR *table; + WCHAR *oemtable; + char nls_filename[256]; + CPINFOEXA cpi; + int i; + struct code_page { + UINT cp; + BOOL oem; + char *table_filename; + char *comment; + } pages[] = { + {37, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT", "IBM EBCDIC US Canada"}, + {424, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP424.TXT", "IBM EBCDIC Hebrew"}, + {437, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT", "OEM United States"}, + {500, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT", "IBM EBCDIC International"}, + /*{708, FALSE, "", "Arabic ASMO"},*/ + /*{720, FALSE, "", "Arabic Transparent ASMO"},*/ + {737, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT", "OEM Greek 437G"}, + {775, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT", "OEM Baltic"}, + {850, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT", "OEM Multilingual Latin 1"}, + {852, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT", "OEM Slovak Latin 2"}, + {855, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT", "OEM Cyrillic" }, + {856, TRUE, LIBDIR"MAPPINGS/VENDORS/MISC/CP856.TXT", "Hebrew PC"}, + {857, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT", "OEM Turkish"}, + {860, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT", "OEM Portuguese"}, + {861, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT", "OEM Icelandic"}, + {862, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT", "OEM Hebrew"}, + {863, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT", "OEM Canadian French"}, + {864, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT", "OEM Arabic"}, + {865, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT", "OEM Nordic"}, + {866, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT", "OEM Russian"}, + {869, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT", "OEM Greek"}, + /*{870, FALSE, "", "IBM EBCDIC Multilingual/ROECE (Latin 2)"},*/ + {874, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP874.TXT", "ANSI/OEM Thai"}, + {875, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT", "IBM EBCDIC Greek"}, + {878, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"}, + {932, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT", "ANSI/OEM Japanese Shift-JIS"}, + {936, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT", "ANSI/OEM Simplified Chinese GBK"}, + {949, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT", "ANSI/OEM Korean Unified Hangul"}, + {950, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT", "ANSI/OEM Traditional Chinese Big5"}, + {1006, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP1006.TXT", "IBM Arabic"}, + {1026, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT", "IBM EBCDIC Latin 5 Turkish"}, + {1250, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT", "ANSI Eastern Europe"}, + {1251, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT", "ANSI Cyrillic"}, + {1252, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT", "ANSI Latin 1"}, + {1253, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT", "ANSI Greek"}, + {1254, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT", "ANSI Turkish"}, + {1255, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT", "ANSI Hebrew"}, + {1256, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT", "ANSI Arabic"}, + {1257, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT", "ANSI Baltic"}, + {1258, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT", "ANSI/OEM Viet Nam"}, + {10000, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT", "Mac Roman"}, + {10006, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT", "Mac Greek"}, + {10007, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT", "Mac Cyrillic"}, + {10029, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT", "Mac Latin 2"}, + {10079, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic"}, + {10081, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish"}, + /*{20000, FALSE, "", "CNS Taiwan"},*/ + /*{20001, FALSE, "", "TCA Taiwan"},*/ + /*{20002, FALSE, "", "Eten Taiwan"},*/ + /*{20003, FALSE, "", "IBM5550 Taiwan"},*/ + /*{20004, FALSE, "", "TeleText Taiwan"},*/ + /*{20005, FALSE, "", "Wang Taiwan"},*/ + /*{20105, FALSE, "", "IA5 IRV International Alphabet No.5"},*/ + /*{20106, FALSE, "", "IA5 German"},*/ + /*{20107, FALSE, "", "IA5 Swedish"},*/ + /*{20108, FALSE, "", "IA5 Norwegian"},*/ + /*{20127, FALSE, "", "US ASCII"},*/ + /*{20261, FALSE, "", "T.61"},*/ + /*{20269, FALSE, "", "ISO 6937 NonSpacing Accent"},*/ + /*{20273, FALSE, "", "IBM EBCDIC Germany"},*/ + /*{20277, FALSE, "", "IBM EBCDIC Denmark/Norway"},*/ + /*{20278, FALSE, "", "IBM EBCDIC Finland/Sweden"},*/ + /*{20280, FALSE, "", "IBM EBCDIC Italy"},*/ + /*{20284, FALSE, "", "IBM EBCDIC Latin America/Spain"},*/ + /*{20285, FALSE, "", "IBM EBCDIC United Kingdom"},*/ + /*{20290, FALSE, "", "IBM EBCDIC Japanese Katakana Extended"},*/ + /*{20297, FALSE, "", "IBM EBCDIC France"},*/ + /*{20420, FALSE, "", "IBM EBCDIC Arabic"},*/ + /*{20423, FALSE, "", "IBM EBCDIC Greek"},*/ + /*{20424, FALSE, "", "IBM EBCDIC Hebrew"},*/ + /*{20833, FALSE, "", "IBM EBCDIC Korean Extended"},*/ + /*{20838, FALSE, "", "IBM EBCDIC Thai"},*/ + /*{20871, FALSE, "", "IBM EBCDIC Icelandic"},*/ + /*{20880, FALSE, "", "IBM EBCDIC Cyrillic (Russian)"},*/ + {20866, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"}, + /*{20905, FALSE, "", "IBM EBCDIC Turkish"},*/ + /*{21025, FALSE, "", "IBM EBCDIC Cyrillic (Serbian, Bulgarian)"},*/ + /*{21027, FALSE, "", "Ext Alpha Lowercase"},*/ + {28591, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1"}, + {28592, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-2.TXT", "ISO 8859-2 Eastern Europe"}, + {28593, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-3.TXT", "ISO 8859-3 Turkish"}, + {28594, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-4.TXT", "ISO 8859-4 Baltic"}, + {28595, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-5.TXT", "ISO 8859-5 Cyrillic"}, + {28596, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-6.TXT", "ISO 8859-6 Arabic"}, + {28597, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-7.TXT", "ISO 8859-7 Greek"}, + {28598, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-8.TXT", "ISO 8859-8 Hebrew"}, + {28599, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-9.TXT", "ISO 8859-9 Latin 5"} + }; + + for(i = 0; i < sizeof(pages)/sizeof(pages[0]); i++) { + table = Load_CP2Unicode_Table(pages[i].table_filename, pages[i].cp, &cpi); + if(!table) { + printf("Could not load \"%s\" (%s)\n", pages[i].table_filename, pages[i].comment); + continue; + } + + if (pages[i].oem) { + oemtable = Load_OEM2Unicode_Table(LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", table, pages[i].cp, &cpi); + if(!oemtable) { + printf("Could not load \"%s\" (%s)\n", LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", "IBM OEM glyph table"); + continue; + } + } + + sprintf(nls_filename, "%s/c_%03d.nls", NLSDIR, cpi.CodePage); + if(!create_nls_file(nls_filename, &cpi, table, pages[i].oem ? oemtable : NULL)) { + printf("Could not write \"%s\" (%s)\n", nls_filename, pages[i].comment); + } + + if (pages[i].oem) + free(oemtable); + + free(table); + } + + return 0; +} + + + +static WORD *to_upper_org = NULL, *to_lower_org = NULL; + +static WORD diffs[256]; +static int number_of_diffs; + +static WORD number_of_subtables_with_diffs; +/* pointers to subtables with 16 elements in each to the main table */ +static WORD *subtables_with_diffs[4096]; + +static WORD number_of_subtables_with_offsets; +/* subtables with 16 elements */ +static WORD subtables_with_offsets[4096 * 16]; + +static void test_packed_table(WCHAR *table) +{ + WCHAR test_str[] = L"This is an English text. Ïî-ðóññêè ÿ ïèñàòü óìåþ íåìíîæêî. 1234567890"; + //WORD diff, off; + //WORD *sub_table; + DWORD i, len; + + len = lstrlenW(test_str); + + for(i = 0; i < len + 1; i++) { + /*off = table[HIBYTE(test_str[i])]; + + sub_table = table + off; + off = sub_table[LOBYTE(test_str[i]) >> 4]; + + sub_table = table + off; + off = LOBYTE(test_str[i]) & 0x0F; + + diff = sub_table[off]; + + test_str[i] += diff;*/ + test_str[i] += table[table[table[HIBYTE(test_str[i])] + (LOBYTE(test_str[i]) >> 4)] + (LOBYTE(test_str[i]) & 0x0F)]; + } +/* + { + FILE *file; + static int n = 0; + char name[20]; + + sprintf(name, "text%02d.dat", n++); + file = fopen(name, "wb"); + fwrite(test_str, len * sizeof(WCHAR), 1, file); + fclose(file); + }*/ +} + +static BOOL CreateCaseDiff(char *table_name) +{ + char buf[256]; + char *p; + WORD code, case_mapping; + FILE *file; + int line; + + to_upper_org = (WORD *)calloc(65536, sizeof(WORD)); + if(!to_upper_org) { + printf("Not enough memory for to upper table\n"); + return FALSE; + } + + to_lower_org = (WORD *)calloc(65536, sizeof(WORD)); + if(!to_lower_org) { + printf("Not enough memory for to lower table\n"); + return FALSE; + } + + file = fopen(table_name, "r"); + if(file == NULL) { + printf("Could not open file \"%s\"\n", table_name); + return FALSE; + } + + line = 0; + + while(fgets(buf, sizeof(buf), file)) { + line++; + p = buf; + while(*p && isspace(*p)) p++; + + if(!*p) + continue; + + /* 0. Code value */ + code = (WORD)strtol(p, &p, 16); + + //if(code != 0x9A0 && code != 0xBA0) + //continue; + + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 1. Character name */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 2. General Category */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 3. Canonical Combining Classes */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 4. Bidirectional Category */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 5. Character Decomposition Mapping */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 6. Decimal digit value */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 7. Digit value */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 8. Numeric value */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 9. Mirrored */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 10. Unicode 1.0 Name */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 11. 10646 comment field */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + + /* 12. Uppercase Mapping */ + while(*p && isspace(*p)) p++; + if(!*p) continue; + if(*p != ';') { + case_mapping = (WORD)strtol(p, &p, 16); + to_upper_org[code] = case_mapping - code; + while(*p && *p != ';') p++; + } + else + p++; + + /* 13. Lowercase Mapping */ + while(*p && isspace(*p)) p++; + if(!*p) continue; + if(*p != ';') { + case_mapping = (WORD)strtol(p, &p, 16); + to_lower_org[code] = case_mapping - code; + while(*p && *p != ';') p++; + } + else + p++; + + /* 14. Titlecase Mapping */ + while(*p && *p != ';') p++; + if(!*p) + continue; + p++; + } + + fclose(file); + + return TRUE; +} + +static int find_diff(WORD diff) +{ + int i; + + for(i = 0; i < number_of_diffs; i++) { + if(diffs[i] == diff) + return i; + } + + return -1; +} + +static WORD find_subtable_with_diffs(WORD *table, WORD *subtable) +{ + WORD index; + + for(index = 0; index < number_of_subtables_with_diffs; index++) { + if(memcmp(subtables_with_diffs[index], subtable, 16 * sizeof(WORD)) == 0) { + return index; + } + } + + if(number_of_subtables_with_diffs >= 4096) { + printf("Could not add new subtable with diffs, storage is full\n"); + return 0; + } + + subtables_with_diffs[number_of_subtables_with_diffs] = subtable; + number_of_subtables_with_diffs++; + + return index; +} + +static WORD find_subtable_with_offsets(WORD *subtable) +{ + WORD index; + + for(index = 0; index < number_of_subtables_with_offsets; index++) { + if(memcmp(&subtables_with_offsets[index * 16], subtable, 16 * sizeof(WORD)) == 0) { + return index; + } + } + + if(number_of_subtables_with_offsets >= 4096) { + printf("Could not add new subtable with offsets, storage is full\n"); + return 0; + } + + memcpy(&subtables_with_offsets[number_of_subtables_with_offsets * 16], subtable, 16 * sizeof(WORD)); + number_of_subtables_with_offsets++; + + return index; +} + +static WORD *pack_table(WORD *table, WORD *packed_size_in_words) +{ + WORD high, low4, index; + WORD main_index[256]; + WORD temp_subtable[16]; + WORD *packed_table; + WORD *subtable_src, *subtable_dst; + + memset(subtables_with_diffs, 0, sizeof(subtables_with_diffs)); + number_of_subtables_with_diffs = 0; + + memset(subtables_with_offsets, 0, sizeof(subtables_with_offsets)); + number_of_subtables_with_offsets = 0; + + for(high = 0; high < 256; high++) { + for(low4 = 0; low4 < 256; low4 += 16) { + index = find_subtable_with_diffs(table, &table[MAKEWORD(low4, high)]); + + temp_subtable[low4 >> 4] = index; + } + + index = find_subtable_with_offsets(temp_subtable); + main_index[high] = index; + } + + *packed_size_in_words = 0x100 + number_of_subtables_with_offsets * 16 + number_of_subtables_with_diffs * 16; + packed_table = calloc(*packed_size_in_words, sizeof(WORD)); + + /* fill main index according to the subtables_with_offsets */ + for(high = 0; high < 256; high++) { + packed_table[high] = 0x100 + main_index[high] * 16; + } + + //memcpy(sub_table, subtables_with_offsets, number_of_subtables_with_offsets * 16); + + /* fill subtable index according to the subtables_with_diffs */ + for(index = 0; index < number_of_subtables_with_offsets; index++) { + subtable_dst = packed_table + 0x100 + index * 16; + subtable_src = &subtables_with_offsets[index * 16]; + + for(low4 = 0; low4 < 16; low4++) { + subtable_dst[low4] = 0x100 + number_of_subtables_with_offsets * 16 + subtable_src[low4] * 16; + } + } + + + for(index = 0; index < number_of_subtables_with_diffs; index++) { + subtable_dst = packed_table + 0x100 + number_of_subtables_with_offsets * 16 + index * 16; + memcpy(subtable_dst, subtables_with_diffs[index], 16 * sizeof(WORD)); + + } + + + test_packed_table(packed_table); + + return packed_table; +} + +int write_casemap_file(void) +{ + WORD packed_size_in_words, offset_to_next_table_in_words; + WORD *packed_table, value; + FILE *file; + + if(!CreateCaseDiff(LIBDIR"UnicodeData.txt")) + return -1; + + file = fopen(NLSDIR"/l_intl.nls", "wb"); + + /* write version number */ + value = 1; + fwrite(&value, 1, sizeof(WORD), file); + + /* pack upper case table */ + packed_table = pack_table(to_upper_org, &packed_size_in_words); + offset_to_next_table_in_words = packed_size_in_words + 1; + fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file); + /* write packed upper case table */ + fwrite(packed_table, sizeof(WORD), packed_size_in_words, file); + free(packed_table); + + /* pack lower case table */ + packed_table = pack_table(to_lower_org, &packed_size_in_words); + offset_to_next_table_in_words = packed_size_in_words + 1; + fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file); + /* write packed lower case table */ + fwrite(packed_table, sizeof(WORD), packed_size_in_words, file); + free(packed_table); + + fclose(file); + + free(to_upper_org); + free(to_lower_org); + + return 0; +} + +int main() +{ + write_nls_files(); + write_casemap_file(); + + return 0; +} diff --git a/reactos/tools/create_nls/unicode.org/.cvsignore b/reactos/tools/create_nls/unicode.org/.cvsignore new file mode 100644 index 00000000000..2211df63dd2 --- /dev/null +++ b/reactos/tools/create_nls/unicode.org/.cvsignore @@ -0,0 +1 @@ +*.txt