Added nls tool.

svn path=/trunk/; revision=2471
This commit is contained in:
Eric Kohl 2002-01-02 00:48:14 +00:00
parent 6ade027031
commit ab4a7cd152
4 changed files with 1043 additions and 0 deletions

View file

@ -0,0 +1,54 @@
#
# ReactOS/NLS conversion tool
#
PATH_TO_TOP = ../..
TARGET=create_nls$(EXE_POSTFIX)
all: $(TARGET)
OBJECTS = create_nls.o
CLEAN_FILES = *.o create_nls$(EXE_POSTFIX)
create_nls$(EXE_POSTFIX): $(OBJECTS)
$(HOST_CC) $(OBJECTS) -o create_nls$(EXE_POSTFIX)
HOST_CFLAGS = -I.
create_nls.o: create_nls.c
$(HOST_CC) $(HOST_CFLAGS) -c create_nls.c -o create_nls.o
ifeq ($(HOST),mingw32-linux)
clean:
rm -f *.o
rm -f create_nls$(EXE_POSTFIX)
endif
ifeq ($(HOST),mingw32-windows)
clean:
del *.o
del create_nls$(EXE_POSTFIX)
endif
.phony: clean
nls:
create_nls$(EXE_POSTFIX)
.phony: nls
ifeq ($(HOST),mingw32-linux)
clean_nls:
rm -f ../../media/nls/*.nls
endif
ifeq ($(HOST),mingw32-windows)
clean_nls:
del ..\..\media\nls\*.nls
endif
.phony: clean_nls
include $(PATH_TO_TOP)/rules.mak
# EOF

View file

@ -0,0 +1,23 @@
Create_nls - .nls file creation tool
------------------------------------
Create_nls is a tool to create binary .nls files from codepage mapping
files provided by Unicode Inc. Due to the confusing license these files
are not part of the ReactOS source tree.
You can easily download the files from 'http://www.unicode.org/public' or
'ftp://ftp.unicode.org/pub'. Download the directory structure of the web/ftp
site into the unicode.org subdirectory and create_nls will build the .nls
files.
Makefile targets:
'make': builds create_nls tool.
'make clean': deletes all executable and object files
'make nls': generates binary .nls files.
'make clean_nls': deletes binary .nls files.
Since the binary .nls files are part of the ReactOS source tree it is not
necessary to build these files yourself. Therefore create_nls is not
integrated with the main makefile. You will have to build and run it
separately.

View file

@ -0,0 +1,965 @@
/*
* Tool for creating NT-like NLS files for Unicode <-> Codepage conversions.
* Tool for creating NT-like l_intl.nls file for case mapping of unicode
* characters.
* Copyright 2000 Timoshkov Dmitry
* Copyright 2001 Matei Alexandru
*
* Sources of information:
* Andrew Kozin's YAW project http://www.chat.ru/~stanson/yaw_en.html
* Ove Kõven's investigations http://www.ping.uio.no/~ovehk/nls
*/
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
#include <malloc.h>
#include <string.h>
#include <ctype.h>
static const WCHAR * const uprtable[256];
static const WCHAR * const lwrtable[256];
#define NLSDIR "../../media/nls"
#define LIBDIR "unicode.org/"
typedef struct {
WORD wSize; /* in words 0x000D */
WORD CodePage;
WORD MaxCharSize; /* 1 or 2 */
BYTE DefaultChar[MAX_DEFAULTCHAR];
WCHAR UnicodeDefaultChar;
WCHAR unknown1;
WCHAR unknown2;
BYTE LeadByte[MAX_LEADBYTES];
} __attribute__((packed)) NLS_FILE_HEADER;
/*
Support for translation from the multiple unicode chars
to the single code page char.
002D;HYPHEN-MINUS;Pd;0;ET;;;;;N;;;;;
00AD;SOFT HYPHEN;Pd;0;ON;;;;;N;;;;;
2010;HYPHEN;Pd;0;ON;;;;;N;;;;;
2011;NON-BREAKING HYPHEN;Pd;0;ON;<noBreak> 2010;;;;N;;;;;
2013;EN DASH;Pd;0;ON;;;;;N;;;;;
2014;EM DASH;Pd;0;ON;;;;;N;;;;;
2015;HORIZONTAL BAR;Pd;0;ON;;;;;N;QUOTATION DASH;;;;
*/
/* HYPHEN-MINUS aliases */
static WCHAR hyphen_aliases[] = {0x00AD,0x2010,0x2011,0x2013,0x2014,0x2015,0};
static struct {
WCHAR cp_char;
WCHAR *alias; /* must be 0 terminated */
} u2cp_alias[] = {
/* HYPHEN-MINUS aliases */
{0x002D, hyphen_aliases}
};
static void patch_aliases(void *u2cp, CPINFOEXA *cpi)
{
int i, j;
WCHAR *wc, *alias;
BYTE *c;
if(cpi->MaxCharSize == 2) {
wc = (WCHAR *)u2cp;
for(i = 0; i < 65536; i++) {
for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
alias = u2cp_alias[j].alias;
while(*alias) {
if(*alias == i && wc[i] == *(WCHAR *)cpi->DefaultChar) {
wc[i] = u2cp_alias[j].cp_char;
}
alias++;
}
}
}
}
else {
c = (BYTE *)u2cp;
for(i = 0; i < 65536; i++) {
for(j = 0; j < sizeof(u2cp_alias)/sizeof(u2cp_alias[0]); j++) {
alias = u2cp_alias[j].alias;
while(*alias) {
if(*alias == i && c[i] == cpi->DefaultChar[0] && u2cp_alias[j].cp_char < 256) {
c[i] = (BYTE)u2cp_alias[j].cp_char;
}
alias++;
}
}
}
}
}
static BOOL write_unicode2cp_table(FILE *out, CPINFOEXA *cpi, WCHAR *table)
{
void *u2cp;
WCHAR *wc;
CHAR *c;
int i;
BOOL ret = TRUE;
u2cp = malloc(cpi->MaxCharSize * 65536);
if(!u2cp) {
printf("Not enough memory for Unicode to Codepage table\n");
return FALSE;
}
if(cpi->MaxCharSize == 2) {
wc = (WCHAR *)u2cp;
for(i = 0; i < 65536; i++)
wc[i] = *(WCHAR *)cpi->DefaultChar;
for(i = 0; i < 65536; i++)
if (table[i] != '?')
wc[table[i]] = (WCHAR)i;
}
else {
c = (CHAR *)u2cp;
for(i = 0; i < 65536; i++)
c[i] = cpi->DefaultChar[0];
for(i = 0; i < 256; i++)
if (table[i] != '?')
c[table[i]] = (CHAR)i;
}
patch_aliases(u2cp, cpi);
if(fwrite(u2cp, 1, cpi->MaxCharSize * 65536, out) != cpi->MaxCharSize * 65536)
ret = FALSE;
free(u2cp);
return ret;
}
static BOOL write_lb_ranges(FILE *out, CPINFOEXA *cpi, WCHAR *table)
{
WCHAR sub_table[256];
WORD offset, offsets[256];
int i, j, range;
memset(offsets, 0, sizeof(offsets));
offset = 0;
for(i = 0; i < MAX_LEADBYTES; i += 2) {
for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
offset += 256;
offsets[range] = offset;
}
}
if(fwrite(offsets, 1, sizeof(offsets), out) != sizeof(offsets))
return FALSE;
for(i = 0; i < MAX_LEADBYTES; i += 2) {
for(range = cpi->LeadByte[i]; range != 0 && range <= cpi->LeadByte[i + 1]; range++) {
/*printf("Writing sub table for LeadByte %02X\n", range);*/
for(j = MAKEWORD(0, range); j <= MAKEWORD(0xFF, range); j++) {
sub_table[j - MAKEWORD(0, range)] = table[j];
}
if(fwrite(sub_table, 1, sizeof(sub_table), out) != sizeof(sub_table))
return FALSE;
}
}
return TRUE;
}
static BOOL create_nls_file(char *name, CPINFOEXA *cpi, WCHAR *table, WCHAR *oemtable)
{
FILE *out;
NLS_FILE_HEADER nls;
WORD wValue, number_of_lb_ranges, number_of_lb_subtables, i;
printf("Creating NLS table \"%s\"\n", name);
if(!(out = fopen(name, "wb"))) {
printf("Could not create file \"%s\"\n", name);
return FALSE;
}
memset(&nls, 0, sizeof(nls));
nls.wSize = sizeof(nls) / sizeof(WORD);
nls.CodePage = cpi->CodePage;
nls.MaxCharSize = cpi->MaxCharSize;
memcpy(nls.DefaultChar, cpi->DefaultChar, MAX_DEFAULTCHAR);
nls.UnicodeDefaultChar = cpi->UnicodeDefaultChar;
nls.unknown1 = '?';
nls.unknown2 = '?';
memcpy(nls.LeadByte, cpi->LeadByte, MAX_LEADBYTES);
if(fwrite(&nls, 1, sizeof(nls), out) != sizeof(nls)) {
fclose(out);
printf("Could not write to file \"%s\"\n", name);
return FALSE;
}
number_of_lb_ranges = 0;
number_of_lb_subtables = 0;
for(i = 0; i < MAX_LEADBYTES; i += 2) {
if(cpi->LeadByte[i] != 0 && cpi->LeadByte[i + 1] > cpi->LeadByte[i]) {
number_of_lb_ranges++;
number_of_lb_subtables += cpi->LeadByte[i + 1] - cpi->LeadByte[i] + 1;
}
}
/*printf("Number of LeadByte ranges %d\n", number_of_lb_ranges);*/
/*printf("Number of LeadByte subtables %d\n", number_of_lb_subtables);*/
/* Calculate offset to Unicode to CP table in words:
* 1. (256 * sizeof(WORD)) primary CP to Unicode table +
* 2. (WORD) optional OEM glyph table size in words +
* 3. OEM glyph table size in words * sizeof(WORD) +
* 4. (WORD) Number of DBCS LeadByte ranges +
* 5. if (Number of DBCS LeadByte ranges != 0) 256 * sizeof(WORD) offsets of lead byte sub tables
* 6. (Number of DBCS LeadByte sub tables * 256 * sizeof(WORD)) LeadByte sub tables +
* 7. (WORD) Unknown flag
*/
wValue = (256 * sizeof(WORD) + /* 1 */
sizeof(WORD) + /* 2 */
((oemtable !=NULL) ? (256 * sizeof(WORD)) : 0) + /* 3 */
sizeof(WORD) + /* 4 */
((number_of_lb_subtables != 0) ? 256 * sizeof(WORD) : 0) + /* 5 */
number_of_lb_subtables * 256 * sizeof(WORD) + /* 6 */
sizeof(WORD) /* 7 */
) / sizeof(WORD);
/* offset of Unicode to CP table in words */
fwrite(&wValue, 1, sizeof(wValue), out);
/* primary CP to Unicode table */
if(fwrite(table, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
fclose(out);
printf("Could not write to file \"%s\"\n", name);
return FALSE;
}
/* optional OEM glyph table size in words */
wValue = (oemtable != NULL) ? (256 * sizeof(WORD)) : 0;
fwrite(&wValue, 1, sizeof(wValue), out);
/* optional OEM to Unicode table */
if (oemtable) {
if(fwrite(oemtable, 1, 256 * sizeof(WCHAR), out) != 256 * sizeof(WCHAR)) {
fclose(out);
printf("Could not write to file \"%s\"\n", name);
return FALSE;
}
}
/* Number of DBCS LeadByte ranges */
fwrite(&number_of_lb_ranges, 1, sizeof(number_of_lb_ranges), out);
/* offsets of lead byte sub tables and lead byte sub tables */
if(number_of_lb_ranges > 0) {
if(!write_lb_ranges(out, cpi, table)) {
fclose(out);
printf("Could not write to file \"%s\"\n", name);
return FALSE;
}
}
/* Unknown flag */
wValue = 0;
fwrite(&wValue, 1, sizeof(wValue), out);
if(!write_unicode2cp_table(out, cpi, table)) {
fclose(out);
printf("Could not write to file \"%s\"\n", name);
return FALSE;
}
fclose(out);
return TRUE;
}
/* correct the codepage information such as default chars */
static void patch_codepage_info(CPINFOEXA *cpi)
{
/* currently nothing */
}
static WCHAR *Load_CP2Unicode_Table(char *table_name, UINT cp, CPINFOEXA *cpi)
{
char buf[256];
char *p;
DWORD n, value;
FILE *file;
WCHAR *table;
int lb_ranges, lb_range_started, line;
printf("Loading translation table \"%s\"\n", table_name);
/* Init to default values */
memset(cpi, 0, sizeof(CPINFOEXA));
cpi->CodePage = cp;
*(WCHAR *)cpi->DefaultChar = '?';
cpi->MaxCharSize = 1;
cpi->UnicodeDefaultChar = '?';
patch_codepage_info(cpi);
table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
if(!table) {
printf("Not enough memory for Codepage to Unicode table\n");
return NULL;
}
for(n = 0; n < 256; n++)
table[n] = (WCHAR)n;
for(n = 256; n < 65536; n++)
table[n] = cpi->UnicodeDefaultChar;
file = fopen(table_name, "r");
if(file == NULL) {
free(table);
return NULL;
}
line = 0;
lb_ranges = 0;
lb_range_started = 0;
while(fgets(buf, sizeof(buf), file)) {
line++;
p = buf;
while(isspace(*p)) p++;
if(!*p || p[0] == '#')
continue;
n = strtol(p, &p, 0);
if(n > 0xFFFF) {
printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, n, table_name);
continue;
}
if(n > 0xFF && cpi->MaxCharSize != 2) {
/*printf("Line %d: Entry 0x%04lX: Switching to DBCS\n", line, n);*/
cpi->MaxCharSize = 2;
}
while(isspace(*p)) p++;
if(!*p || p[0] == '#') {
/*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
}
else {
value = strtol(p, &p, 0);
if(value > 0xFFFF) {
printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, n, table_name);
}
table[n] = (WCHAR)value;
}
/* wait for comment */
while(*p && *p != '#') p++;
if(*p == '#' && strstr(p, "DBCS LEAD BYTE")) {
/*printf("Line %d, entry 0x%02lX DBCS LEAD BYTE\n", line, n);*/
if(n > 0xFF) {
printf("Line %d: Entry 0x%04lX: Error: DBCS lead byte overflowed\n", line, n);
continue;
}
table[n] = (WCHAR)0;
if(lb_range_started) {
cpi->LeadByte[(lb_ranges - 1) * 2 + 1] = (BYTE)n;
}
else {
/*printf("Line %d: Starting new DBCS lead byte range, entry 0x%02lX\n", line, n);*/
if(lb_ranges < MAX_LEADBYTES/2) {
lb_ranges++;
lb_range_started = 1;
cpi->LeadByte[(lb_ranges - 1) * 2] = (BYTE)n;
}
else
printf("Line %d: Error: could not start new lead byte range\n", line);
}
}
else {
if(lb_range_started)
lb_range_started = 0;
}
}
fclose(file);
return table;
}
static WCHAR *Load_OEM2Unicode_Table(char *table_name, WCHAR *def_table, UINT cp, CPINFOEXA *cpi)
{
char buf[256];
char *p;
DWORD n, value;
FILE *file;
WCHAR *table;
int line;
printf("Loading oem glyph table \"%s\"\n", table_name);
table = (WCHAR *)malloc(sizeof(WCHAR) * 65536);
if(!table) {
printf("Not enough memory for Codepage to Unicode table\n");
return NULL;
}
memcpy(table, def_table, 65536 * sizeof(WCHAR));
file = fopen(table_name, "r");
if(file == NULL) {
free(table);
return NULL;
}
while(fgets(buf, sizeof(buf), file)) {
line++;
p = buf;
while(isspace(*p)) p++;
if(!*p || p[0] == '#')
continue;
value = strtol(p, &p, 16);
if(value > 0xFFFF) {
printf("Line %d: Entry 0x%06lX: File \"%s\" corrupted\n", line, value, table_name);
continue;
}
while(isspace(*p)) p++;
if(!*p || p[0] == '#') {
/*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
continue;
}
else {
n = strtol(p, &p, 16);
if(n > 0xFFFF) {
printf("Line %d: Entry 0x%06lX unicode value: File \"%s\" corrupted\n", line, value, table_name);
continue;
}
}
if (cpi->CodePage == 864) {
while(isspace(*p)) p++;
if(!*p || p[0] == '#' || p[0] == '-') {
/*printf("Line %d: Entry 0x%02lX has no Unicode value\n", line, n);*/
continue;
}
else {
n = strtol(p, &p, 16);
if(n > 0xFFFF) {
printf("Line %d: Entry 0x%06lX oem value: File \"%s\" corrupted\n", line, value, table_name);
}
continue;
}
}
table[n] = (WCHAR)value;
}
fclose(file);
return table;
}
int write_nls_files()
{
WCHAR *table;
WCHAR *oemtable;
char nls_filename[256];
CPINFOEXA cpi;
int i;
struct code_page {
UINT cp;
BOOL oem;
char *table_filename;
char *comment;
} pages[] = {
{37, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP037.TXT", "IBM EBCDIC US Canada"},
{424, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP424.TXT", "IBM EBCDIC Hebrew"},
{437, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP437.TXT", "OEM United States"},
{500, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP500.TXT", "IBM EBCDIC International"},
/*{708, FALSE, "", "Arabic ASMO"},*/
/*{720, FALSE, "", "Arabic Transparent ASMO"},*/
{737, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP737.TXT", "OEM Greek 437G"},
{775, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP775.TXT", "OEM Baltic"},
{850, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP850.TXT", "OEM Multilingual Latin 1"},
{852, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP852.TXT", "OEM Slovak Latin 2"},
{855, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP855.TXT", "OEM Cyrillic" },
{856, TRUE, LIBDIR"MAPPINGS/VENDORS/MISC/CP856.TXT", "Hebrew PC"},
{857, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP857.TXT", "OEM Turkish"},
{860, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP860.TXT", "OEM Portuguese"},
{861, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP861.TXT", "OEM Icelandic"},
{862, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP862.TXT", "OEM Hebrew"},
{863, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP863.TXT", "OEM Canadian French"},
{864, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP864.TXT", "OEM Arabic"},
{865, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP865.TXT", "OEM Nordic"},
{866, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP866.TXT", "OEM Russian"},
{869, TRUE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP869.TXT", "OEM Greek"},
/*{870, FALSE, "", "IBM EBCDIC Multilingual/ROECE (Latin 2)"},*/
{874, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/PC/CP874.TXT", "ANSI/OEM Thai"},
{875, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP875.TXT", "IBM EBCDIC Greek"},
{878, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
{932, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT", "ANSI/OEM Japanese Shift-JIS"},
{936, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT", "ANSI/OEM Simplified Chinese GBK"},
{949, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT", "ANSI/OEM Korean Unified Hangul"},
{950, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP950.TXT", "ANSI/OEM Traditional Chinese Big5"},
{1006, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/CP1006.TXT", "IBM Arabic"},
{1026, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/EBCDIC/CP1026.TXT", "IBM EBCDIC Latin 5 Turkish"},
{1250, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1250.TXT", "ANSI Eastern Europe"},
{1251, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1251.TXT", "ANSI Cyrillic"},
{1252, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT", "ANSI Latin 1"},
{1253, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1253.TXT", "ANSI Greek"},
{1254, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1254.TXT", "ANSI Turkish"},
{1255, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1255.TXT", "ANSI Hebrew"},
{1256, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1256.TXT", "ANSI Arabic"},
{1257, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1257.TXT", "ANSI Baltic"},
{1258, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1258.TXT", "ANSI/OEM Viet Nam"},
{10000, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ROMAN.TXT", "Mac Roman"},
{10006, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/GREEK.TXT", "Mac Greek"},
{10007, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/CYRILLIC.TXT", "Mac Cyrillic"},
{10029, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/LATIN2.TXT", "Mac Latin 2"},
{10079, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic"},
{10081, FALSE, LIBDIR"MAPPINGS/VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish"},
/*{20000, FALSE, "", "CNS Taiwan"},*/
/*{20001, FALSE, "", "TCA Taiwan"},*/
/*{20002, FALSE, "", "Eten Taiwan"},*/
/*{20003, FALSE, "", "IBM5550 Taiwan"},*/
/*{20004, FALSE, "", "TeleText Taiwan"},*/
/*{20005, FALSE, "", "Wang Taiwan"},*/
/*{20105, FALSE, "", "IA5 IRV International Alphabet No.5"},*/
/*{20106, FALSE, "", "IA5 German"},*/
/*{20107, FALSE, "", "IA5 Swedish"},*/
/*{20108, FALSE, "", "IA5 Norwegian"},*/
/*{20127, FALSE, "", "US ASCII"},*/
/*{20261, FALSE, "", "T.61"},*/
/*{20269, FALSE, "", "ISO 6937 NonSpacing Accent"},*/
/*{20273, FALSE, "", "IBM EBCDIC Germany"},*/
/*{20277, FALSE, "", "IBM EBCDIC Denmark/Norway"},*/
/*{20278, FALSE, "", "IBM EBCDIC Finland/Sweden"},*/
/*{20280, FALSE, "", "IBM EBCDIC Italy"},*/
/*{20284, FALSE, "", "IBM EBCDIC Latin America/Spain"},*/
/*{20285, FALSE, "", "IBM EBCDIC United Kingdom"},*/
/*{20290, FALSE, "", "IBM EBCDIC Japanese Katakana Extended"},*/
/*{20297, FALSE, "", "IBM EBCDIC France"},*/
/*{20420, FALSE, "", "IBM EBCDIC Arabic"},*/
/*{20423, FALSE, "", "IBM EBCDIC Greek"},*/
/*{20424, FALSE, "", "IBM EBCDIC Hebrew"},*/
/*{20833, FALSE, "", "IBM EBCDIC Korean Extended"},*/
/*{20838, FALSE, "", "IBM EBCDIC Thai"},*/
/*{20871, FALSE, "", "IBM EBCDIC Icelandic"},*/
/*{20880, FALSE, "", "IBM EBCDIC Cyrillic (Russian)"},*/
{20866, FALSE, LIBDIR"MAPPINGS/VENDORS/MISC/KOI8-R.TXT", "Russian KOI8"},
/*{20905, FALSE, "", "IBM EBCDIC Turkish"},*/
/*{21025, FALSE, "", "IBM EBCDIC Cyrillic (Serbian, Bulgarian)"},*/
/*{21027, FALSE, "", "Ext Alpha Lowercase"},*/
{28591, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1"},
{28592, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-2.TXT", "ISO 8859-2 Eastern Europe"},
{28593, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-3.TXT", "ISO 8859-3 Turkish"},
{28594, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-4.TXT", "ISO 8859-4 Baltic"},
{28595, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-5.TXT", "ISO 8859-5 Cyrillic"},
{28596, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-6.TXT", "ISO 8859-6 Arabic"},
{28597, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-7.TXT", "ISO 8859-7 Greek"},
{28598, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-8.TXT", "ISO 8859-8 Hebrew"},
{28599, FALSE, LIBDIR"MAPPINGS/ISO8859/8859-9.TXT", "ISO 8859-9 Latin 5"}
};
for(i = 0; i < sizeof(pages)/sizeof(pages[0]); i++) {
table = Load_CP2Unicode_Table(pages[i].table_filename, pages[i].cp, &cpi);
if(!table) {
printf("Could not load \"%s\" (%s)\n", pages[i].table_filename, pages[i].comment);
continue;
}
if (pages[i].oem) {
oemtable = Load_OEM2Unicode_Table(LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", table, pages[i].cp, &cpi);
if(!oemtable) {
printf("Could not load \"%s\" (%s)\n", LIBDIR"MAPPINGS/VENDORS/MISC/IBMGRAPH.TXT", "IBM OEM glyph table");
continue;
}
}
sprintf(nls_filename, "%s/c_%03d.nls", NLSDIR, cpi.CodePage);
if(!create_nls_file(nls_filename, &cpi, table, pages[i].oem ? oemtable : NULL)) {
printf("Could not write \"%s\" (%s)\n", nls_filename, pages[i].comment);
}
if (pages[i].oem)
free(oemtable);
free(table);
}
return 0;
}
static WORD *to_upper_org = NULL, *to_lower_org = NULL;
static WORD diffs[256];
static int number_of_diffs;
static WORD number_of_subtables_with_diffs;
/* pointers to subtables with 16 elements in each to the main table */
static WORD *subtables_with_diffs[4096];
static WORD number_of_subtables_with_offsets;
/* subtables with 16 elements */
static WORD subtables_with_offsets[4096 * 16];
static void test_packed_table(WCHAR *table)
{
WCHAR test_str[] = L"This is an English text. Ïî-ðóññêè ÿ ïèñàòü óìåþ íåìíîæêî. 1234567890";
//WORD diff, off;
//WORD *sub_table;
DWORD i, len;
len = lstrlenW(test_str);
for(i = 0; i < len + 1; i++) {
/*off = table[HIBYTE(test_str[i])];
sub_table = table + off;
off = sub_table[LOBYTE(test_str[i]) >> 4];
sub_table = table + off;
off = LOBYTE(test_str[i]) & 0x0F;
diff = sub_table[off];
test_str[i] += diff;*/
test_str[i] += table[table[table[HIBYTE(test_str[i])] + (LOBYTE(test_str[i]) >> 4)] + (LOBYTE(test_str[i]) & 0x0F)];
}
/*
{
FILE *file;
static int n = 0;
char name[20];
sprintf(name, "text%02d.dat", n++);
file = fopen(name, "wb");
fwrite(test_str, len * sizeof(WCHAR), 1, file);
fclose(file);
}*/
}
static BOOL CreateCaseDiff(char *table_name)
{
char buf[256];
char *p;
WORD code, case_mapping;
FILE *file;
int line;
to_upper_org = (WORD *)calloc(65536, sizeof(WORD));
if(!to_upper_org) {
printf("Not enough memory for to upper table\n");
return FALSE;
}
to_lower_org = (WORD *)calloc(65536, sizeof(WORD));
if(!to_lower_org) {
printf("Not enough memory for to lower table\n");
return FALSE;
}
file = fopen(table_name, "r");
if(file == NULL) {
printf("Could not open file \"%s\"\n", table_name);
return FALSE;
}
line = 0;
while(fgets(buf, sizeof(buf), file)) {
line++;
p = buf;
while(*p && isspace(*p)) p++;
if(!*p)
continue;
/* 0. Code value */
code = (WORD)strtol(p, &p, 16);
//if(code != 0x9A0 && code != 0xBA0)
//continue;
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 1. Character name */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 2. General Category */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 3. Canonical Combining Classes */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 4. Bidirectional Category */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 5. Character Decomposition Mapping */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 6. Decimal digit value */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 7. Digit value */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 8. Numeric value */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 9. Mirrored */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 10. Unicode 1.0 Name */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 11. 10646 comment field */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
/* 12. Uppercase Mapping */
while(*p && isspace(*p)) p++;
if(!*p) continue;
if(*p != ';') {
case_mapping = (WORD)strtol(p, &p, 16);
to_upper_org[code] = case_mapping - code;
while(*p && *p != ';') p++;
}
else
p++;
/* 13. Lowercase Mapping */
while(*p && isspace(*p)) p++;
if(!*p) continue;
if(*p != ';') {
case_mapping = (WORD)strtol(p, &p, 16);
to_lower_org[code] = case_mapping - code;
while(*p && *p != ';') p++;
}
else
p++;
/* 14. Titlecase Mapping */
while(*p && *p != ';') p++;
if(!*p)
continue;
p++;
}
fclose(file);
return TRUE;
}
static int find_diff(WORD diff)
{
int i;
for(i = 0; i < number_of_diffs; i++) {
if(diffs[i] == diff)
return i;
}
return -1;
}
static WORD find_subtable_with_diffs(WORD *table, WORD *subtable)
{
WORD index;
for(index = 0; index < number_of_subtables_with_diffs; index++) {
if(memcmp(subtables_with_diffs[index], subtable, 16 * sizeof(WORD)) == 0) {
return index;
}
}
if(number_of_subtables_with_diffs >= 4096) {
printf("Could not add new subtable with diffs, storage is full\n");
return 0;
}
subtables_with_diffs[number_of_subtables_with_diffs] = subtable;
number_of_subtables_with_diffs++;
return index;
}
static WORD find_subtable_with_offsets(WORD *subtable)
{
WORD index;
for(index = 0; index < number_of_subtables_with_offsets; index++) {
if(memcmp(&subtables_with_offsets[index * 16], subtable, 16 * sizeof(WORD)) == 0) {
return index;
}
}
if(number_of_subtables_with_offsets >= 4096) {
printf("Could not add new subtable with offsets, storage is full\n");
return 0;
}
memcpy(&subtables_with_offsets[number_of_subtables_with_offsets * 16], subtable, 16 * sizeof(WORD));
number_of_subtables_with_offsets++;
return index;
}
static WORD *pack_table(WORD *table, WORD *packed_size_in_words)
{
WORD high, low4, index;
WORD main_index[256];
WORD temp_subtable[16];
WORD *packed_table;
WORD *subtable_src, *subtable_dst;
memset(subtables_with_diffs, 0, sizeof(subtables_with_diffs));
number_of_subtables_with_diffs = 0;
memset(subtables_with_offsets, 0, sizeof(subtables_with_offsets));
number_of_subtables_with_offsets = 0;
for(high = 0; high < 256; high++) {
for(low4 = 0; low4 < 256; low4 += 16) {
index = find_subtable_with_diffs(table, &table[MAKEWORD(low4, high)]);
temp_subtable[low4 >> 4] = index;
}
index = find_subtable_with_offsets(temp_subtable);
main_index[high] = index;
}
*packed_size_in_words = 0x100 + number_of_subtables_with_offsets * 16 + number_of_subtables_with_diffs * 16;
packed_table = calloc(*packed_size_in_words, sizeof(WORD));
/* fill main index according to the subtables_with_offsets */
for(high = 0; high < 256; high++) {
packed_table[high] = 0x100 + main_index[high] * 16;
}
//memcpy(sub_table, subtables_with_offsets, number_of_subtables_with_offsets * 16);
/* fill subtable index according to the subtables_with_diffs */
for(index = 0; index < number_of_subtables_with_offsets; index++) {
subtable_dst = packed_table + 0x100 + index * 16;
subtable_src = &subtables_with_offsets[index * 16];
for(low4 = 0; low4 < 16; low4++) {
subtable_dst[low4] = 0x100 + number_of_subtables_with_offsets * 16 + subtable_src[low4] * 16;
}
}
for(index = 0; index < number_of_subtables_with_diffs; index++) {
subtable_dst = packed_table + 0x100 + number_of_subtables_with_offsets * 16 + index * 16;
memcpy(subtable_dst, subtables_with_diffs[index], 16 * sizeof(WORD));
}
test_packed_table(packed_table);
return packed_table;
}
int write_casemap_file(void)
{
WORD packed_size_in_words, offset_to_next_table_in_words;
WORD *packed_table, value;
FILE *file;
if(!CreateCaseDiff(LIBDIR"UnicodeData.txt"))
return -1;
file = fopen(NLSDIR"/l_intl.nls", "wb");
/* write version number */
value = 1;
fwrite(&value, 1, sizeof(WORD), file);
/* pack upper case table */
packed_table = pack_table(to_upper_org, &packed_size_in_words);
offset_to_next_table_in_words = packed_size_in_words + 1;
fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
/* write packed upper case table */
fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
free(packed_table);
/* pack lower case table */
packed_table = pack_table(to_lower_org, &packed_size_in_words);
offset_to_next_table_in_words = packed_size_in_words + 1;
fwrite(&offset_to_next_table_in_words, 1, sizeof(WORD), file);
/* write packed lower case table */
fwrite(packed_table, sizeof(WORD), packed_size_in_words, file);
free(packed_table);
fclose(file);
free(to_upper_org);
free(to_lower_org);
return 0;
}
int main()
{
write_nls_files();
write_casemap_file();
return 0;
}

View file

@ -0,0 +1 @@
*.txt