From 55edec2b8a9cdb40176a8e278a32ade57a6bd092 Mon Sep 17 00:00:00 2001 From: aiju Date: Sat, 2 Jun 2018 00:02:29 +0000 Subject: [PATCH] tcs: support EUC-JP JIS X 0212 codes --- sys/src/cmd/tcs/conv_jis.c | 67 ++++++++++++++++++++++++++++++++------ sys/src/cmd/tcs/mkfile | 1 + 2 files changed, 58 insertions(+), 10 deletions(-) diff --git a/sys/src/cmd/tcs/conv_jis.c b/sys/src/cmd/tcs/conv_jis.c index 4b979a7c5..e6c19f638 100644 --- a/sys/src/cmd/tcs/conv_jis.c +++ b/sys/src/cmd/tcs/conv_jis.c @@ -10,6 +10,7 @@ #include "hdr.h" #include "conv.h" #include "kuten208.h" +#include "kuten212.h" #include "jis.h" /* @@ -195,7 +196,7 @@ again: static void ujis(int c, Rune **r, long input_loc) { - static enum { state0, state1 } state = state0; + static enum { state0, state1, state2, state3 } state = state0; static int lastc; int n; long l; @@ -216,16 +217,12 @@ ujis(int c, Rune **r, long input_loc) emit(BADMAP); return; } - if(c == 0x8f){ /* codeset 3 */ - nerrors++; - if(squawk) - EPR "%s: unknown codeset 3 near byte %ld in %s\n", argv0, input_loc, file); - if(!clean) - emit(BADMAP); - return; + if(c == 0x8f) /* codeset 3 */ + state = state2; + else{ + lastc = c; + state = state1; } - lastc = c; - state = state1; return; case state1: /* two part char */ @@ -250,6 +247,56 @@ ujis(int c, Rune **r, long input_loc) emit(l); } state = state0; + return; + + case state2: /* three part char, part #2 */ + if(c < 0){ + if(squawk) + EPR "%s: unexpected EOF in %s\n", argv0, file); + c = 0xA1; + } + if(c < 0xa1 || c > 0xfe){ + if(squawk) + EPR "%s: invalid byte 0x%x in codeset 3\n", argv0, c); + state = state0; + }else{ + lastc = c; + state = state3; + } + return; + + case state3: /* three part char, part #3 */ + if(c < 0){ + if(squawk) + EPR "%s: unexpected EOF in %s\n", argv0, file); + c = 0xA1; + } + if(c < 0xa1 || c > 0xfe){ + if(squawk) + EPR "%s: invalid byte 0x%x in codeset 3\n", argv0, c); + state = state0; + return; + } + + n = (lastc&0x7F)*100 + (c&0x7F) - 3232; /* kuten212 */ + if((n >= KUTEN212MAX) || ((l = tabkuten212[n]) == -1)){ + nerrors++; + if(squawk) + EPR "%s: unknown kuten212 %d (from 0x%x,0x%x) near byte %ld in %s\n", argv0, n, lastc, c, input_loc, file); + if(!clean) + emit(BADMAP); + } else { + if(l < 0){ + l = -l; + if(squawk) + EPR "%s: ambiguous kuten212 %d (mapped to 0x%lx) near byte %ld in %s\n", argv0, n, l, input_loc, file); + } + emit(l); + } + state = state0; + return; + + } } diff --git a/sys/src/cmd/tcs/mkfile b/sys/src/cmd/tcs/mkfile index 435e0f687..98f4a072a 100644 --- a/sys/src/cmd/tcs/mkfile +++ b/sys/src/cmd/tcs/mkfile @@ -10,6 +10,7 @@ OFILES=tcs.$O\ utf.$O\ html.$O\ kuten208.$O\ + kuten212.$O\ gb.$O\ gbk.$O\ ksc.$O\