tcs: support EUC-JP JIS X 0212 codes

This commit is contained in:
aiju 2018-06-02 00:02:29 +00:00
parent 5db80edcd1
commit 55edec2b8a
2 changed files with 58 additions and 10 deletions

View file

@ -10,6 +10,7 @@
#include "hdr.h"
#include "conv.h"
#include "kuten208.h"
#include "kuten212.h"
#include "jis.h"
/*
@ -195,7 +196,7 @@ again:
static void
ujis(int c, Rune **r, long input_loc)
{
static enum { state0, state1 } state = state0;
static enum { state0, state1, state2, state3 } state = state0;
static int lastc;
int n;
long l;
@ -216,16 +217,12 @@ ujis(int c, Rune **r, long input_loc)
emit(BADMAP);
return;
}
if(c == 0x8f){ /* codeset 3 */
nerrors++;
if(squawk)
EPR "%s: unknown codeset 3 near byte %ld in %s\n", argv0, input_loc, file);
if(!clean)
emit(BADMAP);
return;
if(c == 0x8f) /* codeset 3 */
state = state2;
else{
lastc = c;
state = state1;
}
lastc = c;
state = state1;
return;
case state1: /* two part char */
@ -250,6 +247,56 @@ ujis(int c, Rune **r, long input_loc)
emit(l);
}
state = state0;
return;
case state2: /* three part char, part #2 */
if(c < 0){
if(squawk)
EPR "%s: unexpected EOF in %s\n", argv0, file);
c = 0xA1;
}
if(c < 0xa1 || c > 0xfe){
if(squawk)
EPR "%s: invalid byte 0x%x in codeset 3\n", argv0, c);
state = state0;
}else{
lastc = c;
state = state3;
}
return;
case state3: /* three part char, part #3 */
if(c < 0){
if(squawk)
EPR "%s: unexpected EOF in %s\n", argv0, file);
c = 0xA1;
}
if(c < 0xa1 || c > 0xfe){
if(squawk)
EPR "%s: invalid byte 0x%x in codeset 3\n", argv0, c);
state = state0;
return;
}
n = (lastc&0x7F)*100 + (c&0x7F) - 3232; /* kuten212 */
if((n >= KUTEN212MAX) || ((l = tabkuten212[n]) == -1)){
nerrors++;
if(squawk)
EPR "%s: unknown kuten212 %d (from 0x%x,0x%x) near byte %ld in %s\n", argv0, n, lastc, c, input_loc, file);
if(!clean)
emit(BADMAP);
} else {
if(l < 0){
l = -l;
if(squawk)
EPR "%s: ambiguous kuten212 %d (mapped to 0x%lx) near byte %ld in %s\n", argv0, n, l, input_loc, file);
}
emit(l);
}
state = state0;
return;
}
}

View file

@ -10,6 +10,7 @@ OFILES=tcs.$O\
utf.$O\
html.$O\
kuten208.$O\
kuten212.$O\
gb.$O\
gbk.$O\
ksc.$O\