From 8cc04381d1a5fc33c2e772283c200bd22fa55e99 Mon Sep 17 00:00:00 2001 From: cinap_lenrek Date: Wed, 10 Apr 2013 20:47:01 +0200 Subject: [PATCH] postscript: remove private copy of utf8 implementation (from sources) --- sys/src/cmd/postscript/common/mkfile | 4 +- sys/src/cmd/postscript/common/rune.c | 203 --------------------------- sys/src/cmd/postscript/common/rune.h | 20 --- 3 files changed, 1 insertion(+), 226 deletions(-) delete mode 100644 sys/src/cmd/postscript/common/rune.c delete mode 100644 sys/src/cmd/postscript/common/rune.h diff --git a/sys/src/cmd/postscript/common/mkfile b/sys/src/cmd/postscript/common/mkfile index 821c2b1b6..a5e9de392 100644 --- a/sys/src/cmd/postscript/common/mkfile +++ b/sys/src/cmd/postscript/common/mkfile @@ -7,7 +7,6 @@ OFILES=bbox.$O\ glob.$O\ misc.$O\ request.$O\ - rune.$O\ tempnam.$O\ getopt.$O\ @@ -15,8 +14,7 @@ HFILES=comments.h\ gen.h\ ext.h\ request.h\ - path.h\ - rune.h\ + path.h T1 - */ - c = *(unsigned char*)str; - if(c < Tx) { - *rune = c; - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - c1 = *(unsigned char*)(str+1) ^ Tx; - if(c1 & Testx) - goto bad; - if(c < T3) { - if(c < T2) - goto bad; - l = ((c << Bitx) | c1) & Rune2; - if(l <= Rune1) - goto bad; - *rune = l; - return 2; - } - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - c2 = *(unsigned char*)(str+2) ^ Tx; - if(c2 & Testx) - goto bad; - if(c < T4) { - l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3; - if(l <= Rune2) - goto bad; - *rune = l; - return 3; - } - - /* - * four character sequence - * 10000-10FFFF => T4 Tx Tx Tx - */ - if(UTFmax >= 4) { - c3 = *(unsigned char*)(str+3) ^ Tx; - if(c3 & Testx) - goto bad; - if(c < T5) { - l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4; - if(l <= Rune3) - goto bad; - if(l > Runemax) - goto bad; - *rune = l; - return 4; - } - } - - /* - * bad decoding - */ -bad: - *rune = Bad; - return 1; -} - -int -runetochar(char *str, Rune *rune) -{ - long c; - - c = *rune; - if(c > Runemax) - c = Runeerror; - - /* - * one character sequence - * 00000-0007F => 00-7F - */ - if(c <= Rune1) { - str[0] = c; - return 1; - } - - /* - * two character sequence - * 0080-07FF => T2 Tx - */ - if(c <= Rune2) { - str[0] = T2 | (c >> 1*Bitx); - str[1] = Tx | (c & Maskx); - return 2; - } - - /* - * three character sequence - * 0800-FFFF => T3 Tx Tx - */ - if(c <= Rune3) { - str[0] = T3 | (c >> 2*Bitx); - str[1] = Tx | ((c >> 1*Bitx) & Maskx); - str[2] = Tx | (c & Maskx); - return 3; - } - - /* - * four character sequence - * 10000-1FFFFF => T4 Tx Tx Tx - */ - str[0] = T4 | (c >> 3*Bitx); - str[1] = Tx | ((c >> 2*Bitx) & Maskx); - str[2] = Tx | ((c >> 1*Bitx) & Maskx); - str[3] = Tx | (c & Maskx); - return 4; -} - -int -runelen(long c) -{ - Rune rune; - char str[UTFmax]; - - rune = c; - return runetochar(str, &rune); -} - -int -runenlen(Rune *r, int nrune) -{ - int nb, c; - - nb = 0; - while(nrune--) { - c = *r++; - if(c <= Rune1) - nb++; - else - if(c <= Rune2) - nb += 2; - else - if(c <= Rune3 || c > Runemax) - nb += 3; - else - nb += 4; - } - return nb; -} - -int -fullrune(char *str, int n) -{ - int c; - - if(n <= 0) - return 0; - c = *(unsigned char*)str; - if(c < Tx) - return 1; - if(c < T3) - return n >= 2; - if(UTFmax == 3 || c < T4) - return n >= 3; - return n >= 4; -} - diff --git a/sys/src/cmd/postscript/common/rune.h b/sys/src/cmd/postscript/common/rune.h deleted file mode 100644 index 84301a8ea..000000000 --- a/sys/src/cmd/postscript/common/rune.h +++ /dev/null @@ -1,20 +0,0 @@ -/* - * - * Rune declarations - for supporting UTF encoding. - * - */ - -#define RUNELIB 1 - -#ifdef RUNELIB -typedef unsigned short Rune; - -enum -{ - UTFmax = 3, /* maximum bytes per rune */ - Runesync = 0x80, /* cannot represent part of a utf sequence (<) */ - Runeself = 0x80, /* rune and utf sequences are the same (<) */ - Runeerror = 0xFFFD, /* decoding error in utf */ - Runemax = 0xFFFF, /* 16 bit rune */ -}; -#endif