cpp: handle 4 byte utf sequences (21-bit runes)
This commit is contained in:
parent
8003c8b1e2
commit
917da0089d
1 changed files with 7 additions and 2 deletions
|
@ -29,6 +29,7 @@
|
||||||
|
|
||||||
#define UTF2(c) ((c)>=0xA0 && (c)<0xE0) /* 2-char UTF seq */
|
#define UTF2(c) ((c)>=0xA0 && (c)<0xE0) /* 2-char UTF seq */
|
||||||
#define UTF3(c) ((c)>=0xE0 && (c)<0xF0) /* 3-char UTF seq */
|
#define UTF3(c) ((c)>=0xE0 && (c)<0xF0) /* 3-char UTF seq */
|
||||||
|
#define UTF4(c) ((c)>=0xF0 && (c)<0xF8) /* 4-char UTF seq */
|
||||||
|
|
||||||
/* character classes */
|
/* character classes */
|
||||||
#define C_WS 1
|
#define C_WS 1
|
||||||
|
@ -259,7 +260,7 @@ expandlex(void)
|
||||||
case C_ALPH:
|
case C_ALPH:
|
||||||
for (j=0; j<=256; j++)
|
for (j=0; j<=256; j++)
|
||||||
if ('a'<=j&&j<='z' || 'A'<=j&&j<='Z'
|
if ('a'<=j&&j<='z' || 'A'<=j&&j<='Z'
|
||||||
|| UTF2(j) || UTF3(j) || j=='_')
|
|| UTF2(j) || UTF3(j) || UTF4(j) || j=='_')
|
||||||
bigfsm[j][fp->state] = nstate;
|
bigfsm[j][fp->state] = nstate;
|
||||||
continue;
|
continue;
|
||||||
case C_NUM:
|
case C_NUM:
|
||||||
|
@ -274,7 +275,7 @@ expandlex(void)
|
||||||
/* install special cases for ? (trigraphs), \ (splicing), runes */
|
/* install special cases for ? (trigraphs), \ (splicing), runes */
|
||||||
for (i=0; i<MAXSTATE; i++) {
|
for (i=0; i<MAXSTATE; i++) {
|
||||||
for (j=0; j<0xFF; j++)
|
for (j=0; j<0xFF; j++)
|
||||||
if (j=='?' || j=='\\' || UTF2(j) || UTF3(j)) {
|
if (j=='?' || j=='\\' || UTF2(j) || UTF3(j) || UTF4(j)) {
|
||||||
if (bigfsm[j][i]>0)
|
if (bigfsm[j][i]>0)
|
||||||
bigfsm[j][i] = ~bigfsm[j][i];
|
bigfsm[j][i] = ~bigfsm[j][i];
|
||||||
bigfsm[j][i] &= ~QBSBIT;
|
bigfsm[j][i] &= ~QBSBIT;
|
||||||
|
@ -393,6 +394,10 @@ gettokens(Tokenrow *trp, int reset)
|
||||||
runelen = 3;
|
runelen = 3;
|
||||||
goto reswitch;
|
goto reswitch;
|
||||||
}
|
}
|
||||||
|
if (UTF4(c)) {
|
||||||
|
runelen = 4;
|
||||||
|
goto reswitch;
|
||||||
|
}
|
||||||
error(WARNING, "Lexical botch in cpp");
|
error(WARNING, "Lexical botch in cpp");
|
||||||
ip += runelen;
|
ip += runelen;
|
||||||
runelen = 1;
|
runelen = 1;
|
||||||
|
|
Loading…
Reference in a new issue