Improve the posix preprocessor.

This fixes token pasting, making it expand when
it should expand, and paste before expansion when
it should paste before expanding.

	#define CAT(a, b) a ## b
	#define BAR	3
	#define FOO	CAT(BAR, 3)
	FOO

now produces 33, while

	#define CAT(a, b) a ## b
	#define EOF	(-1)
	#define NOP(x)	x
	NOP(CAT(foo, EOF))
	CAT(,EOF)
	CAT(,)

produces

	fooEOF
	(-1)
	<empty>

respectively.
This commit is contained in:
Ori Bernstein 2020-03-17 22:03:25 -07:00
parent 52dc943702
commit 37b86df09f
7 changed files with 283 additions and 136 deletions

View file

@ -68,7 +68,7 @@ process(Tokenrow *trp)
trp->tp += 1;
control(trp);
} else if (!skipping && anymacros)
expandrow(trp, NULL, Notinmacro);
expandrow(trp, NULL);
if (skipping)
setempty(trp);
puttokens(trp);
@ -217,7 +217,7 @@ control(Tokenrow *trp)
case KLINE:
trp->tp = tp+1;
expandrow(trp, "<line>", Notinmacro);
expandrow(trp, "<line>");
tp = trp->bp+2;
kline:
if (tp+1>=trp->lp || tp->type!=NUMBER || tp+3<trp->lp

View file

@ -14,7 +14,7 @@ enum toktype { END, UNCLASS, NAME, NUMBER, STRING, CCON, NL, WS, DSHARP,
EQ, NEQ, LEQ, GEQ, LSH, RSH, LAND, LOR, PPLUS, MMINUS,
ARROW, SBRA, SKET, LP, RP, DOT, AND, STAR, PLUS, MINUS,
TILDE, NOT, SLASH, PCT, LT, GT, CIRC, OR, QUEST,
COLON, ASGN, COMMA, SHARP, SEMIC, CBRA, CKET,
COLON, ASGN, COMMA, XCOMMA, SHARP, SEMIC, CBRA, CKET,
ASPLUS, ASMINUS, ASSTAR, ASSLASH, ASPCT, ASCIRC, ASLSH,
ASRSH, ASOR, ASAND, ELLIPS,
DSHARP1, NAME1, DEFINED, UMINUS };
@ -107,11 +107,11 @@ void dodefine(Tokenrow *);
void doadefine(Tokenrow *, int);
void doinclude(Tokenrow *);
void doif(Tokenrow *, enum kwtype);
void expand(Tokenrow *, Nlist *, int);
void expand(Tokenrow *, Nlist *);
void builtin(Tokenrow *, int);
int gatherargs(Tokenrow *, Tokenrow **, int, int *);
void substargs(Nlist *, Tokenrow *, Tokenrow **);
void expandrow(Tokenrow *, char *, int);
void expandrow(Tokenrow *, char *);
void maketokenrow(int, Tokenrow *);
Tokenrow *copytokenrow(Tokenrow *, Tokenrow *);
Token *growtokenrow(Tokenrow *);
@ -120,7 +120,7 @@ void adjustrow(Tokenrow *, int);
void movetokenrow(Tokenrow *, Tokenrow *);
void insertrow(Tokenrow *, int, Tokenrow *);
void peektokens(Tokenrow *, char *);
void doconcat(Tokenrow *);
void glue(Tokenrow *, Token *, Token *);
Tokenrow *stringify(Tokenrow *);
int lookuparg(Nlist *, Token *);
long eval(Tokenrow *, int);

View file

@ -28,66 +28,67 @@ const struct pri {
char arity;
char ctype;
} priority[] = {
{ 0, 0, 0 }, /* END */
{ 0, 0, 0 }, /* UNCLASS */
{ 0, 0, 0 }, /* NAME */
{ 0, 0, 0 }, /* NUMBER */
{ 0, 0, 0 }, /* STRING */
{ 0, 0, 0 }, /* CCON */
{ 0, 0, 0 }, /* NL */
{ 0, 0, 0 }, /* WS */
{ 0, 0, 0 }, /* DSHARP */
{ 11, 2, RELAT }, /* EQ */
{ 11, 2, RELAT }, /* NEQ */
{ 12, 2, RELAT }, /* LEQ */
{ 12, 2, RELAT }, /* GEQ */
{ 13, 2, SHIFT }, /* LSH */
{ 13, 2, SHIFT }, /* RSH */
{ 7, 2, LOGIC }, /* LAND */
{ 6, 2, LOGIC }, /* LOR */
{ 0, 0, 0 }, /* PPLUS */
{ 0, 0, 0 }, /* MMINUS */
{ 0, 0, 0 }, /* ARROW */
{ 0, 0, 0 }, /* SBRA */
{ 0, 0, 0 }, /* SKET */
{ 3, 0, 0 }, /* LP */
{ 3, 0, 0 }, /* RP */
{ 0, 0, 0 }, /* DOT */
{ 10, 2, ARITH }, /* AND */
{ 15, 2, ARITH }, /* STAR */
{ 14, 2, ARITH }, /* PLUS */
{ 14, 2, ARITH }, /* MINUS */
{ 16, 1, UNARY }, /* TILDE */
{ 16, 1, UNARY }, /* NOT */
{ 15, 2, ARITH }, /* SLASH */
{ 15, 2, ARITH }, /* PCT */
{ 12, 2, RELAT }, /* LT */
{ 12, 2, RELAT }, /* GT */
{ 9, 2, ARITH }, /* CIRC */
{ 8, 2, ARITH }, /* OR */
{ 5, 2, SPCL }, /* QUEST */
{ 5, 2, SPCL }, /* COLON */
{ 0, 0, 0 }, /* ASGN */
{ 4, 2, 0 }, /* COMMA */
{ 0, 0, 0 }, /* SHARP */
{ 0, 0, 0 }, /* SEMIC */
{ 0, 0, 0 }, /* CBRA */
{ 0, 0, 0 }, /* CKET */
{ 0, 0, 0 }, /* ASPLUS */
{ 0, 0, 0 }, /* ASMINUS */
{ 0, 0, 0 }, /* ASSTAR */
{ 0, 0, 0 }, /* ASSLASH */
{ 0, 0, 0 }, /* ASPCT */
{ 0, 0, 0 }, /* ASCIRC */
{ 0, 0, 0 }, /* ASLSH */
{ 0, 0, 0 }, /* ASRSH */
{ 0, 0, 0 }, /* ASOR */
{ 0, 0, 0 }, /* ASAND */
{ 0, 0, 0 }, /* ELLIPS */
{ 0, 0, 0 }, /* DSHARP1 */
{ 0, 0, 0 }, /* NAME1 */
{ 16, 1, UNARY }, /* DEFINED */
{ 16, 0, UNARY }, /* UMINUS */
[END] { 0, 0, 0 },
[UNCLASS] { 0, 0, 0 },
[NAME] { 0, 0, 0 },
[NUMBER] { 0, 0, 0 },
[STRING] { 0, 0, 0 },
[CCON] { 0, 0, 0 },
[NL] { 0, 0, 0 },
[WS] { 0, 0, 0 },
[DSHARP] { 0, 0, 0 },
[EQ] { 11, 2, RELAT },
[NEQ] { 11, 2, RELAT },
[LEQ] { 12, 2, RELAT },
[GEQ] { 12, 2, RELAT },
[LSH] { 13, 2, SHIFT },
[RSH] { 13, 2, SHIFT },
[LAND] { 7, 2, LOGIC },
[LOR] { 6, 2, LOGIC },
[PPLUS] { 0, 0, 0 },
[MMINUS] { 0, 0, 0 },
[ARROW] { 0, 0, 0 },
[SBRA] { 0, 0, 0 },
[SKET] { 0, 0, 0 },
[LP] { 3, 0, 0 },
[RP] { 3, 0, 0 },
[DOT] { 0, 0, 0 },
[AND] { 10, 2, ARITH },
[STAR] { 15, 2, ARITH },
[PLUS] { 14, 2, ARITH },
[MINUS] { 14, 2, ARITH },
[TILDE] { 16, 1, UNARY },
[NOT] { 16, 1, UNARY },
[SLASH] { 15, 2, ARITH },
[PCT] { 15, 2, ARITH },
[LT] { 12, 2, RELAT },
[GT] { 12, 2, RELAT },
[CIRC] { 9, 2, ARITH },
[OR] { 8, 2, ARITH },
[QUEST] { 5, 2, SPCL },
[COLON] { 5, 2, SPCL },
[ASGN] { 0, 0, 0 },
[COMMA] { 4, 2, 0 },
[XCOMMA] { 4, 2, 0 },
[SHARP] { 0, 0, 0 },
[SEMIC] { 0, 0, 0 },
[CBRA] { 0, 0, 0 },
[CKET] { 0, 0, 0 },
[ASPLUS] { 0, 0, 0 },
[ASMINUS] { 0, 0, 0 },
[ASSTAR] { 0, 0, 0 },
[ASSLASH] { 0, 0, 0 },
[ASPCT] { 0, 0, 0 },
[ASCIRC] { 0, 0, 0 },
[ASLSH] { 0, 0, 0 },
[ASRSH] { 0, 0, 0 },
[ASOR] { 0, 0, 0 },
[ASAND] { 0, 0, 0 },
[ELLIPS] { 0, 0, 0 },
[DSHARP1] { 0, 0, 0 },
[NAME1] { 0, 0, 0 },
[DEFINED] { 16, 1, UNARY },
[UMINUS] { 16, 0, UNARY },
};
int evalop(struct pri);
@ -116,7 +117,7 @@ eval(Tokenrow *trp, int kw)
}
ntok = trp->tp - trp->bp;
kwdefined->val = KDEFINED; /* activate special meaning of defined */
expandrow(trp, "<if>", Notinmacro);
expandrow(trp, "<if>");
kwdefined->val = NAME;
vp = vals;
op = ops;
@ -165,7 +166,7 @@ eval(Tokenrow *trp, int kw)
case EQ: case NEQ: case LEQ: case GEQ: case LSH: case RSH:
case LAND: case LOR: case SLASH: case PCT:
case LT: case GT: case CIRC: case OR: case QUEST:
case COLON: case COMMA:
case COLON: case COMMA: case XCOMMA:
if (rand==0)
goto syntax;
if (evalop(priority[tp->type])!=0)

View file

@ -18,7 +18,7 @@ doinclude(Tokenrow *trp)
goto syntax;
if (trp->tp->type!=STRING && trp->tp->type!=LT) {
len = trp->tp - trp->bp;
expandrow(trp, "<include>", Notinmacro);
expandrow(trp, "<include>");
trp->tp = trp->bp+len;
}
if (trp->tp->type==STRING) {

View file

@ -138,7 +138,7 @@ syntax:
* Flag is NULL if more input can be gathered.
*/
void
expandrow(Tokenrow *trp, char *flag, int inmacro)
expandrow(Tokenrow *trp, char *flag)
{
Token *tp;
Nlist *np;
@ -170,7 +170,7 @@ expandrow(Tokenrow *trp, char *flag, int inmacro)
if (np->flag&ISMAC)
builtin(trp, np->val);
else {
expand(trp, np, inmacro);
expand(trp, np);
}
tp = trp->tp;
}
@ -184,7 +184,7 @@ expandrow(Tokenrow *trp, char *flag, int inmacro)
* (ordinarily the beginning of the expansion)
*/
void
expand(Tokenrow *trp, Nlist *np, int inmacro)
expand(Tokenrow *trp, Nlist *np)
{
Tokenrow ntr;
int ntokc, narg, i;
@ -193,12 +193,14 @@ expand(Tokenrow *trp, Nlist *np, int inmacro)
int hs;
copytokenrow(&ntr, np->vp); /* copy macro value */
if (np->ap==NULL) /* parameterless */
if (np->ap==NULL) { /* parameterless */
ntokc = 1;
else {
/* substargs for handling # and ## */
atr[0] = nil;
substargs(np, &ntr, atr);
} else {
ntokc = gatherargs(trp, atr, (np->flag&ISVARMAC) ? rowlen(np->ap) : 0, &narg);
if (narg<0) { /* not actually a call (no '(') */
/* error(WARNING, "%d %r\n", narg, trp); */
/* gatherargs has already pushed trp->tr to the next token */
return;
}
@ -214,8 +216,6 @@ expand(Tokenrow *trp, Nlist *np, int inmacro)
dofree(atr[i]);
}
}
if(!inmacro)
doconcat(&ntr); /* execute ## operators */
hs = newhideset(trp->tp->hideset, np);
for (tp=ntr.bp; tp<ntr.lp; tp++) { /* distribute hidesets */
if (tp->type==NAME) {
@ -228,8 +228,7 @@ expand(Tokenrow *trp, Nlist *np, int inmacro)
ntr.tp = ntr.bp;
insertrow(trp, ntokc, &ntr);
trp->tp -= rowlen(&ntr);
dofree(ntr.bp);
return;
free(ntr.bp);
}
/*
@ -255,7 +254,6 @@ gatherargs(Tokenrow *trp, Tokenrow **atr, int dots, int *narg)
if (trp->tp >= trp->lp) {
gettokens(trp, 0);
if ((trp->lp-1)->type==END) {
/* error(WARNING, "reach END\n"); */
trp->lp -= 1;
if (*narg>=0)
trp->tp -= ntok;
@ -326,7 +324,25 @@ gatherargs(Tokenrow *trp, Tokenrow **atr, int dots, int *narg)
}
return ntok;
}
int
ispaste(Tokenrow *rtr, Token **ap, Token **an, int *ntok)
{
*ap = nil;
*an = nil;
/* EMPTY ## tok */
if (rtr->tp->type == DSHARP && rtr->tp != rtr->bp)
rtr->tp--;
/* tok ## tok */
if(rtr->tp + 1 != rtr->lp && rtr->tp[1].type == DSHARP) {
*ap = rtr->tp;
if(rtr->tp + 2 != rtr->lp)
*an = rtr->tp + 2;
*ntok = 1 + (*ap != nil) + (*an != nil);
return 1;
}
return 0;
}
/*
* substitute the argument list into the replacement string
* This would be simple except for ## and #
@ -334,12 +350,14 @@ gatherargs(Tokenrow *trp, Tokenrow **atr, int dots, int *narg)
void
substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
{
Tokenrow tatr;
Token *tp;
int ntok, argno;
Tokenrow ttr;
Token *tp, *ap, *an, *pp, *pn;
int ntok, argno, hs;
for (rtr->tp=rtr->bp; rtr->tp<rtr->lp; ) {
if (rtr->tp->type==SHARP) { /* string operator */
if(rtr->tp->hideset && checkhideset(rtr->tp->hideset, np)) {
rtr->tp++;
} else if (rtr->tp->type==SHARP) { /* string operator */
tp = rtr->tp;
rtr->tp += 1;
if ((argno = lookuparg(np, rtr->tp))<0) {
@ -349,24 +367,52 @@ substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
ntok = 1 + (rtr->tp - tp);
rtr->tp = tp;
insertrow(rtr, ntok, stringify(atr[argno]));
continue;
}
if (rtr->tp->type==NAME
&& (argno = lookuparg(np, rtr->tp)) >= 0) {
if (rtr->tp < rtr->bp)
error(ERROR, "access out of bounds");
if ((rtr->tp+1)->type==DSHARP
|| rtr->tp!=rtr->bp && (rtr->tp-1)->type==DSHARP)
insertrow(rtr, 1, atr[argno]);
else {
copytokenrow(&tatr, atr[argno]);
expandrow(&tatr, "<macro>", Inmacro);
insertrow(rtr, 1, &tatr);
dofree(tatr.bp);
} else if (ispaste(rtr, &ap, &an, &ntok)) { /* first token, just do the next one */
pp = ap;
pn = an;
if (ap && (argno = lookuparg(np, ap)) >= 0){
pp = nil;
if(atr[argno]->tp != atr[argno]->lp)
pp = atr[argno]->lp - 1;
}
continue;
if (an && (argno = lookuparg(np, an)) >= 0) {
pn = nil;
if(atr[argno]->tp != atr[argno]->lp)
pn = atr[argno]->lp - 1;
}
glue(&ttr, pp, pn);
insertrow(rtr, ntok, &ttr);
free(ttr.bp);
} else if (rtr->tp->type==NAME) {
if((argno = lookuparg(np, rtr->tp)) >= 0) {
if (rtr->tp < rtr->bp) {
error(ERROR, "access out of bounds");
continue;
}
copytokenrow(&ttr, atr[argno]);
expandrow(&ttr, "<macro>");
insertrow(rtr, 1, &ttr);
free(ttr.bp);
} else {
maketokenrow(1, &ttr);
ttr.lp = ttr.tp + 1;
*ttr.tp = *rtr->tp;
hs = newhideset(rtr->tp->hideset, np);
if(ttr.tp->hideset == 0)
ttr.tp->hideset = hs;
else
ttr.tp->hideset = unionhideset(ttr.tp->hideset, hs);
expandrow(&ttr, (char*)np->name);
for(tp = ttr.bp; tp != ttr.lp; tp++)
if(tp->type == COMMA)
tp->type = XCOMMA;
insertrow(rtr, 1, &ttr);
dofree(ttr.bp);
}
} else {
rtr->tp++;
}
rtr->tp++;
}
}
@ -374,41 +420,35 @@ substargs(Nlist *np, Tokenrow *rtr, Tokenrow **atr)
* Evaluate the ## operators in a tokenrow
*/
void
doconcat(Tokenrow *trp)
glue(Tokenrow *ntr, Token *tp, Token *tn)
{
Token *ltp, *ntp;
Tokenrow ntr;
int len;
int np, nn;
char *tt, *p, *n;
for (trp->tp=trp->bp; trp->tp<trp->lp; trp->tp++) {
if (trp->tp->type==DSHARP1)
trp->tp->type = DSHARP;
else if (trp->tp->type==DSHARP) {
char tt[128];
ltp = trp->tp-1;
ntp = trp->tp+1;
if (ltp<trp->bp || ntp>=trp->lp) {
error(ERROR, "## occurs at border of replacement");
continue;
}
len = ltp->len + ntp->len;
strncpy((char*)tt, (char*)ltp->t, ltp->len);
strncpy((char*)tt+ltp->len, (char*)ntp->t, ntp->len);
tt[len] = '\0';
setsource("<##>", -1, tt);
maketokenrow(3, &ntr);
gettokens(&ntr, 1);
unsetsource();
if (ntr.lp-ntr.bp!=2 || ntr.bp->type==UNCLASS)
error(WARNING, "Bad token %r produced by ##", &ntr);
ntr.lp = ntr.bp+1;
trp->tp = ltp;
makespace(&ntr);
insertrow(trp, (ntp-ltp)+1, &ntr);
dofree(ntr.bp);
trp->tp--;
np = tp ? tp->len : 0;
nn = tn ? tn->len : 0;
tt = domalloc(np + nn + 1);
if(tp)
memcpy(tt, tp->t, tp->len);
if(tn)
memcpy(tt+np, tn->t, tn->len);
tt[np+nn] = '\0';
setsource("<##>", -1, tt);
maketokenrow(3, ntr);
gettokens(ntr, 1);
unsetsource();
dofree(tt);
if (np + nn == 0) {
ntr->lp = ntr->bp;
} else {
if (ntr->lp - ntr->bp!=2 || ntr->bp->type==UNCLASS) {
p = tp ? (char*)tp->t : "<empty>";
n = tn ? (char*)tn->t : "<empty>";
error(WARNING, "Bad token %r produced by %s ## %s", &ntr, p, n);
}
ntr->lp = ntr->bp+1;
}
makespace(ntr);
}
/*

View file

@ -1,4 +1,61 @@
#define M1()
#define M2(A1) A1()
M2(M1)
M2(P1)
#define NOP(x) x
#define CAT(a, b) a ## b
#define EOF (-1)
x NOP(CAT(foo, EOF)) y
x NOP(CAT(EOF, foo)) y
x CAT(, EOF) y
y CAT(foo,) x
x CAT(,foo) y
X NOP(CAT(,)) y
#define NCAT(a) foo ## a
NCAT(bar)
#define XCAT(a) ## a
foo XCAT(bar)
#define CAT3(foo) a##foo##b
CAT3(blah)
#define BAR 3
#define FOO CAT(BAR, 3)
FOO
/*
* CURRENTLY BROKEN:
* __VA_ARGS__ requires at least one item.
* It should accept an empty list.
#define xprint(a, ...) print(a, __VA_ARGS__)
xprint("hi", "there")
xprint("hi")
*/
#define C a,b
#define X(a) a
#define Y X(C)
Y
#define x 3
#define f(a) f(x * (a))
#undef x
#define x 2
#define g f
#define z z[0]
#define h g(~
#define m(a) a(w)
#define w 0,1
#define t(a) a
#define p() int
#define q(x) x
#define r(x,y) x ## y
#define str(x) # x
f(y+1) + f(f(z)) % t(t(g)(0) + t)(1);
g(x+(3,4)-w) | h 5) & m
(f)^m(m);
/*
* CURRENTLY BROKEN:
* mac() needs at least one argument.
* It should treat no args as a single empty arg list.
p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,) };
char c[2][6] = { str(hello), str() };
*/

View file

@ -0,0 +1,49 @@
#line 1 "/usr/ori/src/cpp/test.c"
x fooEOF y
x EOFfoo y
x(-1) y
y foo x
x foo y
X y
foobar
foo ## bar
ablahb
33
#line 32 "/usr/ori/src/cpp/test.c"
a,b
f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1);
f(2 * (2+(3,4)- 0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^ m(0,1);
#line 55 "/usr/ori/src/cpp/test.c"