5c: apply richard millers 5c-nan-cmp patch (from sources)
On ARM, it turns out that comparisons with NaN can be made to do the right thing with no code penalty, by a more careful selection of condition code values in the subsequent conditional branch. The meaning of the CC bits in the PSR is subtly different when they've been copied from the floating point status register. Suggested patch is 5c-nan-cmp (works on both vfp and emulated arm7500).
This commit is contained in:
parent
171aafcc22
commit
539fe6990f
|
@ -701,7 +701,7 @@ boolgen(Node *n, int true, Node *nn)
|
||||||
if(true)
|
if(true)
|
||||||
o = comrel[relindex(o)];
|
o = comrel[relindex(o)];
|
||||||
if(typefd[n->type->etype]) {
|
if(typefd[n->type->etype]) {
|
||||||
gopcode(o, nodfconst(0), &nod, Z);
|
gopcode(true ? o | BTRUE : o, nodfconst(0), &nod, Z);
|
||||||
} else
|
} else
|
||||||
gopcode(o, nodconst(0), &nod, Z);
|
gopcode(o, nodconst(0), &nod, Z);
|
||||||
regfree(&nod);
|
regfree(&nod);
|
||||||
|
@ -800,14 +800,14 @@ boolgen(Node *n, int true, Node *nn)
|
||||||
regalloc(&nod, r, nn);
|
regalloc(&nod, r, nn);
|
||||||
cgenrel(r, &nod, 1);
|
cgenrel(r, &nod, 1);
|
||||||
o = invrel[relindex(o)];
|
o = invrel[relindex(o)];
|
||||||
gopcode(o, l, &nod, Z);
|
gopcode(true ? o | BTRUE : o, l, &nod, Z);
|
||||||
regfree(&nod);
|
regfree(&nod);
|
||||||
goto com;
|
goto com;
|
||||||
}
|
}
|
||||||
if(sconst(r)) {
|
if(sconst(r)) {
|
||||||
regalloc(&nod, l, nn);
|
regalloc(&nod, l, nn);
|
||||||
cgenrel(l, &nod, 1);
|
cgenrel(l, &nod, 1);
|
||||||
gopcode(o, r, &nod, Z);
|
gopcode(true ? o | BTRUE : o, r, &nod, Z);
|
||||||
regfree(&nod);
|
regfree(&nod);
|
||||||
goto com;
|
goto com;
|
||||||
}
|
}
|
||||||
|
@ -822,7 +822,7 @@ boolgen(Node *n, int true, Node *nn)
|
||||||
regalloc(&nod1, l, Z);
|
regalloc(&nod1, l, Z);
|
||||||
cgenrel(l, &nod1, 1);
|
cgenrel(l, &nod1, 1);
|
||||||
}
|
}
|
||||||
gopcode(o, &nod, &nod1, Z);
|
gopcode(true ? o | BTRUE : o, &nod, &nod1, Z);
|
||||||
regfree(&nod);
|
regfree(&nod);
|
||||||
regfree(&nod1);
|
regfree(&nod1);
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@
|
||||||
#define SZ_VLONG 8
|
#define SZ_VLONG 8
|
||||||
#define SZ_DOUBLE 8
|
#define SZ_DOUBLE 8
|
||||||
#define FNX 100
|
#define FNX 100
|
||||||
|
#define BTRUE 0x1000
|
||||||
|
|
||||||
typedef struct Adr Adr;
|
typedef struct Adr Adr;
|
||||||
typedef struct Prog Prog;
|
typedef struct Prog Prog;
|
||||||
|
|
|
@ -929,12 +929,14 @@ gins(int a, Node *f, Node *t)
|
||||||
void
|
void
|
||||||
gopcode(int o, Node *f1, Node *f2, Node *t)
|
gopcode(int o, Node *f1, Node *f2, Node *t)
|
||||||
{
|
{
|
||||||
int a, et;
|
int a, et, true;
|
||||||
Adr ta;
|
Adr ta;
|
||||||
|
|
||||||
et = TLONG;
|
et = TLONG;
|
||||||
if(f1 != Z && f1->type != T)
|
if(f1 != Z && f1->type != T)
|
||||||
et = f1->type->etype;
|
et = f1->type->etype;
|
||||||
|
true = o & BTRUE;
|
||||||
|
o &= ~BTRUE;
|
||||||
a = AGOK;
|
a = AGOK;
|
||||||
switch(o) {
|
switch(o) {
|
||||||
case OAS:
|
case OAS:
|
||||||
|
@ -1076,15 +1078,24 @@ gopcode(int o, Node *f1, Node *f2, Node *t)
|
||||||
break;
|
break;
|
||||||
case OLT:
|
case OLT:
|
||||||
a = ABLT;
|
a = ABLT;
|
||||||
|
/* ensure NaN comparison is always false */
|
||||||
|
if(typefd[et] && !true)
|
||||||
|
a = ABMI;
|
||||||
break;
|
break;
|
||||||
case OLE:
|
case OLE:
|
||||||
a = ABLE;
|
a = ABLE;
|
||||||
|
if(typefd[et] && !true)
|
||||||
|
a = ABLS;
|
||||||
break;
|
break;
|
||||||
case OGE:
|
case OGE:
|
||||||
a = ABGE;
|
a = ABGE;
|
||||||
|
if(typefd[et] && true)
|
||||||
|
a = ABPL;
|
||||||
break;
|
break;
|
||||||
case OGT:
|
case OGT:
|
||||||
a = ABGT;
|
a = ABGT;
|
||||||
|
if(typefd[et] && true)
|
||||||
|
a = ABHI;
|
||||||
break;
|
break;
|
||||||
case OLO:
|
case OLO:
|
||||||
a = ABLO;
|
a = ABLO;
|
||||||
|
|
Loading…
Reference in a new issue