plan9fox/sys/src/9/kw/fpiarm.c
cinap_lenrek 24057fd4f4 kernel: introduce per process FPU struct (PFPU) for more flexible machine specific fpu handling
introducing the PFPU structue which allows the machine specific
code some flexibility on how to handle the FPU process state.

for example, in the pc and pc64 kernel, the FPsave structure is
arround 512 bytes. with avx512, it could grow up to 2K. instead
of embedding that into the Proc strucutre, it is more effective
to allocate it on first use of the fpu, as most processes do not
use simd or floating point in the first place. also, the FPsave
structure has special 16 byte alignment constraint, which further
favours dynamic allocation.

this gets rid of the memmoves in pc/pc64 kernels for the aligment.

there is also devproc, which is now checking if the fpsave area
is actually valid before reading it, avoiding debuggers to see
garbage data.

the Notsave structure is gone now, as it was not used on any
machine.
2017-11-04 20:08:22 +01:00

773 lines
15 KiB
C

/*
* this doesn't attempt to implement ARM floating-point properties
* that aren't visible in the Inferno environment.
* all arithmetic is done in double precision.
* the FP trap status isn't updated.
*/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "ureg.h"
#include "arm.h"
#include "fpi.h"
/* undef this if correct kernel r13 isn't in Ureg;
* check calculation in fpiarm below
*/
#define REG(ur, x) (*(long*)(((char*)(ur))+roff[(x)]))
#define FR(ufp, x) (*(Internal*)(ufp)->regs[(x)&(Nfpctlregs - 1)])
typedef struct FP2 FP2;
typedef struct FP1 FP1;
struct FP2 {
char* name;
void (*f)(Internal, Internal, Internal*);
};
struct FP1 {
char* name;
void (*f)(Internal*, Internal*);
};
enum {
N = 1<<31,
Z = 1<<30,
C = 1<<29,
V = 1<<28,
REGPC = 15,
};
enum {
fpemudebug = 0,
};
#undef OFR
#define OFR(X) ((ulong)&((Ureg*)0)->X)
static int roff[] = {
OFR(r0), OFR(r1), OFR(r2), OFR(r3),
OFR(r4), OFR(r5), OFR(r6), OFR(r7),
OFR(r8), OFR(r9), OFR(r10), OFR(r11),
OFR(r12), OFR(r13), OFR(r14), OFR(pc),
};
static Internal fpconst[8] = { /* indexed by op&7 (ARM 7500 FPA) */
/* s, e, l, h */
{0, 0x1, 0x00000000, 0x00000000}, /* 0.0 */
{0, 0x3FF, 0x00000000, 0x08000000}, /* 1.0 */
{0, 0x400, 0x00000000, 0x08000000}, /* 2.0 */
{0, 0x400, 0x00000000, 0x0C000000}, /* 3.0 */
{0, 0x401, 0x00000000, 0x08000000}, /* 4.0 */
{0, 0x401, 0x00000000, 0x0A000000}, /* 5.0 */
{0, 0x3FE, 0x00000000, 0x08000000}, /* 0.5 */
{0, 0x402, 0x00000000, 0x0A000000}, /* 10.0 */
};
/*
* arm binary operations
*/
static void
fadd(Internal m, Internal n, Internal *d)
{
(m.s == n.s? fpiadd: fpisub)(&m, &n, d);
}
static void
fsub(Internal m, Internal n, Internal *d)
{
m.s ^= 1;
(m.s == n.s? fpiadd: fpisub)(&m, &n, d);
}
static void
fsubr(Internal m, Internal n, Internal *d)
{
n.s ^= 1;
(n.s == m.s? fpiadd: fpisub)(&n, &m, d);
}
static void
fmul(Internal m, Internal n, Internal *d)
{
fpimul(&m, &n, d);
}
static void
fdiv(Internal m, Internal n, Internal *d)
{
fpidiv(&m, &n, d);
}
static void
fdivr(Internal m, Internal n, Internal *d)
{
fpidiv(&n, &m, d);
}
/*
* arm unary operations
*/
static void
fmov(Internal *m, Internal *d)
{
*d = *m;
}
static void
fmovn(Internal *m, Internal *d)
{
*d = *m;
d->s ^= 1;
}
static void
fabsf(Internal *m, Internal *d)
{
*d = *m;
d->s = 0;
}
static void
frnd(Internal *m, Internal *d)
{
short e;
(m->s? fsub: fadd)(fpconst[6], *m, d);
if(IsWeird(d))
return;
fpiround(d);
e = (d->e - ExpBias) + 1;
if(e <= 0)
SetZero(d);
else if(e > FractBits){
if(e < 2*FractBits)
d->l &= ~((1<<(2*FractBits - e))-1);
}else{
d->l = 0;
if(e < FractBits)
d->h &= ~((1<<(FractBits-e))-1);
}
}
/*
* ARM 7500 FPA opcodes
*/
static FP1 optab1[16] = { /* Fd := OP Fm */
[0] {"MOVF", fmov},
[1] {"NEGF", fmovn},
[2] {"ABSF", fabsf},
[3] {"RNDF", frnd},
[4] {"SQTF", /*fsqt*/0},
/* LOG, LGN, EXP, SIN, COS, TAN, ASN, ACS, ATN all `deprecated' */
/* URD and NRM aren't implemented */
};
static FP2 optab2[16] = { /* Fd := Fn OP Fm */
[0] {"ADDF", fadd},
[1] {"MULF", fmul},
[2] {"SUBF", fsub},
[3] {"RSUBF", fsubr},
[4] {"DIVF", fdiv},
[5] {"RDIVF", fdivr},
/* POW, RPW deprecated */
[8] {"REMF", /*frem*/0},
[9] {"FMF", fmul}, /* fast multiply */
[10] {"FDV", fdiv}, /* fast divide */
[11] {"FRD", fdivr}, /* fast reverse divide */
/* POL deprecated */
};
/*
* ARM VFP opcodes
*/
static FP1 voptab1[32] = { /* Vd := OP Vm */
[0] {"MOVF", fmov},
[1] {"ABSF", fabsf},
[2] {"NEGF", fmovn},
[15] {"CVTF", fmov},
};
static FP2 voptab2[16] = { /* Vd := Vn FOP Fm */
[4] {"MULF", fmul},
[6] {"ADDF", fadd},
[7] {"SUBF", fsub},
[8] {"DIVF", fdiv},
};
static ulong
fcmp(Internal *n, Internal *m)
{
int i;
Internal rm, rn;
if(IsWeird(m) || IsWeird(n)){
/* BUG: should trap if not masked */
return V|C;
}
rn = *n;
rm = *m;
fpiround(&rn);
fpiround(&rm);
i = fpicmp(&rn, &rm);
if(i > 0)
return C;
else if(i == 0)
return C|Z;
else
return N;
}
static void
fld(void (*f)(Internal*, void*), int d, ulong ea, int n, FPsave *ufp)
{
void *mem;
mem = (void*)ea;
(*f)(&FR(ufp, d), mem);
if(fpemudebug)
print("MOV%c #%lux, F%d\n", n==8? 'D': 'F', ea, d);
}
static void
fst(void (*f)(void*, Internal*), ulong ea, int s, int n, FPsave *ufp)
{
Internal tmp;
void *mem;
mem = (void*)ea;
tmp = FR(ufp, s);
if(fpemudebug)
print("MOV%c F%d,#%lux\n", n==8? 'D': 'F', s, ea);
(*f)(mem, &tmp);
}
static int
condok(int cc, int c)
{
switch(c){
case 0: /* Z set */
return cc&Z;
case 1: /* Z clear */
return (cc&Z) == 0;
case 2: /* C set */
return cc&C;
case 3: /* C clear */
return (cc&C) == 0;
case 4: /* N set */
return cc&N;
case 5: /* N clear */
return (cc&N) == 0;
case 6: /* V set */
return cc&V;
case 7: /* V clear */
return (cc&V) == 0;
case 8: /* C set and Z clear */
return cc&C && (cc&Z) == 0;
case 9: /* C clear or Z set */
return (cc&C) == 0 || cc&Z;
case 10: /* N set and V set, or N clear and V clear */
return (~cc&(N|V))==0 || (cc&(N|V)) == 0;
case 11: /* N set and V clear, or N clear and V set */
return (cc&(N|V))==N || (cc&(N|V))==V;
case 12: /* Z clear, and either N set and V set or N clear and V clear */
return (cc&Z) == 0 && ((~cc&(N|V))==0 || (cc&(N|V))==0);
case 13: /* Z set, or N set and V clear or N clear and V set */
return (cc&Z) || (cc&(N|V))==N || (cc&(N|V))==V;
case 14: /* always */
return 1;
case 15: /* never (reserved) */
return 0;
}
return 0; /* not reached */
}
static void
unimp(ulong pc, ulong op)
{
char buf[60];
snprint(buf, sizeof(buf), "sys: fp: pc=%lux unimp fp 0x%.8lux", pc, op);
if(fpemudebug)
print("FPE: %s\n", buf);
error(buf);
/* no return */
}
static void
fpaemu(ulong pc, ulong op, Ureg *ur, FPsave *ufp)
{
int rn, rd, tag, o;
long off;
ulong ea;
Internal tmp, *fm, *fn;
/* note: would update fault status here if we noted numeric exceptions */
/*
* LDF, STF; 10.1.1
*/
if(((op>>25)&7) == 6){
if(op & (1<<22))
unimp(pc, op); /* packed or extended */
rn = (op>>16)&0xF;
off = (op&0xFF)<<2;
if((op & (1<<23)) == 0)
off = -off;
ea = REG(ur, rn);
if(rn == REGPC)
ea += 8;
if(op & (1<<24))
ea += off;
rd = (op>>12)&7;
if(op & (1<<20)){
if(op & (1<<15))
fld(fpid2i, rd, ea, 8, ufp);
else
fld(fpis2i, rd, ea, 4, ufp);
}else{
if(op & (1<<15))
fst(fpii2d, ea, rd, 8, ufp);
else
fst(fpii2s, ea, rd, 4, ufp);
}
if((op & (1<<24)) == 0)
ea += off;
if(op & (1<<21))
REG(ur, rn) = ea;
return;
}
/*
* CPRT/transfer, 10.3
*/
if(op & (1<<4)){
rd = (op>>12) & 0xF;
/*
* compare, 10.3.1
*/
if(rd == 15 && op & (1<<20)){
rn = (op>>16)&7;
fn = &FR(ufp, rn);
if(op & (1<<3)){
fm = &fpconst[op&7];
if(fpemudebug)
tag = 'C';
}else{
fm = &FR(ufp, op&7);
if(fpemudebug)
tag = 'F';
}
switch((op>>21)&7){
default:
unimp(pc, op);
case 4: /* CMF: Fn :: Fm */
case 6: /* CMFE: Fn :: Fm (with exception) */
ur->psr &= ~(N|C|Z|V);
ur->psr |= fcmp(fn, fm);
break;
case 5: /* CNF: Fn :: -Fm */
case 7: /* CNFE: Fn :: -Fm (with exception) */
tmp = *fm;
tmp.s ^= 1;
ur->psr &= ~(N|C|Z|V);
ur->psr |= fcmp(fn, &tmp);
break;
}
if(fpemudebug)
print("CMPF %c%d,F%ld =%#lux\n",
tag, rn, op&7, ur->psr>>28);
return;
}
/*
* other transfer, 10.3
*/
switch((op>>20)&0xF){
default:
unimp(pc, op);
case 0: /* FLT */
rn = (op>>16) & 7;
fpiw2i(&FR(ufp, rn), &REG(ur, rd));
if(fpemudebug)
print("MOVW[FD] R%d, F%d\n", rd, rn);
break;
case 1: /* FIX */
if(op & (1<<3))
unimp(pc, op);
rn = op & 7;
tmp = FR(ufp, rn);
fpii2w(&REG(ur, rd), &tmp);
if(fpemudebug)
print("MOV[FD]W F%d, R%d =%ld\n", rn, rd, REG(ur, rd));
break;
case 2: /* FPSR := Rd */
ufp->status = REG(ur, rd);
if(fpemudebug)
print("MOVW R%d, FPSR\n", rd);
break;
case 3: /* Rd := FPSR */
REG(ur, rd) = ufp->status;
if(fpemudebug)
print("MOVW FPSR, R%d\n", rd);
break;
case 4: /* FPCR := Rd */
ufp->control = REG(ur, rd);
if(fpemudebug)
print("MOVW R%d, FPCR\n", rd);
break;
case 5: /* Rd := FPCR */
REG(ur, rd) = ufp->control;
if(fpemudebug)
print("MOVW FPCR, R%d\n", rd);
break;
}
return;
}
/*
* arithmetic
*/
if(op & (1<<3)){ /* constant */
fm = &fpconst[op&7];
if(fpemudebug)
tag = 'C';
}else{
fm = &FR(ufp, op&7);
if(fpemudebug)
tag = 'F';
}
rd = (op>>12)&7;
o = (op>>20)&0xF;
if(op & (1<<15)){ /* monadic */
FP1 *fp;
fp = &optab1[o];
if(fp->f == nil)
unimp(pc, op);
if(fpemudebug)
print("%s %c%ld,F%d\n", fp->name, tag, op&7, rd);
(*fp->f)(fm, &FR(ufp, rd));
} else {
FP2 *fp;
fp = &optab2[o];
if(fp->f == nil)
unimp(pc, op);
rn = (op>>16)&7;
if(fpemudebug)
print("%s %c%ld,F%d,F%d\n", fp->name, tag, op&7, rn, rd);
(*fp->f)(*fm, FR(ufp, rn), &FR(ufp, rd));
}
}
static void
vfpoptoi(Internal *fm, uchar o2, uchar o4)
{
fm->s = o2>>3;
fm->e = ((o2>>3) | ~(o2 & 4)) - 3 + ExpBias;
fm->l = 0;
fm->h = o4 << (20+NGuardBits);
if(fm->e)
fm->h |= HiddenBit;
else
fm->e++;
}
static void
vfpemu(ulong pc, ulong op, Ureg *ur, FPsave *ufp)
{
int sz, vd, o1, o2, o3, o4, o, tag;
long off;
ulong ea;
Word w;
Internal *fm, fc;
/* note: would update fault status here if we noted numeric exceptions */
sz = op & (1<<8);
o1 = (op>>20) & 0xF;
o2 = (op>>16) & 0xF;
vd = (op>>12) & 0xF;
switch((op>>24) & 0xF){
default:
unimp(pc, op);
case 0xD:
/*
* Extension Register load/store A7.6
*/
off = (op&0xFF)<<2;
if((op & (1<<23)) == 0)
off = -off;
ea = REG(ur, o2) + off;
switch(o1&0x7){ /* D(Bit 22) = 0 (5l) */
default:
unimp(pc, op);
case 0:
if(sz)
fst(fpii2d, ea, vd, sz, ufp);
else
fst(fpii2s, ea, vd, sz, ufp);
break;
case 1:
if(sz)
fld(fpid2i, vd, ea, sz, ufp);
else
fld(fpis2i, vd, ea, sz, ufp);
break;
}
break;
case 0xE:
if(op & (1<<4)){
/*
* Register transfer between Core & Extension A7.8
*/
if(sz) /* C(Bit 8) != 0 */
unimp(pc, op);
switch(o1){
default:
unimp(pc, op);
case 0: /* Fn := Rt */
*((Word*)&FR(ufp, o2)) = REG(ur, vd);
if(fpemudebug)
print("MOVWF R%d, F%d\n", vd, o2);
break;
case 1: /* Rt := Fn */
REG(ur, vd) = *((Word*)&FR(ufp, o2));
if(fpemudebug)
print("MOVFW F%d, R%d =%ld\n", o2, vd, REG(ur, vd));
break;
case 0xE: /* FPSCR := Rt */
ufp->status = REG(ur, vd);
if(fpemudebug)
print("MOVW R%d, FPSCR\n", vd);
break;
case 0xF: /* Rt := FPSCR */
if(vd == 0xF){
ur->psr = ufp->status;
if(fpemudebug)
print("MOVW FPSCR, PSR\n");
}else{
REG(ur, vd) = ufp->status;
if(fpemudebug)
print("MOVW FPSCR, R%d\n", vd);
}
break;
}
}
else{
/*
* VFP data processing instructions A7.5
* Note: As per 5l we ignore (D, N, M) bits
*/
if(fpemudebug)
tag = 'F';
o3 = (op>>6) & 0x3;
o4 = op & 0xF;
fm = &FR(ufp, o4);
if(o1 == 0xB){ /* A7-17 */
if(o3 & 0x1){
switch(o2){
default:
o = (o2<<1) | (o3>>1);
break;
case 0x8: /* CVT int -> float/double */
w = *((Word*)fm);
fpiw2i(&FR(ufp, vd), &w);
if(fpemudebug)
print("CVTW%c F%d, F%d\n", sz?'D':'F', o4, vd);
return;
case 0xD: /* CVT float/double -> int */
fpii2w(&w, fm);
*((Word*)&FR(ufp, vd)) = w;
if(fpemudebug)
print("CVT%cW F%d, F%d\n", sz?'D':'F', o4, vd);
return;
case 0x5: /* CMPF(E) */
fm = &fpconst[0];
if(fpemudebug)
tag = 'C';
case 0x4: /* CMPF(E) */
ufp->status &= ~(N|C|Z|V);
ufp->status |= fcmp(&FR(ufp, vd), fm);
if(fpemudebug)
print("CMPF %c%d,F%d =%#lux\n",
tag, (o2&0x1)? 0: o4, vd, ufp->status>>28);
return;
}
}else{ /* VMOV imm (VFPv3 & v4) (5l doesn't generate) */
vfpoptoi(&fc, o2, o4);
fm = &fc;
o = 0;
if(fpemudebug)
tag = 'C';
}
FP1 *vfp;
vfp = &voptab1[o];
if(vfp->f == nil)
unimp(pc, op);
if(fpemudebug)
print("%s %c%d,F%d\n", vfp->name, tag, o4, vd);
(*vfp->f)(fm, &FR(ufp, vd));
}
else { /* A7-16 */
FP2 *vfp;
o = ((o1&0x3)<<1) | (o1&0x8) | (o3&0x1);
vfp = &voptab2[o];
if(vfp->f == nil)
unimp(pc, op);
if(fpemudebug)
print("%s F%d,F%d,F%d\n", vfp->name, o4, o2, vd);
(*vfp->f)(*fm, FR(ufp, o2), &FR(ufp, vd));
}
}
break;
}
}
void
casemu(ulong pc, ulong op, Ureg *ur)
{
ulong *rp, ro, rn, *rd;
USED(pc);
rp = (ulong*)ur;
ro = rp[op>>16 & 0x7];
rn = rp[op>>0 & 0x7];
rd = rp + (op>>12 & 0x7);
rp = (ulong*)*rd;
validaddr((uintptr)rp, 4, 1);
splhi();
if(*rd = (*rp == ro))
*rp = rn;
spllo();
}
int ldrexvalid;
void
ldrex(ulong pc, ulong op, Ureg *ur)
{
ulong *rp, *rd, *addr;
USED(pc);
rp = (ulong*)ur;
rd = rp + (op>>16 & 0x7);
addr = (ulong*)*rd;
validaddr((uintptr)addr, 4, 0);
ldrexvalid = 1;
rp[op>>12 & 0x7] = *addr;
if(fpemudebug)
print("ldrex, r%ld = [r%ld]@0x%8.8p = 0x%8.8lux",
op>>12 & 0x7, op>>16 & 0x7, addr, rp[op>>12 & 0x7]);
}
void
strex(ulong pc, ulong op, Ureg *ur)
{
ulong *rp, rn, *rd, *addr;
USED(pc);
rp = (ulong*)ur;
rd = rp + (op>>16 & 0x7);
rn = rp[op>>0 & 0x7];
addr = (ulong*)*rd;
validaddr((uintptr)addr, 4, 1);
splhi();
if(ldrexvalid){
if(fpemudebug)
print("strex valid, [r%ld]@0x%8.8p = r%ld = 0x%8.8lux",
op>>16 & 0x7, addr, op>>0 & 0x7, rn);
*addr = rn;
ldrexvalid = 0;
rp[op>>12 & 0x7] = 0;
}else{
if(fpemudebug)
print("strex invalid, r%ld = 1", op>>16 & 0x7);
rp[op>>12 & 0x7] = 1;
}
spllo();
}
struct {
ulong opc;
ulong mask;
void (*f)(ulong, ulong, Ureg*);
} specialopc[] = {
{ 0x01900f9f, 0x0ff00fff, ldrex },
{ 0x01800f90, 0x0ff00ff0, strex },
{ 0x0ed00100, 0x0ef08100, casemu },
{ 0x00000000, 0x00000000, nil }
};
/*
* returns the number of FP instructions emulated
*/
int
fpiarm(Ureg *ur)
{
ulong op, o, cp;
FPsave *ufp;
int i, n;
if(up == nil)
panic("fpiarm not in a process");
ufp = up->fpsave;
/*
* because all the emulated fp state is in the proc structure,
* it need not be saved/restored
*/
if(up->fpstate != FPactive){
// assert(sizeof(Internal) == sizeof(ufp->regs[0]));
up->fpstate = FPactive;
ufp->control = 0;
ufp->status = (0x01<<28)|(1<<12); /* software emulation, alternative C flag */
for(n = 0; n < Nfpctlregs; n++)
FR(ufp, n) = fpconst[0];
}
for(n=0; ;n++){
validaddr(ur->pc, 4, 0);
op = *(ulong*)(ur->pc);
if(fpemudebug)
print("%#lux: %#8.8lux ", ur->pc, op);
o = (op>>24) & 0xF;
cp = (op>>8) & 0xF;
for(i = 0; specialopc[i].f; i++)
if((op & specialopc[i].mask) == specialopc[i].opc)
break;
if(specialopc[i].f){
if(condok(ur->psr, op>>28))
specialopc[i].f(ur->pc, op, ur);
}
else if(ISVFPOP(cp, o)){
if(condok(ur->psr, op>>28))
vfpemu(ur->pc, op, ur, ufp);
}
else if(ISFPAOP(cp, o)){
if(condok(ur->psr, op>>28))
fpaemu(ur->pc, op, ur, ufp);
}
else
break;
ur->pc += 4; /* pretend cpu executed the instr */
}
if(fpemudebug)
print("\n");
return n;
}