keep fpregs always in sse (FXSAVE) format, adapt libmach and acid files for new format
we now always use the new FXSAVE format in FPsave structure and fpregs file, converting back and forth in fpx87save() and fpx87restore(). document that fprestore() is a destructive operation now. change fp register definition in libmach and adapt fpr() acid funciton. avoid unneccesary copy of fpstate and fpsave in sysfork(). functions including syscalls do not preserve the fp registers and copying fpstate from the current process would mean we had to fpsave(&up->fpsave); first. simply not doing it, new process starts in FPinit state.
This commit is contained in:
parent
7aea1204b9
commit
257c7e958e
7 changed files with 177 additions and 99 deletions
|
@ -69,22 +69,14 @@ defn fpr()
|
|||
print("F5\t", *F5, "\n");
|
||||
print("F6\t", *F6, "\n");
|
||||
print("F7\t", *F7, "\n");
|
||||
print("control\t", *fmt(E0, 'x'), "\n");
|
||||
print("status\t", *fmt(E1, 'x'), "\n");
|
||||
print("tag\t", *fmt(E2, 'x'), "\n");
|
||||
print("ip offset\t", *fmt(E3, 'X'), "\n");
|
||||
print("cs selector\t", *fmt(E4, 'x'), "\n");
|
||||
print("opcode\t", *fmt(E4>>8, 'x'), "\n");
|
||||
print("data operand offset\t", *fmt(E5, 'x'), "\n");
|
||||
print("operand selector\t", *fmt(E6, 'x'), "\n");
|
||||
}
|
||||
|
||||
defn mmregs()
|
||||
{
|
||||
print("MM0\t", *MM0, " MM1\t", *MM1, "\n");
|
||||
print("MM2\t", *MM2, " MM3\t", *MM3, "\n");
|
||||
print("MM4\t", *MM4, " MM5\t", *MM5, "\n");
|
||||
print("MM6\t", *MM6, " MM7\t", *MM7, "\n");
|
||||
print("control\t", *FCW, "\n");
|
||||
print("status\t", *FSW, "\n");
|
||||
print("tag\t", *FTW, "\n");
|
||||
print("ip\t", *FIP, "\n");
|
||||
print("cs selector\t", *FCS, "\n");
|
||||
print("opcode\t", *FOP, "\n");
|
||||
print("data operand\t", *FDP, "\n");
|
||||
print("operand selector\t", *FDS, "\n");
|
||||
}
|
||||
|
||||
defn pstop(pid)
|
||||
|
|
|
@ -76,10 +76,10 @@ struct FPstate
|
|||
ushort r3;
|
||||
ulong pc;
|
||||
ushort selector;
|
||||
ushort r4;
|
||||
ushort opcode;
|
||||
ulong operand;
|
||||
ushort oselector;
|
||||
ushort r5;
|
||||
ushort r4;
|
||||
uchar regs[80]; /* floating point registers */
|
||||
};
|
||||
|
||||
|
@ -91,10 +91,10 @@ struct FPssestate /* SSE fp state */
|
|||
ushort fop; /* opcode */
|
||||
ulong fpuip; /* pc */
|
||||
ushort cs; /* pc segment */
|
||||
ushort r1; /* reserved */
|
||||
ushort rsrvd1; /* reserved */
|
||||
ulong fpudp; /* data pointer */
|
||||
ushort ds; /* data pointer segment */
|
||||
ushort r2;
|
||||
ushort rsrvd2;
|
||||
ulong mxcsr; /* MXCSR register state */
|
||||
ulong mxcsr_mask; /* MXCSR mask register */
|
||||
uchar xregs[480]; /* extended registers */
|
||||
|
|
|
@ -41,7 +41,9 @@ void fpssesave(FPsave*);
|
|||
void fpssesave0(FPsave*);
|
||||
ulong fpstatus(void);
|
||||
void fpx87restore(FPsave*);
|
||||
void fpx87restore0(FPsave*);
|
||||
void fpx87save(FPsave*);
|
||||
void fpx87save0(FPsave*);
|
||||
ulong getcr0(void);
|
||||
ulong getcr2(void);
|
||||
ulong getcr3(void);
|
||||
|
|
|
@ -657,13 +657,13 @@ TEXT fpinit(SB), $0 /* enable and init */
|
|||
WAIT
|
||||
RET
|
||||
|
||||
TEXT fpx87save(SB), $0 /* save state and disable */
|
||||
TEXT fpx87save0(SB), $0 /* save state and disable */
|
||||
MOVL p+0(FP), AX
|
||||
FSAVE 0(AX) /* no WAIT */
|
||||
FPOFF(l2)
|
||||
RET
|
||||
|
||||
TEXT fpx87restore(SB), $0 /* enable and restore state */
|
||||
TEXT fpx87restore0(SB), $0 /* enable and restore state */
|
||||
FPON
|
||||
MOVL p+0(FP), AX
|
||||
FRSTOR 0(AX)
|
||||
|
|
|
@ -469,6 +469,121 @@ confinit(void)
|
|||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* we keep FPsave structure in sse format emulating FXSAVE / FXRSTOR
|
||||
* instructions for legacy x87 fpu.
|
||||
*
|
||||
* Note that fpx87restore() and fpxsserestore() do modify the FPsave
|
||||
* data structure for conversion / realignment shuffeling. this means
|
||||
* that p->fpsave is only valid when p->fpstate == FPinactive.
|
||||
*/
|
||||
void
|
||||
fpx87save(FPsave *fps)
|
||||
{
|
||||
fpx87save0(fps);
|
||||
|
||||
/* NOP fps->fcw = fps->control; */
|
||||
fps->fsw = fps->status;
|
||||
fps->ftw = fps->tag;
|
||||
fps->fop = fps->opcode;
|
||||
fps->fpuip = fps->pc;
|
||||
fps->cs = fps->selector;
|
||||
fps->fpudp = fps->operand;
|
||||
fps->ds = fps->oselector;
|
||||
|
||||
#define MOVA(d,s) \
|
||||
*((ushort*)(d+8)) = *((ushort*)(s+8)), \
|
||||
*((ulong*)(d+4)) = *((ulong*)(s+4)), \
|
||||
*((ulong*)(d)) = *((ulong*)(s))
|
||||
|
||||
MOVA(fps->xregs+0x70, fps->regs+70);
|
||||
MOVA(fps->xregs+0x60, fps->regs+60);
|
||||
MOVA(fps->xregs+0x50, fps->regs+50);
|
||||
MOVA(fps->xregs+0x40, fps->regs+40);
|
||||
MOVA(fps->xregs+0x30, fps->regs+30);
|
||||
MOVA(fps->xregs+0x20, fps->regs+20);
|
||||
MOVA(fps->xregs+0x10, fps->regs+10);
|
||||
MOVA(fps->xregs+0x00, fps->regs+00);
|
||||
|
||||
#undef MOVA
|
||||
|
||||
#define CLR6(d) \
|
||||
*((ulong*)(d)) = 0, \
|
||||
*((ushort*)(d+4)) = 0
|
||||
|
||||
CLR6(fps->xregs+0x70+10);
|
||||
CLR6(fps->xregs+0x60+10);
|
||||
CLR6(fps->xregs+0x50+10);
|
||||
CLR6(fps->xregs+0x40+10);
|
||||
CLR6(fps->xregs+0x30+10);
|
||||
CLR6(fps->xregs+0x20+10);
|
||||
CLR6(fps->xregs+0x10+10);
|
||||
CLR6(fps->xregs+0x00+10);
|
||||
|
||||
#undef CLR6
|
||||
|
||||
fps->rsrvd1 = fps->rsrvd2 = fps->mxcsr = fps->mxcsr_mask = 0;
|
||||
}
|
||||
|
||||
void
|
||||
fpx87restore(FPsave *fps)
|
||||
{
|
||||
#define MOVA(d,s) \
|
||||
*((ulong*)(d)) = *((ulong*)(s)), \
|
||||
*((ulong*)(d+4)) = *((ulong*)(s+4)), \
|
||||
*((ushort*)(d+8)) = *((ushort*)(s+8))
|
||||
|
||||
MOVA(fps->regs+00, fps->xregs+0x00);
|
||||
MOVA(fps->regs+10, fps->xregs+0x10);
|
||||
MOVA(fps->regs+20, fps->xregs+0x20);
|
||||
MOVA(fps->regs+30, fps->xregs+0x30);
|
||||
MOVA(fps->regs+40, fps->xregs+0x40);
|
||||
MOVA(fps->regs+50, fps->xregs+0x50);
|
||||
MOVA(fps->regs+60, fps->xregs+0x60);
|
||||
MOVA(fps->regs+70, fps->xregs+0x70);
|
||||
|
||||
#undef MOVA
|
||||
|
||||
fps->oselector = fps->ds;
|
||||
fps->operand = fps->fpudp;
|
||||
fps->opcode = (fps->fop & 0x7ff);
|
||||
fps->selector = fps->cs;
|
||||
fps->pc = fps->fpuip;
|
||||
fps->tag = fps->ftw;
|
||||
fps->status = fps->fsw;
|
||||
/* NOP fps->control = fps->fcw; */
|
||||
|
||||
fps->r1 = fps->r2 = fps->r3 = fps->r4 = 0;
|
||||
|
||||
fpx87restore0(fps);
|
||||
}
|
||||
|
||||
/*
|
||||
* sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
|
||||
* so we shuffle the data up and down as needed or make copies.
|
||||
*/
|
||||
void
|
||||
fpssesave(FPsave *fps)
|
||||
{
|
||||
FPsave *afps;
|
||||
|
||||
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
|
||||
fpssesave0(afps);
|
||||
if(fps != afps) /* not aligned? shuffle down from aligned buffer */
|
||||
memmove(fps, afps, sizeof(FPssestate) - FPalign);
|
||||
}
|
||||
|
||||
void
|
||||
fpsserestore(FPsave *fps)
|
||||
{
|
||||
FPsave *afps;
|
||||
|
||||
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
|
||||
if(fps != afps) /* shuffle up to make aligned */
|
||||
memmove(afps, fps, sizeof(FPssestate) - FPalign);
|
||||
fpsserestore0(afps);
|
||||
}
|
||||
|
||||
static char* mathmsg[] =
|
||||
{
|
||||
nil, /* handled below */
|
||||
|
@ -510,61 +625,6 @@ mathnote(ulong status, ulong pc)
|
|||
postnote(up, 1, note, NDebug);
|
||||
}
|
||||
|
||||
/*
|
||||
* sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
|
||||
* so we shuffle the data up and down as needed or make copies.
|
||||
*/
|
||||
void
|
||||
fpssesave(FPsave *fps)
|
||||
{
|
||||
FPsave *afps;
|
||||
|
||||
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
|
||||
fpssesave0(afps);
|
||||
if(fps != afps) /* not aligned? shuffle down from aligned buffer */
|
||||
memmove(fps, afps, sizeof(FPssestate) - FPalign);
|
||||
}
|
||||
|
||||
void
|
||||
fpsserestore(FPsave *fps)
|
||||
{
|
||||
FPsave *afps;
|
||||
|
||||
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
|
||||
if(fps != afps) /* shuffle up to make aligned */
|
||||
memmove(afps, fps, sizeof(FPssestate) - FPalign);
|
||||
fpsserestore0(afps);
|
||||
if(fps != afps) /* shuffle regs back down when unaligned */
|
||||
memmove(fps, afps, sizeof(FPssestate) - FPalign);
|
||||
}
|
||||
|
||||
/*
|
||||
* extract control, status and fppc from process
|
||||
* floating point state independent of format.
|
||||
*/
|
||||
static void
|
||||
mathstate(ulong *stsp, ulong *pcp, ulong *ctlp)
|
||||
{
|
||||
ulong sts, fpc, ctl;
|
||||
FPsave *f = &up->fpsave;
|
||||
|
||||
if(fpsave == fpx87save){
|
||||
sts = f->status;
|
||||
fpc = f->pc;
|
||||
ctl = f->control;
|
||||
} else {
|
||||
sts = f->fsw;
|
||||
fpc = f->fpuip;
|
||||
ctl = f->fcw;
|
||||
}
|
||||
if(stsp)
|
||||
*stsp = sts;
|
||||
if(pcp)
|
||||
*pcp = fpc;
|
||||
if(ctlp)
|
||||
*ctlp = ctl;
|
||||
}
|
||||
|
||||
/*
|
||||
* math coprocessor error
|
||||
*/
|
||||
|
@ -591,7 +651,7 @@ matherror(Ureg*, void*)
|
|||
static void
|
||||
mathemu(Ureg *ureg, void*)
|
||||
{
|
||||
ulong status, control, pc;
|
||||
ulong status, control;
|
||||
|
||||
if(up->fpstate & FPillegal){
|
||||
/* someone did floating point in a note handler */
|
||||
|
@ -611,9 +671,10 @@ mathemu(Ureg *ureg, void*)
|
|||
* More attention should probably be paid here to the
|
||||
* exception masks and error summary.
|
||||
*/
|
||||
mathstate(&status, &pc, &control);
|
||||
status = up->fpsave.fsw;
|
||||
control = up->fpsave.fcw;
|
||||
if((status & ~control) & 0x07F){
|
||||
mathnote(status, pc);
|
||||
mathnote(status, up->fpsave.fpuip);
|
||||
break;
|
||||
}
|
||||
fprestore(&up->fpsave);
|
||||
|
|
|
@ -84,7 +84,6 @@ sysrfork(ulong *arg)
|
|||
|
||||
p = newproc();
|
||||
|
||||
p->fpsave = up->fpsave;
|
||||
p->scallnr = up->scallnr;
|
||||
p->s = up->s;
|
||||
p->nerrlab = 0;
|
||||
|
@ -180,7 +179,6 @@ sysrfork(ulong *arg)
|
|||
if((flag&RFNOTEG) == 0)
|
||||
p->noteid = up->noteid;
|
||||
|
||||
p->fpstate = up->fpstate;
|
||||
pid = p->pid;
|
||||
memset(p->time, 0, sizeof(p->time));
|
||||
p->time[TReal] = MACHP(0)->ticks;
|
||||
|
|
|
@ -14,9 +14,11 @@
|
|||
#define AX REGOFF(ax)
|
||||
|
||||
#define REGSIZE sizeof(struct Ureg)
|
||||
#define FP_CTLS(x) (REGSIZE+2*(x))
|
||||
#define FP_CTL(x) (REGSIZE+4*(x))
|
||||
#define FP_REG(x) (FP_CTL(7)+10*(x))
|
||||
#define FPREGSIZE (7*4+8*10)
|
||||
#define FP_REG(x) (FP_CTL(8)+16*(x))
|
||||
#define XM_REG(x) (FP_CTL(8)+8*16+16*(x))
|
||||
#define FPREGSIZE 512
|
||||
|
||||
Reglist i386reglist[] = {
|
||||
{"DI", REGOFF(di), RINT, 'X'},
|
||||
|
@ -38,21 +40,44 @@ Reglist i386reglist[] = {
|
|||
{"SP", SP, RINT, 'X'},
|
||||
{"SS", REGOFF(ss), RINT, 'X'},
|
||||
|
||||
{"E0", FP_CTL(0), RFLT, 'X'},
|
||||
{"E1", FP_CTL(1), RFLT, 'X'},
|
||||
{"E2", FP_CTL(2), RFLT, 'X'},
|
||||
{"E3", FP_CTL(3), RFLT, 'X'},
|
||||
{"E4", FP_CTL(4), RFLT, 'X'},
|
||||
{"E5", FP_CTL(5), RFLT, 'X'},
|
||||
{"E6", FP_CTL(6), RFLT, 'X'},
|
||||
{"F0", FP_REG(0), RFLT, '3'},
|
||||
{"F1", FP_REG(1), RFLT, '3'},
|
||||
{"F2", FP_REG(2), RFLT, '3'},
|
||||
{"F3", FP_REG(3), RFLT, '3'},
|
||||
{"F4", FP_REG(4), RFLT, '3'},
|
||||
{"F5", FP_REG(5), RFLT, '3'},
|
||||
{"F6", FP_REG(6), RFLT, '3'},
|
||||
{"F7", FP_REG(7), RFLT, '3'},
|
||||
{"FCW", FP_CTLS(0), RFLT, 'x'},
|
||||
{"FSW", FP_CTLS(1), RFLT, 'x'},
|
||||
{"FTW", FP_CTLS(2), RFLT, 'b'},
|
||||
{"FOP", FP_CTLS(3), RFLT, 'x'},
|
||||
{"FIP", FP_CTL(2), RFLT, 'X'},
|
||||
{"FCS", FP_CTLS(6), RFLT, 'x'},
|
||||
{"FDP", FP_CTL(4), RFLT, 'X'},
|
||||
{"FDS", FP_CTLS(10), RFLT, 'x'},
|
||||
{"MXCSR", FP_CTL(6), RFLT, 'X'},
|
||||
{"MXCSRMASK", FP_CTL(7), RFLT, 'X'},
|
||||
|
||||
{"M0", FP_REG(0), RFLT, 'F'}, /* assumes double */
|
||||
{"M1", FP_REG(1), RFLT, 'F'},
|
||||
{"M2", FP_REG(2), RFLT, 'F'},
|
||||
{"M3", FP_REG(3), RFLT, 'F'},
|
||||
{"M4", FP_REG(4), RFLT, 'F'},
|
||||
{"M5", FP_REG(5), RFLT, 'F'},
|
||||
{"M6", FP_REG(6), RFLT, 'F'},
|
||||
{"M7", FP_REG(7), RFLT, 'F'},
|
||||
|
||||
{"X0", XM_REG(0), RFLT, 'F'}, /* assumes double */
|
||||
{"X1", XM_REG(1), RFLT, 'F'},
|
||||
{"X2", XM_REG(2), RFLT, 'F'},
|
||||
{"X3", XM_REG(3), RFLT, 'F'},
|
||||
{"X4", XM_REG(4), RFLT, 'F'},
|
||||
{"X5", XM_REG(5), RFLT, 'F'},
|
||||
{"X6", XM_REG(6), RFLT, 'F'},
|
||||
{"X7", XM_REG(7), RFLT, 'F'},
|
||||
|
||||
{"F0", FP_REG(7), RFLT, '3'},
|
||||
{"F1", FP_REG(6), RFLT, '3'},
|
||||
{"F2", FP_REG(5), RFLT, '3'},
|
||||
{"F3", FP_REG(4), RFLT, '3'},
|
||||
{"F4", FP_REG(3), RFLT, '3'},
|
||||
{"F5", FP_REG(2), RFLT, '3'},
|
||||
{"F6", FP_REG(1), RFLT, '3'},
|
||||
{"F7", FP_REG(0), RFLT, '3'},
|
||||
|
||||
{ 0 }
|
||||
};
|
||||
|
||||
|
|
Loading…
Reference in a new issue