pc64: allow using the FPU in syscall and pagefault handlers
The aim is to take advantage of SSE instructions such as AES-NI in the kernel by lazily saving and restoring FPU state across system calls and pagefaults. (everything can can do I/O) This is accomplished by the functions fpusave() and fpurestore(). fpusave() remembers the current state and disables the FPU if it was active by setting the TS flag. In case the FPU gets used, the current state gets saved and a new PFPU.fpslot is allocated by mathemu(). fpurestore() restores the previous FPU state, reenabling the FPU if fpusave() disabled it. In the most common case, when userspace is not using the FPU, then fpusave()/fpurestore() just toggle the FPpush bit in up->fpstate. When the FPU was active, but we do not use the FPU, then nothing needs to be saved or restored. We just switched the TS flag on and off agaian. Note, this is done for the amd64 kernel only.
This commit is contained in:
parent
3ccd53549f
commit
4f27f6a04f
4 changed files with 134 additions and 55 deletions
|
@ -65,12 +65,6 @@ struct FPsave
|
|||
uchar ign[96]; /* reserved, ignored */
|
||||
};
|
||||
|
||||
struct PFPU
|
||||
{
|
||||
int fpstate;
|
||||
FPsave *fpsave;
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
/* this is a state */
|
||||
|
@ -78,8 +72,27 @@ enum
|
|||
FPactive= 1,
|
||||
FPinactive= 2,
|
||||
|
||||
/* the following is a bit that can be or'd into the state */
|
||||
FPillegal= 0x100,
|
||||
/*
|
||||
* the following are bits that can be or'd into the state.
|
||||
*
|
||||
* this is biased so that FPinit, FPactive and FPinactive
|
||||
* without any flags refer to user fp state in fpslot[0].
|
||||
*/
|
||||
FPillegal= 1<<8, /* fp forbidden in note handler */
|
||||
FPpush= 2<<8, /* trap on use and initialize new fpslot */
|
||||
FPnouser= 4<<8, /* fpslot[0] is kernel regs */
|
||||
FPkernel= 8<<8, /* fp use in kernel (user in fpslot[0] when !FPnouser) */
|
||||
|
||||
FPindexs= 16,
|
||||
FPindex1= 1<<FPindexs,
|
||||
FPindexm= 3<<FPindexs,
|
||||
};
|
||||
|
||||
struct PFPU
|
||||
{
|
||||
int fpstate;
|
||||
FPsave *fpsave; /* fpslot[fpstate>>FPindexs] */
|
||||
FPsave *fpslot[(FPindexm+1)>>FPindexs];
|
||||
};
|
||||
|
||||
struct Confmem
|
||||
|
|
|
@ -41,6 +41,8 @@ void fpsserestore(FPsave*);
|
|||
void fpssesave(FPsave*);
|
||||
void fpx87restore(FPsave*);
|
||||
void fpx87save(FPsave*);
|
||||
int fpusave(void);
|
||||
void fpurestore(int);
|
||||
u64int getcr0(void);
|
||||
u64int getcr2(void);
|
||||
u64int getcr3(void);
|
||||
|
|
|
@ -473,13 +473,13 @@ mathnote(ulong status, uintptr pc)
|
|||
* math coprocessor error
|
||||
*/
|
||||
static void
|
||||
matherror(Ureg*, void*)
|
||||
matherror(Ureg *, void*)
|
||||
{
|
||||
/*
|
||||
* Save FPU state to check out the error.
|
||||
*/
|
||||
fpsave(up->fpsave);
|
||||
up->fpstate = FPinactive;
|
||||
up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
|
||||
mathnote(up->fpsave->fsw, up->fpsave->rip);
|
||||
}
|
||||
|
||||
|
@ -490,7 +490,7 @@ static void
|
|||
simderror(Ureg *ureg, void*)
|
||||
{
|
||||
fpsave(up->fpsave);
|
||||
up->fpstate = FPinactive;
|
||||
up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
|
||||
mathnote(up->fpsave->mxcsr & 0x3f, ureg->pc);
|
||||
}
|
||||
|
||||
|
@ -519,18 +519,37 @@ static void
|
|||
mathemu(Ureg *ureg, void*)
|
||||
{
|
||||
ulong status, control;
|
||||
int index;
|
||||
|
||||
if(up->fpstate & FPillegal){
|
||||
/* someone did floating point in a note handler */
|
||||
postnote(up, 1, "sys: floating point in note handler", NDebug);
|
||||
return;
|
||||
}
|
||||
switch(up->fpstate){
|
||||
switch(up->fpstate & ~(FPnouser|FPkernel|FPindexm)){
|
||||
case FPactive | FPpush:
|
||||
_clts();
|
||||
fpsave(up->fpsave);
|
||||
case FPinactive | FPpush:
|
||||
up->fpstate += FPindex1;
|
||||
case FPinit | FPpush:
|
||||
case FPinit:
|
||||
fpinit();
|
||||
while(up->fpsave == nil)
|
||||
up->fpsave = mallocalign(sizeof(FPsave), FPalign, 0, 0);
|
||||
up->fpstate = FPactive;
|
||||
index = up->fpstate >> FPindexs;
|
||||
if(index < 0 || index > FPindexm)
|
||||
panic("fpslot index overflow: %d", index);
|
||||
if(userureg(ureg)){
|
||||
if(index != 0)
|
||||
panic("fpslot index %d != 0 for user", index);
|
||||
} else {
|
||||
if(index == 0)
|
||||
up->fpstate |= FPnouser;
|
||||
up->fpstate |= FPkernel;
|
||||
}
|
||||
while(up->fpslot[index] == nil)
|
||||
up->fpslot[index] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
|
||||
up->fpsave = up->fpslot[index];
|
||||
up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
|
||||
break;
|
||||
case FPinactive:
|
||||
/*
|
||||
|
@ -547,7 +566,7 @@ mathemu(Ureg *ureg, void*)
|
|||
break;
|
||||
}
|
||||
fprestore(up->fpsave);
|
||||
up->fpstate = FPactive;
|
||||
up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
|
||||
break;
|
||||
case FPactive:
|
||||
panic("math emu pid %ld %s pc %#p",
|
||||
|
@ -596,17 +615,21 @@ procfork(Proc *p)
|
|||
/* save floating point state */
|
||||
s = splhi();
|
||||
switch(up->fpstate & ~FPillegal){
|
||||
case FPactive | FPpush:
|
||||
_clts();
|
||||
case FPactive:
|
||||
fpsave(up->fpsave);
|
||||
up->fpstate = FPinactive;
|
||||
up->fpstate = FPinactive | (up->fpstate & FPpush);
|
||||
case FPactive | FPkernel:
|
||||
case FPinactive | FPkernel:
|
||||
case FPinactive | FPpush:
|
||||
case FPinactive:
|
||||
while(p->fpsave == nil)
|
||||
p->fpsave = mallocalign(sizeof(FPsave), FPalign, 0, 0);
|
||||
memmove(p->fpsave, up->fpsave, sizeof(FPsave));
|
||||
while(p->fpslot[0] == nil)
|
||||
p->fpslot[0] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
|
||||
memmove(p->fpsave = p->fpslot[0], up->fpslot[0], sizeof(FPsave));
|
||||
p->fpstate = FPinactive;
|
||||
}
|
||||
splx(s);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -644,24 +667,26 @@ procsave(Proc *p)
|
|||
p->kentry -= t;
|
||||
p->pcycles += t;
|
||||
|
||||
if(p->fpstate == FPactive){
|
||||
switch(p->fpstate & ~(FPnouser|FPkernel|FPindexm)){
|
||||
case FPactive | FPpush:
|
||||
_clts();
|
||||
case FPactive:
|
||||
if(p->state == Moribund){
|
||||
_clts();
|
||||
_fnclex();
|
||||
_stts();
|
||||
break;
|
||||
}
|
||||
else{
|
||||
/*
|
||||
* Fpsave() stores without handling pending
|
||||
* unmasked exeptions. Postnote() can't be called
|
||||
* here as sleep() already has up->rlock, so
|
||||
* the handling of pending exceptions is delayed
|
||||
* until the process runs again and generates an
|
||||
* emulation fault to activate the FPU.
|
||||
*/
|
||||
fpsave(p->fpsave);
|
||||
}
|
||||
p->fpstate = FPinactive;
|
||||
/*
|
||||
* Fpsave() stores without handling pending
|
||||
* unmasked exeptions. Postnote() can't be called
|
||||
* here as sleep() already has up->rlock, so
|
||||
* the handling of pending exceptions is delayed
|
||||
* until the process runs again and generates an
|
||||
* emulation fault to activate the FPU.
|
||||
*/
|
||||
fpsave(p->fpsave);
|
||||
p->fpstate = FPinactive | (p->fpstate & (FPpush|FPnouser|FPkernel|FPindexm));
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -677,3 +702,32 @@ procsave(Proc *p)
|
|||
*/
|
||||
mmuflushtlb();
|
||||
}
|
||||
|
||||
/*
|
||||
* Fpusave and fpurestore lazily save and restore FPU state across
|
||||
* system calls and the pagefault handler so that we can take
|
||||
* advantage of SSE instructions such as AES-NI in the kernel.
|
||||
*/
|
||||
int
|
||||
fpusave(void)
|
||||
{
|
||||
int ostate = up->fpstate;
|
||||
if((up->fpstate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
|
||||
_stts();
|
||||
up->fpstate = FPpush | (up->fpstate & ~FPillegal);
|
||||
return ostate;
|
||||
}
|
||||
void
|
||||
fpurestore(int ostate)
|
||||
{
|
||||
if((up->fpstate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
|
||||
_stts();
|
||||
if((ostate & FPindexm) == (up->fpstate & FPindexm)){
|
||||
if((ostate & ~(FPnouser|FPkernel|FPindexm)) == FPactive)
|
||||
_clts();
|
||||
} else {
|
||||
up->fpsave = up->fpslot[ostate>>FPindexs];
|
||||
ostate = FPinactive | (ostate & (FPillegal|FPpush|FPnouser|FPkernel|FPindexm));
|
||||
}
|
||||
up->fpstate = ostate;
|
||||
}
|
||||
|
|
|
@ -649,11 +649,12 @@ unexpected(Ureg* ureg, void*)
|
|||
}
|
||||
|
||||
extern void checkpages(void);
|
||||
|
||||
static void
|
||||
faultamd64(Ureg* ureg, void*)
|
||||
{
|
||||
uintptr addr;
|
||||
int read, user, n, insyscall;
|
||||
int read, user, n, insyscall, f;
|
||||
char buf[ERRMAX];
|
||||
|
||||
addr = getcr2();
|
||||
|
@ -670,6 +671,14 @@ faultamd64(Ureg* ureg, void*)
|
|||
|
||||
insyscall = up->insyscall;
|
||||
up->insyscall = 1;
|
||||
f = fpusave();
|
||||
if(!user && waserror()){
|
||||
int s = splhi();
|
||||
fpurestore(f);
|
||||
up->insyscall = insyscall;
|
||||
splx(s);
|
||||
nexterror();
|
||||
}
|
||||
n = fault(addr, read);
|
||||
if(n < 0){
|
||||
if(!user){
|
||||
|
@ -681,6 +690,9 @@ faultamd64(Ureg* ureg, void*)
|
|||
read ? "read" : "write", addr);
|
||||
postnote(up, 1, buf, NDebug);
|
||||
}
|
||||
if(!user) poperror();
|
||||
splhi();
|
||||
fpurestore(f);
|
||||
up->insyscall = insyscall;
|
||||
}
|
||||
|
||||
|
@ -698,7 +710,7 @@ syscall(Ureg* ureg)
|
|||
char *e;
|
||||
uintptr sp;
|
||||
long long ret;
|
||||
int i, s;
|
||||
int i, s, f;
|
||||
ulong scallnr;
|
||||
vlong startns, stopns;
|
||||
|
||||
|
@ -715,11 +727,12 @@ syscall(Ureg* ureg)
|
|||
sp = ureg->sp;
|
||||
scallnr = ureg->bp; /* RARG */
|
||||
up->scallnr = scallnr;
|
||||
|
||||
f = fpusave();
|
||||
spllo();
|
||||
|
||||
ret = -1;
|
||||
startns = 0;
|
||||
up->nerrlab = 0;
|
||||
ret = -1;
|
||||
if(!waserror()){
|
||||
if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD))
|
||||
validaddr(sp, sizeof(Sargs)+BY2WD, 0);
|
||||
|
@ -778,12 +791,13 @@ syscall(Ureg* ureg)
|
|||
splx(s);
|
||||
}
|
||||
|
||||
splhi();
|
||||
fpurestore(f);
|
||||
up->insyscall = 0;
|
||||
up->psstate = 0;
|
||||
|
||||
if(scallnr == NOTED){
|
||||
noted(ureg, *((ulong*)up->s.args));
|
||||
|
||||
/*
|
||||
* normally, syscall() returns to forkret()
|
||||
* not restoring general registers when going
|
||||
|
@ -796,10 +810,10 @@ syscall(Ureg* ureg)
|
|||
}
|
||||
|
||||
if(scallnr!=RFORK && (up->procctl || up->nnote)){
|
||||
splhi();
|
||||
notify(ureg);
|
||||
((void**)&ureg)[-1] = (void*)noteret; /* loads RARG */
|
||||
}
|
||||
|
||||
/* if we delayed sched because we held a lock, sched now */
|
||||
if(up->delaysched)
|
||||
sched();
|
||||
|
@ -813,7 +827,7 @@ syscall(Ureg* ureg)
|
|||
int
|
||||
notify(Ureg* ureg)
|
||||
{
|
||||
int l, s;
|
||||
int l;
|
||||
uintptr sp;
|
||||
Note *n;
|
||||
|
||||
|
@ -821,14 +835,7 @@ notify(Ureg* ureg)
|
|||
procctl();
|
||||
if(up->nnote == 0)
|
||||
return 0;
|
||||
|
||||
if(up->fpstate == FPactive){
|
||||
fpsave(up->fpsave);
|
||||
up->fpstate = FPinactive;
|
||||
}
|
||||
up->fpstate |= FPillegal;
|
||||
|
||||
s = spllo();
|
||||
spllo();
|
||||
qlock(&up->debug);
|
||||
up->notepending = 0;
|
||||
n = &up->note[0];
|
||||
|
@ -887,11 +894,14 @@ if(0) print("%s %lud: notify %#p %#p %#p %s\n",
|
|||
up->nnote--;
|
||||
memmove(&up->lastnote, &up->note[0], sizeof(Note));
|
||||
memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note));
|
||||
|
||||
qunlock(&up->debug);
|
||||
splx(s);
|
||||
splhi();
|
||||
if(up->fpstate == FPactive){
|
||||
fpsave(up->fpsave);
|
||||
up->fpstate = FPinactive;
|
||||
}
|
||||
up->fpstate |= FPillegal;
|
||||
return 1;
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -903,6 +913,8 @@ noted(Ureg* ureg, ulong arg0)
|
|||
Ureg *nureg;
|
||||
uintptr oureg, sp;
|
||||
|
||||
up->fpstate &= ~FPillegal;
|
||||
spllo();
|
||||
qlock(&up->debug);
|
||||
if(arg0!=NRSTR && !up->notified) {
|
||||
qunlock(&up->debug);
|
||||
|
@ -913,8 +925,6 @@ noted(Ureg* ureg, ulong arg0)
|
|||
|
||||
nureg = up->ureg; /* pointer to user returned Ureg struct */
|
||||
|
||||
up->fpstate &= ~FPillegal;
|
||||
|
||||
/* sanity clause */
|
||||
oureg = (uintptr)nureg;
|
||||
if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){
|
||||
|
|
Loading…
Reference in a new issue