diff --git a/sys/src/9/pc64/dat.h b/sys/src/9/pc64/dat.h index 4dde0ec1b..8106cf1cb 100644 --- a/sys/src/9/pc64/dat.h +++ b/sys/src/9/pc64/dat.h @@ -65,12 +65,6 @@ struct FPsave uchar ign[96]; /* reserved, ignored */ }; -struct PFPU -{ - int fpstate; - FPsave *fpsave; -}; - enum { /* this is a state */ @@ -78,8 +72,27 @@ enum FPactive= 1, FPinactive= 2, - /* the following is a bit that can be or'd into the state */ - FPillegal= 0x100, + /* + * the following are bits that can be or'd into the state. + * + * this is biased so that FPinit, FPactive and FPinactive + * without any flags refer to user fp state in fpslot[0]. + */ + FPillegal= 1<<8, /* fp forbidden in note handler */ + FPpush= 2<<8, /* trap on use and initialize new fpslot */ + FPnouser= 4<<8, /* fpslot[0] is kernel regs */ + FPkernel= 8<<8, /* fp use in kernel (user in fpslot[0] when !FPnouser) */ + + FPindexs= 16, + FPindex1= 1<>FPindexs] */ + FPsave *fpslot[(FPindexm+1)>>FPindexs]; }; struct Confmem diff --git a/sys/src/9/pc64/fns.h b/sys/src/9/pc64/fns.h index 44613d2b2..f785a328a 100644 --- a/sys/src/9/pc64/fns.h +++ b/sys/src/9/pc64/fns.h @@ -41,6 +41,8 @@ void fpsserestore(FPsave*); void fpssesave(FPsave*); void fpx87restore(FPsave*); void fpx87save(FPsave*); +int fpusave(void); +void fpurestore(int); u64int getcr0(void); u64int getcr2(void); u64int getcr3(void); diff --git a/sys/src/9/pc64/main.c b/sys/src/9/pc64/main.c index 77a45f396..e56ad9197 100644 --- a/sys/src/9/pc64/main.c +++ b/sys/src/9/pc64/main.c @@ -473,13 +473,13 @@ mathnote(ulong status, uintptr pc) * math coprocessor error */ static void -matherror(Ureg*, void*) +matherror(Ureg *, void*) { /* * Save FPU state to check out the error. */ fpsave(up->fpsave); - up->fpstate = FPinactive; + up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm)); mathnote(up->fpsave->fsw, up->fpsave->rip); } @@ -490,7 +490,7 @@ static void simderror(Ureg *ureg, void*) { fpsave(up->fpsave); - up->fpstate = FPinactive; + up->fpstate = FPinactive | (up->fpstate & (FPnouser|FPkernel|FPindexm)); mathnote(up->fpsave->mxcsr & 0x3f, ureg->pc); } @@ -519,18 +519,37 @@ static void mathemu(Ureg *ureg, void*) { ulong status, control; + int index; if(up->fpstate & FPillegal){ /* someone did floating point in a note handler */ postnote(up, 1, "sys: floating point in note handler", NDebug); return; } - switch(up->fpstate){ + switch(up->fpstate & ~(FPnouser|FPkernel|FPindexm)){ + case FPactive | FPpush: + _clts(); + fpsave(up->fpsave); + case FPinactive | FPpush: + up->fpstate += FPindex1; + case FPinit | FPpush: case FPinit: fpinit(); - while(up->fpsave == nil) - up->fpsave = mallocalign(sizeof(FPsave), FPalign, 0, 0); - up->fpstate = FPactive; + index = up->fpstate >> FPindexs; + if(index < 0 || index > FPindexm) + panic("fpslot index overflow: %d", index); + if(userureg(ureg)){ + if(index != 0) + panic("fpslot index %d != 0 for user", index); + } else { + if(index == 0) + up->fpstate |= FPnouser; + up->fpstate |= FPkernel; + } + while(up->fpslot[index] == nil) + up->fpslot[index] = mallocalign(sizeof(FPsave), FPalign, 0, 0); + up->fpsave = up->fpslot[index]; + up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm)); break; case FPinactive: /* @@ -547,7 +566,7 @@ mathemu(Ureg *ureg, void*) break; } fprestore(up->fpsave); - up->fpstate = FPactive; + up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm)); break; case FPactive: panic("math emu pid %ld %s pc %#p", @@ -596,17 +615,21 @@ procfork(Proc *p) /* save floating point state */ s = splhi(); switch(up->fpstate & ~FPillegal){ + case FPactive | FPpush: + _clts(); case FPactive: fpsave(up->fpsave); - up->fpstate = FPinactive; + up->fpstate = FPinactive | (up->fpstate & FPpush); + case FPactive | FPkernel: + case FPinactive | FPkernel: + case FPinactive | FPpush: case FPinactive: - while(p->fpsave == nil) - p->fpsave = mallocalign(sizeof(FPsave), FPalign, 0, 0); - memmove(p->fpsave, up->fpsave, sizeof(FPsave)); + while(p->fpslot[0] == nil) + p->fpslot[0] = mallocalign(sizeof(FPsave), FPalign, 0, 0); + memmove(p->fpsave = p->fpslot[0], up->fpslot[0], sizeof(FPsave)); p->fpstate = FPinactive; } splx(s); - } void @@ -644,24 +667,26 @@ procsave(Proc *p) p->kentry -= t; p->pcycles += t; - if(p->fpstate == FPactive){ + switch(p->fpstate & ~(FPnouser|FPkernel|FPindexm)){ + case FPactive | FPpush: + _clts(); + case FPactive: if(p->state == Moribund){ - _clts(); _fnclex(); _stts(); + break; } - else{ - /* - * Fpsave() stores without handling pending - * unmasked exeptions. Postnote() can't be called - * here as sleep() already has up->rlock, so - * the handling of pending exceptions is delayed - * until the process runs again and generates an - * emulation fault to activate the FPU. - */ - fpsave(p->fpsave); - } - p->fpstate = FPinactive; + /* + * Fpsave() stores without handling pending + * unmasked exeptions. Postnote() can't be called + * here as sleep() already has up->rlock, so + * the handling of pending exceptions is delayed + * until the process runs again and generates an + * emulation fault to activate the FPU. + */ + fpsave(p->fpsave); + p->fpstate = FPinactive | (p->fpstate & (FPpush|FPnouser|FPkernel|FPindexm)); + break; } /* @@ -677,3 +702,32 @@ procsave(Proc *p) */ mmuflushtlb(); } + +/* + * Fpusave and fpurestore lazily save and restore FPU state across + * system calls and the pagefault handler so that we can take + * advantage of SSE instructions such as AES-NI in the kernel. + */ +int +fpusave(void) +{ + int ostate = up->fpstate; + if((up->fpstate & ~(FPnouser|FPkernel|FPindexm)) == FPactive) + _stts(); + up->fpstate = FPpush | (up->fpstate & ~FPillegal); + return ostate; +} +void +fpurestore(int ostate) +{ + if((up->fpstate & ~(FPnouser|FPkernel|FPindexm)) == FPactive) + _stts(); + if((ostate & FPindexm) == (up->fpstate & FPindexm)){ + if((ostate & ~(FPnouser|FPkernel|FPindexm)) == FPactive) + _clts(); + } else { + up->fpsave = up->fpslot[ostate>>FPindexs]; + ostate = FPinactive | (ostate & (FPillegal|FPpush|FPnouser|FPkernel|FPindexm)); + } + up->fpstate = ostate; +} diff --git a/sys/src/9/pc64/trap.c b/sys/src/9/pc64/trap.c index ced545deb..387467aa5 100644 --- a/sys/src/9/pc64/trap.c +++ b/sys/src/9/pc64/trap.c @@ -649,11 +649,12 @@ unexpected(Ureg* ureg, void*) } extern void checkpages(void); + static void faultamd64(Ureg* ureg, void*) { uintptr addr; - int read, user, n, insyscall; + int read, user, n, insyscall, f; char buf[ERRMAX]; addr = getcr2(); @@ -670,6 +671,14 @@ faultamd64(Ureg* ureg, void*) insyscall = up->insyscall; up->insyscall = 1; + f = fpusave(); + if(!user && waserror()){ + int s = splhi(); + fpurestore(f); + up->insyscall = insyscall; + splx(s); + nexterror(); + } n = fault(addr, read); if(n < 0){ if(!user){ @@ -681,6 +690,9 @@ faultamd64(Ureg* ureg, void*) read ? "read" : "write", addr); postnote(up, 1, buf, NDebug); } + if(!user) poperror(); + splhi(); + fpurestore(f); up->insyscall = insyscall; } @@ -698,7 +710,7 @@ syscall(Ureg* ureg) char *e; uintptr sp; long long ret; - int i, s; + int i, s, f; ulong scallnr; vlong startns, stopns; @@ -715,11 +727,12 @@ syscall(Ureg* ureg) sp = ureg->sp; scallnr = ureg->bp; /* RARG */ up->scallnr = scallnr; - + f = fpusave(); spllo(); + + ret = -1; startns = 0; up->nerrlab = 0; - ret = -1; if(!waserror()){ if(sp<(USTKTOP-BY2PG) || sp>(USTKTOP-sizeof(Sargs)-BY2WD)) validaddr(sp, sizeof(Sargs)+BY2WD, 0); @@ -778,12 +791,13 @@ syscall(Ureg* ureg) splx(s); } + splhi(); + fpurestore(f); up->insyscall = 0; up->psstate = 0; if(scallnr == NOTED){ noted(ureg, *((ulong*)up->s.args)); - /* * normally, syscall() returns to forkret() * not restoring general registers when going @@ -796,10 +810,10 @@ syscall(Ureg* ureg) } if(scallnr!=RFORK && (up->procctl || up->nnote)){ - splhi(); notify(ureg); ((void**)&ureg)[-1] = (void*)noteret; /* loads RARG */ } + /* if we delayed sched because we held a lock, sched now */ if(up->delaysched) sched(); @@ -813,7 +827,7 @@ syscall(Ureg* ureg) int notify(Ureg* ureg) { - int l, s; + int l; uintptr sp; Note *n; @@ -821,14 +835,7 @@ notify(Ureg* ureg) procctl(); if(up->nnote == 0) return 0; - - if(up->fpstate == FPactive){ - fpsave(up->fpsave); - up->fpstate = FPinactive; - } - up->fpstate |= FPillegal; - - s = spllo(); + spllo(); qlock(&up->debug); up->notepending = 0; n = &up->note[0]; @@ -887,11 +894,14 @@ if(0) print("%s %lud: notify %#p %#p %#p %s\n", up->nnote--; memmove(&up->lastnote, &up->note[0], sizeof(Note)); memmove(&up->note[0], &up->note[1], up->nnote*sizeof(Note)); - qunlock(&up->debug); - splx(s); + splhi(); + if(up->fpstate == FPactive){ + fpsave(up->fpsave); + up->fpstate = FPinactive; + } + up->fpstate |= FPillegal; return 1; - } /* @@ -903,6 +913,8 @@ noted(Ureg* ureg, ulong arg0) Ureg *nureg; uintptr oureg, sp; + up->fpstate &= ~FPillegal; + spllo(); qlock(&up->debug); if(arg0!=NRSTR && !up->notified) { qunlock(&up->debug); @@ -913,8 +925,6 @@ noted(Ureg* ureg, ulong arg0) nureg = up->ureg; /* pointer to user returned Ureg struct */ - up->fpstate &= ~FPillegal; - /* sanity clause */ oureg = (uintptr)nureg; if(!okaddr(oureg-BY2WD, BY2WD+sizeof(Ureg), 0)){