sse kernel support (sources)
This commit is contained in:
parent
213bf50893
commit
81b7451972
7 changed files with 141 additions and 16 deletions
|
@ -2,7 +2,9 @@ typedef struct BIOS32si BIOS32si;
|
|||
typedef struct BIOS32ci BIOS32ci;
|
||||
typedef struct Conf Conf;
|
||||
typedef struct Confmem Confmem;
|
||||
typedef struct FPsave FPsave;
|
||||
typedef union FPsave FPsave;
|
||||
typedef struct FPssestate FPssestate;
|
||||
typedef struct FPstate FPstate;
|
||||
typedef struct ISAConf ISAConf;
|
||||
typedef struct Label Label;
|
||||
typedef struct Lock Lock;
|
||||
|
@ -64,7 +66,7 @@ enum
|
|||
FPillegal= 0x100,
|
||||
};
|
||||
|
||||
struct FPsave
|
||||
struct FPstate
|
||||
{
|
||||
ushort control;
|
||||
ushort r1;
|
||||
|
@ -81,6 +83,33 @@ struct FPsave
|
|||
uchar regs[80]; /* floating point registers */
|
||||
};
|
||||
|
||||
struct FPssestate /* SSE fp state */
|
||||
{
|
||||
ushort fcw; /* control */
|
||||
ushort fsw; /* status */
|
||||
ushort ftw; /* tag */
|
||||
ushort fop; /* opcode */
|
||||
ulong fpuip; /* pc */
|
||||
ushort cs; /* pc segment */
|
||||
ushort r1; /* reserved */
|
||||
ulong fpudp; /* data pointer */
|
||||
ushort ds; /* data pointer segment */
|
||||
ushort r2;
|
||||
ulong mxcsr; /* MXCSR register state */
|
||||
ulong mxcsr_mask; /* MXCSR mask register */
|
||||
uchar xregs[480]; /* extended registers */
|
||||
uchar alignpad[FPalign];
|
||||
};
|
||||
|
||||
/*
|
||||
* the FP regs must be stored here, not somewhere pointed to from here.
|
||||
* port code assumes this.
|
||||
*/
|
||||
union FPsave {
|
||||
FPstate;
|
||||
FPssestate;
|
||||
};
|
||||
|
||||
struct Confmem
|
||||
{
|
||||
ulong base;
|
||||
|
@ -227,6 +256,7 @@ struct Mach
|
|||
uvlong tscticks;
|
||||
int pdballoc;
|
||||
int pdbfree;
|
||||
FPsave *fpsavalign;
|
||||
|
||||
vlong mtrrcap;
|
||||
vlong mtrrdef;
|
||||
|
@ -297,6 +327,7 @@ enum {
|
|||
Clflush = 1<<19,
|
||||
Acpif = 1<<22, /* therm control msr */
|
||||
Mmx = 1<<23,
|
||||
Fxsr = 1<<24, /* have SSE FXSAVE/FXRSTOR */
|
||||
Sse = 1<<25, /* thus sfence instr. */
|
||||
Sse2 = 1<<26, /* thus mfence & lfence instr.s */
|
||||
Rdrnd = 1<<30, /* RDRAND support bit */
|
||||
|
|
|
@ -38,6 +38,11 @@ enum {
|
|||
|
||||
Qmax = 16,
|
||||
};
|
||||
|
||||
enum {
|
||||
CR4Osfxsr = 1 << 9,
|
||||
};
|
||||
|
||||
enum { /* cpuid standard function codes */
|
||||
Highstdfunc = 0, /* also returns vendor string */
|
||||
Procsig,
|
||||
|
@ -850,6 +855,15 @@ cpuidentify(void)
|
|||
rdmsr(0x01, &mct);
|
||||
}
|
||||
|
||||
if(m->cpuiddx & Fxsr){ /* have sse fp? */
|
||||
fpsave = fpssesave;
|
||||
fprestore = fpsserestore;
|
||||
putcr4(getcr4() | CR4Osfxsr);
|
||||
} else {
|
||||
fpsave = fpx87save;
|
||||
fprestore = fpx87restore;
|
||||
}
|
||||
|
||||
cputype = t;
|
||||
return t->family;
|
||||
}
|
||||
|
|
|
@ -33,9 +33,15 @@ void fpclear(void);
|
|||
void fpenv(FPsave*);
|
||||
void fpinit(void);
|
||||
void fpoff(void);
|
||||
void fprestore(FPsave*);
|
||||
void fpsave(FPsave*);
|
||||
void (*fprestore)(FPsave*);
|
||||
void (*fpsave)(FPsave*);
|
||||
void fpsserestore(FPsave*);
|
||||
void fpsserestore0(FPsave*);
|
||||
void fpssesave(FPsave*);
|
||||
void fpssesave0(FPsave*);
|
||||
ulong fpstatus(void);
|
||||
void fpx87restore(FPsave*);
|
||||
void fpx87save(FPsave*);
|
||||
ulong getcr0(void);
|
||||
ulong getcr2(void);
|
||||
ulong getcr3(void);
|
||||
|
|
|
@ -657,13 +657,13 @@ TEXT fpinit(SB), $0 /* enable and init */
|
|||
WAIT
|
||||
RET
|
||||
|
||||
TEXT fpsave(SB), $0 /* save state and disable */
|
||||
TEXT fpx87save(SB), $0 /* save state and disable */
|
||||
MOVL p+0(FP), AX
|
||||
FSAVE 0(AX) /* no WAIT */
|
||||
FPOFF(l2)
|
||||
RET
|
||||
|
||||
TEXT fprestore(SB), $0 /* enable and restore state */
|
||||
TEXT fpx87restore(SB), $0 /* enable and restore state */
|
||||
FPON
|
||||
MOVL p+0(FP), AX
|
||||
FRSTOR 0(AX)
|
||||
|
@ -685,6 +685,19 @@ TEXT fpclear(SB), $0 /* clear pending exceptions */
|
|||
FPOFF(l3)
|
||||
RET
|
||||
|
||||
TEXT fpssesave0(SB), $0 /* save state and disable */
|
||||
MOVL p+0(FP), AX
|
||||
FXSAVE 0(AX) /* no WAIT */
|
||||
FPOFF(l4)
|
||||
RET
|
||||
|
||||
TEXT fpsserestore0(SB), $0 /* enable and restore state */
|
||||
FPON
|
||||
MOVL p+0(FP), AX
|
||||
FXRSTOR 0(AX)
|
||||
WAIT
|
||||
RET
|
||||
|
||||
/*
|
||||
*/
|
||||
TEXT splhi(SB), $0
|
||||
|
|
|
@ -479,14 +479,37 @@ static char* mathmsg[] =
|
|||
"precision loss",
|
||||
};
|
||||
|
||||
static void
|
||||
mathstate(ulong *stsp, ulong *pcp, ulong *ctlp)
|
||||
{
|
||||
ulong sts, fpc, ctl;
|
||||
FPsave *f = &up->fpsave;
|
||||
|
||||
if(fpsave == fpx87save){
|
||||
sts = f->status;
|
||||
fpc = f->pc;
|
||||
ctl = f->control;
|
||||
} else {
|
||||
sts = f->fsw;
|
||||
fpc = f->fpuip;
|
||||
ctl = f->fcw;
|
||||
}
|
||||
if(stsp)
|
||||
*stsp = sts;
|
||||
if(pcp)
|
||||
*pcp = fpc;
|
||||
if(ctlp)
|
||||
*ctlp = ctl;
|
||||
}
|
||||
|
||||
static void
|
||||
mathnote(void)
|
||||
{
|
||||
int i;
|
||||
ulong status;
|
||||
ulong status, pc;
|
||||
char *msg, note[ERRMAX];
|
||||
|
||||
status = up->fpsave.status;
|
||||
mathstate(&status, &pc, nil);
|
||||
|
||||
/*
|
||||
* Some attention should probably be paid here to the
|
||||
|
@ -513,12 +536,50 @@ mathnote(void)
|
|||
postnote(up, 1, note, NDebug);
|
||||
}
|
||||
|
||||
/*
|
||||
* sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
|
||||
* so we shuffle the data up and down as needed or make copies.
|
||||
*/
|
||||
|
||||
void
|
||||
fpssesave(FPsave *fps)
|
||||
{
|
||||
FPsave *afps;
|
||||
|
||||
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
|
||||
fpssesave0(afps);
|
||||
if (fps != afps) /* not aligned? shuffle down from aligned buffer */
|
||||
memmove(fps, afps, sizeof(FPssestate) - FPalign);
|
||||
}
|
||||
|
||||
void
|
||||
fpsserestore(FPsave *fps)
|
||||
{
|
||||
FPsave *afps;
|
||||
|
||||
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
|
||||
if (fps != afps) {
|
||||
if (m->fpsavalign == nil)
|
||||
m->fpsavalign = mallocalign(sizeof(FPssestate),
|
||||
FPalign, 0, 0);
|
||||
if (m->fpsavalign)
|
||||
afps = m->fpsavalign;
|
||||
/* copy or shuffle up to make aligned */
|
||||
memmove(afps, fps, sizeof(FPssestate) - FPalign);
|
||||
}
|
||||
fpsserestore0(afps);
|
||||
/* if we couldn't make a copy, shuffle regs back down */
|
||||
if (fps != afps && afps != m->fpsavalign)
|
||||
memmove(fps, afps, sizeof(FPssestate) - FPalign);
|
||||
}
|
||||
|
||||
/*
|
||||
* math coprocessor error
|
||||
*/
|
||||
static void
|
||||
matherror(Ureg *ur, void*)
|
||||
{
|
||||
ulong status, pc;
|
||||
/*
|
||||
* a write cycle to port 0xF0 clears the interrupt latch attached
|
||||
* to the error# line from the 387
|
||||
|
@ -532,9 +593,11 @@ matherror(Ureg *ur, void*)
|
|||
fpenv(&up->fpsave);
|
||||
mathnote();
|
||||
|
||||
if((ur->pc & 0xf0000000) == KZERO)
|
||||
if((ur->pc & 0xf0000000) == KZERO){
|
||||
mathstate(&status, &pc, nil);
|
||||
panic("fp: status %ux fppc=0x%lux pc=0x%lux",
|
||||
up->fpsave.status, up->fpsave.pc, ur->pc);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -543,6 +606,8 @@ matherror(Ureg *ur, void*)
|
|||
static void
|
||||
mathemu(Ureg *ureg, void*)
|
||||
{
|
||||
ulong status, control;
|
||||
|
||||
if(up->fpstate & FPillegal){
|
||||
/* someone did floating point in a note handler */
|
||||
postnote(up, 1, "sys: floating point in note handler", NDebug);
|
||||
|
@ -561,7 +626,8 @@ mathemu(Ureg *ureg, void*)
|
|||
* More attention should probably be paid here to the
|
||||
* exception masks and error summary.
|
||||
*/
|
||||
if((up->fpsave.status & ~up->fpsave.control) & 0x07F){
|
||||
mathstate(&status, nil, &control);
|
||||
if((status & ~control) & 0x07F){
|
||||
mathnote();
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1))
|
||||
#define PGROUND(s) ROUND(s, BY2PG)
|
||||
#define BLOCKALIGN 8
|
||||
#define FPalign 16
|
||||
|
||||
/*
|
||||
* In 32-bit mode, the MAXMACH limit is 32 without
|
||||
|
|
|
@ -434,12 +434,6 @@ uchar ymskb[] =
|
|||
Ymr, Yrl, Zm_r_xm, 1,
|
||||
0
|
||||
};
|
||||
uchar yxaes[] =
|
||||
{
|
||||
Yxm, Yxr, Zm_r_xm, 2,
|
||||
Yxm, Yxr, Zm_r_i_xm, 2,
|
||||
0
|
||||
};
|
||||
|
||||
Optab optab[] =
|
||||
/* as, ytab, andproto, opcode */
|
||||
|
|
Loading…
Reference in a new issue