sse kernel support (sources)
This commit is contained in:
parent
213bf50893
commit
81b7451972
7 changed files with 141 additions and 16 deletions
|
@ -2,7 +2,9 @@ typedef struct BIOS32si BIOS32si;
|
||||||
typedef struct BIOS32ci BIOS32ci;
|
typedef struct BIOS32ci BIOS32ci;
|
||||||
typedef struct Conf Conf;
|
typedef struct Conf Conf;
|
||||||
typedef struct Confmem Confmem;
|
typedef struct Confmem Confmem;
|
||||||
typedef struct FPsave FPsave;
|
typedef union FPsave FPsave;
|
||||||
|
typedef struct FPssestate FPssestate;
|
||||||
|
typedef struct FPstate FPstate;
|
||||||
typedef struct ISAConf ISAConf;
|
typedef struct ISAConf ISAConf;
|
||||||
typedef struct Label Label;
|
typedef struct Label Label;
|
||||||
typedef struct Lock Lock;
|
typedef struct Lock Lock;
|
||||||
|
@ -64,7 +66,7 @@ enum
|
||||||
FPillegal= 0x100,
|
FPillegal= 0x100,
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FPsave
|
struct FPstate
|
||||||
{
|
{
|
||||||
ushort control;
|
ushort control;
|
||||||
ushort r1;
|
ushort r1;
|
||||||
|
@ -81,6 +83,33 @@ struct FPsave
|
||||||
uchar regs[80]; /* floating point registers */
|
uchar regs[80]; /* floating point registers */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct FPssestate /* SSE fp state */
|
||||||
|
{
|
||||||
|
ushort fcw; /* control */
|
||||||
|
ushort fsw; /* status */
|
||||||
|
ushort ftw; /* tag */
|
||||||
|
ushort fop; /* opcode */
|
||||||
|
ulong fpuip; /* pc */
|
||||||
|
ushort cs; /* pc segment */
|
||||||
|
ushort r1; /* reserved */
|
||||||
|
ulong fpudp; /* data pointer */
|
||||||
|
ushort ds; /* data pointer segment */
|
||||||
|
ushort r2;
|
||||||
|
ulong mxcsr; /* MXCSR register state */
|
||||||
|
ulong mxcsr_mask; /* MXCSR mask register */
|
||||||
|
uchar xregs[480]; /* extended registers */
|
||||||
|
uchar alignpad[FPalign];
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* the FP regs must be stored here, not somewhere pointed to from here.
|
||||||
|
* port code assumes this.
|
||||||
|
*/
|
||||||
|
union FPsave {
|
||||||
|
FPstate;
|
||||||
|
FPssestate;
|
||||||
|
};
|
||||||
|
|
||||||
struct Confmem
|
struct Confmem
|
||||||
{
|
{
|
||||||
ulong base;
|
ulong base;
|
||||||
|
@ -227,6 +256,7 @@ struct Mach
|
||||||
uvlong tscticks;
|
uvlong tscticks;
|
||||||
int pdballoc;
|
int pdballoc;
|
||||||
int pdbfree;
|
int pdbfree;
|
||||||
|
FPsave *fpsavalign;
|
||||||
|
|
||||||
vlong mtrrcap;
|
vlong mtrrcap;
|
||||||
vlong mtrrdef;
|
vlong mtrrdef;
|
||||||
|
@ -297,6 +327,7 @@ enum {
|
||||||
Clflush = 1<<19,
|
Clflush = 1<<19,
|
||||||
Acpif = 1<<22, /* therm control msr */
|
Acpif = 1<<22, /* therm control msr */
|
||||||
Mmx = 1<<23,
|
Mmx = 1<<23,
|
||||||
|
Fxsr = 1<<24, /* have SSE FXSAVE/FXRSTOR */
|
||||||
Sse = 1<<25, /* thus sfence instr. */
|
Sse = 1<<25, /* thus sfence instr. */
|
||||||
Sse2 = 1<<26, /* thus mfence & lfence instr.s */
|
Sse2 = 1<<26, /* thus mfence & lfence instr.s */
|
||||||
Rdrnd = 1<<30, /* RDRAND support bit */
|
Rdrnd = 1<<30, /* RDRAND support bit */
|
||||||
|
|
|
@ -38,6 +38,11 @@ enum {
|
||||||
|
|
||||||
Qmax = 16,
|
Qmax = 16,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum {
|
||||||
|
CR4Osfxsr = 1 << 9,
|
||||||
|
};
|
||||||
|
|
||||||
enum { /* cpuid standard function codes */
|
enum { /* cpuid standard function codes */
|
||||||
Highstdfunc = 0, /* also returns vendor string */
|
Highstdfunc = 0, /* also returns vendor string */
|
||||||
Procsig,
|
Procsig,
|
||||||
|
@ -850,6 +855,15 @@ cpuidentify(void)
|
||||||
rdmsr(0x01, &mct);
|
rdmsr(0x01, &mct);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if(m->cpuiddx & Fxsr){ /* have sse fp? */
|
||||||
|
fpsave = fpssesave;
|
||||||
|
fprestore = fpsserestore;
|
||||||
|
putcr4(getcr4() | CR4Osfxsr);
|
||||||
|
} else {
|
||||||
|
fpsave = fpx87save;
|
||||||
|
fprestore = fpx87restore;
|
||||||
|
}
|
||||||
|
|
||||||
cputype = t;
|
cputype = t;
|
||||||
return t->family;
|
return t->family;
|
||||||
}
|
}
|
||||||
|
|
|
@ -33,9 +33,15 @@ void fpclear(void);
|
||||||
void fpenv(FPsave*);
|
void fpenv(FPsave*);
|
||||||
void fpinit(void);
|
void fpinit(void);
|
||||||
void fpoff(void);
|
void fpoff(void);
|
||||||
void fprestore(FPsave*);
|
void (*fprestore)(FPsave*);
|
||||||
void fpsave(FPsave*);
|
void (*fpsave)(FPsave*);
|
||||||
|
void fpsserestore(FPsave*);
|
||||||
|
void fpsserestore0(FPsave*);
|
||||||
|
void fpssesave(FPsave*);
|
||||||
|
void fpssesave0(FPsave*);
|
||||||
ulong fpstatus(void);
|
ulong fpstatus(void);
|
||||||
|
void fpx87restore(FPsave*);
|
||||||
|
void fpx87save(FPsave*);
|
||||||
ulong getcr0(void);
|
ulong getcr0(void);
|
||||||
ulong getcr2(void);
|
ulong getcr2(void);
|
||||||
ulong getcr3(void);
|
ulong getcr3(void);
|
||||||
|
|
|
@ -657,13 +657,13 @@ TEXT fpinit(SB), $0 /* enable and init */
|
||||||
WAIT
|
WAIT
|
||||||
RET
|
RET
|
||||||
|
|
||||||
TEXT fpsave(SB), $0 /* save state and disable */
|
TEXT fpx87save(SB), $0 /* save state and disable */
|
||||||
MOVL p+0(FP), AX
|
MOVL p+0(FP), AX
|
||||||
FSAVE 0(AX) /* no WAIT */
|
FSAVE 0(AX) /* no WAIT */
|
||||||
FPOFF(l2)
|
FPOFF(l2)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
TEXT fprestore(SB), $0 /* enable and restore state */
|
TEXT fpx87restore(SB), $0 /* enable and restore state */
|
||||||
FPON
|
FPON
|
||||||
MOVL p+0(FP), AX
|
MOVL p+0(FP), AX
|
||||||
FRSTOR 0(AX)
|
FRSTOR 0(AX)
|
||||||
|
@ -685,6 +685,19 @@ TEXT fpclear(SB), $0 /* clear pending exceptions */
|
||||||
FPOFF(l3)
|
FPOFF(l3)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
TEXT fpssesave0(SB), $0 /* save state and disable */
|
||||||
|
MOVL p+0(FP), AX
|
||||||
|
FXSAVE 0(AX) /* no WAIT */
|
||||||
|
FPOFF(l4)
|
||||||
|
RET
|
||||||
|
|
||||||
|
TEXT fpsserestore0(SB), $0 /* enable and restore state */
|
||||||
|
FPON
|
||||||
|
MOVL p+0(FP), AX
|
||||||
|
FXRSTOR 0(AX)
|
||||||
|
WAIT
|
||||||
|
RET
|
||||||
|
|
||||||
/*
|
/*
|
||||||
*/
|
*/
|
||||||
TEXT splhi(SB), $0
|
TEXT splhi(SB), $0
|
||||||
|
|
|
@ -479,14 +479,37 @@ static char* mathmsg[] =
|
||||||
"precision loss",
|
"precision loss",
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void
|
||||||
|
mathstate(ulong *stsp, ulong *pcp, ulong *ctlp)
|
||||||
|
{
|
||||||
|
ulong sts, fpc, ctl;
|
||||||
|
FPsave *f = &up->fpsave;
|
||||||
|
|
||||||
|
if(fpsave == fpx87save){
|
||||||
|
sts = f->status;
|
||||||
|
fpc = f->pc;
|
||||||
|
ctl = f->control;
|
||||||
|
} else {
|
||||||
|
sts = f->fsw;
|
||||||
|
fpc = f->fpuip;
|
||||||
|
ctl = f->fcw;
|
||||||
|
}
|
||||||
|
if(stsp)
|
||||||
|
*stsp = sts;
|
||||||
|
if(pcp)
|
||||||
|
*pcp = fpc;
|
||||||
|
if(ctlp)
|
||||||
|
*ctlp = ctl;
|
||||||
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
mathnote(void)
|
mathnote(void)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
ulong status;
|
ulong status, pc;
|
||||||
char *msg, note[ERRMAX];
|
char *msg, note[ERRMAX];
|
||||||
|
|
||||||
status = up->fpsave.status;
|
mathstate(&status, &pc, nil);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Some attention should probably be paid here to the
|
* Some attention should probably be paid here to the
|
||||||
|
@ -513,12 +536,50 @@ mathnote(void)
|
||||||
postnote(up, 1, note, NDebug);
|
postnote(up, 1, note, NDebug);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
|
||||||
|
* so we shuffle the data up and down as needed or make copies.
|
||||||
|
*/
|
||||||
|
|
||||||
|
void
|
||||||
|
fpssesave(FPsave *fps)
|
||||||
|
{
|
||||||
|
FPsave *afps;
|
||||||
|
|
||||||
|
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
|
||||||
|
fpssesave0(afps);
|
||||||
|
if (fps != afps) /* not aligned? shuffle down from aligned buffer */
|
||||||
|
memmove(fps, afps, sizeof(FPssestate) - FPalign);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
fpsserestore(FPsave *fps)
|
||||||
|
{
|
||||||
|
FPsave *afps;
|
||||||
|
|
||||||
|
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
|
||||||
|
if (fps != afps) {
|
||||||
|
if (m->fpsavalign == nil)
|
||||||
|
m->fpsavalign = mallocalign(sizeof(FPssestate),
|
||||||
|
FPalign, 0, 0);
|
||||||
|
if (m->fpsavalign)
|
||||||
|
afps = m->fpsavalign;
|
||||||
|
/* copy or shuffle up to make aligned */
|
||||||
|
memmove(afps, fps, sizeof(FPssestate) - FPalign);
|
||||||
|
}
|
||||||
|
fpsserestore0(afps);
|
||||||
|
/* if we couldn't make a copy, shuffle regs back down */
|
||||||
|
if (fps != afps && afps != m->fpsavalign)
|
||||||
|
memmove(fps, afps, sizeof(FPssestate) - FPalign);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* math coprocessor error
|
* math coprocessor error
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
matherror(Ureg *ur, void*)
|
matherror(Ureg *ur, void*)
|
||||||
{
|
{
|
||||||
|
ulong status, pc;
|
||||||
/*
|
/*
|
||||||
* a write cycle to port 0xF0 clears the interrupt latch attached
|
* a write cycle to port 0xF0 clears the interrupt latch attached
|
||||||
* to the error# line from the 387
|
* to the error# line from the 387
|
||||||
|
@ -532,9 +593,11 @@ matherror(Ureg *ur, void*)
|
||||||
fpenv(&up->fpsave);
|
fpenv(&up->fpsave);
|
||||||
mathnote();
|
mathnote();
|
||||||
|
|
||||||
if((ur->pc & 0xf0000000) == KZERO)
|
if((ur->pc & 0xf0000000) == KZERO){
|
||||||
|
mathstate(&status, &pc, nil);
|
||||||
panic("fp: status %ux fppc=0x%lux pc=0x%lux",
|
panic("fp: status %ux fppc=0x%lux pc=0x%lux",
|
||||||
up->fpsave.status, up->fpsave.pc, ur->pc);
|
up->fpsave.status, up->fpsave.pc, ur->pc);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -543,6 +606,8 @@ matherror(Ureg *ur, void*)
|
||||||
static void
|
static void
|
||||||
mathemu(Ureg *ureg, void*)
|
mathemu(Ureg *ureg, void*)
|
||||||
{
|
{
|
||||||
|
ulong status, control;
|
||||||
|
|
||||||
if(up->fpstate & FPillegal){
|
if(up->fpstate & FPillegal){
|
||||||
/* someone did floating point in a note handler */
|
/* someone did floating point in a note handler */
|
||||||
postnote(up, 1, "sys: floating point in note handler", NDebug);
|
postnote(up, 1, "sys: floating point in note handler", NDebug);
|
||||||
|
@ -561,7 +626,8 @@ mathemu(Ureg *ureg, void*)
|
||||||
* More attention should probably be paid here to the
|
* More attention should probably be paid here to the
|
||||||
* exception masks and error summary.
|
* exception masks and error summary.
|
||||||
*/
|
*/
|
||||||
if((up->fpsave.status & ~up->fpsave.control) & 0x07F){
|
mathstate(&status, nil, &control);
|
||||||
|
if((status & ~control) & 0x07F){
|
||||||
mathnote();
|
mathnote();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,7 @@
|
||||||
#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1))
|
#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1))
|
||||||
#define PGROUND(s) ROUND(s, BY2PG)
|
#define PGROUND(s) ROUND(s, BY2PG)
|
||||||
#define BLOCKALIGN 8
|
#define BLOCKALIGN 8
|
||||||
|
#define FPalign 16
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* In 32-bit mode, the MAXMACH limit is 32 without
|
* In 32-bit mode, the MAXMACH limit is 32 without
|
||||||
|
|
|
@ -434,12 +434,6 @@ uchar ymskb[] =
|
||||||
Ymr, Yrl, Zm_r_xm, 1,
|
Ymr, Yrl, Zm_r_xm, 1,
|
||||||
0
|
0
|
||||||
};
|
};
|
||||||
uchar yxaes[] =
|
|
||||||
{
|
|
||||||
Yxm, Yxr, Zm_r_xm, 2,
|
|
||||||
Yxm, Yxr, Zm_r_i_xm, 2,
|
|
||||||
0
|
|
||||||
};
|
|
||||||
|
|
||||||
Optab optab[] =
|
Optab optab[] =
|
||||||
/* as, ytab, andproto, opcode */
|
/* as, ytab, andproto, opcode */
|
||||||
|
|
Loading…
Reference in a new issue