sse kernel support (sources)

This commit is contained in:
jpathy 2013-05-22 23:47:05 +05:30
parent 213bf50893
commit 81b7451972
7 changed files with 141 additions and 16 deletions

View file

@ -2,7 +2,9 @@ typedef struct BIOS32si BIOS32si;
typedef struct BIOS32ci BIOS32ci; typedef struct BIOS32ci BIOS32ci;
typedef struct Conf Conf; typedef struct Conf Conf;
typedef struct Confmem Confmem; typedef struct Confmem Confmem;
typedef struct FPsave FPsave; typedef union FPsave FPsave;
typedef struct FPssestate FPssestate;
typedef struct FPstate FPstate;
typedef struct ISAConf ISAConf; typedef struct ISAConf ISAConf;
typedef struct Label Label; typedef struct Label Label;
typedef struct Lock Lock; typedef struct Lock Lock;
@ -64,7 +66,7 @@ enum
FPillegal= 0x100, FPillegal= 0x100,
}; };
struct FPsave struct FPstate
{ {
ushort control; ushort control;
ushort r1; ushort r1;
@ -81,6 +83,33 @@ struct FPsave
uchar regs[80]; /* floating point registers */ uchar regs[80]; /* floating point registers */
}; };
struct FPssestate /* SSE fp state */
{
ushort fcw; /* control */
ushort fsw; /* status */
ushort ftw; /* tag */
ushort fop; /* opcode */
ulong fpuip; /* pc */
ushort cs; /* pc segment */
ushort r1; /* reserved */
ulong fpudp; /* data pointer */
ushort ds; /* data pointer segment */
ushort r2;
ulong mxcsr; /* MXCSR register state */
ulong mxcsr_mask; /* MXCSR mask register */
uchar xregs[480]; /* extended registers */
uchar alignpad[FPalign];
};
/*
* the FP regs must be stored here, not somewhere pointed to from here.
* port code assumes this.
*/
union FPsave {
FPstate;
FPssestate;
};
struct Confmem struct Confmem
{ {
ulong base; ulong base;
@ -227,6 +256,7 @@ struct Mach
uvlong tscticks; uvlong tscticks;
int pdballoc; int pdballoc;
int pdbfree; int pdbfree;
FPsave *fpsavalign;
vlong mtrrcap; vlong mtrrcap;
vlong mtrrdef; vlong mtrrdef;
@ -297,6 +327,7 @@ enum {
Clflush = 1<<19, Clflush = 1<<19,
Acpif = 1<<22, /* therm control msr */ Acpif = 1<<22, /* therm control msr */
Mmx = 1<<23, Mmx = 1<<23,
Fxsr = 1<<24, /* have SSE FXSAVE/FXRSTOR */
Sse = 1<<25, /* thus sfence instr. */ Sse = 1<<25, /* thus sfence instr. */
Sse2 = 1<<26, /* thus mfence & lfence instr.s */ Sse2 = 1<<26, /* thus mfence & lfence instr.s */
Rdrnd = 1<<30, /* RDRAND support bit */ Rdrnd = 1<<30, /* RDRAND support bit */

View file

@ -38,6 +38,11 @@ enum {
Qmax = 16, Qmax = 16,
}; };
enum {
CR4Osfxsr = 1 << 9,
};
enum { /* cpuid standard function codes */ enum { /* cpuid standard function codes */
Highstdfunc = 0, /* also returns vendor string */ Highstdfunc = 0, /* also returns vendor string */
Procsig, Procsig,
@ -850,6 +855,15 @@ cpuidentify(void)
rdmsr(0x01, &mct); rdmsr(0x01, &mct);
} }
if(m->cpuiddx & Fxsr){ /* have sse fp? */
fpsave = fpssesave;
fprestore = fpsserestore;
putcr4(getcr4() | CR4Osfxsr);
} else {
fpsave = fpx87save;
fprestore = fpx87restore;
}
cputype = t; cputype = t;
return t->family; return t->family;
} }

View file

@ -33,9 +33,15 @@ void fpclear(void);
void fpenv(FPsave*); void fpenv(FPsave*);
void fpinit(void); void fpinit(void);
void fpoff(void); void fpoff(void);
void fprestore(FPsave*); void (*fprestore)(FPsave*);
void fpsave(FPsave*); void (*fpsave)(FPsave*);
void fpsserestore(FPsave*);
void fpsserestore0(FPsave*);
void fpssesave(FPsave*);
void fpssesave0(FPsave*);
ulong fpstatus(void); ulong fpstatus(void);
void fpx87restore(FPsave*);
void fpx87save(FPsave*);
ulong getcr0(void); ulong getcr0(void);
ulong getcr2(void); ulong getcr2(void);
ulong getcr3(void); ulong getcr3(void);

View file

@ -657,13 +657,13 @@ TEXT fpinit(SB), $0 /* enable and init */
WAIT WAIT
RET RET
TEXT fpsave(SB), $0 /* save state and disable */ TEXT fpx87save(SB), $0 /* save state and disable */
MOVL p+0(FP), AX MOVL p+0(FP), AX
FSAVE 0(AX) /* no WAIT */ FSAVE 0(AX) /* no WAIT */
FPOFF(l2) FPOFF(l2)
RET RET
TEXT fprestore(SB), $0 /* enable and restore state */ TEXT fpx87restore(SB), $0 /* enable and restore state */
FPON FPON
MOVL p+0(FP), AX MOVL p+0(FP), AX
FRSTOR 0(AX) FRSTOR 0(AX)
@ -685,6 +685,19 @@ TEXT fpclear(SB), $0 /* clear pending exceptions */
FPOFF(l3) FPOFF(l3)
RET RET
TEXT fpssesave0(SB), $0 /* save state and disable */
MOVL p+0(FP), AX
FXSAVE 0(AX) /* no WAIT */
FPOFF(l4)
RET
TEXT fpsserestore0(SB), $0 /* enable and restore state */
FPON
MOVL p+0(FP), AX
FXRSTOR 0(AX)
WAIT
RET
/* /*
*/ */
TEXT splhi(SB), $0 TEXT splhi(SB), $0

View file

@ -479,14 +479,37 @@ static char* mathmsg[] =
"precision loss", "precision loss",
}; };
static void
mathstate(ulong *stsp, ulong *pcp, ulong *ctlp)
{
ulong sts, fpc, ctl;
FPsave *f = &up->fpsave;
if(fpsave == fpx87save){
sts = f->status;
fpc = f->pc;
ctl = f->control;
} else {
sts = f->fsw;
fpc = f->fpuip;
ctl = f->fcw;
}
if(stsp)
*stsp = sts;
if(pcp)
*pcp = fpc;
if(ctlp)
*ctlp = ctl;
}
static void static void
mathnote(void) mathnote(void)
{ {
int i; int i;
ulong status; ulong status, pc;
char *msg, note[ERRMAX]; char *msg, note[ERRMAX];
status = up->fpsave.status; mathstate(&status, &pc, nil);
/* /*
* Some attention should probably be paid here to the * Some attention should probably be paid here to the
@ -513,12 +536,50 @@ mathnote(void)
postnote(up, 1, note, NDebug); postnote(up, 1, note, NDebug);
} }
/*
* sse fp save and restore buffers have to be 16-byte (FPalign) aligned,
* so we shuffle the data up and down as needed or make copies.
*/
void
fpssesave(FPsave *fps)
{
FPsave *afps;
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
fpssesave0(afps);
if (fps != afps) /* not aligned? shuffle down from aligned buffer */
memmove(fps, afps, sizeof(FPssestate) - FPalign);
}
void
fpsserestore(FPsave *fps)
{
FPsave *afps;
afps = (FPsave *)ROUND(((uintptr)fps), FPalign);
if (fps != afps) {
if (m->fpsavalign == nil)
m->fpsavalign = mallocalign(sizeof(FPssestate),
FPalign, 0, 0);
if (m->fpsavalign)
afps = m->fpsavalign;
/* copy or shuffle up to make aligned */
memmove(afps, fps, sizeof(FPssestate) - FPalign);
}
fpsserestore0(afps);
/* if we couldn't make a copy, shuffle regs back down */
if (fps != afps && afps != m->fpsavalign)
memmove(fps, afps, sizeof(FPssestate) - FPalign);
}
/* /*
* math coprocessor error * math coprocessor error
*/ */
static void static void
matherror(Ureg *ur, void*) matherror(Ureg *ur, void*)
{ {
ulong status, pc;
/* /*
* a write cycle to port 0xF0 clears the interrupt latch attached * a write cycle to port 0xF0 clears the interrupt latch attached
* to the error# line from the 387 * to the error# line from the 387
@ -532,9 +593,11 @@ matherror(Ureg *ur, void*)
fpenv(&up->fpsave); fpenv(&up->fpsave);
mathnote(); mathnote();
if((ur->pc & 0xf0000000) == KZERO) if((ur->pc & 0xf0000000) == KZERO){
mathstate(&status, &pc, nil);
panic("fp: status %ux fppc=0x%lux pc=0x%lux", panic("fp: status %ux fppc=0x%lux pc=0x%lux",
up->fpsave.status, up->fpsave.pc, ur->pc); up->fpsave.status, up->fpsave.pc, ur->pc);
}
} }
/* /*
@ -543,6 +606,8 @@ matherror(Ureg *ur, void*)
static void static void
mathemu(Ureg *ureg, void*) mathemu(Ureg *ureg, void*)
{ {
ulong status, control;
if(up->fpstate & FPillegal){ if(up->fpstate & FPillegal){
/* someone did floating point in a note handler */ /* someone did floating point in a note handler */
postnote(up, 1, "sys: floating point in note handler", NDebug); postnote(up, 1, "sys: floating point in note handler", NDebug);
@ -561,7 +626,8 @@ mathemu(Ureg *ureg, void*)
* More attention should probably be paid here to the * More attention should probably be paid here to the
* exception masks and error summary. * exception masks and error summary.
*/ */
if((up->fpsave.status & ~up->fpsave.control) & 0x07F){ mathstate(&status, nil, &control);
if((status & ~control) & 0x07F){
mathnote(); mathnote();
break; break;
} }

View file

@ -19,6 +19,7 @@
#define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1)) #define ROUND(s, sz) (((s)+((sz)-1))&~((sz)-1))
#define PGROUND(s) ROUND(s, BY2PG) #define PGROUND(s) ROUND(s, BY2PG)
#define BLOCKALIGN 8 #define BLOCKALIGN 8
#define FPalign 16
/* /*
* In 32-bit mode, the MAXMACH limit is 32 without * In 32-bit mode, the MAXMACH limit is 32 without

View file

@ -434,12 +434,6 @@ uchar ymskb[] =
Ymr, Yrl, Zm_r_xm, 1, Ymr, Yrl, Zm_r_xm, 1,
0 0
}; };
uchar yxaes[] =
{
Yxm, Yxr, Zm_r_xm, 2,
Yxm, Yxr, Zm_r_i_xm, 2,
0
};
Optab optab[] = Optab optab[] =
/* as, ytab, andproto, opcode */ /* as, ytab, andproto, opcode */