replace x87 floating point with sse floating point for the pae kernel. there is no option to fall back to FSAVE vs FXSAVE. Reference: /n/atom/patch/applied2013/paesse Date: Mon Jun 3 17:46:11 CES 2013 Signed-off-by: quanstro@quanstro.net --- /sys/src/9/pcpae/pae Mon Jun 3 17:45:37 2013 +++ /sys/src/9/pcpae/pae Mon Jun 3 17:45:38 2013 @@ -55,6 +55,8 @@ archmp mp nomp mpacpi apic msi + fpsse + sdaoe sdide pci sdscsi sdiahci pci sdscsi led --- /sys/src/9/pcpae/sse.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9/pcpae/sse.c Mon Jun 3 17:45:38 2013 @@ -0,0 +1,174 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" + +extern void ssesave(FPsave*); +extern void sserestore(FPsave*); +extern void sseclear(void); +extern void ldmxcsr(uint); + +/* assume all m's have the same sse cap */ +enum{ + Fxrstor = 1<<24, + + /* cr0 */ + Moncoproc = 1<<1, + X87em = 1<<2, + TS = 1<<3, /* task switch */ + NE = 1<<4, /* x87 numeric error */ + + /* cr4 */ + Osfxsr = 1<<9, /* enable sse */ + Osxmexcept = 1<<10, /* enable #XM exception */ +}; + +enum { + SSEie = 1<<0, /* invalid instruction */ + SSEde = 1<<1, /* denormal */ + SSEze = 1<<2, /* div by zero */ + SSEoe = 1<<3, /* overflow */ + SSEue = 1<<4, /* underflow */ + SSEpe = 1<<5, /* precision loss */ + SSEdaz = 1<<6, /* denormals are zero */ + SSEim = 1<<7, /* invalid instruction mask */ + SSEdm = 1<<8, /* denormal mask */ + SSEzm = 1<<9, /* div by zero mask */ + SSEom = 1<<10, /* overflow mask */ + SSEum = 1<<11, /* underflow mask */ + SSEpm = 1<<12, /* precision loss mask */ + SSErc = 3<<13, /* rounding control */ + SSEfz = 1<<15, /* */ + + SSEmask = SSEum | SSEpm +}; + +typedef struct Fxsave Fxsave; +struct Fxsave { + u16int fcw; /* x87 control word */ + u16int fsw; /* x87 status word */ + u8int ftw; /* x87 tag word */ + u8int zero; /* 0 */ + u16int fop; /* last x87 opcode */ + u64int rip; /* last x87 instruction pointer */ + u64int rdp; /* last x87 data pointer */ + u32int mxcsr; /* MMX control and status */ + u32int mxcsrmask; /* supported MMX feature bits */ + uchar st[128]; /* shared 64-bit media and x87 regs */ + uchar xmm[256]; /* 128-bit media regs */ + uchar ign[96]; /* reserved, ignored */ +}; + +static void +sseenable(void) +{ +// putcr0(getcr0() & ~(X87em | Moncoproc)); + fpinit(); + ldmxcsr(SSEmask); +} + +static void +sseoff(void) +{ +// putcr0(getcr0() & ~(X87em | Moncoproc) | NE | TS); + fpoff(); +} + +static void +ssenote(void) +{ +iprint("%d: %lud: see note\n", m->machno, up->pid); + postnote(up, 1, "sse error", NDebug); +} + +static void +sseover(Ureg*, void*) +{ + pexit("sse error", 0); +} + +static void +sseerror(Ureg *u, void*) +{ + /* + * save floating point state to check out error + */ +iprint("%lud: sserror\n", up? up->pid: -1); + fp->env(up->fpusave); + ssenote(); + + if((u->pc & 0xf0000000) == KZERO) + panic("sse error in kernel pc=%#p", u->pc); +} + +void +ssesaveoff(FPsave *p) +{ + ssesave(p); + sseclear(); +} + +static void +sserestoreen(FPsave *p) +{ + sseenable(); + sserestore(p); +} + +static int +sseinit(void) +{ + if(m->cpuiddx & (Sse2 | Fxrstor) != (Sse2 | Fxrstor)) + return -1; + putcr4(getcr4() | Osfxsr | Osxmexcept); + sseoff(); + return 0; +} + +static void +ssemachinit(void) +{ + sseinit(); +} + +static uint +ssegetfsw(FPsave *f) +{ + Fxsave *x; + + x = (Fxsave*)f->sse; + return x->fsw; +} + +static FPArch sse = { + "sse", + ssemachinit, /* machinit */ + sseoff, /* off */ + sseenable, /* init */ + ssesaveoff, /* save */ + sserestoreen, /* restore */ + ssesave, /* env */ + sseclear, /* clear */ + ssegetfsw, +}; + +static int +enabled(char *s) +{ + return s != nil && (cistrcmp(s, "enable") == 0 || atoi(s) > 0); +} + +void +sselink(void) +{ + if(!enabled(getconf("*sse")) || sseinit() == -1) + return; + print("sseinit\n"); + trapenable(VectorSIMD, sseerror, 0, "simderror"); + // trapenable(Vector#XE, sseover, 0, "simdover"); + fp = &sse; +} --- /sys/src/9/pcpae/main.c Mon Jun 3 17:45:39 2013 +++ /sys/src/9/pcpae/main.c Mon Jun 3 17:45:40 2013 @@ -424,131 +424,6 @@ kmem, kpages, ppages, conf.upages); } -static char* mathmsg[] = -{ - nil, /* handled below */ - "denormalized operand", - "division by zero", - "numeric overflow", - "numeric underflow", - "precision loss", -}; - -static void -mathnote(void) -{ - int i; - u32int status; - char *msg, note[ERRMAX]; - - status = up->fpsave.status; - - /* - * Some attention should probably be paid here to the - * exception masks and error summary. - */ - msg = "unknown exception"; - for(i = 1; i <= 5; i++){ - if(!((1<fpsave.pc, status); - postnote(up, 1, note, NDebug); -} - -/* - * math coprocessor error - */ -static void -matherror(Ureg *ur, void*) -{ - /* - * a write cycle to port 0xF0 clears the interrupt latch attached - * to the error# line from the 387 - */ - if(!(m->cpuiddx & 0x01)) - outb(0xF0, 0xFF); - - /* - * save floating point state to check out error - */ - fpenv(&up->fpsave); - mathnote(); - - if((ur->pc & 0xf0000000) == KZERO) - panic("fp: status %ux fppc=%#ux pc=%#lux", - up->fpsave.status, up->fpsave.pc, ur->pc); -} - -/* - * math coprocessor emulation fault - */ -static void -mathemu(Ureg *ureg, void*) -{ - if(up->fpstate & FPillegal){ - /* someone did floating point in a note handler */ - postnote(up, 1, "sys: floating point in note handler", NDebug); - return; - } - switch(up->fpstate){ - case FPinit: - fpinit(); - up->fpstate = FPactive; - break; - case FPinactive: - /* - * Before restoring the state, check for any pending - * exceptions, there's no way to restore the state without - * generating an unmasked exception. - * More attention should probably be paid here to the - * exception masks and error summary. - */ - if((up->fpsave.status & ~up->fpsave.control) & 0x07F){ - mathnote(); - break; - } - fprestore(&up->fpsave); - up->fpstate = FPactive; - break; - case FPactive: - panic("math emu pid %ld %s pc 0x%lux", - up->pid, up->text, ureg->pc); - break; - } -} - -/* - * math coprocessor segment overrun - */ -static void -mathover(Ureg*, void*) -{ - pexit("math overrun", 0); -} - -void -mathinit(void) -{ - trapenable(VectorCERR, matherror, 0, "matherror"); - if(X86FAMILY(m->cpuidax) == 3) - intrenable(IrqIRQ13, matherror, 0, BUSUNKNOWN, "matherror"); - trapenable(VectorCNA, mathemu, 0, "mathemu"); - trapenable(VectorCSO, mathover, 0, "mathover"); -} - /* * set up floating point for a new process */ --- /sys/src/9/pcpae/paecpu Mon Jun 3 17:45:41 2013 +++ /sys/src/9/pcpae/paecpu Mon Jun 3 17:45:42 2013 @@ -59,6 +59,8 @@ archacpi mp nomp mpacpi apic msi archmp mp nomp mpacpi apic msi + fpsse + uarti8250 uartp8250 uartox958 pci uartp8250 uartpci pci uartp8250 --- /sys/src/9/pcpae/mp.c Mon Jun 3 17:45:43 2013 +++ /sys/src/9/pcpae/mp.c Mon Jun 3 17:45:44 2013 @@ -411,7 +411,7 @@ syncclock(); timersinit(); - fpoff(); + mathinit(); lock(&active); active.machs |= 1<machno; --- /sys/src/9/pcpae/dat.h Mon Jun 3 17:45:45 2013 +++ /sys/src/9/pcpae/dat.h Mon Jun 3 17:45:46 2013 @@ -64,21 +64,17 @@ FPillegal= 0x100, }; -struct FPsave -{ - u16int control; - u16int r1; - u16int status; - u16int r2; - u16int tag; - u16int r3; - u32int pc; - u16int selector; - u16int r4; - u32int operand; - u16int oselector; - u16int r5; - uchar regs[80]; /* floating point registers */ +enum { + Fpalign = 16, + Fpsize = 512, +}; + +/* + * FPU stuff in Proc + */ +struct FPsave { + /* need to be large enough to accomidate any fp arch */ + uchar fxsave[Fpsize+Fpalign-1]; }; struct Confmem --- /sys/src/9/pcpae/fns.h Mon Jun 3 17:45:47 2013 +++ /sys/src/9/pcpae/fns.h Mon Jun 3 17:45:48 2013 @@ -17,12 +17,10 @@ void delay(int); #define evenaddr(x) /* x86 doesn't care */ void fpclear(void); -void fpenv(FPsave*); void fpinit(void); void fpoff(void); void fprestore(FPsave*); void fpsave(FPsave*); -u32int fpstatus(void); u32int getcr0(void); u32int getcr2(void); u32int getcr3(void); --- /sys/src/9/pcpae/l.s Mon Jun 3 17:45:49 2013 +++ /sys/src/9/pcpae/l.s Mon Jun 3 17:45:51 2013 @@ -499,7 +499,7 @@ * FNxxx variations) so WAIT instructions must be explicitly placed in the * code as necessary. */ -#define FPOFF(l) ;\ +#define X87OFF(l) ;\ MOVL CR0, AX ;\ ANDL $0xC, AX /* EM, TS */ ;\ CMPL AX, $0x8 ;\ @@ -511,17 +511,17 @@ ORL $0x28, AX /* NE=1, TS=1 */ ;\ MOVL AX, CR0 -#define FPON ;\ +#define X87ON ;\ MOVL CR0, AX ;\ ANDL $~0xC, AX /* EM=0, TS=0 */ ;\ MOVL AX, CR0 -TEXT fpoff(SB), 1, $0 /* disable */ - FPOFF(l1) +TEXT x87off(SB), 1, $0 /* disable */ + X87OFF(l1) RET -TEXT fpinit(SB), 1, $0 /* enable and init */ - FPON +TEXT x87init(SB), 1, $0 /* enable and init */ + X87ON FINIT WAIT /* setfcr(FPPDBL|FPRNR|FPINVAL|FPZDIV|FPOVFL) */ @@ -532,32 +532,52 @@ WAIT RET -TEXT fpsave(SB), 1, $0 /* save state and disable */ - MOVL p+0(FP), AX - FSAVE 0(AX) /* no WAIT */ - FPOFF(l2) - RET - -TEXT fprestore(SB), 1, $0 /* enable and restore state */ - FPON +/* + * sse support + */ +TEXT ssesave(SB), 1, $0 MOVL p+0(FP), AX - FRSTOR 0(AX) - WAIT - RET - -TEXT fpstatus(SB), 1, $0 /* get floating point status */ - FSTSW AX + ADDL $15, AX + ANDL $~15, AX + BYTE $0x0f; BYTE $0xae; BYTE $0x00; /* FXSAVE 0(AX) */ RET -TEXT fpenv(SB), 1, $0 /* save state without waiting */ +TEXT sserestore(SB), 1, $0 + X87ON MOVL p+0(FP), AX - FSTENV 0(AX) + ADDL $15, AX + ANDL $~15, AX + BYTE $0x0f; BYTE $0xae; BYTE $0x08; /* FXRSTOR 0(AX) */ + WAIT /* gratitous x87 support? */ + RET + +TEXT ldmxcsr(SB), 1, $0 + MOVL xcr+0(FP), AX + PUSHL AX +/* LDMXCSR 0(SP) */ + BYTE $0x0f; BYTE $0xae; BYTE $0x14; BYTE $0x24; + POPL AX + RET + +TEXT sseclear(SB), 1, $0 /* clear pending exceptions */ + X87ON + PUSHL $(1<<11 | 1<<12) +/* LDMXCSR 0(SP) */ + BYTE $0x0f; BYTE $0xae; BYTE $0x14; BYTE $0x24; + POPL AX + FCLEX /* no WAIT */ + X87OFF(l4) RET -TEXT fpclear(SB), 1, $0 /* clear pending exceptions */ - FPON - FCLEX /* no WAIT */ - FPOFF(l3) +TEXT ssezero(SB), 1, $0 + PXOR X0, X0 + MOVDQA X0, X1 + MOVDQA X0, X2 + MOVDQA X0, X3 + MOVDQA X0, X4 + MOVDQA X0, X5 + MOVDQA X0, X6 + MOVDQA X0, X7 RET /* @@ -798,7 +818,7 @@ CALL _strayintr(SB); BYTE $0x10 /* coprocessor error */ CALL _strayintrx(SB); BYTE $0x11 /* alignment check */ CALL _strayintr(SB); BYTE $0x12 /* machine check */ - CALL _strayintr(SB); BYTE $0x13 + CALL _strayintr(SB); BYTE $0x13 /* SIMD exception */ CALL _strayintr(SB); BYTE $0x14 CALL _strayintr(SB); BYTE $0x15 CALL _strayintr(SB); BYTE $0x16 --- /sys/src/9/pcpae/fpsse.c Thu Jan 1 00:00:00 1970 +++ /sys/src/9/pcpae/fpsse.c Mon Jun 3 17:45:51 2013 @@ -0,0 +1,327 @@ +#include "u.h" +#include "../port/lib.h" +#include "mem.h" +#include "dat.h" +#include "fns.h" +#include "io.h" +#include "ureg.h" +#include "../port/error.h" + +#define dprint(...) print(__VA_ARGS__) +#define diprint(...) iprint(__VA_ARGS__) + +extern void x87off(void); +extern void x87init(void); + +extern void ssesave(FPsave*); +extern void sserestore(FPsave*); +extern void sseclear(void); +extern void ldmxcsr(u32int); + +/* assume all m's have the same sse cap */ +enum{ + Fxrstor = 1<<24, + + /* cr0 */ + Moncoproc = 1<<1, + X87em = 1<<2, + TS = 1<<3, /* task switch */ + NE = 1<<5, /* x87 numeric error */ + + /* cr4 */ + Osfxsr = 1<<9, /* enable sse */ + Osxmexcept = 1<<10, /* enable #XM exception */ +}; + +enum { + SSEie = 1<<0, /* invalid instruction */ + SSEde = 1<<1, /* denormal */ + SSEze = 1<<2, /* div by zero */ + SSEoe = 1<<3, /* overflow */ + SSEue = 1<<4, /* underflow */ + SSEpe = 1<<5, /* precision loss */ + SSEdaz = 1<<6, /* denormals are zero */ + SSEim = 1<<7, /* invalid instruction mask */ + SSEdm = 1<<8, /* denormal mask */ + SSEzm = 1<<9, /* div by zero mask */ + SSEom = 1<<10, /* overflow mask */ + SSEum = 1<<11, /* underflow mask */ + SSEpm = 1<<12, /* precision loss mask */ + SSErc = 3<<13, /* rounding control */ + SSEfz = 1<<15, /* */ + + SSEflags = SSEie | SSEde | SSEze | SSEoe | SSEue | SSEpe, + SSEmask = SSEdm | SSEum | SSEpm +}; + +typedef struct Fxsave Fxsave; +struct Fxsave { + u16int fcw; /* x87 control word */ + u16int fsw; /* x87 status word */ + u8int ftw; /* x87 tag word */ + u8int zero; /* 0 */ + u16int fop; /* last x87 opcode */ + u32int ip; /* last x87 instruction pointer */ + u16int cs; /* last x87 code segment */ + u16int r1; + u32int dp; /* data pointer */ + u16int ds; /* data pointer segment */ + u16int r2; + u32int mxcsr; /* sse control and status */ + u32int mxcsrmask; /* supported sse feature bits */ + uchar st[128]; /* shared 64-bit media and x87 regs */ + uchar xmm[256]; /* 128-bit media regs */ + uchar ign[96]; /* reserved, ignored */ +}; + +#define savetox(save) ((Fxsave*)ROUNDUP(PTR2UINT(save), Fpalign)) + +static void +sseenable(void) +{ +// putcr0(getcr0() & ~(X87em | Moncoproc)); + x87init(); + ldmxcsr(SSEmask); +} + +static void +sseoff(void) +{ +// putcr0(getcr0() & ~(X87em | Moncoproc) | NE | TS); + x87off(); +} + +static void +sseover(Ureg*, void*) +{ + pexit("sse error", 0); +} + +static void +sserestoreen(FPsave *p) +{ + sseenable(); + sserestore(p); +} + +static void +sseclonestate(FPsave *t, FPsave *s) +{ + Fxsave *r; + + memmove(t, s, sizeof(FPsave)); + r = savetox(t); + r->ftw = 0; /* all x87 registers invalid (clear stack) */ + r->fsw &= ~SSEflags; /* clear x87 status */ +} + +static int +sseinit(void) +{ + if(m->cpuiddx & (Sse2 | Fxrstor) != (Sse2 | Fxrstor)) + return -1; + putcr4(getcr4() | Osfxsr | Osxmexcept); + sseoff(); + return 0; +} + +static char *mathmsg[] = +{ + nil, /* handled below */ + "denormalized operand", + "division by zero", + "numeric overflow", + "numeric underflow", + "precision loss", +}; + +static void +mathnote(uint status) +{ + char *msg, note[ERRMAX]; + int i; + Fxsave *x; + + /* + * Some attention should probably be paid here to the + * exception masks and error summary. + */ + msg = "unknown exception"; + for(i = 1; i <= 5; i++){ + if(!((1<fpsave); + snprint(note, sizeof note, "sys: fp: %s fppc=%#p status=%#ux", + msg, (uintptr)x->ip, status); + postnote(up, 1, note, NDebug); +} + +static void +sseerror(Ureg *ur, void*) +{ + Fxsave *x; + + /* + * save floating point state to check out error + */ + dprint("%lud: %s: %s: sserror\n", up->pid, up->user, up->text); + ssesave(&up->fpsave); + x = savetox(&up->fpsave); + mathnote(x->mxcsr & SSEflags); + + if((ur->pc & 0xf0000000) == KZERO) + panic("sse error in kernel pc=%#p", ur->pc); +} + +/* + * math coprocessor error + */ +static void +x87error(Ureg *ur, void*) +{ + Fxsave *x; + + dprint("%lud: %s: %s: x87error\n", up->pid, up->user, up->text); + /* + * a write cycle to port 0xF0 clears the interrupt latch attached + * to the error# line from the 387 + */ + if(!(m->cpuiddx & 0x01)) + outb(0xF0, 0xFF); + + /* + * save floating point state to check out error + */ + ssesave(&up->fpsave); + x = savetox(&up->fpsave); + mathnote(x->fsw & ~x->fcw); + + if((ur->pc & 0xf0000000) == KZERO) + panic("fp: status %ux fppc=%#p pc=%#p", + x->fsw, (uintptr)x->ip, ur->pc); +} + +/* + * math coprocessor emulation fault + */ +static void +sseemu(Ureg *ureg, void*) +{ + Fxsave *x; + + if(up->fpstate & FPillegal){ + /* someone did floating point in a note handler */ + postnote(up, 1, "sys: floating point in note handler", NDebug); + return; + } + switch(up->fpstate){ + case FPinit: + sseenable(); +extern void ssezero(void); +ssezero(); + up->fpstate = FPactive; + break; + case FPinactive: + /* + * Before restoring the state, check for any pending + * exceptions, there's no way to restore the state without + * generating an unmasked exception. + * More attention should probably be paid here to the + * exception masks and error summary. + */ + x = savetox(&up->fpsave); + if((x->fsw & ~x->fcw) & 0x07F){ + diprint("%lud: mathnote (on restore)\n", up->pid); + mathnote(x->fsw); + break; + } + if(x->mxcsr & SSEflags){ + diprint("%lud: sse mathnote (on restore)\n", up->pid); + mathnote(x->mxcsr & SSEflags); + break; + } + x->fsw &= ~SSEim; + sserestoreen(&up->fpsave); + up->fpstate = FPactive; + break; + case FPactive: + panic("math emu pid %lud %s pc %#p", + up->pid, up->text, ureg->pc); + break; + } +} + +long +ssedevprocio(Proc *p, void *va, long n, uvlong offset, int write) +{ + uchar *fp; + int sz; + + fp = (uchar*)savetox(&p->fpsave); + sz = Fpsize; + + if(offset >= sz) + n = 0; + else if(offset+n > sz) + n = sz - offset; + + if(write) + memmove(fp+offset, va, n); + else + memmove(va, fp+offset, n); + return n; +} + +extern long (*fpudevprocio)(Proc*, void*, long, uvlong, int); + +static void +ssemathinit(void) +{ + if(m->cpuiddx & (Sse2 | Fxrstor) != (Sse2 | Fxrstor)) + panic("no sse2 support"); + + trapenable(VectorCERR, x87error, 0, "x87error"); + trapenable(VectorCNA, sseemu, 0, "sseemu"); + trapenable(VectorSIMD, sseerror, 0, "sseerror"); + trapenable(VectorCSO, sseover, 0, "sseover"); // Vector#XE + fpudevprocio = ssedevprocio; +} + +void +fpoff(void) +{ + sseoff(); +} + +void +fpclear(void) +{ + sseclear(); +} + +void +fpsave(FPsave *save) +{ + ssesave(save); + sseclear(); +} + +void +mathinit(void) +{ + if(m->machno == 0) + ssemathinit(); + sseinit(); +} --- /sys/src/9/pcpae/devarch.c Mon Jun 3 17:45:53 2013 +++ /sys/src/9/pcpae/devarch.c Mon Jun 3 17:45:54 2013 @@ -995,6 +995,7 @@ p = seprint(p, ep, "%#p\n", coherence); p = seprint(p, ep, "cmpswap cmpswap486\n"); p = seprint(p, ep, "i8253set %s\n", doi8253set ? "on" : "off"); + p = seprint(p, ep, "fp sse\n"); USED(p); n = readstr(offset, a, nn, buf);