--- /sys/src/9/port/fpi.h Thu Jun 13 22:35:49 2013 +++ /sys/src/9/port/fpi.h Thu Jun 20 23:47:44 2013 @@ -1,10 +1,15 @@ +#ifndef nil +#include +#endif + typedef long Word; typedef long long Vlong; typedef unsigned long Single; -typedef struct { - unsigned long l; - unsigned long h; -} Double; + +/* use u.h's FPdbleword */ +#define Double FPdbleword +#define h hi +#define l lo enum { FractBits = 28, --- /sys/src/9/teg2/syscall.c Tue Apr 9 23:55:59 2013 +++ /sys/src/9/teg2/syscall.c Fri Jun 21 21:51:13 2013 @@ -80,11 +80,11 @@ qunlock(&up->debug); splhi(); - nf->arg1 = nf->msg; - nf->arg0 = &nf->ureg; + nf->arg1 = nf->msg; /* arg 1 is string */ + nf->arg0 = &nf->ureg; /* arg 0 XX(FP) is ureg* */ nf->ip = 0; cur->sp = PTR2UINT(nf); - cur->r0 = PTR2UINT(nf->arg0); + cur->r0 = PTR2UINT(nf->arg0); /* arg 0 in reg is ureg* */ break; default: pprint("unknown noted arg %#p\n", arg0); @@ -168,9 +168,9 @@ nf->old = up->ureg; up->ureg = nf; memmove(nf->msg, up->note[0].msg, ERRMAX); - nf->arg1 = nf->msg; - nf->arg0 = &nf->ureg; - ureg->r0 = PTR2UINT(nf->arg0); + nf->arg1 = nf->msg; /* arg 1 is string */ + nf->arg0 = &nf->ureg; /* arg 0 XX(FP) is ureg* */ + ureg->r0 = PTR2UINT(nf->arg0); /* arg 0 in r0 is ureg* */ nf->ip = 0; ureg->sp = sp; --- /sys/src/cmd/vl/l.h Wed Mar 27 00:18:09 2013 +++ /sys/src/cmd/vl/l.h Sun Jun 23 06:46:06 2013 @@ -251,6 +251,7 @@ Count mfrom; Count page; Count jump; + Count store; } nop; extern char* anames[]; --- /sys/src/cmd/vl/noop.c Wed Jun 19 04:21:50 2013 +++ /sys/src/cmd/vl/noop.c Sun Jun 23 06:46:35 2013 @@ -1,5 +1,184 @@ #include "l.h" +/* + * flag: insert nops to prevent three consecutive stores. + * workaround for 24k erratum #48, costs about 10% in text space, + * so only enable this if you need it. a test case is "hoc -e '7^6'". + */ +enum { + Mips24k = 0, +}; + +static int +isdblwrdmov(Prog *p) +{ + if(p == nil) + return 0; + switch(p->as){ + case AMOVD: + case AMOVDF: + case AMOVDW: + case AMOVFD: + case AMOVWD: + case AMOVV: + case AMOVVL: + case AMOVVR: + case AMOVFV: + case AMOVDV: + case AMOVVF: + case AMOVVD: + return 1; + } + return 0; +} + +static int +ismove(Prog *p) +{ + if(p == nil) + return 0; + switch(p->as){ + case AMOVB: + case AMOVBU: + case AMOVF: + case AMOVFW: + case AMOVH: + case AMOVHU: + case AMOVW: + case AMOVWF: + case AMOVWL: + case AMOVWR: + case AMOVWU: + return 1; + } + if(isdblwrdmov(p)) + return 1; + return 0; +} + +static int +isstore(Prog *p) +{ + if(p == nil) + return 0; + if(ismove(p)) + switch(p->to.type) { + case D_OREG: + case D_EXTERN: + case D_STATIC: + case D_AUTO: + case D_PARAM: + return 1; + } + return 0; +} + +static int +iscondbranch(Prog *p) +{ + if(p == nil) + return 0; + switch(p->as){ + case ABEQ: + case ABFPF: + case ABFPT: + case ABGEZ: + case ABGEZAL: + case ABGTZ: + case ABLEZ: + case ABLTZ: + case ABLTZAL: + case ABNE: + return 1; + } + return 0; +} + +static int +isbranch(Prog *p) +{ + if(p == nil) + return 0; + switch(p->as){ + case AJAL: + case AJMP: + case ARET: + case ARFE: + return 1; + } + if(iscondbranch(p)) + return 1; + return 0; +} + +/* + * workaround for 24k erratum #48, costs about 0.5% in space. + * inserts a NOP before the last of 3 consecutive stores. + * double-word stores complicate things. + */ +static int +no3stores(Prog *p) +{ + Prog *p1; + + if(!isstore(p)) + return 0; + p1 = p->link; + if(!isstore(p1)) + return 0; + if(isdblwrdmov(p) || isdblwrdmov(p1)) { + p->mark |= LABEL|SYNC; + addnop(p); + nop.store.count++; + nop.store.outof++; + return 1; + } + if(isstore(p1->link)) { + p1->mark |= LABEL|SYNC; + addnop(p1); + nop.store.count++; + nop.store.outof++; + return 1; + } + return 0; +} + +/* + * keep stores out of branch delay slots. + * this is costly in space (the other 9.5%), but makes no3stores effective. + * there is undoubtedly a better way to do this. + */ +void +storesnosched(void) +{ + Prog *p; + + for(p = firstp; p != P; p = p->link) + if(isstore(p)) + p->mark |= NOSCHED; /* keep stores out of delay slots */ +} + +void +triplestorenops(void) +{ + Prog *p, *p1; + + for(p = firstp; p != P; p = p1) { + p1 = p->link; +// if (p->mark & NOSCHED) +// continue; + if(ismove(p)) + no3stores(p); + else if (isbranch(p)) + /* + * can't ignore delay slot of a conditional branch; + * the branch could fail and fall through. + */ + if (!iscondbranch(p) && p1) + p1 = p1->link; /* skip its delay slot */ + } +} + void noops(void) { @@ -348,6 +527,8 @@ break; } } + if (Mips24k) + storesnosched(); curtext = P; q = P; /* p - 1 */ @@ -388,6 +569,9 @@ } q = p; } + + if (Mips24k) + triplestorenops(); } void