If option '-f' is given, generate VFP hardware floating point instructions. When the arm kernel fp emulator can decode VFP instructions, this option should be made the default. Reference: /n/sources/patch/applied/5l-vfp Date: Fri Jan 11 20:55:18 CET 2013 Signed-off-by: miller@hamnavoe.com --- /sys/src/cmd/5l/asm.c Fri Jan 11 20:51:47 2013 +++ /sys/src/cmd/5l/asm.c Fri Jan 11 20:51:41 2013 @@ -1365,6 +1365,53 @@ else if(p->as == AMOVH) o2 ^= (1<<6); break; + + /* VFP ops: */ + case 74: /* vfp floating point arith */ + o1 = opvfprrr(p->as, p->scond); + rf = p->from.reg; + if(p->from.type == D_FCONST) { + diag("invalid floating-point immediate\n%P", p); + rf = 0; + } + rt = p->to.reg; + r = p->reg; + if(r == NREG) + r = rt; + o1 |= rt<<12; + if(((o1>>20)&0xf) == 0xb) + o1 |= rf<<0; + else + o1 |= r<<16 | rf<<0; + break; + case 75: /* vfp floating point compare */ + o1 = opvfprrr(p->as, p->scond); + rf = p->from.reg; + if(p->from.type == D_FCONST) { + if(p->from.ieee->h != 0 || p->from.ieee->l != 0) + diag("invalid floating-point immediate\n%P", p); + o1 |= 1<<16; + rf = 0; + } + rt = p->reg; + o1 |= rt<<12 | rf<<0; + o2 = 0x0ef1fa10; /* MRS APSR_nzcv, FPSCR */ + o2 |= (p->scond & C_SCOND) << 28; + break; + case 76: /* vfp floating point fix and float */ + o1 = opvfprrr(p->as, p->scond); + rf = p->from.reg; + rt = p->to.reg; + if(p->from.type == D_REG) { + o2 = o1 | rt<<12 | rt<<0; + o1 = 0x0e000a10; /* VMOV F,R */ + o1 |= (p->scond & C_SCOND) << 28 | rt<<16 | rf<<12; + } else { + o1 |= FREGTMP<<12 | rf<<0; + o2 = 0x0e100a10; /* VMOV R,F */ + o2 |= (p->scond & C_SCOND) << 28 | FREGTMP<<16 | rt<<12; + } + break; } if(debug['a'] > 1) @@ -1494,6 +1541,40 @@ } long +opvfprrr(int a, int sc) +{ + long o; + + o = (sc & C_SCOND) << 28; + if(sc & (C_SBIT|C_PBIT|C_WBIT)) + diag(".S/.P/.W on vfp instruction"); + o |= 0xe<<24; + switch(a) { + case AMOVWD: return o | 0xb<<8 | 0xb<<20 | 1<<6 | 0x8<<16 | 1<<7; + case AMOVWF: return o | 0xa<<8 | 0xb<<20 | 1<<6 | 0x8<<16 | 1<<7; + case AMOVDW: return o | 0xb<<8 | 0xb<<20 | 1<<6 | 0xD<<16 | 1<<7; + case AMOVFW: return o | 0xa<<8 | 0xb<<20 | 1<<6 | 0xD<<16 | 1<<7; + case AMOVFD: return o | 0xa<<8 | 0xb<<20 | 1<<6 | 0x7<<16 | 1<<7; + case AMOVDF: return o | 0xb<<8 | 0xb<<20 | 1<<6 | 0x7<<16 | 1<<7; + case AMOVF: return o | 0xa<<8 | 0xb<<20 | 1<<6 | 0x0<<16 | 0<<7; + case AMOVD: return o | 0xb<<8 | 0xb<<20 | 1<<6 | 0x0<<16 | 0<<7; + case ACMPF: return o | 0xa<<8 | 0xb<<20 | 1<<6 | 0x4<<16 | 0<<7; + case ACMPD: return o | 0xb<<8 | 0xb<<20 | 1<<6 | 0x4<<16 | 0<<7; + case AADDF: return o | 0xa<<8 | 0x3<<20; + case AADDD: return o | 0xb<<8 | 0x3<<20; + case ASUBF: return o | 0xa<<8 | 0x3<<20 | 1<<6; + case ASUBD: return o | 0xb<<8 | 0x3<<20 | 1<<6; + case AMULF: return o | 0xa<<8 | 0x2<<20; + case AMULD: return o | 0xb<<8 | 0x2<<20; + case ADIVF: return o | 0xa<<8 | 0x8<<20; + case ADIVD: return o | 0xb<<8 | 0x8<<20; + } + diag("bad vfp rrr %d", a); + prasm(curp); + return 0; +} + +long opbra(int a, int sc) { @@ -1628,10 +1709,45 @@ } long +ovfpmem(int a, int r, long v, int b, int sc, Prog *p) +{ + long o; + + if(sc & (C_SBIT|C_PBIT|C_WBIT)) + diag(".S/.P/.W on VLDR/VSTR instruction"); + o = (sc & C_SCOND) << 28; + o |= 0xd<<24 | (1<<23); + if(v < 0) { + v = -v; + o ^= 1 << 23; + } + if(v & 3) + diag("odd offset for floating point op: %ld\n%P", v, p); + else if(v >= (1<<10)) + diag("literal span too large: %ld\n%P", v, p); + o |= (v>>2) & 0xFF; + o |= b << 16; + o |= r << 12; + switch(a) { + default: + diag("bad fst %A", a); + case AMOVD: + o |= 0xb<<8; + break; + case AMOVF: + o |= 0xa<<8; + break; + } + return o; +} + +long ofsr(int a, int r, long v, int b, int sc, Prog *p) { long o; + if(vfp) + return ovfpmem(a, r, v, b, sc, p); if(sc & C_SBIT) diag(".S on FLDR/FSTR instruction"); o = (sc & C_SCOND) << 28; @@ -1703,6 +1819,8 @@ Ieee *p; int n; + if(vfp) + return -1; for(n = sizeof(chipfloats)/sizeof(chipfloats[0]); --n >= 0;){ p = &chipfloats[n]; if(p->l == e->l && p->h == e->h) --- /sys/src/cmd/5l/optab.c Fri Jan 11 20:51:52 2013 +++ /sys/src/cmd/5l/optab.c Fri Jan 11 20:51:49 2013 @@ -211,6 +211,14 @@ { ACASE, C_REG, C_NONE, C_NONE, 62, 4, 0 }, { ABCASE, C_NONE, C_NONE, C_SBRA, 63, 4, 0 }, + { AADDF, C_FREG, C_NONE, C_FREG, 74, 4, 0, VFP }, + { AADDF, C_FREG, C_REG, C_FREG, 74, 4, 0, VFP }, + { AMOVF, C_FREG, C_NONE, C_FREG, 74, 4, 0, VFP }, + { ACMPF, C_FREG, C_REG, C_NONE, 75, 8, 0, VFP }, + { ACMPF, C_FCON, C_REG, C_NONE, 75, 8, 0, VFP }, + { AMOVFW, C_FREG, C_NONE, C_REG, 76, 8, 0, VFP }, + { AMOVFW, C_REG, C_NONE, C_FREG, 76, 8, 0, VFP }, + { AMOVH, C_REG, C_NONE, C_HEXT, 70, 4, REGSB, V4 }, { AMOVH, C_REG, C_NONE, C_HAUTO, 70, 4, REGSP, V4 }, { AMOVH, C_REG, C_NONE, C_HOREG, 70, 4, 0, V4 }, --- /sys/src/cmd/5l/span.c Fri Jan 11 20:51:59 2013 +++ /sys/src/cmd/5l/span.c Fri Jan 11 20:51:55 2013 @@ -639,6 +639,9 @@ n = (p2->flag&V4) - (p1->flag&V4); /* architecture version */ if(n) return n; + n = (p2->flag&VFP) - (p1->flag&VFP); /* floating point arch */ + if(n) + return n; n = p1->a1 - p2->a1; if(n) return n; @@ -657,14 +660,18 @@ int i, n, r; armv4 = !debug['h']; + vfp = debug['f']; for(i=0; i double made the + * unfortunate assumption that single and double floating + * point registers are aliased - true for emulated 7500 + * but not for vfp. Now corrected, but this test is + * insurance against old 5c compiled code in libraries. + */ + case AMOVWD: + if((q = p->link) != P && q->as == ACMP) + if((q = q->link) != P && q->as == AMOVF) + if((q1 = q->link) != P && q1->as == AADDF) + if(q1->to.type == D_FREG && q1->to.reg == p->to.reg) { + q1->as = AADDD; + q1 = prg(); + q1->scond = q->scond; + q1->line = q->line; + q1->as = AMOVFD; + q1->from = q->to; + q1->to = q1->from; + q1->link = q->link; + q->link = q1; + } + break; + case ADIV: case ADIVU: case AMOD: --- /sys/man/1/8l Fri Jan 11 20:52:15 2013 +++ /sys/man/1/8l Fri Jan 11 20:52:13 2013 @@ -150,6 +150,14 @@ only) Move strings into the text segment. .TP +.B -f +(\c +.I 5l +only) +Generate VFP hardware floating point instructions. +Without this flag, 5l generates arm7500 floating point +instructions which are emulated in the kernel. +.TP .BI -H n Executable header is type .IR n .