add AESKEYGENASSIST. this requires handling sse opcodes with an immediate byte, and some extra transformations in asmandg. vex encoding is optimal, but not required for these instructions, so vexreg() after the transformation so vex encoding is not forced. Reference: /n/atom/patch/applied2013/aeskeygen Date: Tue Jul 2 20:22:02 CES 2013 Signed-off-by: quanstro@quanstro.net --- /sys/src/cmd/6l/span.c Tue Jul 2 20:19:48 2013 +++ /sys/src/cmd/6l/span.c Tue Jul 2 20:19:49 2013 @@ -713,6 +713,9 @@ int t; Adr aa; + if(r == -1) + diag("asmandsz: immedate instead of register"); + rex &= (0x40 | Rxr); v = a->offset; t = a->type; @@ -879,8 +882,28 @@ void asmandg(Adr *a, Adr *r, int o, int rdest, int prefix) { - Adr aa; + Adr aa, rr; + if(isxyreg(a->type)) { + if(isxyreg(a->index) && r->type == D_CONST) { + /* + * convert sse instructions with immediate like + * AESKEYGENASSIST $32, X1, X2 from + * a=X1(X2*0); r=$32 to a=X1, r=X2. the + * caller adds the immediate byte. vex is not required + */ + rr.offset = 0; + rr.sym = a->sym; + rr.type = a->index; + rr.index = D_NONE; + rr.scale = 0; + r = &rr; + + aa = *a; + aa.index = D_NONE; + a = &aa; + } + } vexreg(a); if(isxyreg(a->type)) { if(a->index != D_NONE) { --- /sys/src/cmd/8l/span.c Tue Jul 2 20:19:51 2013 +++ /sys/src/cmd/8l/span.c Tue Jul 2 20:19:53 2013 @@ -593,6 +593,9 @@ int t; Adr aa; + if(r == -1) + diag("asmand: immedate instead of register"); + v = a->offset; t = a->type; if(a->index != D_NONE) { @@ -741,8 +744,28 @@ void asmandg(Adr *a, Adr *r, int o, int rdest, int prefix) { - Adr aa; + Adr aa, rr; + + if(isxyreg(a->type)) { + if(isxyreg(a->index) && r->type == D_CONST) { + /* + * convert sse instructions with immediate like + * AESKEYGENASSIST $32, X1, X2 from + * a=X1(X2*0); r=$32 to a=X1, r=X2. the + * caller adds the immediate byte. vex is not required + */ + rr.offset = 0; + rr.sym = a->sym; + rr.type = a->index; + rr.index = D_NONE; + rr.scale = 0; + r = &rr; + aa = *a; + aa.index = D_NONE; + a = &aa; + } + } vexreg(a); if(isxyreg(a->type)) { if(a->index != D_NONE) { --- /sys/src/cmd/6l/optab.c Tue Jul 2 20:19:55 2013 +++ /sys/src/cmd/6l/optab.c Tue Jul 2 20:19:56 2013 @@ -533,6 +533,10 @@ Ymr, Yrl, Zm_r_xm, 1, 0 }; +uchar yaes[] = { + Yi8, Yxyr, Zr_m_i_xm, 3, + 0 +}; Optab optab[] = /* as, ytab, andproto, opcode */ @@ -1269,7 +1273,11 @@ { AAESENCLAST, yxm, Pe, Pm38, 0xdd }, { AAESDEC, yxm, Pe, Pm38, 0xde }, { AAESDECLAST, yxm, Pe, Pm38, 0xdf }, -// { AAESKEYGENASSIST, yaes, Pe, Pm3a, 0xdf }, /* doesn't work */ + { AAESKEYGENASSIST, yaes, Pe, Pm3a, 0xdf }, + { APCLMULQDQ, yaes, Pe, Pm3a, 0x44 }, + + { ABLENDPS, yaes, Pe, Pm3a, 0xdc }, + { ABLENDPD, yaes, Pe, Pm3a, 0x0d }, { AEND }, 0 --- /sys/src/cmd/8l/optab.c Tue Jul 2 20:19:58 2013 +++ /sys/src/cmd/8l/optab.c Tue Jul 2 20:20:00 2013 @@ -448,6 +448,10 @@ Yxyr, Yrl, Zm_r_xm, 2, 0 }; +uchar yaes[] = { + Yi8, Yxyr, Zr_m_i_xm, 3, + 0 +}; Optab optab[] = /* as, ytab, andproto, opcode */ @@ -994,7 +998,11 @@ { AAESENCLAST, yxm, Pe, Pm38, 0xdd }, { AAESDEC, yxm, Pe, Pm38, 0xde }, { AAESDECLAST, yxm, Pe, Pm38, 0xdf }, -// { AAESKEYGENASSIST, yaes, Pe, Pm3a, 0xdf }, /* doesn't work */ + { AAESKEYGENASSIST, yaes, Pe, Pm3a, 0xdf }, + { APCLMULQDQ, yaes, Pe, Pm3a, 0x44 }, + + { ABLENDPS, yaes, Pe, Pm3a, 0xdc }, + { ABLENDPD, yaes, Pe, Pm3a, 0x0d }, { ACVTTPD2PL, yxcvm1, Px, Pe,0xe6,Pe,0x2c }, { ACVTTPS2PL, yxcvm1, Px, Pf3,0x5b,Pm,0x2c },