diff options
author | cinap_lenrek <cinap_lenrek@felloff.net> | 2019-04-08 14:05:27 +0200 |
---|---|---|
committer | cinap_lenrek <cinap_lenrek@felloff.net> | 2019-04-08 14:05:27 +0200 |
commit | d8d4802f80b40bc9a43031e3d6484aa237e7d444 (patch) | |
tree | 7714690d2d47f2aedf4ad60ad9ad0e6d88db2a1b /sys/src/cmd/7l/asmout.c | |
parent | 394d095ee0a9e50242b88a783af6bb777cfb3e01 (diff) |
7l: add arm64 linker (initial sync)
Diffstat (limited to 'sys/src/cmd/7l/asmout.c')
-rw-r--r-- | sys/src/cmd/7l/asmout.c | 1709 |
1 files changed, 1709 insertions, 0 deletions
diff --git a/sys/src/cmd/7l/asmout.c b/sys/src/cmd/7l/asmout.c new file mode 100644 index 000000000..7a3c7b4f9 --- /dev/null +++ b/sys/src/cmd/7l/asmout.c @@ -0,0 +1,1709 @@ +#include "l.h" + +#define S32 (0U<<31) +#define S64 (1U<<31) +#define Rm(X) (((X)&31)<<16) +#define Rn(X) (((X)&31)<<5) +#define Rd(X) (((X)&31)<<0) +#define Sbit (1U<<29) + +#define OPDP2(x) (0<<30 | 0 << 29 | 0xd6<<21 | (x)<<10) +#define OPDP3(sf,op54,op31,o0) ((sf)<<31 | (op54)<<29 | 0x1B<<24 | (op31)<<21 | (o0)<<15) +#define OPBcc(x) (0x2A<<25 | 0<<24 | 0<<4 | ((x)&15)) +#define OPBLR(x) (0x6B<<25 | 0<<23 | (x)<<21 | 0x1F<<16 | 0<<10) /* x=0, JMP; 1, CALL; 2, RET */ +#define SYSOP(l,op0,op1,crn,crm,op2,rt) (0x354<<22 | (l)<<21 | (op0)<<19 | (op1)<<16 | (crn)<<12 | (crm)<<8 | (op2)<<5 | (rt)) +#define SYSHINT(x) SYSOP(0,0,3,2,0,(x),0x1F) + +#define LDSTR12U(sz,v,opc) ((sz)<<30 | 7<<27 | (v)<<26 | 1<<24 | (opc)<<22) +#define LDSTR9S(sz,v,opc) ((sz)<<30 | 7<<27 | (v)<<26 | 0<<24 | (opc)<<22) +#define LD2STR(o) ((o) & ~(3<<22)) + +#define LDSTX(sz,o2,l,o1,o0) ((sz)<<30 | 0x8<<24 | (o2)<<23 | (l)<<22 | (o1)<<21 | (o0)<<15) + +#define FPCMP(m,s,type,op,op2) ((m)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 1<<21 | (op)<<14 | 8<<10 | (op2)) +#define FPCCMP(m,s,type,op) ((m)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 1<<21 | 1<<10 | (op)<<4) +#define FPOP1S(m,s,type,op) ((m)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 1<<21 | (op)<<15 | 0x10<<10) +#define FPOP2S(m,s,type,op) ((m)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 1<<21 | (op)<<12 | 2<<10) +#define FPCVTI(sf,s,type,rmode,op) ((sf)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 1<<21 | (rmode)<<19 | (op)<<16 | 0<<10) +#define FPCVTF(sf,s,type,rmode,op,scale) ((sf)<<31 | (s)<<29 | 0x1E<<24 | (type)<<22 | 0<<21 | (rmode)<<19 | (op)<<16 | (scale)<<10) +#define ADR(p,o,rt) ((p)<<31 | ((o)&3)<<29 | (0x10<<24) | (((o>>2)&0x7FFFF)<<5) | (rt)) + +#define LSL0_32 (2<<13) +#define LSL0_64 (3<<13) + +static long opbrr(int); +static long opbra(int); +static long oshrr(int, int, int); +static long olhrr(int, int, int); +static long olsr12u(long, long, int, int); +static long olsr9s(long, long, int, int); +static long opimm(int); +static vlong brdist(Prog*, int, int, int); +static long opbfm(int, int, int, int, int); +static long opextr(int, long, int, int, int); +static long opbit(int); +static long op0(int); +static long opstr12(int); +static long opstr9(int); +static long opldr9(int); +static long opxrrr(int); +static long olsxrr(int, int, int, int); +static long oprrr(int); +static long opirr(int); +static long opldr12(int); +static long opldrpp(int); +static long opload(int); +static long opstore(int); +static long omovlit(int, Prog*, Adr*, int); +static int movesize(int); +static long oaddi(long, long, int, int); + +/* + * valid pstate field values, and value to use in instruction + */ +static struct{ + ulong a; + ulong b; +} pstatefield[] = { +D_SPSel, (0<<16) | (4<<12) | (5<<5), +D_DAIFSet, (3<<16) | (4<<12) | (6<<5), +D_DAIFClr, (3<<16) | (4<<12) | (7<<5), +}; + +void +asmout(Prog *p, Optab *o) +{ + long o1, o2, o3, o4, o5, v, hi; + ulong u; + vlong d; + int r, s, rf, rt, ra, nzcv, cond, i, as; + Mask *mask; + static Prog *lastcase; + + o1 = 0; + o2 = 0; + o3 = 0; + o4 = 0; + o5 = 0; + switch(o->type) { + default: + diag("unknown asm %d", o->type); + prasm(p); + break; + + case 0: /* pseudo ops */ + break; + + case 1: /* op Rm,[Rn],Rd; default Rn=Rd -> op Rm<<0,[Rn,]Rd (shifted register) */ + o1 = oprrr(p->as); + rf = p->from.reg; + rt = p->to.reg; + r = p->reg; + if(p->to.type == D_NONE) + rt = REGZERO; + if(r == NREG) + r = rt; + o1 |= (rf<<16) | (r<<5) | rt; + break; + + case 2: /* add/sub $(uimm12|uimm24)[,R],R; cmp $(uimm12|uimm24),R */ + o1 = opirr(p->as); + rt = p->to.reg; + if(p->to.type == D_NONE){ + if((o1 & Sbit) == 0) + diag("ineffective ZR destination\n%P", p); + rt = REGZERO; + } + r = p->reg; + if(r == NREG) + r = rt; + v = regoff(&p->from); + o1 = oaddi(o1, v, r, rt); + break; + + case 3: /* op R<<n[,R],R (shifted register) */ + o1 = oprrr(p->as); + o1 |= p->from.offset; /* includes reg, op, etc */ + rt = p->to.reg; + if(p->to.type == D_NONE) + rt = REGZERO; + r = p->reg; + if(p->as == AMVN || p->as == AMVNW) + r = REGZERO; + else if(r == NREG) + r = rt; + o1 |= (r<<5) | rt; + break; + + case 4: /* mov $addcon, R; mov $recon, R; mov $racon, R */ + o1 = opirr(p->as); + rt = p->to.reg; + r = o->param; + if(r == 0) + r = REGZERO; + v = regoff(&p->from); + if((v & 0xFFF000) != 0){ + v >>= 12; + o1 |= 1<<22; /* shift, by 12 */ + } + o1 |= ((v& 0xFFF) << 10) | (r<<5) | rt; + break; + + case 5: /* b s; bl s */ + o1 = opbra(p->as); + o1 |= brdist(p, 0, 26, 2); + break; + + case 6: /* b ,O(R); bl ,O(R) */ + o1 = opbrr(p->as); + o1 |= p->to.reg << 5; + break; + + case 7: /* beq s */ + o1 = opbra(p->as); + o1 |= brdist(p, 0, 19, 2)<<5; + break; + + case 8: /* lsl $c,[R],R -> ubfm $(W-1)-c,$(-c MOD (W-1)),Rn,Rd */ + rt = p->to.reg; + rf = p->reg; + if(rf == NREG) + rf = rt; + v = p->from.offset; + switch(p->as){ + case AASR: o1 = opbfm(ASBFM, v, 63, rf, rt); break; + case AASRW: o1 = opbfm(ASBFMW, v, 31, rf, rt); break; + case ALSL: o1 = opbfm(AUBFM, (64-v)&63, 63-v, rf, rt); break; + case ALSLW: o1 = opbfm(AUBFMW, (32-v)&31, 31-v, rf, rt); break; + case ALSR: o1 = opbfm(AUBFM, v, 63, rf, rt); break; + case ALSRW: o1 = opbfm(AUBFMW, v, 31, rf, rt); break; + case AROR: o1 = opextr(AEXTR, v, rf, rf, rt); break; + case ARORW: o1 = opextr(AEXTRW, v, rf, rf, rt); break; + default: + diag("bad shift $con\n%P", curp); + break; + } + break; + + case 9: /* lsl Rm,[Rn],Rd -> lslv Rm, Rn, Rd */ + o1 = oprrr(p->as); + r = p->reg; + if(r == NREG) + r = p->to.reg; + o1 |= (p->from.reg << 16) | (r<<5) | p->to.reg; + break; + + case 10: /* brk/hvc/.../svc [$con] */ + o1 = opimm(p->as); + if(p->to.type != D_NONE) + o1 |= (p->to.offset & 0xffff)<<5; + break; + + case 11: /* dword */ + switch(aclass(&p->to)) { + case C_VCON: + case C_ZCON: + case C_LCON: + if(!dlm) + break; + if(p->to.name != D_EXTERN && p->to.name != D_STATIC) + break; + case C_ADDR: + if(p->to.sym->type == SUNDEF) + ckoff(p->to.sym, p->to.offset); + dynreloc(p->to.sym, p->pc, 1); + } + o1 = instoffset; + o2 = instoffset >> 32; + break; + + case 12: /* movT $lcon, reg */ + o1 = omovlit(p->as, p, &p->from, p->to.reg); + break; + + case 13: /* addop $lcon, [R], R (64 bit literal); cmp $lcon,R -> addop $lcon,R, ZR */ + o1 = omovlit(AMOV, p, &p->from, REGTMP); + if(!o1) + break; + rt = p->to.reg; + if(p->to.type == D_NONE) + rt = REGZERO; + r = p->reg; + if(r == NREG) + r = rt; + if(p->to.type != D_NONE && (p->to.reg == REGSP || r == REGSP)){ + o2 = opxrrr(p->as); + o2 |= REGTMP<<16; + o2 |= LSL0_64; + }else{ + o2 = oprrr(p->as); + o2 |= REGTMP << 16; /* shift is 0 */ + } + o2 |= r << 5; + o2 |= rt; + break; + + case 14: /* word */ + if(aclass(&p->to) == C_ADDR) + diag("address constant needs DWORD\n%P", p); + o1 = instoffset; + break; + + case 15: /* mul/mneg/umulh/umull r,[r,]r; madd/msub Rm,Rn,Ra,Rd */ + o1 = oprrr(p->as); + rf = p->from.reg; + rt = p->to.reg; + if(p->from3.type == D_REG){ + r = p->from3.reg; + ra = p->reg; + if(ra == NREG) + ra = REGZERO; + }else{ + r = p->reg; + if(r == NREG) + r = rt; + ra = REGZERO; + } + o1 |= (rf<<16) | (ra<<10) | (r<<5) | rt; + break; + + case 16: /* XremY R[,R],R -> XdivY; XmsubY */ + o1 = oprrr(p->as); + rf = p->from.reg; + rt = p->to.reg; + r = p->reg; + if(r == NREG) + r = rt; + o1 |= (rf<<16) | (r<<5) | REGTMP; + o2 = oprrr(AMSUBW); + o2 |= o1 & (1<<31); /* same size */ + o2 |= (rf<<16) | (r<<10) | (REGTMP<<5) | rt; + break; + + case 17: /* op Rm,[Rn],Rd; default Rn=ZR */ + o1 = oprrr(p->as); + rf = p->from.reg; + rt = p->to.reg; + r = p->reg; + if(p->to.type == D_NONE) + rt = REGZERO; + if(r == NREG) + r = REGZERO; + o1 |= (rf<<16) | (r<<5) | rt; + break; + + case 18: /* csel cond,Rn,Rm,Rd; cinc/cinv/cneg cond,Rn,Rd; cset cond,Rd */ + o1 = oprrr(p->as); + cond = p->from.reg; + r = p->reg; + if(r != NREG){ + if(p->from3.type == D_NONE){ + /* CINC/CINV/CNEG */ + rf = r; + cond ^= 1; + }else + rf = p->from3.reg; /* CSEL */ + }else{ + /* CSET */ + if(p->from3.type != D_NONE) + diag("invalid combination\n%P", p); + r = rf = REGZERO; + cond ^= 1; + } + rt = p->to.reg; + o1 |= (r<<16) | (cond<<12) | (rf<<5) | rt; + break; + + case 19: /* CCMN cond, (Rm|uimm5),Rn, uimm4 -> ccmn Rn,Rm,uimm4,cond */ + nzcv = p->to.offset; + cond = p->from.reg; + if(p->from3.type == D_REG){ + o1 = oprrr(p->as); + rf = p->from3.reg; /* Rm */ + }else{ + o1 = opirr(p->as); + rf = p->from3.offset & 0x1F; + } + o1 |= (rf<<16) | (cond<<12) | (p->reg<<5) | nzcv; + break; + + case 20: /* movT R,O(R) -> strT */ + v = regoff(&p->to); + r = p->to.reg; + if(r == NREG) + r = o->param; + if(v < 0){ /* unscaled 9-bit signed */ + o1 = olsr9s(opstr9(p->as), v, r, p->from.reg); + }else{ + v = offsetshift(v, o->a3); + o1 = olsr12u(opstr12(p->as), v, r, p->from.reg); + } + break; + + case 21: /* movT O(R),R -> ldrT */ + v = regoff(&p->from); + r = p->from.reg; + if(r == NREG) + r = o->param; + if(v < 0){ /* unscaled 9-bit signed */ + o1 = olsr9s(opldr9(p->as), v, r, p->to.reg); + }else{ + v = offsetshift(v, o->a1); + //print("offset=%lld v=%ld a1=%d\n", instoffset, v, o->a1); + o1 = olsr12u(opldr12(p->as), v, r, p->to.reg); + } + break; + + case 22: /* movT (R)O!,R; movT O(R)!, R -> ldrT */ + v = p->from.offset; + if(v < -256 || v > 255) + diag("offset out of range\n%P", p); + o1 = opldrpp(p->as); + if(p->from.type == D_XPOST) + o1 |= 1<<10; + else + o1 |= 3<<10; + o1 |= ((v&0x1FF)<<12) | (p->from.reg<<5) | p->to.reg; + break; + + case 23: /* movT R,(R)O!; movT O(R)!, R -> strT */ + v = p->to.offset; + if(v < -256 || v > 255) + diag("offset out of range\n%P", p); + o1 = LD2STR(opldrpp(p->as)); + if(p->to.type == D_XPOST) + o1 |= 1<<10; + else + o1 |= 3<<10; + o1 |= ((v&0x1FF)<<12) | (p->to.reg<<5) | p->from.reg; + break; + + case 24: /* mov/mvn Rs,Rd -> add $0,Rs,Rd or orr Rs,ZR,Rd */ + rf = p->from.reg; + rt = p->to.reg; + s = rf == REGSP || rt == REGSP; + if(p->as == AMVN || p->as == AMVNW){ + if(s) + diag("illegal SP reference\n%P", p); + o1 = oprrr(p->as); + o1 |= (rf<<16) | (REGZERO<<5) | rt; + }else if(s){ + o1 = opirr(p->as); + o1 |= (rf<<5) | rt; + }else{ + o1 = oprrr(p->as); + o1 |= (rf<<16) | (REGZERO<<5) | rt; + } + break; + + case 25: /* negX Rs, Rd -> subX Rs<<0, ZR, Rd */ + o1 = oprrr(p->as); + rf = p->from.reg; + rt = p->to.reg; + o1 |= (rf<<16) | (REGZERO<<5) | rt; + break; + + case 26: /* negX Rm<<s, Rd -> subX Rm<<s, ZR, Rd */ + o1 = oprrr(p->as); + o1 |= p->from.offset; /* includes reg, op, etc */ + rt = p->to.reg; + o1 |= (REGZERO<<5) | rt; + break; + + case 27: /* op Rm<<n[,Rn],Rd (extended register) */ + o1 = opxrrr(p->as); + if(p->from.type == D_EXTREG) + o1 |= p->from.offset; /* includes reg, op, etc */ + else + o1 |= p->from.reg << 16; + rt = p->to.reg; + if(p->to.type == D_NONE) + rt = REGZERO; + r = p->reg; + if(r == NREG) + r = rt; + o1 |= (r<<5) | rt; + break; + + case 28: /* logop $lcon, [R], R (64 bit literal) */ + o1 = omovlit(AMOV, p, &p->from, REGTMP); + if(!o1) + break; + r = p->reg; + if(r == NREG) + r = p->to.reg; + o2 = oprrr(p->as); + o2 |= REGTMP << 16; /* shift is 0 */ + o2 |= r << 5; + o2 |= p->to.reg; + break; + + case 29: /* op Rn, Rd */ + o1 = oprrr(p->as); + o1 |= p->from.reg<<5 | p->to.reg; + break; + + case 30: /* movT R,L(R) -> strT */ + s = movesize(o->as); + if(s < 0) + diag("unexpected long move, op %A tab %A\n%P", p->as, o->as, p); + v = regoff(&p->to); + if(v < 0) + diag("negative large offset\n%P", p); + if((v & ((1<<s)-1)) != 0) + diag("misaligned offset\n%P", p); + hi = v - (v & (0xFFF<<s)); + if((hi & 0xFFF) != 0) + diag("internal: miscalculated offset %ld [%d]\n%P", v, s, p); + //fprint(2, "v=%ld (%#lux) s=%d hi=%ld (%#lux) v'=%ld (%#lux)\n", v, v, s, hi, hi, ((v-hi)>>s)&0xFFF, ((v-hi)>>s)&0xFFF); + r = p->to.reg; + if(r == NREG) + r = o->param; + o1 = oaddi(opirr(AADD), hi, r, REGTMP); + o2 = olsr12u(opstr12(p->as), ((v-hi)>>s)&0xFFF, REGTMP, p->from.reg); + break; + + case 31: /* movT L(R), R -> ldrT */ + s = movesize(o->as); + if(s < 0) + diag("unexpected long move, op %A tab %A\n%P", p->as, o->as, p); + v = regoff(&p->from); + if(v < 0) + diag("negative large offset\n%P", p); + if((v & ((1<<s)-1)) != 0) + diag("misaligned offset\n%P", p); + hi = v - (v & (0xFFF<<s)); + if((hi & 0xFFF) != 0) + diag("internal: miscalculated offset %ld [%d]\n%P", v, s, p); + //fprint(2, "v=%ld (%#lux) s=%d hi=%ld (%#lux) v'=%ld (%#lux)\n", v, v, s, hi, hi, ((v-hi)>>s)&0xFFF, ((v-hi)>>s)&0xFFF); + r = p->from.reg; + if(r == NREG) + r = o->param; + o1 = oaddi(opirr(AADD), hi, r, REGTMP); + o2 = olsr12u(opldr12(p->as), ((v-hi)>>s)&0xFFF, REGTMP, p->to.reg); + break; + + case 32: /* mov $con, R -> movz/movn */ + r = 32; + if(p->as == AMOV) + r = 64; + d = p->from.offset; + s = movcon(d); + if(s < 0 || s >= r){ + d = ~d; + s = movcon(d); + if(s < 0 || s >= r) + diag("impossible move wide: %#llux\n%P", p->from.offset, p); + if(p->as == AMOV) + o1 = opirr(AMOVN); + else + o1 = opirr(AMOVNW); + }else{ + if(p->as == AMOV) + o1 = opirr(AMOVZ); + else + o1 = opirr(AMOVZW); + } + rt = p->to.reg; + o1 |= (((d>>(s*16))& 0xFFFF) << 5) | ((s&3)<<21) | rt; + break; + + case 33: /* movk $uimm16 << pos */ + o1 = opirr(p->as); + d = p->from.offset; + if((d>>16) != 0) + diag("requires uimm16\n%P", p); + s = 0; + if(p->from3.type != D_NONE){ + if(p->from3.type != D_CONST) + diag("missing bit position\n%P", p); + s = p->from3.offset; + if((s&0xF) != 0 || (s /= 16) >= 4 || (o1&S64) == 0 && s >= 2) + diag("illegal bit position\n%P", p); + } + rt = p->to.reg; + o1 |= ((d & 0xFFFF) << 5) | ((s&3)<<21) | rt; + break; + + case 34: /* mov $lacon,R */ + o1 = omovlit(AMOV, p, &p->from, REGTMP); + if(!o1) + break; + + o2 = opxrrr(AADD); + o2 |= REGTMP << 16; + o2 |= LSL0_64; + r = p->from.reg; + if(r == NREG) + r = o->param; + o2 |= r << 5; + o2 |= p->to.reg; + break; + + case 35: /* mov SPR,R -> mrs */ + o1 = oprrr(AMRS); + v = p->from.offset; + if((o1 & (v & ~(3<<19))) != 0) + diag("MRS register value overlap\n%P", p); + o1 |= v; + o1 |= p->to.reg; + break; + + case 36: /* mov R,SPR */ + o1 = oprrr(AMSR); + v = p->to.offset; + if((o1 & (v & ~(3<<19))) != 0) + diag("MSR register value overlap\n%P", p); + o1 |= v; + o1 |= p->from.reg; + break; + + case 37: /* mov $con,PSTATEfield -> MSR [immediate] */ + if((p->from.offset&~(uvlong)0xF) != 0) + diag("illegal immediate for PSTATE field\n%P", p); + o1 = opirr(AMSR); + o1 |= (p->from.offset&0xF) << 8; /* Crm */ + v = 0; + for(i = 0; i < nelem(pstatefield); i++) + if(pstatefield[i].a == p->to.offset){ + v = pstatefield[i].b; + break; + } + if(v == 0) + diag("illegal PSTATE field for immediate move\n%P", p); + o1 |= v; + break; + + case 38: /* clrex [$imm] */ + o1 = opimm(p->as); + if(p->to.type == D_NONE) + o1 |= 0xF<<8; + else + o1 |= (p->to.offset & 0xF)<<8; + break; + + case 39: /* cbz R, rel */ + o1 = opirr(p->as); + o1 |= p->from.reg; + o1 |= brdist(p, 0, 19, 2) << 5; + break; + + case 40: /* tbz */ + o1 = opirr(p->as); + v = p->from.offset; + if(v < 0 || v > 63) + diag("illegal bit number\n%P", p); + o1 |= ((v&0x20)<<(31-5)) | ((v&0x1F)<<19); + o1 |= brdist(p, 0, 14, 2)<<5; + o1 |= p->reg; + break; + + case 41: /* eret, nop, others with no operands */ + o1 = op0(p->as); + break; + + case 42: /* bfm R,r,s,R */ + o1 = opbfm(p->as, p->from.offset, p->from3.offset, p->reg, p->to.reg); + break; + + case 43: /* bfm aliases */ + r = p->from.offset; + s = p->from3.offset; + rf = p->reg; + rt = p->to.reg; + if(rf == NREG) + rf = rt; + switch(p->as){ + case ABFI: o1 = opbfm(ABFM, 64-r, s-1, rf, rt); break; + case ABFIW: o1 = opbfm(ABFMW, 32-r, s-1, rf, rt); break; + case ABFXIL: o1 = opbfm(ABFM, r, r+s-1, rf, rt); break; + case ABFXILW: o1 = opbfm(ABFMW, r, r+s-1, rf, rt); break; + case ASBFIZ: o1 = opbfm(ASBFM, 64-r, s-1, rf, rt); break; + case ASBFIZW: o1 = opbfm(ASBFMW, 32-r, s-1, rf, rt); break; + case ASBFX: o1 = opbfm(ASBFM, r, r+s-1, rf, rt); break; + case ASBFXW: o1 = opbfm(ASBFMW, r, r+s-1, rf, rt); break; + case AUBFIZ: o1 = opbfm(AUBFM, 64-r, s-1, rf, rt); break; + case AUBFIZW: o1 = opbfm(AUBFMW, 32-r, s-1, rf, rt); break; + case AUBFX: o1 = opbfm(AUBFM, r, r+s-1, rf, rt); break; + case AUBFXW: o1 = opbfm(AUBFMW, r, r+s-1, rf, rt); break; + default: + diag("bad bfm alias\n%P", curp); + break; + } + break; + + case 44: /* extr $b, Rn, Rm, Rd */ + o1 = opextr(p->as, p->from.offset, p->from3.reg, p->reg, p->to.reg); + break; + + case 45: /* sxt/uxt[bhw] R,R; movT R,R -> sxtT R,R */ + rf = p->from.reg; + rt = p->to.reg; + as = p->as; + if(rf == REGZERO) + as = AMOVWU; /* clearer in disassembly */ + switch(as){ + case AMOVB: + case ASXTB: o1 = opbfm(ASBFM, 0, 7, rf, rt); break; + case AMOVH: + case ASXTH: o1 = opbfm(ASBFM, 0, 15, rf, rt); break; + case AMOVW: + case ASXTW: o1 = opbfm(ASBFM, 0, 31, rf, rt); break; + case AMOVBU: + case AUXTB: o1 = opbfm(AUBFM, 0, 7, rf, rt); break; + case AMOVHU: + case AUXTH: o1 = opbfm(AUBFM, 0, 15, rf, rt); break; + case AMOVWU: o1 = oprrr(as) | (rf<<16) | (REGZERO<<5) | rt; break; + case AUXTW: o1 = opbfm(AUBFM, 0, 31, rf, rt); break; + case ASXTBW: o1 = opbfm(ASBFMW, 0, 7, rf, rt); break; + case ASXTHW: o1 = opbfm(ASBFMW, 0, 15, rf, rt); break; + case AUXTBW: o1 = opbfm(AUBFMW, 0, 7, rf, rt); break; + case AUXTHW: o1 = opbfm(AUBFMW, 0, 15, rf, rt); break; + default: diag("bad sxt %A", as); break; + } + break; + + case 46: /* cls */ + o1 = opbit(p->as); + o1 |= p->from.reg<<5; + o1 |= p->to.reg; + break; + + case 47: /* movT R,V(R) -> strT (huge offset) */ + o1 = omovlit(AMOVW, p, &p->to, REGTMP); + if(!o1) + break; + r = p->to.reg; + if(r == NREG) + r = o->param; + o2 = olsxrr(p->as, REGTMP,r, p->from.reg); + break; + + case 48: /* movT V(R), R -> ldrT (huge offset) */ + o1 = omovlit(AMOVW, p, &p->from, REGTMP); + if(!o1) + break; + r = p->from.reg; + if(r == NREG) + r = o->param; + o2 = olsxrr(p->as, REGTMP,r, p->to.reg); + break; + + case 50: /* sys/sysl */ + o1 = opirr(p->as); + if((p->from.offset & ~SYSARG4(0x7, 0xF, 0xF, 0x7)) != 0) + diag("illegal SYS argument\n%P", p); + o1 |= p->from.offset; + if(p->to.type == D_REG) + o1 |= p->to.reg; + else if(p->reg != NREG) + o1 |= p->reg; + else + o1 |= 0x1F; + break; + + case 51: /* dmb */ + o1 = opirr(p->as); + if(p->from.type == D_CONST) + o1 |= (p->from.offset&0xF)<<8; + break; + + case 52: /* hint */ + o1 = opirr(p->as); + o1 |= (p->from.offset&0x7F)<<5; + break; + + case 53: /* and/or/eor/bic/... $bimmN, Rn, Rd -> op (N,r,s), Rn, Rd */ + as = p->as; + rt = p->to.reg; + r = p->reg; + if(r == NREG) + r = rt; + if(as == AMOV){ + as = AORR; + r = REGZERO; + }else if(as == AMOVW){ + as = AORRW; + r = REGZERO; + } + o1 = opirr(as); + s = o1 & S64? 64: 32; + mask = findmask(p->from.offset); + if(mask == nil) + mask = findmask(p->from.offset | (p->from.offset<<32)); + if(mask != nil){ + o1 |= ((mask->r&(s-1))<<16) | (((mask->s-1)&(s-1))<<10); + if(s == 64){ + if(mask->e == 64 && ((uvlong)p->from.offset>>32) != 0) + o1 |= 1<<22; + }else{ + u = (uvlong)p->from.offset >> 32; + if(u != 0 && u != 0xFFFFFFFF) + diag("mask needs 64 bits %#llux\n%P", p->from.offset, p); + } + }else + diag("invalid mask %#llux\n%P", p->from.offset, p); /* probably shouldn't happen */ + o1 |= (r<<5) | rt; + break; + + case 54: /* floating point arith */ + o1 = oprrr(p->as); + if(p->from.type == D_FCONST) { + rf = chipfloat(p->from.ieee); + if(rf < 0 || 1){ + diag("invalid floating-point immediate\n%P", p); + rf = 0; + } + rf |= (1<<3); + } else + rf = p->from.reg; + rt = p->to.reg; + r = p->reg; + if((o1 & (0x1F<<24)) == (0x1E<<24) && (o1 & (1<<11)) == 0){ /* monadic */ + r = rf; + rf = 0; + }else if(r == NREG) + r = rt; + o1 |= (rf << 16) | (r<<5) | rt; + break; + + case 56: /* floating point compare */ + o1 = oprrr(p->as); + if(p->from.type == D_FCONST) { + if(p->from.ieee->h != 0 || p->from.ieee->l != 0) + diag("invalid floating-point immediate\n%P", p); + o1 |= 8; /* zero */ + rf = 0; + }else + rf = p->from.reg; + rt = p->reg; + o1 |= rf<<16 | rt<<5; + break; + + case 57: /* floating point conditional compare */ + o1 = oprrr(p->as); + cond = p->from.reg; + nzcv = p->to.offset; + if(nzcv & ~0xF) + diag("implausible condition\n%P", p); + rf = p->reg; + if(p->from3.type != D_FREG) + diag("illegal FCCMP\n%P", p); + rt = p->from3.reg; + o1 |= rf<<16 | cond<<12 | rt<<5 | nzcv; + break; + + case 58: /* ldxr */ + o1 = opload(p->as); + o1 |= 0x1F<<16; + o1 |= p->from.reg<<5; + if(p->reg != NREG) + o1 |= p->reg<<10; + else + o1 |= 0x1F<<10; + o1 |= p->to.reg; + break; + + case 59: /* stxr */ + o1 = opstore(p->as); + o1 |= p->reg << 16; + if(p->from3.type != D_NONE) + o1 |= p->from3.reg<<10; + else + o1 |= 0x1F<<10; + o1 |= p->to.reg<<5; + o1 |= p->from.reg; + break; + + case 60: /* adrp label,r */ + d = brdist(p, 12, 21, 0); + o1 = ADR(1, d, p->to.reg); + break; + + case 61: /* adr label, r */ + d = brdist(p, 0, 21, 0); + o1 = ADR(0, d, p->to.reg); + break; + + case 62: /* case Rv, Rt -> adr tab, Rt; movw Rt[R<<2], Rl; add Rt, Rl; br (Rl) */ + o1 = ADR(0, 4*4, p->to.reg); /* adr 4(pc), Rt */ + o2 = (2<<30)|(7<<27)|(2<<22)|(1<<21)|(3<<13)|(1<<12)|(2<<10)|(p->from.reg<<16)|(p->to.reg<<5)|REGTMP; /* movw Rt[Rv<<2], REGTMP */ + o3 = oprrr(AADD) | (p->to.reg<<16) | (REGTMP<<5) | REGTMP; /* add Rt, REGTMP */ + o4 = (0x6b<<25)|(0x1F<<16)|(REGTMP<<5); /* br (REGTMP) */ + lastcase = p; + break; + + case 63: /* bcase */ + if(lastcase == nil){ + diag("missing CASE\n%P", p); + break; + } + if(p->cond != P) { + o1 = p->cond->pc - (lastcase->pc + 4*4); + if(dlm) + dynreloc(S, p->pc, 1); + } + break; + + /* reloc ops */ + case 64: /* movT R,addr */ + o1 = omovlit(AMOV, p, &p->to, REGTMP); + if(!o1) + break; + o2 = olsr12u(opstr12(p->as), 0, REGTMP, p->from.reg); + break; + + case 65: /* movT addr,R */ + o1 = omovlit(AMOV, p, &p->from, REGTMP); + if(!o1) + break; + o2 = olsr12u(opldr12(p->as), 0, REGTMP, p->to.reg); + break; + } + + if(debug['a'] > 1) + Bprint(&bso, "%2d ", o->type); + + v = p->pc; + switch(o->size) { + default: + if(debug['a']) + Bprint(&bso, " %.8lux:\t\t%P\n", v, p); + break; + case 4: + if(debug['a']) + Bprint(&bso, " %.8lux: %.8lux\t%P\n", v, o1, p); + lputl(o1); + break; + case 8: + if(debug['a']) + Bprint(&bso, " %.8lux: %.8lux %.8lux%P\n", v, o1, o2, p); + lputl(o1); + lputl(o2); + break; + case 12: + if(debug['a']) + Bprint(&bso, " %.8lux: %.8lux %.8lux %.8lux%P\n", v, o1, o2, o3, p); + lputl(o1); + lputl(o2); + lputl(o3); + break; + case 16: + if(debug['a']) + Bprint(&bso, " %.8lux: %.8lux %.8lux %.8lux %.8lux%P\n", + v, o1, o2, o3, o4, p); + lputl(o1); + lputl(o2); + lputl(o3); + lputl(o4); + break; + case 20: + if(debug['a']) + Bprint(&bso, " %.8lux: %.8lux %.8lux %.8lux %.8lux %.8lux%P\n", + v, o1, o2, o3, o4, o5, p); + lputl(o1); + lputl(o2); + lputl(o3); + lputl(o4); + lputl(o5); + break; + } +} + +/* + * basic Rm op Rn -> Rd (using shifted register with 0) + * also op Rn -> Rt + * also Rm*Rn op Ra -> Rd + */ +static long +oprrr(int a) +{ + switch(a) { + case AADC: return S64 | 0<<30 | 0<<29 | 0xd0<<21 | 0<<10; + case AADCW: return S32 | 0<<30 | 0<<29 | 0xd0<<21 | 0<<10; + case AADCS: return S64 | 0<<30 | 1<<29 | 0xd0<<21 | 0<<10; + case AADCSW: return S32 | 0<<30 | 1<<29 | 0xd0<<21 | 0<<10; + + case ANGC: + case ASBC: return S64 | 1<<30 | 0<<29 | 0xd0<<21 | 0<<10; + case ANGCS: + case ASBCS: return S64 | 1<<30 | 1<<29 | 0xd0<<21 | 0<<10; + case ANGCW: + case ASBCW: return S32 | 1<<30 | 0<<29 | 0xd0<<21 | 0<<10; + case ANGCSW: + case ASBCSW: return S32 | 1<<30 | 1<<29 | 0xd0<<21 | 0<<10; + + case AADD: return S64 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10; + case AADDW: return S32 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10; + case ACMN: + case AADDS: return S64 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10; + case ACMNW: + case AADDSW: return S32 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10; + + case ASUB: return S64 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10; + case ASUBW: return S32 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10; + case ACMP: + case ASUBS: return S64 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10; + case ACMPW: + case ASUBSW: return S32 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 0<<21 | 0<<10; + + case AAND: return S64 | 0<<29 | 0xA<<24; + case AANDW: return S32 | 0<<29 | 0xA<<24; + case AMOV: + case AORR: return S64 | 1<<29 | 0xA<<24; +// case AMOVW: + case AMOVWU: + case AORRW: return S32 | 1<<29 | 0xA<<24; + case AEOR: return S64 | 2<<29 | 0xA<<24; + case AEORW: return S32 | 2<<29 | 0xA<<24; + case AANDS: return S64 | 3<<29 | 0xA<<24; + case AANDSW: return S32 | 3<<29 | 0xA<<24; + + case ABIC: return S64 | 0<<29 | 0xA<<24 | 1<<21; + case ABICW: return S32 | 0<<29 | 0xA<<24 | 1<<21; + case ABICS: return S64 | 3<<29 | 0xA<<24 | 1<<21; + case ABICSW: return S32 | 3<<29 | 0xA<<24 | 1<<21; + case AEON: return S64 | 2<<29 | 0xA<<24 | 1<<21; + case AEONW: return S32 | 2<<29 | 0xA<<24 | 1<<21; + case AMVN: + case AORN: return S64 | 1<<29 | 0xA<<24 | 1<<21; + case AMVNW: + case AORNW: return S32 | 1<<29 | 0xA<<24 | 1<<21; + + case AASR: return S64 | OPDP2(10); /* also ASRV */ + case AASRW: return S32 | OPDP2(10); + case ALSL: return S64 | OPDP2(8); + case ALSLW: return S32 | OPDP2(8); + case ALSR: return S64 | OPDP2(9); + case ALSRW: return S32 | OPDP2(9); + case AROR: return S64 | OPDP2(11); + case ARORW: return S32 | OPDP2(11); + + case ACCMN: return S64 | 0<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4; /* cond<<12 | nzcv<<0 */ + case ACCMNW: return S32 | 0<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4; + case ACCMP: return S64 | 1<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4; /* imm5<<16 | cond<<12 | nzcv<<0 */ + case ACCMPW: return S32 | 1<<30 | 1<<29 | 0xD2<<21 | 0<<11 | 0<<10 | 0<<4; + + case ACRC32B: return S32 | OPDP2(16); + case ACRC32H: return S32 | OPDP2(17); + case ACRC32W: return S32 | OPDP2(18); + case ACRC32X: return S64 | OPDP2(19); + case ACRC32CB: return S32 | OPDP2(20); + case ACRC32CH: return S32 | OPDP2(21); + case ACRC32CW: return S32 | OPDP2(22); + case ACRC32CX: return S64 | OPDP2(23); + + case ACSEL: return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10; + case ACSELW: return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10; + case ACSET: return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10; + case ACSETW: return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10; + case ACSETM: return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10; + case ACSETMW: return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10; + case ACINC: + case ACSINC: return S64 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10; + case ACINCW: + case ACSINCW: return S32 | 0<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10; + case ACINV: + case ACSINV: return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10; + case ACINVW: + case ACSINVW: return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 0<<10; + case ACNEG: + case ACSNEG: return S64 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10; + case ACNEGW: + case ACSNEGW: return S32 | 1<<30 | 0<<29 | 0xD4<<21 | 0<<11 | 1<<10; + + case AMUL: + case AMADD: return S64 | 0<<29 | 0x1B<<24 | 0<<21 | 0<<15; + case AMULW: + case AMADDW: return S32 | 0<<29 | 0x1B<<24 | 0<<21 | 0<<15; + case AMNEG: + case AMSUB: return S64 | 0<<29 | 0x1B<<24 | 0<<21 | 1<<15; + case AMNEGW: + case AMSUBW: return S32 | 0<<29 | 0x1B<<24 | 0<<21 | 1<<15; + + case AMRS: return SYSOP(1,2,0,0,0,0,0); + case AMSR: return SYSOP(0,2,0,0,0,0,0); + + case ANEG: return S64 | 1<<30 | 0<<29 | 0xB<<24 | 0<<21; + case ANEGW: return S32 | 1<<30 | 0<<29 | 0xB<<24 | 0<<21; + case ANEGS: return S64 | 1<<30 | 1<<29 | 0xB<<24 | 0<<21; + case ANEGSW: return S32 | 1<<30 | 1<<29 | 0xB<<24 | 0<<21; + + case AREM: + case ASDIV: return S64 | OPDP2(3); + case AREMW: + case ASDIVW: return S32 | OPDP2(3); + + case ASMULL: + case ASMADDL: return OPDP3(1, 0, 1, 0); + case ASMNEGL: + case ASMSUBL: return OPDP3(1, 0, 1, 1); + case ASMULH: return OPDP3(1, 0, 2, 0); + case AUMULL: + case AUMADDL: return OPDP3(1, 0, 5, 0); + case AUMNEGL: + case AUMSUBL: return OPDP3(1, 0, 5, 1); + case AUMULH: return OPDP3(1, 0, 6, 0); + + case AUREM: + case AUDIV: return S64 | OPDP2(2); + case AUREMW: + case AUDIVW: return S32 | OPDP2(2); + + case AAESE: return 0x4E<<24 | 2<<20 | 8<<16 | 4<<12 | 2<<10; + case AAESD: return 0x4E<<24 | 2<<20 | 8<<16 | 5<<12 | 2<<10; + case AAESMC: return 0x4E<<24 | 2<<20 | 8<<16 | 6<<12 | 2<<10; + case AAESIMC: return 0x4E<<24 | 2<<20 | 8<<16 | 7<<12 | 2<<10; + + case ASHA1C: return 0x5E<<24 | 0<<12; + case ASHA1P: return 0x5E<<24 | 1<<12; + case ASHA1M: return 0x5E<<24 | 2<<12; + case ASHA1SU0: return 0x5E<<24 | 3<<12; + case ASHA256H: return 0x5E<<24 | 4<<12; + case ASHA256H2: return 0x5E<<24 | 5<<12; + case ASHA256SU1: return 0x5E<<24 | 6<<12; + + case ASHA1H: return 0x5E<<24 | 2<<20 | 8<<16 | 0<<12 | 2<<10; + case ASHA1SU1: return 0x5E<<24 | 2<<20 | 8<<16 | 1<<12 | 2<<10; + case ASHA256SU0: return 0x5E<<24 | 2<<20 | 8<<16 | 2<<12 | 2<<10; + + case AFCVTZSD: return FPCVTI(1, 0, 1, 3, 0); + case AFCVTZSDW: return FPCVTI(0, 0, 1, 3, 0); + case AFCVTZSS: return FPCVTI(1, 0, 0, 3, 0); + case AFCVTZSSW: return FPCVTI(0, 0, 0, 3, 0); + + case AFCVTZUD: return FPCVTI(1, 0, 1, 3, 1); + case AFCVTZUDW: return FPCVTI(0, 0, 1, 3, 1); + case AFCVTZUS: return FPCVTI(1, 0, 0, 3, 1); + case AFCVTZUSW: return FPCVTI(0, 0, 0, 3, 1); + + case ASCVTFD: return FPCVTI(1, 0, 1, 0, 2); + case ASCVTFS: return FPCVTI(1, 0, 0, 0, 2); + case ASCVTFWD: return FPCVTI(0, 0, 1, 0, 2); + case ASCVTFWS: return FPCVTI(0, 0, 0, 0, 2); + + case AUCVTFD: return FPCVTI(1, 0, 1, 0, 3); + case AUCVTFS: return FPCVTI(1, 0, 0, 0, 3); + case AUCVTFWD: return FPCVTI(0, 0, 1, 0, 3); + case AUCVTFWS: return FPCVTI(0, 0, 0, 0, 3); + + case AFADDS: return FPOP2S(0, 0, 0, 2); + case AFADDD: return FPOP2S(0, 0, 1, 2); + case AFSUBS: return FPOP2S(0, 0, 0, 3); + case AFSUBD: return FPOP2S(0, 0, 1, 3); + case AFMULS: return FPOP2S(0, 0, 0, 0); + case AFMULD: return FPOP2S(0, 0, 1, 0); + case AFDIVS: return FPOP2S(0, 0, 0, 1); + case AFDIVD: return FPOP2S(0, 0, 1, 1); + case AFMAXS: return FPOP2S(0, 0, 0, 4); + case AFMINS: return FPOP2S(0, 0, 0, 5); + case AFMAXD: return FPOP2S(0, 0, 1, 4); + case AFMIND: return FPOP2S(0, 0, 1, 5); + case AFMAXNMS: return FPOP2S(0, 0, 0, 6); + case AFMAXNMD: return FPOP2S(0, 0, 1, 6); + case AFMINNMS: return FPOP2S(0, 0, 0, 7); + case AFMINNMD: return FPOP2S(0, 0, 1, 7); + case AFNMULS: return FPOP2S(0, 0, 0, 8); + case AFNMULD: return FPOP2S(0, 0, 1, 8); + + case AFCMPS: return FPCMP(0, 0, 0, 0, 0); + case AFCMPD: return FPCMP(0, 0, 1, 0, 0); + case AFCMPES: return FPCMP(0, 0, 0, 0, 16); + case AFCMPED: return FPCMP(0, 0, 1, 0, 16); + + case AFCCMPS: return FPCCMP(0, 0, 0, 0); + case AFCCMPD: return FPCCMP(0, 0, 1, 0); + case AFCCMPES: return FPCCMP(0, 0, 0, 1); + case AFCCMPED: return FPCCMP(0, 0, 1, 1); + + case AFCSELS: return 0x1E<<24 | 0<<22 | 1<<21 | 3<<10; + case AFCSELD: return 0x1E<<24 | 1<<22 | 1<<21 | 3<<10; + + case AFMOVS: return FPOP1S(0, 0, 0, 0); + case AFABSS: return FPOP1S(0, 0, 0, 1); + case AFNEGS: return FPOP1S(0, 0, 0, 2); + case AFSQRTS: return FPOP1S(0, 0, 0, 3); + case AFCVTSD: return FPOP1S(0, 0, 0, 5); + case AFCVTSH: return FPOP1S(0, 0, 0, 7); + case AFRINTNS: return FPOP1S(0, 0, 0, 8); + case AFRINTPS: return FPOP1S(0, 0, 0, 9); + case AFRINTMS: return FPOP1S(0, 0, 0, 10); + case AFRINTZS: return FPOP1S(0, 0, 0, 11); + case AFRINTAS: return FPOP1S(0, 0, 0, 12); + case AFRINTXS: return FPOP1S(0, 0, 0, 14); + case AFRINTIS: return FPOP1S(0, 0, 0, 15); + + case AFMOVD: return FPOP1S(0, 0, 1, 0); + case AFABSD: return FPOP1S(0, 0, 1, 1); + case AFNEGD: return FPOP1S(0, 0, 1, 2); + case AFSQRTD: return FPOP1S(0, 0, 1, 3); + case AFCVTDS: return FPOP1S(0, 0, 1, 4); + case AFCVTDH: return FPOP1S(0, 0, 1, 7); + case AFRINTND: return FPOP1S(0, 0, 1, 8); + case AFRINTPD: return FPOP1S(0, 0, 1, 9); + case AFRINTMD: return FPOP1S(0, 0, 1, 10); + case AFRINTZD: return FPOP1S(0, 0, 1, 11); + case AFRINTAD: return FPOP1S(0, 0, 1, 12); + case AFRINTXD: return FPOP1S(0, 0, 1, 14); + case AFRINTID: return FPOP1S(0, 0, 1, 15); + case AFCVTHS: return FPOP1S(0, 0, 3, 4); + case AFCVTHD: return FPOP1S(0, 0, 3, 5); + + } + diag("bad rrr %d %A", a, a); + prasm(curp); + return 0; +} + +/* + * imm -> Rd + * imm op Rn -> Rd + */ +static long +opirr(int a) +{ + switch(a){ + + /* op $addcon, Rn, Rd */ + case AMOV: + case AADD: return S64 | 0<<30 | 0<<29 | 0x11<<24; + case ACMN: + case AADDS: return S64 | 0<<30 | 1<<29 | 0x11<<24; + case AMOVW: + case AADDW: return S32 | 0<<30 | 0<<29 | 0x11<<24; + case ACMNW: + case AADDSW: return S32 | 0<<30 | 1<<29 | 0x11<<24; + case ASUB: return S64 | 1<<30 | 0<<29 | 0x11<<24; + case ACMP: + case ASUBS: return S64 | 1<<30 | 1<<29 | 0x11<<24; + case ASUBW: return S32 | 1<<30 | 0<<29 | 0x11<<24; + case ACMPW: + case ASUBSW: return S32 | 1<<30 | 1<<29 | 0x11<<24; + + /* op $imm(SB), Rd; op label, Rd */ + case AADR: return 0<<31 | 0x10<<24; + case AADRP: return 1<<31 | 0x10<<24; + + /* op $bimm, Rn, Rd */ + case AAND: return S64 | 0<<29 | 0x24<<23; + case AANDW: return S32 | 0<<29 | 0x24<<23 | 0<<22; + case AORR: return S64 | 1<<29 | 0x24<<23; + case AORRW: return S32 | 1<<29 | 0x24<<23 | 0<<22; + case AEOR: return S64 | 2<<29 | 0x24<<23; + case AEORW: return S32 | 2<<29 | 0x24<<23 | 0<<22; + case AANDS: return S64 | 3<<29 | 0x24<<23; + case AANDSW: return S32 | 3<<29 | 0x24<<23 | 0<<22; + + case AASR: return S64 | 0<<29 | 0x26<<23; /* alias of SBFM */ + case AASRW: return S32 | 0<<29 | 0x26<<23 | 0<<22; + + /* op $width, $lsb, Rn, Rd */ + case ABFI: return S64 | 2<<29 | 0x26<<23 | 1<<22; /* alias of BFM */ + case ABFIW: return S32 | 2<<29 | 0x26<<23 | 0<<22; + + /* op $imms, $immr, Rn, Rd */ + case ABFM: return S64 | 1<<29 | 0x26<<23 | 1<<22; + case ABFMW: return S32 | 1<<29 | 0x26<<23 | 0<<22; + case ASBFM: return S64 | 0<<29 | 0x26<<23 | 1<<22; + case ASBFMW: return S32 | 0<<29 | 0x26<<23 | 0<<22; + case AUBFM: return S64 | 2<<29 | 0x26<<23 | 1<<22; + case AUBFMW: return S32 | 2<<29 | 0x26<<23 | 0<<22; + + case ABFXIL: return S64 | 1<<29 | 0x26<<23 | 1<<22; /* alias of BFM */ + case ABFXILW: return S32 | 1<<29 | 0x26<<23 | 0<<22; + + case AEXTR: return S64 | 0<<29 | 0x27<<23 | 1<<22 | 0<<21; + case AEXTRW: return S32 | 0<<29 | 0x27<<23 | 0<<22 | 0<<21; + + case ACBNZ: return S64 | 0x1A<<25 | 1<<24; + case ACBNZW: return S32 | 0x1A<<25 | 1<<24; + case ACBZ: return S64 | 0x1A<<25 | 0<<24; + case ACBZW: return S32 | 0x1A<<25 | 0<<24; + + case ACCMN: return S64 | 0<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4; /* imm5<<16 | cond<<12 | nzcv<<0 */ + case ACCMNW: return S32 | 0<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4; + case ACCMP: return S64 | 1<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4; /* imm5<<16 | cond<<12 | nzcv<<0 */ + case ACCMPW: return S32 | 1<<30 | 1<<29 | 0xD2<<21 | 1<<11 | 0<<10 | 0<<4; + + case AMOVK: return S64 | 3<<29 | 0x25<<23; + case AMOVKW: return S32 | 3<<29 | 0x25<<23; + + case AMOVN: return S64 | 0<<29 | 0x25<<23; + case AMOVNW: return S32 | 0<<29 | 0x25<<23; + case AMOVZ: return S64 | 2<<29 | 0x25<<23; + case AMOVZW: return S32 | 2<<29 | 0x25<<23; + + case AMSR: return SYSOP(0,0,0,4,0,0,0x1F); /* MSR (immediate) */ + + case AAT: + case ADC: + case AIC: + case ATLBI: + case ASYS: return SYSOP(0,1,0,0,0,0,0); + case ASYSL: return SYSOP(1,1,0,0,0,0,0); + + case ATBZ: return 0x36<<24; + case ATBNZ: return 0x37<<24; + + case ADSB: return SYSOP(0,0,3,3,0,4,0x1F); + case ADMB: return SYSOP(0,0,3,3,0,5,0x1F); + case AISB: return SYSOP(0,0,3,3,0,6,0x1F); + case AHINT: return SYSOP(0,0,3,2,0,0,0x1F); + + } + diag("bad irr %A", a); + prasm(curp); + return 0; +} + +/* + * bit operations + */ +#define OPBIT(x) (1<<30 | 0<<29 | 0xD6<<21 | 0<<16 | (x)<<10) + +static long +opbit(int a) +{ + switch(a){ + case ACLS: return S64 | OPBIT(5); + case ACLSW: return S32 | OPBIT(5); + case ACLZ: return S64 | OPBIT(4); + case ACLZW: return S32 | OPBIT(4); + case ARBIT: return S64 | OPBIT(0); + case ARBITW: return S32 | OPBIT(0); + case AREV: return S64 | OPBIT(3); + case AREVW: return S32 | OPBIT(2); + case AREV16: return S64 | OPBIT(1); + case AREV16W: return S32 | OPBIT(1); + case AREV32: return S64 | OPBIT(2); + default: + diag("bad bit op\n%P", curp); + return 0; + } +} + +/* + * add/subtract extended register + */ +static long +opxrrr(int a) +{ + switch(a) { + case AADD: return S64 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_64; + case AADDW: return S32 | 0<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_32; + case ACMN: + case AADDS: return S64 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_64; + case ACMNW: + case AADDSW: return S32 | 0<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_32; + + case ASUB: return S64 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_64; + case ASUBW: return S32 | 1<<30 | 0<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_32; + case ACMP: + case ASUBS: return S64 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_64; + case ACMPW: + case ASUBSW: return S32 | 1<<30 | 1<<29 | 0x0b<<24 | 0<<22 | 1<<21 | LSL0_32; + + } + diag("bad opxrrr %A\n%P", a, curp); + return 0; +} + +static long +opimm(int a) +{ + switch(a){ + case ASVC: return 0xD4<<24 | 0<<21 | 1; /* imm16<<5 */ + case AHVC: return 0xD4<<24 | 0<<21 | 2; + case ASMC: return 0xD4<<24 | 0<<21 | 3; + case ABRK: return 0xD4<<24 | 1<<21 | 0; + case AHLT: return 0xD4<<24 | 2<<21 | 0; + case ADCPS1: return 0xD4<<24 | 5<<21 | 1; + case ADCPS2: return 0xD4<<24 | 5<<21 | 2; + case ADCPS3: return 0xD4<<24 | 5<<21 | 3; + + case ACLREX: return SYSOP(0,0,3,3,0,2,0x1F); + } + diag("bad imm %A", a); + prasm(curp); + return 0; +} + +static vlong +brdist(Prog *p, int preshift, int flen, int shift) +{ + vlong v, t; + Sym *s; + + v = 0; + if(p->cond == UP) { + s = p->to.sym; + if(s->type != SUNDEF) + diag("bad branch sym type"); + v = (uvlong)s->value >> (Roffset-2); + dynreloc(s, p->pc, 0); /* TO DO */ + } + else if(p->cond != P) + v = (p->cond->pc>>preshift) - (pc>>preshift); + if((v & ((1<<shift)-1)) != 0) + diag("misaligned label\n%P", p); + v >>= shift; + t = (vlong)1 << (flen-1); + if(v < -t || v >= t) + diag("branch too far\n%P", p); + return v & ((t<<1)-1); +} + +/* + * pc-relative branches + */ +static long +opbra(int a) +{ + switch(a) { + case ABEQ: return OPBcc(0x0); + case ABNE: return OPBcc(0x1); + case ABCS: return OPBcc(0x2); + case ABHS: return OPBcc(0x2); + case ABCC: return OPBcc(0x3); + case ABLO: return OPBcc(0x3); + case ABMI: return OPBcc(0x4); + case ABPL: return OPBcc(0x5); + case ABVS: return OPBcc(0x6); + case ABVC: return OPBcc(0x7); + case ABHI: return OPBcc(0x8); + case ABLS: return OPBcc(0x9); + case ABGE: return OPBcc(0xa); + case ABLT: return OPBcc(0xb); + case ABGT: return OPBcc(0xc); + case ABLE: return OPBcc(0xd); /* imm19<<5 | cond */ + case AB: return 0<<31 | 5<<26; /* imm26 */ + case ABL: return 1<<31 | 5<<26; + } + diag("bad bra %A", a); + prasm(curp); + return 0; +} + +static long +opbrr(int a) +{ + switch(a){ + case ABL: return OPBLR(1); /* BLR */ + case AB: return OPBLR(0); /* BR */ + case ARET: return OPBLR(2); /* RET */ + } + diag("bad brr %A", a); + prasm(curp); + return 0; +} + +static long +op0(int a) +{ + switch(a){ + case ADRPS: return 0x6B<<25 | 5<<21 | 0x1F<<16 | 0x1F<<5; + case AERET: return 0x6B<<25 | 4<<21 | 0x1F<<16 | 0<<10 | 0x1F<<5; + case ANOP: return SYSHINT(0); + case AYIELD: return SYSHINT(1); + case AWFE: return SYSHINT(2); + case AWFI: return SYSHINT(3); + case ASEV: return SYSHINT(4); + case ASEVL: return SYSHINT(5); + } + diag("bad op0 %A", a); + prasm(curp); + return 0; +} + +/* + * register offset + */ +static long +opload(int a) +{ + switch(a){ + case ALDAR: return LDSTX(3,1,1,0,1) | 0x1F<<10; + case ALDARW: return LDSTX(2,1,1,0,1) | 0x1F<<10; + case ALDARB: return LDSTX(0,1,1,0,1) | 0x1F<<10; + case ALDARH: return LDSTX(1,1,1,0,1) | 0x1F<<10; + case ALDAXP: return LDSTX(3,0,1,1,1); + case ALDAXPW: return LDSTX(2,0,1,1,1); + case ALDAXR: return LDSTX(3,0,1,0,1) | 0x1F<<10; + case ALDAXRW: return LDSTX(2,1,1,0,1) | 0x1F<<10; + case ALDAXRB: return LDSTX(0,0,1,0,1) | 0x1F<<10; + case ALDAXRH: return LDSTX(1,0,1,0,1) | 0x1F<<10; + case ALDXR: return LDSTX(3,0,1,0,0) | 0x1F<<10; + case ALDXRB: return LDSTX(0,0,1,0,0) | 0x1F<<10; + case ALDXRH: return LDSTX(1,0,1,0,0) | 0x1F<<10; + case ALDXRW: return LDSTX(2,0,1,0,0) | 0x1F<<10; + case ALDXP: return LDSTX(3,0,1,1,0); + case ALDXPW: return LDSTX(2,0,1,1,0); + case AMOVNP: return S64 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22; + case AMOVNPW: return S32 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22; + } + diag("bad opload %A\n%P", a, curp); + return 0; +} + +static long +opstore(int a) +{ + switch(a){ + case ASTLR: return LDSTX(3,1,0,0,1) | 0x1F<<10; + case ASTLRB: return LDSTX(0,1,0,0,1) | 0x1F<<10; + case ASTLRH: return LDSTX(1,1,0,0,1) | 0x1F<<10; + case ASTLP: return LDSTX(3,0,0,1,1); + case ASTLPW: return LDSTX(2,0,0,1,1); + case ASTLRW: return LDSTX(2,1,0,0,1) | 0x1F<<10; + case ASTLXP: return LDSTX(2,0,0,1,1); + case ASTLXPW: return LDSTX(3,0,0,1,1); + case ASTLXR: return LDSTX(3,0,0,0,1) | 0x1F<<10; + case ASTLXRB: return LDSTX(0,0,0,0,1) | 0x1F<<10; + case ASTLXRH: return LDSTX(1,0,0,0,1) | 0x1F<<10; + case ASTLXRW: return LDSTX(2,0,0,0,1) | 0x1F<<10; + case ASTXR: return LDSTX(3,0,0,0,0) | 0x1F<<10; + case ASTXRB: return LDSTX(0,0,0,0,0) | 0x1F<<10; + case ASTXRH: return LDSTX(1,0,0,0,0) | 0x1F<<10; + case ASTXP: return LDSTX(3,0,0,1,0); + case ASTXPW: return LDSTX(2,0,0,1,0); + case ASTXRW: return LDSTX(2,0,0,0,0) | 0x1F<<10; + case AMOVNP: return S64 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22; + case AMOVNPW: return S32 | 0<<30 | 5<<27 | 0<<26 | 0<<23 | 1<<22; + } + diag("bad opstore %A\n%P", a, curp); + return 0; +} + +/* + * load/store register (unsigned immediate) C3.3.13 + * these produce 64-bit values (when there's an option) + */ + +static long +olsr12u(long o, long v, int b, int r) +{ + if(v < 0 || v >= (1<<12)) + diag("offset out of range: %ld\n%P", v, curp); + o |= (v&0xFFF)<<10; + o |= b << 5; + o |= r; + return o; +} + +static long +opldr12(int a) +{ + switch(a){ + case AMOV: return LDSTR12U(3, 0, 1); /* imm12<<10 | Rn<<5 | Rt */ + case AMOVW: return LDSTR12U(2, 0, 2); + case AMOVWU: return LDSTR12U(2, 0, 1); + case AMOVH: return LDSTR12U(1, 0, 2); + case AMOVHU: return LDSTR12U(1, 0, 1); + case AMOVB: return LDSTR12U(0, 0, 2); + case AMOVBU: return LDSTR12U(0, 0, 1); + case AFMOVS: return LDSTR12U(2, 1, 1); + case AFMOVD: return LDSTR12U(3, 1, 1); + } + diag("bad opldr12 %A\n%P", a, curp); + return 0; +} + +static long +opstr12(int a) +{ + return LD2STR(opldr12(a)); +} + +/* + * load/store register (unscaled immediate) C3.3.12 + */ + +static long +olsr9s(long o, long v, int b, int r) +{ + if(v < -256 || v > 255) + diag("offset out of range: %ld\n%P", v, curp); + o |= (v&0x1FF)<<12; + o |= b << 5; + o |= r; + return o; +} + +static long +opldr9(int a) +{ + switch(a){ + case AMOV: return LDSTR9S(3, 0, 1); /* simm9<<12 | Rn<<5 | Rt */ + case AMOVW: return LDSTR9S(2, 0, 2); + case AMOVWU: return LDSTR9S(2, 0, 1); + case AMOVH: return LDSTR9S(1, 0, 2); + case AMOVHU: return LDSTR9S(1, 0, 1); + case AMOVB: return LDSTR9S(0, 0, 2); + case AMOVBU: return LDSTR9S(0, 0, 1); + case AFMOVS: return LDSTR9S(2, 1, 1); + case AFMOVD: return LDSTR9S(3, 1, 1); + } + diag("bad opldr9 %A\n%P", a, curp); + return 0; +} + +static long +opstr9(int a) +{ + return LD2STR(opldr9(a)); +} + +static long +opldrpp(int a) +{ + switch(a){ + case AMOV: return 3<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22; /* simm9<<12 | Rn<<5 | Rt */ + case AMOVW: return 2<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22; + case AMOVWU: return 2<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22; + case AMOVH: return 1<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22; + case AMOVHU: return 1<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22; + case AMOVB: return 0<<30 | 7<<27 | 0<<26 | 0<<24 | 2<<22; + case AMOVBU: return 0<<30 | 7<<27 | 0<<26 | 0<<24 | 1<<22; + } + diag("bad opldr %A\n%P", a, curp); + return 0; +} + +/* + * load/store register (extended register) + */ +static long +olsxrr(int a, int b, int c, int d) +{ + diag("need load/store extended register\n%P", curp); + return -1; +} + +static long +oaddi(long o1, long v, int r, int rt) +{ + if((v & 0xFFF000) != 0){ + v >>= 12; + o1 |= 1<<22; + } + o1 |= ((v & 0xFFF) << 10) | (r<<5) | rt; + return o1; +} + +/* + * load a a literal value into dr + */ +static long +omovlit(int as, Prog *p, Adr *a, int dr) +{ + long v, o1; + int w, fp; + + if(p->cond == nil){ /* not in literal pool */ + aclass(a); +fprint(2, "omovlit add %lld (%#llux)\n", instoffset, instoffset); + /* TO DO: could be clever, and use general constant builder */ + o1 = opirr(AADD); + v = instoffset; + if(v != 0 && (v & 0xFFF) == 0){ + v >>= 12; + o1 |= 1<<22; /* shift, by 12 */ + } + o1 |= ((v& 0xFFF) << 10) | (REGZERO<<5) | dr; + }else{ + fp = 0; + w = 0; /* default: 32 bit, unsigned */ + switch(as){ + case AFMOVS: + fp = 1; + break; + case AFMOVD: + fp = 1; + w = 1; /* 64 bit simd&fp */ + break; + case AMOV: + if(p->cond->as == ADWORD) + w = 1; /* 64 bit */ + else if(p->cond->to.offset < 0) + w = 2; /* sign extend */ + break; + case AMOVB: + case AMOVH: + case AMOVW: + w = 2; /* 32 bit, sign-extended to 64 */ + break; + } + v = brdist(p, 0, 19, 2); + o1 = (w<<30)|(fp<<26)|(3<<27); + o1 |= (v&0x7FFFF)<<5; + o1 |= dr; + } + return o1; +} + +static long +opbfm(int a, int r, int s, int rf, int rt) +{ + long o, c; + + o = opirr(a); + if((o & (1<<31)) == 0) + c = 32; + else + c = 64; + if(r < 0 || r >= c) + diag("illegal bit number\n%P", curp); + o |= (r&0x3F)<<16; + if(s < 0 || s >= c) + diag("illegal bit number\n%P", curp); + o |= (s&0x3F)<<10; + o |= (rf<<5) | rt; + return o; +} + +static long +opextr(int a, long v, int rn, int rm, int rt) +{ + long o, c; + + o = opirr(a); + c = (o & (1<<31)) != 0? 63: 31; + if(v < 0 || v > c) + diag("illegal bit number\n%P", curp); + o |= v<<10; + o |= rn << 5; + o |= rm << 16; + o |= rt; + return o; +} + +/* + * size in log2(bytes) + */ +static int +movesize(int a) +{ + switch(a){ + case AMOV: + return 3; + case AMOVW: + case AMOVWU: + return 2; + case AMOVH: + case AMOVHU: + return 1; + case AMOVB: + case AMOVBU: + return 0; + case AFMOVS: + return 2; + case AFMOVD: + return 3; + default: + return -1; + } +} + +/* + * SIMD + */ |