Mercurial
comparison third_party/luajit/src/lj_emit_arm.h @ 186:8cf4ec5e2191 hg-web
Fixed merge conflict.
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Fri, 23 Jan 2026 22:38:59 -0800 |
| parents | 94705b5986b3 |
| children |
comparison
equal
deleted
inserted
replaced
| 176:fed99fc04e12 | 186:8cf4ec5e2191 |
|---|---|
| 1 /* | |
| 2 ** ARM instruction emitter. | |
| 3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h | |
| 4 */ | |
| 5 | |
| 6 /* -- Constant encoding --------------------------------------------------- */ | |
| 7 | |
| 8 static uint8_t emit_invai[16] = { | |
| 9 /* AND */ (ARMI_AND^ARMI_BIC) >> 21, | |
| 10 /* EOR */ 0, | |
| 11 /* SUB */ (ARMI_SUB^ARMI_ADD) >> 21, | |
| 12 /* RSB */ 0, | |
| 13 /* ADD */ (ARMI_ADD^ARMI_SUB) >> 21, | |
| 14 /* ADC */ (ARMI_ADC^ARMI_SBC) >> 21, | |
| 15 /* SBC */ (ARMI_SBC^ARMI_ADC) >> 21, | |
| 16 /* RSC */ 0, | |
| 17 /* TST */ 0, | |
| 18 /* TEQ */ 0, | |
| 19 /* CMP */ (ARMI_CMP^ARMI_CMN) >> 21, | |
| 20 /* CMN */ (ARMI_CMN^ARMI_CMP) >> 21, | |
| 21 /* ORR */ 0, | |
| 22 /* MOV */ (ARMI_MOV^ARMI_MVN) >> 21, | |
| 23 /* BIC */ (ARMI_BIC^ARMI_AND) >> 21, | |
| 24 /* MVN */ (ARMI_MVN^ARMI_MOV) >> 21 | |
| 25 }; | |
| 26 | |
| 27 /* Encode constant in K12 format for data processing instructions. */ | |
| 28 static uint32_t emit_isk12(ARMIns ai, int32_t n) | |
| 29 { | |
| 30 uint32_t invai, i, m = (uint32_t)n; | |
| 31 /* K12: unsigned 8 bit value, rotated in steps of two bits. */ | |
| 32 for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2)) | |
| 33 if (m <= 255) return ARMI_K12|m|i; | |
| 34 /* Otherwise try negation/complement with the inverse instruction. */ | |
| 35 invai = emit_invai[((ai >> 21) & 15)]; | |
| 36 if (!invai) return 0; /* Failed. No inverse instruction. */ | |
| 37 m = ~(uint32_t)n; | |
| 38 if (invai == ((ARMI_SUB^ARMI_ADD) >> 21) || | |
| 39 invai == (ARMI_CMP^ARMI_CMN) >> 21) m++; | |
| 40 for (i = 0; i < 4096; i += 256, m = lj_rol(m, 2)) | |
| 41 if (m <= 255) return ARMI_K12|(invai<<21)|m|i; | |
| 42 return 0; /* Failed. */ | |
| 43 } | |
| 44 | |
| 45 /* -- Emit basic instructions --------------------------------------------- */ | |
| 46 | |
| 47 static void emit_dnm(ASMState *as, ARMIns ai, Reg rd, Reg rn, Reg rm) | |
| 48 { | |
| 49 *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn) | ARMF_M(rm); | |
| 50 } | |
| 51 | |
| 52 static void emit_dm(ASMState *as, ARMIns ai, Reg rd, Reg rm) | |
| 53 { | |
| 54 *--as->mcp = ai | ARMF_D(rd) | ARMF_M(rm); | |
| 55 } | |
| 56 | |
| 57 static void emit_dn(ASMState *as, ARMIns ai, Reg rd, Reg rn) | |
| 58 { | |
| 59 *--as->mcp = ai | ARMF_D(rd) | ARMF_N(rn); | |
| 60 } | |
| 61 | |
| 62 static void emit_nm(ASMState *as, ARMIns ai, Reg rn, Reg rm) | |
| 63 { | |
| 64 *--as->mcp = ai | ARMF_N(rn) | ARMF_M(rm); | |
| 65 } | |
| 66 | |
| 67 static void emit_d(ASMState *as, ARMIns ai, Reg rd) | |
| 68 { | |
| 69 *--as->mcp = ai | ARMF_D(rd); | |
| 70 } | |
| 71 | |
| 72 static void emit_n(ASMState *as, ARMIns ai, Reg rn) | |
| 73 { | |
| 74 *--as->mcp = ai | ARMF_N(rn); | |
| 75 } | |
| 76 | |
| 77 static void emit_m(ASMState *as, ARMIns ai, Reg rm) | |
| 78 { | |
| 79 *--as->mcp = ai | ARMF_M(rm); | |
| 80 } | |
| 81 | |
| 82 static void emit_lsox(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) | |
| 83 { | |
| 84 lj_assertA(ofs >= -255 && ofs <= 255, | |
| 85 "load/store offset %d out of range", ofs); | |
| 86 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; | |
| 87 *--as->mcp = ai | ARMI_LS_P | ARMI_LSX_I | ARMF_D(rd) | ARMF_N(rn) | | |
| 88 ((ofs & 0xf0) << 4) | (ofs & 0x0f); | |
| 89 } | |
| 90 | |
| 91 static void emit_lso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) | |
| 92 { | |
| 93 lj_assertA(ofs >= -4095 && ofs <= 4095, | |
| 94 "load/store offset %d out of range", ofs); | |
| 95 /* Combine LDR/STR pairs to LDRD/STRD. */ | |
| 96 if (*as->mcp == (ai|ARMI_LS_P|ARMI_LS_U|ARMF_D(rd^1)|ARMF_N(rn)|(ofs^4)) && | |
| 97 (ai & ~(ARMI_LDR^ARMI_STR)) == ARMI_STR && rd != rn && | |
| 98 (uint32_t)ofs <= 252 && !(ofs & 3) && !((rd ^ (ofs >>2)) & 1) && | |
| 99 as->mcp != as->mcloop) { | |
| 100 as->mcp++; | |
| 101 emit_lsox(as, ai == ARMI_LDR ? ARMI_LDRD : ARMI_STRD, rd&~1, rn, ofs&~4); | |
| 102 return; | |
| 103 } | |
| 104 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; | |
| 105 *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd) | ARMF_N(rn) | ofs; | |
| 106 } | |
| 107 | |
| 108 #if !LJ_SOFTFP | |
| 109 static void emit_vlso(ASMState *as, ARMIns ai, Reg rd, Reg rn, int32_t ofs) | |
| 110 { | |
| 111 lj_assertA(ofs >= -1020 && ofs <= 1020 && (ofs&3) == 0, | |
| 112 "load/store offset %d out of range", ofs); | |
| 113 if (ofs < 0) ofs = -ofs; else ai |= ARMI_LS_U; | |
| 114 *--as->mcp = ai | ARMI_LS_P | ARMF_D(rd & 15) | ARMF_N(rn) | (ofs >> 2); | |
| 115 } | |
| 116 #endif | |
| 117 | |
| 118 /* -- Emit loads/stores --------------------------------------------------- */ | |
| 119 | |
| 120 /* Prefer spills of BASE/L. */ | |
| 121 #define emit_canremat(ref) ((ref) < ASMREF_L) | |
| 122 | |
| 123 /* Try to find a one step delta relative to another constant. */ | |
| 124 static int emit_kdelta1(ASMState *as, Reg d, int32_t i) | |
| 125 { | |
| 126 RegSet work = ~as->freeset & RSET_GPR; | |
| 127 while (work) { | |
| 128 Reg r = rset_picktop(work); | |
| 129 IRRef ref = regcost_ref(as->cost[r]); | |
| 130 lj_assertA(r != d, "dest reg not free"); | |
| 131 if (emit_canremat(ref)) { | |
| 132 int32_t delta = i - (ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i); | |
| 133 uint32_t k = emit_isk12(ARMI_ADD, delta); | |
| 134 if (k) { | |
| 135 if (k == ARMI_K12) | |
| 136 emit_dm(as, ARMI_MOV, d, r); | |
| 137 else | |
| 138 emit_dn(as, ARMI_ADD^k, d, r); | |
| 139 return 1; | |
| 140 } | |
| 141 } | |
| 142 rset_clear(work, r); | |
| 143 } | |
| 144 return 0; /* Failed. */ | |
| 145 } | |
| 146 | |
| 147 /* Try to find a two step delta relative to another constant. */ | |
| 148 static int emit_kdelta2(ASMState *as, Reg rd, int32_t i) | |
| 149 { | |
| 150 RegSet work = ~as->freeset & RSET_GPR; | |
| 151 while (work) { | |
| 152 Reg r = rset_picktop(work); | |
| 153 IRRef ref = regcost_ref(as->cost[r]); | |
| 154 lj_assertA(r != rd, "dest reg %d not free", rd); | |
| 155 if (emit_canremat(ref)) { | |
| 156 int32_t other = ra_iskref(ref) ? ra_krefk(as, ref) : IR(ref)->i; | |
| 157 if (other) { | |
| 158 int32_t delta = i - other; | |
| 159 uint32_t sh, inv = 0, k2, k; | |
| 160 if (delta < 0) { delta = (int32_t)(~(uint32_t)delta+1u); inv = ARMI_ADD^ARMI_SUB; } | |
| 161 sh = lj_ffs(delta) & ~1; | |
| 162 k2 = emit_isk12(0, delta & (255 << sh)); | |
| 163 k = emit_isk12(0, delta & ~(255 << sh)); | |
| 164 if (k) { | |
| 165 emit_dn(as, ARMI_ADD^k2^inv, rd, rd); | |
| 166 emit_dn(as, ARMI_ADD^k^inv, rd, r); | |
| 167 return 1; | |
| 168 } | |
| 169 } | |
| 170 } | |
| 171 rset_clear(work, r); | |
| 172 } | |
| 173 return 0; /* Failed. */ | |
| 174 } | |
| 175 | |
| 176 /* Load a 32 bit constant into a GPR. */ | |
| 177 static void emit_loadi(ASMState *as, Reg rd, int32_t i) | |
| 178 { | |
| 179 uint32_t k = emit_isk12(ARMI_MOV, i); | |
| 180 lj_assertA(rset_test(as->freeset, rd) || rd == RID_TMP, | |
| 181 "dest reg %d not free", rd); | |
| 182 if (k) { | |
| 183 /* Standard K12 constant. */ | |
| 184 emit_d(as, ARMI_MOV^k, rd); | |
| 185 } else if ((as->flags & JIT_F_ARMV6T2) && (uint32_t)i < 0x00010000u) { | |
| 186 /* 16 bit loword constant for ARMv6T2. */ | |
| 187 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); | |
| 188 } else if (emit_kdelta1(as, rd, i)) { | |
| 189 /* One step delta relative to another constant. */ | |
| 190 } else if ((as->flags & JIT_F_ARMV6T2)) { | |
| 191 /* 32 bit hiword/loword constant for ARMv6T2. */ | |
| 192 emit_d(as, ARMI_MOVT|((i>>16) & 0x0fff)|(((i>>16) & 0xf000)<<4), rd); | |
| 193 emit_d(as, ARMI_MOVW|(i & 0x0fff)|((i & 0xf000)<<4), rd); | |
| 194 } else if (emit_kdelta2(as, rd, i)) { | |
| 195 /* Two step delta relative to another constant. */ | |
| 196 } else { | |
| 197 /* Otherwise construct the constant with up to 4 instructions. */ | |
| 198 /* NYI: use mvn+bic, use pc-relative loads. */ | |
| 199 for (;;) { | |
| 200 uint32_t sh = lj_ffs(i) & ~1; | |
| 201 int32_t m = i & (255 << sh); | |
| 202 i &= ~(255 << sh); | |
| 203 if (i == 0) { | |
| 204 emit_d(as, ARMI_MOV ^ emit_isk12(0, m), rd); | |
| 205 break; | |
| 206 } | |
| 207 emit_dn(as, ARMI_ORR ^ emit_isk12(0, m), rd, rd); | |
| 208 } | |
| 209 } | |
| 210 } | |
| 211 | |
| 212 #define emit_loada(as, rd, addr) emit_loadi(as, (rd), i32ptr((addr))) | |
| 213 | |
| 214 static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); | |
| 215 | |
| 216 /* Get/set from constant pointer. */ | |
| 217 static void emit_lsptr(ASMState *as, ARMIns ai, Reg r, void *p) | |
| 218 { | |
| 219 int32_t i = i32ptr(p); | |
| 220 emit_lso(as, ai, r, ra_allock(as, (i & ~4095), rset_exclude(RSET_GPR, r)), | |
| 221 (i & 4095)); | |
| 222 } | |
| 223 | |
| 224 #if !LJ_SOFTFP | |
| 225 /* Load a number constant into an FPR. */ | |
| 226 static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) | |
| 227 { | |
| 228 cTValue *tv = ir_knum(ir); | |
| 229 int32_t i; | |
| 230 if ((as->flags & JIT_F_VFPV3) && !tv->u32.lo) { | |
| 231 uint32_t hi = tv->u32.hi; | |
| 232 uint32_t b = ((hi >> 22) & 0x1ff); | |
| 233 if (!(hi & 0xffff) && (b == 0x100 || b == 0x0ff)) { | |
| 234 *--as->mcp = ARMI_VMOVI_D | ARMF_D(r & 15) | | |
| 235 ((tv->u32.hi >> 12) & 0x00080000) | | |
| 236 ((tv->u32.hi >> 4) & 0x00070000) | | |
| 237 ((tv->u32.hi >> 16) & 0x0000000f); | |
| 238 return; | |
| 239 } | |
| 240 } | |
| 241 i = i32ptr(tv); | |
| 242 emit_vlso(as, ARMI_VLDR_D, r, | |
| 243 ra_allock(as, (i & ~1020), RSET_GPR), (i & 1020)); | |
| 244 } | |
| 245 #endif | |
| 246 | |
| 247 /* Get/set global_State fields. */ | |
| 248 #define emit_getgl(as, r, field) \ | |
| 249 emit_lsptr(as, ARMI_LDR, (r), (void *)&J2G(as->J)->field) | |
| 250 #define emit_setgl(as, r, field) \ | |
| 251 emit_lsptr(as, ARMI_STR, (r), (void *)&J2G(as->J)->field) | |
| 252 | |
| 253 /* Trace number is determined from pc of exit instruction. */ | |
| 254 #define emit_setvmstate(as, i) UNUSED(i) | |
| 255 | |
| 256 /* -- Emit control-flow instructions -------------------------------------- */ | |
| 257 | |
| 258 /* Label for internal jumps. */ | |
| 259 typedef MCode *MCLabel; | |
| 260 | |
| 261 /* Return label pointing to current PC. */ | |
| 262 #define emit_label(as) ((as)->mcp) | |
| 263 | |
| 264 static void emit_branch(ASMState *as, ARMIns ai, MCode *target) | |
| 265 { | |
| 266 MCode *p = as->mcp; | |
| 267 ptrdiff_t delta = (target - p) - 1; | |
| 268 lj_assertA(((delta + 0x00800000) >> 24) == 0, "branch target out of range"); | |
| 269 *--p = ai | ((uint32_t)delta & 0x00ffffffu); | |
| 270 as->mcp = p; | |
| 271 } | |
| 272 | |
| 273 #define emit_jmp(as, target) emit_branch(as, ARMI_B, (target)) | |
| 274 | |
| 275 static void emit_call(ASMState *as, void *target) | |
| 276 { | |
| 277 MCode *p = --as->mcp; | |
| 278 ptrdiff_t delta = ((char *)target - (char *)p) - 8; | |
| 279 if ((((delta>>2) + 0x00800000) >> 24) == 0) { | |
| 280 if ((delta & 1)) | |
| 281 *p = ARMI_BLX | ((uint32_t)(delta>>2) & 0x00ffffffu) | ((delta&2) << 23); | |
| 282 else | |
| 283 *p = ARMI_BL | ((uint32_t)(delta>>2) & 0x00ffffffu); | |
| 284 } else { /* Target out of range: need indirect call. But don't use R0-R3. */ | |
| 285 Reg r = ra_allock(as, i32ptr(target), RSET_RANGE(RID_R4, RID_R12+1)); | |
| 286 *p = ARMI_BLXr | ARMF_M(r); | |
| 287 } | |
| 288 } | |
| 289 | |
| 290 /* -- Emit generic operations --------------------------------------------- */ | |
| 291 | |
| 292 /* Generic move between two regs. */ | |
| 293 static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) | |
| 294 { | |
| 295 #if LJ_SOFTFP | |
| 296 lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); | |
| 297 #else | |
| 298 if (dst >= RID_MAX_GPR) { | |
| 299 emit_dm(as, irt_isnum(ir->t) ? ARMI_VMOV_D : ARMI_VMOV_S, | |
| 300 (dst & 15), (src & 15)); | |
| 301 return; | |
| 302 } | |
| 303 #endif | |
| 304 if (as->mcp != as->mcloop) { /* Swap early registers for loads/stores. */ | |
| 305 MCode ins = *as->mcp, swp = (src^dst); | |
| 306 if ((ins & 0x0c000000) == 0x04000000 && (ins & 0x02000010) != 0x02000010) { | |
| 307 if (!((ins ^ (dst << 16)) & 0x000f0000)) | |
| 308 *as->mcp = ins ^ (swp << 16); /* Swap N in load/store. */ | |
| 309 if (!(ins & 0x00100000) && !((ins ^ (dst << 12)) & 0x0000f000)) | |
| 310 *as->mcp = ins ^ (swp << 12); /* Swap D in store. */ | |
| 311 } | |
| 312 } | |
| 313 emit_dm(as, ARMI_MOV, dst, src); | |
| 314 } | |
| 315 | |
| 316 /* Generic load of register with base and (small) offset address. */ | |
| 317 static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | |
| 318 { | |
| 319 #if LJ_SOFTFP | |
| 320 lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); | |
| 321 #else | |
| 322 if (r >= RID_MAX_GPR) | |
| 323 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VLDR_D : ARMI_VLDR_S, r, base, ofs); | |
| 324 else | |
| 325 #endif | |
| 326 emit_lso(as, ARMI_LDR, r, base, ofs); | |
| 327 } | |
| 328 | |
| 329 /* Generic store of register with base and (small) offset address. */ | |
| 330 static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) | |
| 331 { | |
| 332 #if LJ_SOFTFP | |
| 333 lj_assertA(!irt_isnum(ir->t), "unexpected FP op"); UNUSED(ir); | |
| 334 #else | |
| 335 if (r >= RID_MAX_GPR) | |
| 336 emit_vlso(as, irt_isnum(ir->t) ? ARMI_VSTR_D : ARMI_VSTR_S, r, base, ofs); | |
| 337 else | |
| 338 #endif | |
| 339 emit_lso(as, ARMI_STR, r, base, ofs); | |
| 340 } | |
| 341 | |
| 342 /* Emit an arithmetic/logic operation with a constant operand. */ | |
| 343 static void emit_opk(ASMState *as, ARMIns ai, Reg dest, Reg src, | |
| 344 int32_t i, RegSet allow) | |
| 345 { | |
| 346 uint32_t k = emit_isk12(ai, i); | |
| 347 if (k) | |
| 348 emit_dn(as, ai^k, dest, src); | |
| 349 else | |
| 350 emit_dnm(as, ai, dest, src, ra_allock(as, i, allow)); | |
| 351 } | |
| 352 | |
| 353 /* Add offset to pointer. */ | |
| 354 static void emit_addptr(ASMState *as, Reg r, int32_t ofs) | |
| 355 { | |
| 356 if (ofs) | |
| 357 emit_opk(as, ARMI_ADD, r, r, ofs, rset_exclude(RSET_GPR, r)); | |
| 358 } | |
| 359 | |
| 360 #define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) | |
| 361 |