Mercurial
comparison third_party/luajit/src/lj_asm_ppc.h @ 178:94705b5986b3
[ThirdParty] Added WRK and luajit for load testing.
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Thu, 22 Jan 2026 20:10:30 -0800 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 177:24fe8ff94056 | 178:94705b5986b3 |
|---|---|
| 1 /* | |
| 2 ** PPC IR assembler (SSA IR -> machine code). | |
| 3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h | |
| 4 */ | |
| 5 | |
| 6 /* -- Register allocator extensions --------------------------------------- */ | |
| 7 | |
| 8 /* Allocate a register with a hint. */ | |
| 9 static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) | |
| 10 { | |
| 11 Reg r = IR(ref)->r; | |
| 12 if (ra_noreg(r)) { | |
| 13 if (!ra_hashint(r) && !iscrossref(as, ref)) | |
| 14 ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ | |
| 15 r = ra_allocref(as, ref, allow); | |
| 16 } | |
| 17 ra_noweak(as, r); | |
| 18 return r; | |
| 19 } | |
| 20 | |
| 21 /* Allocate two source registers for three-operand instructions. */ | |
| 22 static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) | |
| 23 { | |
| 24 IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); | |
| 25 Reg left = irl->r, right = irr->r; | |
| 26 if (ra_hasreg(left)) { | |
| 27 ra_noweak(as, left); | |
| 28 if (ra_noreg(right)) | |
| 29 right = ra_allocref(as, ir->op2, rset_exclude(allow, left)); | |
| 30 else | |
| 31 ra_noweak(as, right); | |
| 32 } else if (ra_hasreg(right)) { | |
| 33 ra_noweak(as, right); | |
| 34 left = ra_allocref(as, ir->op1, rset_exclude(allow, right)); | |
| 35 } else if (ra_hashint(right)) { | |
| 36 right = ra_allocref(as, ir->op2, allow); | |
| 37 left = ra_alloc1(as, ir->op1, rset_exclude(allow, right)); | |
| 38 } else { | |
| 39 left = ra_allocref(as, ir->op1, allow); | |
| 40 right = ra_alloc1(as, ir->op2, rset_exclude(allow, left)); | |
| 41 } | |
| 42 return left | (right << 8); | |
| 43 } | |
| 44 | |
| 45 /* -- Guard handling ------------------------------------------------------ */ | |
| 46 | |
| 47 /* Setup exit stubs after the end of each trace. */ | |
| 48 static void asm_exitstub_setup(ASMState *as, ExitNo nexits) | |
| 49 { | |
| 50 ExitNo i; | |
| 51 MCode *mxp = as->mctop; | |
| 52 if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim) | |
| 53 asm_mclimit(as); | |
| 54 /* 1: mflr r0; bl ->vm_exit_handler; li r0, traceno; bl <1; bl <1; ... */ | |
| 55 for (i = nexits-1; (int32_t)i >= 0; i--) | |
| 56 *--mxp = PPCI_BL|(((-3-i)&0x00ffffffu)<<2); | |
| 57 *--mxp = PPCI_LI|PPCF_T(RID_TMP)|as->T->traceno; /* Read by exit handler. */ | |
| 58 mxp--; | |
| 59 *mxp = PPCI_BL|((((MCode *)(void *)lj_vm_exit_handler-mxp)&0x00ffffffu)<<2); | |
| 60 *--mxp = PPCI_MFLR|PPCF_T(RID_TMP); | |
| 61 as->mctop = mxp; | |
| 62 } | |
| 63 | |
| 64 static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno) | |
| 65 { | |
| 66 /* Keep this in-sync with exitstub_trace_addr(). */ | |
| 67 return as->mctop + exitno + 3; | |
| 68 } | |
| 69 | |
| 70 /* Emit conditional branch to exit for guard. */ | |
| 71 static void asm_guardcc(ASMState *as, PPCCC cc) | |
| 72 { | |
| 73 MCode *target = asm_exitstub_addr(as, as->snapno); | |
| 74 MCode *p = as->mcp; | |
| 75 if (LJ_UNLIKELY(p == as->invmcp)) { | |
| 76 as->loopinv = 1; | |
| 77 *p = PPCI_B | (((target-p) & 0x00ffffffu) << 2); | |
| 78 emit_condbranch(as, PPCI_BC, cc^4, p); | |
| 79 return; | |
| 80 } | |
| 81 emit_condbranch(as, PPCI_BC, cc, target); | |
| 82 } | |
| 83 | |
| 84 /* -- Operand fusion ------------------------------------------------------ */ | |
| 85 | |
| 86 /* Limit linear search to this distance. Avoids O(n^2) behavior. */ | |
| 87 #define CONFLICT_SEARCH_LIM 31 | |
| 88 | |
| 89 /* Check if there's no conflicting instruction between curins and ref. */ | |
| 90 static int noconflict(ASMState *as, IRRef ref, IROp conflict) | |
| 91 { | |
| 92 IRIns *ir = as->ir; | |
| 93 IRRef i = as->curins; | |
| 94 if (i > ref + CONFLICT_SEARCH_LIM) | |
| 95 return 0; /* Give up, ref is too far away. */ | |
| 96 while (--i > ref) | |
| 97 if (ir[i].o == conflict) | |
| 98 return 0; /* Conflict found. */ | |
| 99 return 1; /* Ok, no conflict. */ | |
| 100 } | |
| 101 | |
| 102 /* Fuse the array base of colocated arrays. */ | |
| 103 static int32_t asm_fuseabase(ASMState *as, IRRef ref) | |
| 104 { | |
| 105 IRIns *ir = IR(ref); | |
| 106 if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && | |
| 107 !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) | |
| 108 return (int32_t)sizeof(GCtab); | |
| 109 return 0; | |
| 110 } | |
| 111 | |
| 112 /* Indicates load/store indexed is ok. */ | |
| 113 #define AHUREF_LSX ((int32_t)0x80000000) | |
| 114 | |
| 115 /* Fuse array/hash/upvalue reference into register+offset operand. */ | |
| 116 static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) | |
| 117 { | |
| 118 IRIns *ir = IR(ref); | |
| 119 if (ra_noreg(ir->r)) { | |
| 120 if (ir->o == IR_AREF) { | |
| 121 if (mayfuse(as, ref)) { | |
| 122 if (irref_isk(ir->op2)) { | |
| 123 IRRef tab = IR(ir->op1)->op1; | |
| 124 int32_t ofs = asm_fuseabase(as, tab); | |
| 125 IRRef refa = ofs ? tab : ir->op1; | |
| 126 ofs += 8*IR(ir->op2)->i; | |
| 127 if (checki16(ofs)) { | |
| 128 *ofsp = ofs; | |
| 129 return ra_alloc1(as, refa, allow); | |
| 130 } | |
| 131 } | |
| 132 if (*ofsp == AHUREF_LSX) { | |
| 133 Reg base = ra_alloc1(as, ir->op1, allow); | |
| 134 Reg idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); | |
| 135 return base | (idx << 8); | |
| 136 } | |
| 137 } | |
| 138 } else if (ir->o == IR_HREFK) { | |
| 139 if (mayfuse(as, ref)) { | |
| 140 int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); | |
| 141 if (checki16(ofs)) { | |
| 142 *ofsp = ofs; | |
| 143 return ra_alloc1(as, ir->op1, allow); | |
| 144 } | |
| 145 } | |
| 146 } else if (ir->o == IR_UREFC) { | |
| 147 if (irref_isk(ir->op1)) { | |
| 148 GCfunc *fn = ir_kfunc(IR(ir->op1)); | |
| 149 int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv); | |
| 150 int32_t jgl = (intptr_t)J2G(as->J); | |
| 151 if ((uint32_t)(ofs-jgl) < 65536) { | |
| 152 *ofsp = ofs-jgl-32768; | |
| 153 return RID_JGL; | |
| 154 } else { | |
| 155 *ofsp = (int16_t)ofs; | |
| 156 return ra_allock(as, ofs-(int16_t)ofs, allow); | |
| 157 } | |
| 158 } | |
| 159 } else if (ir->o == IR_TMPREF) { | |
| 160 *ofsp = (int32_t)(offsetof(global_State, tmptv)-32768); | |
| 161 return RID_JGL; | |
| 162 } | |
| 163 } | |
| 164 *ofsp = 0; | |
| 165 return ra_alloc1(as, ref, allow); | |
| 166 } | |
| 167 | |
| 168 /* Fuse XLOAD/XSTORE reference into load/store operand. */ | |
| 169 static void asm_fusexref(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | |
| 170 RegSet allow, int32_t ofs) | |
| 171 { | |
| 172 IRIns *ir = IR(ref); | |
| 173 Reg base; | |
| 174 if (ra_noreg(ir->r) && canfuse(as, ir)) { | |
| 175 if (ir->o == IR_ADD) { | |
| 176 int32_t ofs2; | |
| 177 if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) { | |
| 178 ofs = ofs2; | |
| 179 ref = ir->op1; | |
| 180 } else if (ofs == 0) { | |
| 181 Reg right, left = ra_alloc2(as, ir, allow); | |
| 182 right = (left >> 8); left &= 255; | |
| 183 emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right); | |
| 184 return; | |
| 185 } | |
| 186 } else if (ir->o == IR_STRREF) { | |
| 187 lj_assertA(ofs == 0, "bad usage"); | |
| 188 ofs = (int32_t)sizeof(GCstr); | |
| 189 if (irref_isk(ir->op2)) { | |
| 190 ofs += IR(ir->op2)->i; | |
| 191 ref = ir->op1; | |
| 192 } else if (irref_isk(ir->op1)) { | |
| 193 ofs += IR(ir->op1)->i; | |
| 194 ref = ir->op2; | |
| 195 } else { | |
| 196 /* NYI: Fuse ADD with constant. */ | |
| 197 Reg tmp, right, left = ra_alloc2(as, ir, allow); | |
| 198 right = (left >> 8); left &= 255; | |
| 199 tmp = ra_scratch(as, rset_exclude(rset_exclude(allow, left), right)); | |
| 200 emit_fai(as, pi, rt, tmp, ofs); | |
| 201 emit_tab(as, PPCI_ADD, tmp, left, right); | |
| 202 return; | |
| 203 } | |
| 204 if (!checki16(ofs)) { | |
| 205 Reg left = ra_alloc1(as, ref, allow); | |
| 206 Reg right = ra_allock(as, ofs, rset_exclude(allow, left)); | |
| 207 emit_fab(as, PPCI_LWZX | ((pi >> 20) & 0x780), rt, left, right); | |
| 208 return; | |
| 209 } | |
| 210 } | |
| 211 } | |
| 212 base = ra_alloc1(as, ref, allow); | |
| 213 emit_fai(as, pi, rt, base, ofs); | |
| 214 } | |
| 215 | |
| 216 /* Fuse XLOAD/XSTORE reference into indexed-only load/store operand. */ | |
| 217 static void asm_fusexrefx(ASMState *as, PPCIns pi, Reg rt, IRRef ref, | |
| 218 RegSet allow) | |
| 219 { | |
| 220 IRIns *ira = IR(ref); | |
| 221 Reg right, left; | |
| 222 if (canfuse(as, ira) && ira->o == IR_ADD && ra_noreg(ira->r)) { | |
| 223 left = ra_alloc2(as, ira, allow); | |
| 224 right = (left >> 8); left &= 255; | |
| 225 } else { | |
| 226 right = ra_alloc1(as, ref, allow); | |
| 227 left = RID_R0; | |
| 228 } | |
| 229 emit_tab(as, pi, rt, left, right); | |
| 230 } | |
| 231 | |
| 232 #if !LJ_SOFTFP | |
| 233 /* Fuse to multiply-add/sub instruction. */ | |
| 234 static int asm_fusemadd(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pir) | |
| 235 { | |
| 236 IRRef lref = ir->op1, rref = ir->op2; | |
| 237 IRIns *irm; | |
| 238 if ((as->flags & JIT_F_OPT_FMA) && | |
| 239 lref != rref && | |
| 240 ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && | |
| 241 ra_noreg(irm->r)) || | |
| 242 (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && | |
| 243 (rref = lref, pi = pir, ra_noreg(irm->r))))) { | |
| 244 Reg dest = ra_dest(as, ir, RSET_FPR); | |
| 245 Reg add = ra_alloc1(as, rref, RSET_FPR); | |
| 246 Reg right, left = ra_alloc2(as, irm, rset_exclude(RSET_FPR, add)); | |
| 247 right = (left >> 8); left &= 255; | |
| 248 emit_facb(as, pi, dest, left, right, add); | |
| 249 return 1; | |
| 250 } | |
| 251 return 0; | |
| 252 } | |
| 253 #endif | |
| 254 | |
| 255 /* -- Calls --------------------------------------------------------------- */ | |
| 256 | |
| 257 /* Generate a call to a C function. */ | |
| 258 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) | |
| 259 { | |
| 260 uint32_t n, nargs = CCI_XNARGS(ci); | |
| 261 int32_t ofs = 8; | |
| 262 Reg gpr = REGARG_FIRSTGPR; | |
| 263 #if !LJ_SOFTFP | |
| 264 Reg fpr = REGARG_FIRSTFPR; | |
| 265 #endif | |
| 266 if ((void *)ci->func) | |
| 267 emit_call(as, (void *)ci->func); | |
| 268 for (n = 0; n < nargs; n++) { /* Setup args. */ | |
| 269 IRRef ref = args[n]; | |
| 270 if (ref) { | |
| 271 IRIns *ir = IR(ref); | |
| 272 #if !LJ_SOFTFP | |
| 273 if (irt_isfp(ir->t)) { | |
| 274 if (fpr <= REGARG_LASTFPR) { | |
| 275 lj_assertA(rset_test(as->freeset, fpr), | |
| 276 "reg %d not free", fpr); /* Already evicted. */ | |
| 277 ra_leftov(as, fpr, ref); | |
| 278 fpr++; | |
| 279 } else { | |
| 280 Reg r = ra_alloc1(as, ref, RSET_FPR); | |
| 281 if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4; | |
| 282 emit_spstore(as, ir, r, ofs); | |
| 283 ofs += irt_isnum(ir->t) ? 8 : 4; | |
| 284 } | |
| 285 } else | |
| 286 #endif | |
| 287 { | |
| 288 if (gpr <= REGARG_LASTGPR) { | |
| 289 lj_assertA(rset_test(as->freeset, gpr), | |
| 290 "reg %d not free", gpr); /* Already evicted. */ | |
| 291 ra_leftov(as, gpr, ref); | |
| 292 gpr++; | |
| 293 } else { | |
| 294 Reg r = ra_alloc1(as, ref, RSET_GPR); | |
| 295 emit_spstore(as, ir, r, ofs); | |
| 296 ofs += 4; | |
| 297 } | |
| 298 } | |
| 299 } else { | |
| 300 if (gpr <= REGARG_LASTGPR) | |
| 301 gpr++; | |
| 302 else | |
| 303 ofs += 4; | |
| 304 } | |
| 305 checkmclim(as); | |
| 306 } | |
| 307 #if !LJ_SOFTFP | |
| 308 if ((ci->flags & CCI_VARARG)) /* Vararg calls need to know about FPR use. */ | |
| 309 emit_tab(as, fpr == REGARG_FIRSTFPR ? PPCI_CRXOR : PPCI_CREQV, 6, 6, 6); | |
| 310 #endif | |
| 311 } | |
| 312 | |
| 313 /* Setup result reg/sp for call. Evict scratch regs. */ | |
| 314 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) | |
| 315 { | |
| 316 RegSet drop = RSET_SCRATCH; | |
| 317 int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)); | |
| 318 #if !LJ_SOFTFP | |
| 319 if ((ci->flags & CCI_NOFPRCLOBBER)) | |
| 320 drop &= ~RSET_FPR; | |
| 321 #endif | |
| 322 if (ra_hasreg(ir->r)) | |
| 323 rset_clear(drop, ir->r); /* Dest reg handled below. */ | |
| 324 if (hiop && ra_hasreg((ir+1)->r)) | |
| 325 rset_clear(drop, (ir+1)->r); /* Dest reg handled below. */ | |
| 326 ra_evictset(as, drop); /* Evictions must be performed first. */ | |
| 327 if (ra_used(ir)) { | |
| 328 lj_assertA(!irt_ispri(ir->t), "PRI dest"); | |
| 329 if (!LJ_SOFTFP && irt_isfp(ir->t)) { | |
| 330 if ((ci->flags & CCI_CASTU64)) { | |
| 331 /* Use spill slot or temp slots. */ | |
| 332 int32_t ofs = ir->s ? sps_scale(ir->s) : SPOFS_TMP; | |
| 333 Reg dest = ir->r; | |
| 334 if (ra_hasreg(dest)) { | |
| 335 ra_free(as, dest); | |
| 336 ra_modified(as, dest); | |
| 337 emit_fai(as, PPCI_LFD, dest, RID_SP, ofs); | |
| 338 } | |
| 339 emit_tai(as, PPCI_STW, RID_RETHI, RID_SP, ofs); | |
| 340 emit_tai(as, PPCI_STW, RID_RETLO, RID_SP, ofs+4); | |
| 341 } else { | |
| 342 ra_destreg(as, ir, RID_FPRET); | |
| 343 } | |
| 344 } else if (hiop) { | |
| 345 ra_destpair(as, ir); | |
| 346 } else { | |
| 347 ra_destreg(as, ir, RID_RET); | |
| 348 } | |
| 349 } | |
| 350 } | |
| 351 | |
| 352 static void asm_callx(ASMState *as, IRIns *ir) | |
| 353 { | |
| 354 IRRef args[CCI_NARGS_MAX*2]; | |
| 355 CCallInfo ci; | |
| 356 IRRef func; | |
| 357 IRIns *irf; | |
| 358 ci.flags = asm_callx_flags(as, ir); | |
| 359 asm_collectargs(as, ir, &ci, args); | |
| 360 asm_setupresult(as, ir, &ci); | |
| 361 func = ir->op2; irf = IR(func); | |
| 362 if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } | |
| 363 if (irref_isk(func)) { /* Call to constant address. */ | |
| 364 ci.func = (ASMFunction)(void *)(intptr_t)(irf->i); | |
| 365 } else { /* Need a non-argument register for indirect calls. */ | |
| 366 RegSet allow = RSET_GPR & ~RSET_RANGE(RID_R0, REGARG_LASTGPR+1); | |
| 367 Reg freg = ra_alloc1(as, func, allow); | |
| 368 *--as->mcp = PPCI_BCTRL; | |
| 369 *--as->mcp = PPCI_MTCTR | PPCF_T(freg); | |
| 370 ci.func = (ASMFunction)(void *)0; | |
| 371 } | |
| 372 asm_gencall(as, &ci, args); | |
| 373 } | |
| 374 | |
| 375 /* -- Returns ------------------------------------------------------------- */ | |
| 376 | |
| 377 /* Return to lower frame. Guard that it goes to the right spot. */ | |
| 378 static void asm_retf(ASMState *as, IRIns *ir) | |
| 379 { | |
| 380 Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); | |
| 381 void *pc = ir_kptr(IR(ir->op2)); | |
| 382 int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); | |
| 383 as->topslot -= (BCReg)delta; | |
| 384 if ((int32_t)as->topslot < 0) as->topslot = 0; | |
| 385 irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ | |
| 386 emit_setgl(as, base, jit_base); | |
| 387 emit_addptr(as, base, -8*delta); | |
| 388 asm_guardcc(as, CC_NE); | |
| 389 emit_ab(as, PPCI_CMPW, RID_TMP, | |
| 390 ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base))); | |
| 391 emit_tai(as, PPCI_LWZ, RID_TMP, base, -8); | |
| 392 } | |
| 393 | |
| 394 /* -- Buffer operations --------------------------------------------------- */ | |
| 395 | |
| 396 #if LJ_HASBUFFER | |
| 397 static void asm_bufhdr_write(ASMState *as, Reg sb) | |
| 398 { | |
| 399 Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, sb)); | |
| 400 IRIns irgc; | |
| 401 irgc.ot = IRT(0, IRT_PGC); /* GC type. */ | |
| 402 emit_storeofs(as, &irgc, RID_TMP, sb, offsetof(SBuf, L)); | |
| 403 emit_rot(as, PPCI_RLWIMI, RID_TMP, tmp, 0, 31-lj_fls(SBUF_MASK_FLAG), 31); | |
| 404 emit_getgl(as, RID_TMP, cur_L); | |
| 405 emit_loadofs(as, &irgc, tmp, sb, offsetof(SBuf, L)); | |
| 406 } | |
| 407 #endif | |
| 408 | |
| 409 /* -- Type conversions ---------------------------------------------------- */ | |
| 410 | |
| 411 #if !LJ_SOFTFP | |
| 412 static void asm_tointg(ASMState *as, IRIns *ir, Reg left) | |
| 413 { | |
| 414 RegSet allow = RSET_FPR; | |
| 415 Reg tmp = ra_scratch(as, rset_clear(allow, left)); | |
| 416 Reg fbias = ra_scratch(as, rset_clear(allow, tmp)); | |
| 417 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 418 Reg hibias = ra_allock(as, 0x43300000, rset_exclude(RSET_GPR, dest)); | |
| 419 asm_guardcc(as, CC_NE); | |
| 420 emit_fab(as, PPCI_FCMPU, 0, tmp, left); | |
| 421 emit_fab(as, PPCI_FSUB, tmp, tmp, fbias); | |
| 422 emit_fai(as, PPCI_LFD, tmp, RID_SP, SPOFS_TMP); | |
| 423 emit_tai(as, PPCI_STW, RID_TMP, RID_SP, SPOFS_TMPLO); | |
| 424 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); | |
| 425 emit_asi(as, PPCI_XORIS, RID_TMP, dest, 0x8000); | |
| 426 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | |
| 427 emit_lsptr(as, PPCI_LFS, (fbias & 31), | |
| 428 (void *)&as->J->k32[LJ_K32_2P52_2P31], RSET_GPR); | |
| 429 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | |
| 430 emit_fb(as, PPCI_FCTIWZ, tmp, left); | |
| 431 } | |
| 432 | |
| 433 static void asm_tobit(ASMState *as, IRIns *ir) | |
| 434 { | |
| 435 RegSet allow = RSET_FPR; | |
| 436 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 437 Reg left = ra_alloc1(as, ir->op1, allow); | |
| 438 Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); | |
| 439 Reg tmp = ra_scratch(as, rset_clear(allow, right)); | |
| 440 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | |
| 441 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | |
| 442 emit_fab(as, PPCI_FADD, tmp, left, right); | |
| 443 } | |
| 444 #endif | |
| 445 | |
| 446 static void asm_conv(ASMState *as, IRIns *ir) | |
| 447 { | |
| 448 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | |
| 449 #if !LJ_SOFTFP | |
| 450 int stfp = (st == IRT_NUM || st == IRT_FLOAT); | |
| 451 #endif | |
| 452 IRRef lref = ir->op1; | |
| 453 /* 64 bit integer conversions are handled by SPLIT. */ | |
| 454 lj_assertA(!(irt_isint64(ir->t) || (st == IRT_I64 || st == IRT_U64)), | |
| 455 "IR %04d has unsplit 64 bit type", | |
| 456 (int)(ir - as->ir) - REF_BIAS); | |
| 457 #if LJ_SOFTFP | |
| 458 /* FP conversions are handled by SPLIT. */ | |
| 459 lj_assertA(!irt_isfp(ir->t) && !(st == IRT_NUM || st == IRT_FLOAT), | |
| 460 "IR %04d has FP type", | |
| 461 (int)(ir - as->ir) - REF_BIAS); | |
| 462 /* Can't check for same types: SPLIT uses CONV int.int + BXOR for sfp NEG. */ | |
| 463 #else | |
| 464 lj_assertA(irt_type(ir->t) != st, "inconsistent types for CONV"); | |
| 465 if (irt_isfp(ir->t)) { | |
| 466 Reg dest = ra_dest(as, ir, RSET_FPR); | |
| 467 if (stfp) { /* FP to FP conversion. */ | |
| 468 if (st == IRT_NUM) /* double -> float conversion. */ | |
| 469 emit_fb(as, PPCI_FRSP, dest, ra_alloc1(as, lref, RSET_FPR)); | |
| 470 else /* float -> double conversion is a no-op on PPC. */ | |
| 471 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | |
| 472 } else { /* Integer to FP conversion. */ | |
| 473 /* IRT_INT: Flip hibit, bias with 2^52, subtract 2^52+2^31. */ | |
| 474 /* IRT_U32: Bias with 2^52, subtract 2^52. */ | |
| 475 RegSet allow = RSET_GPR; | |
| 476 Reg left = ra_alloc1(as, lref, allow); | |
| 477 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, left)); | |
| 478 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | |
| 479 if (irt_isfloat(ir->t)) emit_fb(as, PPCI_FRSP, dest, dest); | |
| 480 emit_fab(as, PPCI_FSUB, dest, dest, fbias); | |
| 481 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | |
| 482 emit_lsptr(as, PPCI_LFS, (fbias & 31), | |
| 483 &as->J->k32[st == IRT_U32 ? LJ_K32_2P52 : LJ_K32_2P52_2P31], | |
| 484 rset_clear(allow, hibias)); | |
| 485 emit_tai(as, PPCI_STW, st == IRT_U32 ? left : RID_TMP, | |
| 486 RID_SP, SPOFS_TMPLO); | |
| 487 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); | |
| 488 if (st != IRT_U32) emit_asi(as, PPCI_XORIS, RID_TMP, left, 0x8000); | |
| 489 } | |
| 490 } else if (stfp) { /* FP to integer conversion. */ | |
| 491 if (irt_isguard(ir->t)) { | |
| 492 /* Checked conversions are only supported from number to int. */ | |
| 493 lj_assertA(irt_isint(ir->t) && st == IRT_NUM, | |
| 494 "bad type for checked CONV"); | |
| 495 asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); | |
| 496 } else { | |
| 497 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 498 Reg left = ra_alloc1(as, lref, RSET_FPR); | |
| 499 Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); | |
| 500 if (irt_isu32(ir->t)) { | |
| 501 /* Convert both x and x-2^31 to int and merge results. */ | |
| 502 Reg tmpi = ra_scratch(as, rset_exclude(RSET_GPR, dest)); | |
| 503 emit_asb(as, PPCI_OR, dest, dest, tmpi); /* Select with mask idiom. */ | |
| 504 emit_asb(as, PPCI_AND, tmpi, tmpi, RID_TMP); | |
| 505 emit_asb(as, PPCI_ANDC, dest, dest, RID_TMP); | |
| 506 emit_tai(as, PPCI_LWZ, tmpi, RID_SP, SPOFS_TMPLO); /* tmp = (int)(x) */ | |
| 507 emit_tai(as, PPCI_ADDIS, dest, dest, 0x8000); /* dest += 2^31 */ | |
| 508 emit_asb(as, PPCI_SRAWI, RID_TMP, dest, 31); /* mask = -(dest < 0) */ | |
| 509 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | |
| 510 emit_tai(as, PPCI_LWZ, dest, | |
| 511 RID_SP, SPOFS_TMPLO); /* dest = (int)(x-2^31) */ | |
| 512 emit_fb(as, PPCI_FCTIWZ, tmp, left); | |
| 513 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | |
| 514 emit_fb(as, PPCI_FCTIWZ, tmp, tmp); | |
| 515 emit_fab(as, PPCI_FSUB, tmp, left, tmp); | |
| 516 emit_lsptr(as, PPCI_LFS, (tmp & 31), | |
| 517 (void *)&as->J->k32[LJ_K32_2P31], RSET_GPR); | |
| 518 } else { | |
| 519 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | |
| 520 emit_fai(as, PPCI_STFD, tmp, RID_SP, SPOFS_TMP); | |
| 521 emit_fb(as, PPCI_FCTIWZ, tmp, left); | |
| 522 } | |
| 523 } | |
| 524 } else | |
| 525 #endif | |
| 526 { | |
| 527 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 528 if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */ | |
| 529 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 530 lj_assertA(irt_isint(ir->t) || irt_isu32(ir->t), "bad type for CONV EXT"); | |
| 531 if ((ir->op2 & IRCONV_SEXT)) | |
| 532 emit_as(as, st == IRT_I8 ? PPCI_EXTSB : PPCI_EXTSH, dest, left); | |
| 533 else | |
| 534 emit_rot(as, PPCI_RLWINM, dest, left, 0, st == IRT_U8 ? 24 : 16, 31); | |
| 535 } else { /* 32/64 bit integer conversions. */ | |
| 536 /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */ | |
| 537 ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ | |
| 538 } | |
| 539 } | |
| 540 } | |
| 541 | |
| 542 static void asm_strto(ASMState *as, IRIns *ir) | |
| 543 { | |
| 544 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; | |
| 545 IRRef args[2]; | |
| 546 int32_t ofs = SPOFS_TMP; | |
| 547 #if LJ_SOFTFP | |
| 548 ra_evictset(as, RSET_SCRATCH); | |
| 549 if (ra_used(ir)) { | |
| 550 if (ra_hasspill(ir->s) && ra_hasspill((ir+1)->s) && | |
| 551 (ir->s & 1) == LJ_BE && (ir->s ^ 1) == (ir+1)->s) { | |
| 552 int i; | |
| 553 for (i = 0; i < 2; i++) { | |
| 554 Reg r = (ir+i)->r; | |
| 555 if (ra_hasreg(r)) { | |
| 556 ra_free(as, r); | |
| 557 ra_modified(as, r); | |
| 558 emit_spload(as, ir+i, r, sps_scale((ir+i)->s)); | |
| 559 } | |
| 560 } | |
| 561 ofs = sps_scale(ir->s & ~1); | |
| 562 } else { | |
| 563 Reg rhi = ra_dest(as, ir+1, RSET_GPR); | |
| 564 Reg rlo = ra_dest(as, ir, rset_exclude(RSET_GPR, rhi)); | |
| 565 emit_tai(as, PPCI_LWZ, rhi, RID_SP, ofs); | |
| 566 emit_tai(as, PPCI_LWZ, rlo, RID_SP, ofs+4); | |
| 567 } | |
| 568 } | |
| 569 #else | |
| 570 RegSet drop = RSET_SCRATCH; | |
| 571 if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ | |
| 572 ra_evictset(as, drop); | |
| 573 if (ir->s) ofs = sps_scale(ir->s); | |
| 574 #endif | |
| 575 asm_guardcc(as, CC_EQ); | |
| 576 emit_ai(as, PPCI_CMPWI, RID_RET, 0); /* Test return status. */ | |
| 577 args[0] = ir->op1; /* GCstr *str */ | |
| 578 args[1] = ASMREF_TMP1; /* TValue *n */ | |
| 579 asm_gencall(as, ci, args); | |
| 580 /* Store the result to the spill slot or temp slots. */ | |
| 581 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_SP, ofs); | |
| 582 } | |
| 583 | |
| 584 /* -- Memory references --------------------------------------------------- */ | |
| 585 | |
| 586 /* Get pointer to TValue. */ | |
| 587 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref, MSize mode) | |
| 588 { | |
| 589 int32_t tmpofs = (int32_t)(offsetof(global_State, tmptv)-32768); | |
| 590 if ((mode & IRTMPREF_IN1)) { | |
| 591 IRIns *ir = IR(ref); | |
| 592 if (irt_isnum(ir->t)) { | |
| 593 if ((mode & IRTMPREF_OUT1)) { | |
| 594 #if LJ_SOFTFP | |
| 595 lj_assertA(irref_isk(ref), "unsplit FP op"); | |
| 596 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); | |
| 597 emit_setgl(as, | |
| 598 ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, RSET_GPR), | |
| 599 tmptv.u32.lo); | |
| 600 emit_setgl(as, | |
| 601 ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, RSET_GPR), | |
| 602 tmptv.u32.hi); | |
| 603 #else | |
| 604 Reg src = ra_alloc1(as, ref, RSET_FPR); | |
| 605 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); | |
| 606 emit_fai(as, PPCI_STFD, src, RID_JGL, tmpofs); | |
| 607 #endif | |
| 608 } else if (irref_isk(ref)) { | |
| 609 /* Use the number constant itself as a TValue. */ | |
| 610 ra_allockreg(as, i32ptr(ir_knum(ir)), dest); | |
| 611 } else { | |
| 612 #if LJ_SOFTFP | |
| 613 lj_assertA(0, "unsplit FP op"); | |
| 614 #else | |
| 615 /* Otherwise force a spill and use the spill slot. */ | |
| 616 emit_tai(as, PPCI_ADDI, dest, RID_SP, ra_spill(as, ir)); | |
| 617 #endif | |
| 618 } | |
| 619 } else { | |
| 620 /* Otherwise use g->tmptv to hold the TValue. */ | |
| 621 Reg type; | |
| 622 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); | |
| 623 if (!irt_ispri(ir->t)) { | |
| 624 Reg src = ra_alloc1(as, ref, RSET_GPR); | |
| 625 emit_setgl(as, src, tmptv.gcr); | |
| 626 } | |
| 627 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t)) | |
| 628 type = ra_alloc1(as, ref+1, RSET_GPR); | |
| 629 else | |
| 630 type = ra_allock(as, irt_toitype(ir->t), RSET_GPR); | |
| 631 emit_setgl(as, type, tmptv.it); | |
| 632 } | |
| 633 } else { | |
| 634 emit_tai(as, PPCI_ADDI, dest, RID_JGL, tmpofs); | |
| 635 } | |
| 636 } | |
| 637 | |
| 638 static void asm_aref(ASMState *as, IRIns *ir) | |
| 639 { | |
| 640 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 641 Reg idx, base; | |
| 642 if (irref_isk(ir->op2)) { | |
| 643 IRRef tab = IR(ir->op1)->op1; | |
| 644 int32_t ofs = asm_fuseabase(as, tab); | |
| 645 IRRef refa = ofs ? tab : ir->op1; | |
| 646 ofs += 8*IR(ir->op2)->i; | |
| 647 if (checki16(ofs)) { | |
| 648 base = ra_alloc1(as, refa, RSET_GPR); | |
| 649 emit_tai(as, PPCI_ADDI, dest, base, ofs); | |
| 650 return; | |
| 651 } | |
| 652 } | |
| 653 base = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 654 idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); | |
| 655 emit_tab(as, PPCI_ADD, dest, RID_TMP, base); | |
| 656 emit_slwi(as, RID_TMP, idx, 3); | |
| 657 } | |
| 658 | |
| 659 /* Inlined hash lookup. Specialized for key type and for const keys. | |
| 660 ** The equivalent C code is: | |
| 661 ** Node *n = hashkey(t, key); | |
| 662 ** do { | |
| 663 ** if (lj_obj_equal(&n->key, key)) return &n->val; | |
| 664 ** } while ((n = nextnode(n))); | |
| 665 ** return niltv(L); | |
| 666 */ | |
| 667 static void asm_href(ASMState *as, IRIns *ir, IROp merge) | |
| 668 { | |
| 669 RegSet allow = RSET_GPR; | |
| 670 int destused = ra_used(ir); | |
| 671 Reg dest = ra_dest(as, ir, allow); | |
| 672 Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); | |
| 673 Reg key = RID_NONE, tmp1 = RID_TMP, tmp2; | |
| 674 Reg tisnum = RID_NONE, tmpnum = RID_NONE; | |
| 675 IRRef refkey = ir->op2; | |
| 676 IRIns *irkey = IR(refkey); | |
| 677 int isk = irref_isk(refkey); | |
| 678 IRType1 kt = irkey->t; | |
| 679 uint32_t khash; | |
| 680 MCLabel l_end, l_loop, l_next; | |
| 681 | |
| 682 rset_clear(allow, tab); | |
| 683 #if LJ_SOFTFP | |
| 684 if (!isk) { | |
| 685 key = ra_alloc1(as, refkey, allow); | |
| 686 rset_clear(allow, key); | |
| 687 if (irkey[1].o == IR_HIOP) { | |
| 688 if (ra_hasreg((irkey+1)->r)) { | |
| 689 tmpnum = (irkey+1)->r; | |
| 690 ra_noweak(as, tmpnum); | |
| 691 } else { | |
| 692 tmpnum = ra_allocref(as, refkey+1, allow); | |
| 693 } | |
| 694 rset_clear(allow, tmpnum); | |
| 695 } | |
| 696 } | |
| 697 #else | |
| 698 if (irt_isnum(kt)) { | |
| 699 key = ra_alloc1(as, refkey, RSET_FPR); | |
| 700 tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); | |
| 701 tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); | |
| 702 rset_clear(allow, tisnum); | |
| 703 } else if (!irt_ispri(kt)) { | |
| 704 key = ra_alloc1(as, refkey, allow); | |
| 705 rset_clear(allow, key); | |
| 706 } | |
| 707 #endif | |
| 708 tmp2 = ra_scratch(as, allow); | |
| 709 rset_clear(allow, tmp2); | |
| 710 | |
| 711 /* Key not found in chain: jump to exit (if merged) or load niltv. */ | |
| 712 l_end = emit_label(as); | |
| 713 as->invmcp = NULL; | |
| 714 if (merge == IR_NE) | |
| 715 asm_guardcc(as, CC_EQ); | |
| 716 else if (destused) | |
| 717 emit_loada(as, dest, niltvg(J2G(as->J))); | |
| 718 | |
| 719 /* Follow hash chain until the end. */ | |
| 720 l_loop = --as->mcp; | |
| 721 emit_ai(as, PPCI_CMPWI, dest, 0); | |
| 722 emit_tai(as, PPCI_LWZ, dest, dest, (int32_t)offsetof(Node, next)); | |
| 723 l_next = emit_label(as); | |
| 724 | |
| 725 /* Type and value comparison. */ | |
| 726 if (merge == IR_EQ) | |
| 727 asm_guardcc(as, CC_EQ); | |
| 728 else | |
| 729 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); | |
| 730 if (!LJ_SOFTFP && irt_isnum(kt)) { | |
| 731 emit_fab(as, PPCI_FCMPU, 0, tmpnum, key); | |
| 732 emit_condbranch(as, PPCI_BC, CC_GE, l_next); | |
| 733 emit_ab(as, PPCI_CMPLW, tmp1, tisnum); | |
| 734 emit_fai(as, PPCI_LFD, tmpnum, dest, (int32_t)offsetof(Node, key.n)); | |
| 735 } else { | |
| 736 if (!irt_ispri(kt)) { | |
| 737 emit_ab(as, PPCI_CMPW, tmp2, key); | |
| 738 emit_condbranch(as, PPCI_BC, CC_NE, l_next); | |
| 739 } | |
| 740 if (LJ_SOFTFP && ra_hasreg(tmpnum)) | |
| 741 emit_ab(as, PPCI_CMPW, tmp1, tmpnum); | |
| 742 else | |
| 743 emit_ai(as, PPCI_CMPWI, tmp1, irt_toitype(irkey->t)); | |
| 744 if (!irt_ispri(kt)) | |
| 745 emit_tai(as, PPCI_LWZ, tmp2, dest, (int32_t)offsetof(Node, key.gcr)); | |
| 746 } | |
| 747 emit_tai(as, PPCI_LWZ, tmp1, dest, (int32_t)offsetof(Node, key.it)); | |
| 748 *l_loop = PPCI_BC | PPCF_Y | PPCF_CC(CC_NE) | | |
| 749 (((char *)as->mcp-(char *)l_loop) & 0xffffu); | |
| 750 | |
| 751 /* Load main position relative to tab->node into dest. */ | |
| 752 khash = isk ? ir_khash(as, irkey) : 1; | |
| 753 if (khash == 0) { | |
| 754 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); | |
| 755 } else { | |
| 756 Reg tmphash = tmp1; | |
| 757 if (isk) | |
| 758 tmphash = ra_allock(as, khash, allow); | |
| 759 emit_tab(as, PPCI_ADD, dest, dest, tmp1); | |
| 760 emit_tai(as, PPCI_MULLI, tmp1, tmp1, sizeof(Node)); | |
| 761 emit_asb(as, PPCI_AND, tmp1, tmp2, tmphash); | |
| 762 emit_tai(as, PPCI_LWZ, dest, tab, (int32_t)offsetof(GCtab, node)); | |
| 763 emit_tai(as, PPCI_LWZ, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); | |
| 764 if (isk) { | |
| 765 /* Nothing to do. */ | |
| 766 } else if (irt_isstr(kt)) { | |
| 767 emit_tai(as, PPCI_LWZ, tmp1, key, (int32_t)offsetof(GCstr, sid)); | |
| 768 } else { /* Must match with hash*() in lj_tab.c. */ | |
| 769 emit_tab(as, PPCI_SUBF, tmp1, tmp2, tmp1); | |
| 770 emit_rotlwi(as, tmp2, tmp2, HASH_ROT3); | |
| 771 emit_asb(as, PPCI_XOR, tmp1, tmp1, tmp2); | |
| 772 emit_rotlwi(as, tmp1, tmp1, (HASH_ROT2+HASH_ROT1)&31); | |
| 773 emit_tab(as, PPCI_SUBF, tmp2, dest, tmp2); | |
| 774 if (LJ_SOFTFP ? (irkey[1].o == IR_HIOP) : irt_isnum(kt)) { | |
| 775 #if LJ_SOFTFP | |
| 776 emit_asb(as, PPCI_XOR, tmp2, key, tmp1); | |
| 777 emit_rotlwi(as, dest, tmp1, HASH_ROT1); | |
| 778 emit_tab(as, PPCI_ADD, tmp1, tmpnum, tmpnum); | |
| 779 #else | |
| 780 int32_t ofs = ra_spill(as, irkey); | |
| 781 emit_asb(as, PPCI_XOR, tmp2, tmp2, tmp1); | |
| 782 emit_rotlwi(as, dest, tmp1, HASH_ROT1); | |
| 783 emit_tab(as, PPCI_ADD, tmp1, tmp1, tmp1); | |
| 784 emit_tai(as, PPCI_LWZ, tmp2, RID_SP, ofs+4); | |
| 785 emit_tai(as, PPCI_LWZ, tmp1, RID_SP, ofs); | |
| 786 #endif | |
| 787 } else { | |
| 788 emit_asb(as, PPCI_XOR, tmp2, key, tmp1); | |
| 789 emit_rotlwi(as, dest, tmp1, HASH_ROT1); | |
| 790 emit_tai(as, PPCI_ADDI, tmp1, tmp2, HASH_BIAS); | |
| 791 emit_tai(as, PPCI_ADDIS, tmp2, key, (HASH_BIAS + 32768)>>16); | |
| 792 } | |
| 793 } | |
| 794 } | |
| 795 } | |
| 796 | |
| 797 static void asm_hrefk(ASMState *as, IRIns *ir) | |
| 798 { | |
| 799 IRIns *kslot = IR(ir->op2); | |
| 800 IRIns *irkey = IR(kslot->op1); | |
| 801 int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); | |
| 802 int32_t kofs = ofs + (int32_t)offsetof(Node, key); | |
| 803 Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; | |
| 804 Reg node = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 805 Reg key = RID_NONE, type = RID_TMP, idx = node; | |
| 806 RegSet allow = rset_exclude(RSET_GPR, node); | |
| 807 lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); | |
| 808 if (ofs > 32736) { | |
| 809 idx = dest; | |
| 810 rset_clear(allow, dest); | |
| 811 kofs = (int32_t)offsetof(Node, key); | |
| 812 } else if (ra_hasreg(dest)) { | |
| 813 emit_tai(as, PPCI_ADDI, dest, node, ofs); | |
| 814 } | |
| 815 asm_guardcc(as, CC_NE); | |
| 816 if (!irt_ispri(irkey->t)) { | |
| 817 key = ra_scratch(as, allow); | |
| 818 rset_clear(allow, key); | |
| 819 } | |
| 820 rset_clear(allow, type); | |
| 821 if (irt_isnum(irkey->t)) { | |
| 822 emit_cmpi(as, key, (int32_t)ir_knum(irkey)->u32.lo); | |
| 823 asm_guardcc(as, CC_NE); | |
| 824 emit_cmpi(as, type, (int32_t)ir_knum(irkey)->u32.hi); | |
| 825 } else { | |
| 826 if (ra_hasreg(key)) { | |
| 827 emit_cmpi(as, key, irkey->i); /* May use RID_TMP, i.e. type. */ | |
| 828 asm_guardcc(as, CC_NE); | |
| 829 } | |
| 830 emit_ai(as, PPCI_CMPWI, type, irt_toitype(irkey->t)); | |
| 831 } | |
| 832 if (ra_hasreg(key)) emit_tai(as, PPCI_LWZ, key, idx, kofs+4); | |
| 833 emit_tai(as, PPCI_LWZ, type, idx, kofs); | |
| 834 if (ofs > 32736) { | |
| 835 emit_tai(as, PPCI_ADDIS, dest, dest, (ofs + 32768) >> 16); | |
| 836 emit_tai(as, PPCI_ADDI, dest, node, ofs); | |
| 837 } | |
| 838 } | |
| 839 | |
| 840 static void asm_uref(ASMState *as, IRIns *ir) | |
| 841 { | |
| 842 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 843 if (irref_isk(ir->op1)) { | |
| 844 GCfunc *fn = ir_kfunc(IR(ir->op1)); | |
| 845 MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; | |
| 846 emit_lsptr(as, PPCI_LWZ, dest, v, RSET_GPR); | |
| 847 } else { | |
| 848 Reg uv = ra_scratch(as, RSET_GPR); | |
| 849 Reg func = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 850 if (ir->o == IR_UREFC) { | |
| 851 asm_guardcc(as, CC_NE); | |
| 852 emit_ai(as, PPCI_CMPWI, RID_TMP, 1); | |
| 853 emit_tai(as, PPCI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv)); | |
| 854 emit_tai(as, PPCI_LBZ, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); | |
| 855 } else { | |
| 856 emit_tai(as, PPCI_LWZ, dest, uv, (int32_t)offsetof(GCupval, v)); | |
| 857 } | |
| 858 emit_tai(as, PPCI_LWZ, uv, func, | |
| 859 (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8)); | |
| 860 } | |
| 861 } | |
| 862 | |
| 863 static void asm_fref(ASMState *as, IRIns *ir) | |
| 864 { | |
| 865 UNUSED(as); UNUSED(ir); | |
| 866 lj_assertA(!ra_used(ir), "unfused FREF"); | |
| 867 } | |
| 868 | |
| 869 static void asm_strref(ASMState *as, IRIns *ir) | |
| 870 { | |
| 871 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 872 IRRef ref = ir->op2, refk = ir->op1; | |
| 873 int32_t ofs = (int32_t)sizeof(GCstr); | |
| 874 Reg r; | |
| 875 if (irref_isk(ref)) { | |
| 876 IRRef tmp = refk; refk = ref; ref = tmp; | |
| 877 } else if (!irref_isk(refk)) { | |
| 878 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 879 IRIns *irr = IR(ir->op2); | |
| 880 if (ra_hasreg(irr->r)) { | |
| 881 ra_noweak(as, irr->r); | |
| 882 right = irr->r; | |
| 883 } else if (mayfuse(as, irr->op2) && | |
| 884 irr->o == IR_ADD && irref_isk(irr->op2) && | |
| 885 checki16(ofs + IR(irr->op2)->i)) { | |
| 886 ofs += IR(irr->op2)->i; | |
| 887 right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left)); | |
| 888 } else { | |
| 889 right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left)); | |
| 890 } | |
| 891 emit_tai(as, PPCI_ADDI, dest, dest, ofs); | |
| 892 emit_tab(as, PPCI_ADD, dest, left, right); | |
| 893 return; | |
| 894 } | |
| 895 r = ra_alloc1(as, ref, RSET_GPR); | |
| 896 ofs += IR(refk)->i; | |
| 897 if (checki16(ofs)) | |
| 898 emit_tai(as, PPCI_ADDI, dest, r, ofs); | |
| 899 else | |
| 900 emit_tab(as, PPCI_ADD, dest, r, | |
| 901 ra_allock(as, ofs, rset_exclude(RSET_GPR, r))); | |
| 902 } | |
| 903 | |
| 904 /* -- Loads and stores ---------------------------------------------------- */ | |
| 905 | |
| 906 static PPCIns asm_fxloadins(ASMState *as, IRIns *ir) | |
| 907 { | |
| 908 UNUSED(as); | |
| 909 switch (irt_type(ir->t)) { | |
| 910 case IRT_I8: return PPCI_LBZ; /* Needs sign-extension. */ | |
| 911 case IRT_U8: return PPCI_LBZ; | |
| 912 case IRT_I16: return PPCI_LHA; | |
| 913 case IRT_U16: return PPCI_LHZ; | |
| 914 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_LFD; | |
| 915 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_LFS; | |
| 916 default: return PPCI_LWZ; | |
| 917 } | |
| 918 } | |
| 919 | |
| 920 static PPCIns asm_fxstoreins(ASMState *as, IRIns *ir) | |
| 921 { | |
| 922 UNUSED(as); | |
| 923 switch (irt_type(ir->t)) { | |
| 924 case IRT_I8: case IRT_U8: return PPCI_STB; | |
| 925 case IRT_I16: case IRT_U16: return PPCI_STH; | |
| 926 case IRT_NUM: lj_assertA(!LJ_SOFTFP, "unsplit FP op"); return PPCI_STFD; | |
| 927 case IRT_FLOAT: if (!LJ_SOFTFP) return PPCI_STFS; | |
| 928 default: return PPCI_STW; | |
| 929 } | |
| 930 } | |
| 931 | |
| 932 static void asm_fload(ASMState *as, IRIns *ir) | |
| 933 { | |
| 934 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 935 PPCIns pi = asm_fxloadins(as, ir); | |
| 936 Reg idx; | |
| 937 int32_t ofs; | |
| 938 if (ir->op1 == REF_NIL) { /* FLOAD from GG_State with offset. */ | |
| 939 idx = RID_JGL; | |
| 940 ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); | |
| 941 } else { | |
| 942 idx = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 943 if (ir->op2 == IRFL_TAB_ARRAY) { | |
| 944 ofs = asm_fuseabase(as, ir->op1); | |
| 945 if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ | |
| 946 emit_tai(as, PPCI_ADDI, dest, idx, ofs); | |
| 947 return; | |
| 948 } | |
| 949 } | |
| 950 ofs = field_ofs[ir->op2]; | |
| 951 } | |
| 952 lj_assertA(!irt_isi8(ir->t), "unsupported FLOAD I8"); | |
| 953 emit_tai(as, pi, dest, idx, ofs); | |
| 954 } | |
| 955 | |
| 956 static void asm_fstore(ASMState *as, IRIns *ir) | |
| 957 { | |
| 958 if (ir->r != RID_SINK) { | |
| 959 Reg src = ra_alloc1(as, ir->op2, RSET_GPR); | |
| 960 IRIns *irf = IR(ir->op1); | |
| 961 Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); | |
| 962 int32_t ofs = field_ofs[irf->op2]; | |
| 963 PPCIns pi = asm_fxstoreins(as, ir); | |
| 964 emit_tai(as, pi, src, idx, ofs); | |
| 965 } | |
| 966 } | |
| 967 | |
| 968 static void asm_xload(ASMState *as, IRIns *ir) | |
| 969 { | |
| 970 Reg dest = ra_dest(as, ir, | |
| 971 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | |
| 972 lj_assertA(!(ir->op2 & IRXLOAD_UNALIGNED), "unaligned XLOAD"); | |
| 973 if (irt_isi8(ir->t)) | |
| 974 emit_as(as, PPCI_EXTSB, dest, dest); | |
| 975 asm_fusexref(as, asm_fxloadins(as, ir), dest, ir->op1, RSET_GPR, 0); | |
| 976 } | |
| 977 | |
| 978 static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) | |
| 979 { | |
| 980 IRIns *irb; | |
| 981 if (ir->r == RID_SINK) | |
| 982 return; | |
| 983 if (ofs == 0 && mayfuse(as, ir->op2) && (irb = IR(ir->op2))->o == IR_BSWAP && | |
| 984 ra_noreg(irb->r) && (irt_isint(ir->t) || irt_isu32(ir->t))) { | |
| 985 /* Fuse BSWAP with XSTORE to stwbrx. */ | |
| 986 Reg src = ra_alloc1(as, irb->op1, RSET_GPR); | |
| 987 asm_fusexrefx(as, PPCI_STWBRX, src, ir->op1, rset_exclude(RSET_GPR, src)); | |
| 988 } else { | |
| 989 Reg src = ra_alloc1(as, ir->op2, | |
| 990 (!LJ_SOFTFP && irt_isfp(ir->t)) ? RSET_FPR : RSET_GPR); | |
| 991 asm_fusexref(as, asm_fxstoreins(as, ir), src, ir->op1, | |
| 992 rset_exclude(RSET_GPR, src), ofs); | |
| 993 } | |
| 994 } | |
| 995 | |
| 996 #define asm_xstore(as, ir) asm_xstore_(as, ir, 0) | |
| 997 | |
| 998 static void asm_ahuvload(ASMState *as, IRIns *ir) | |
| 999 { | |
| 1000 IRType1 t = ir->t; | |
| 1001 Reg dest = RID_NONE, type = RID_TMP, tmp = RID_TMP, idx; | |
| 1002 RegSet allow = RSET_GPR; | |
| 1003 int32_t ofs = AHUREF_LSX; | |
| 1004 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) { | |
| 1005 t.irt = IRT_NUM; | |
| 1006 if (ra_used(ir+1)) { | |
| 1007 type = ra_dest(as, ir+1, allow); | |
| 1008 rset_clear(allow, type); | |
| 1009 } | |
| 1010 ofs = 0; | |
| 1011 } | |
| 1012 if (ra_used(ir)) { | |
| 1013 lj_assertA((LJ_SOFTFP ? 0 : irt_isnum(ir->t)) || | |
| 1014 irt_isint(ir->t) || irt_isaddr(ir->t), | |
| 1015 "bad load type %d", irt_type(ir->t)); | |
| 1016 if (LJ_SOFTFP || !irt_isnum(t)) ofs = 0; | |
| 1017 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | |
| 1018 rset_clear(allow, dest); | |
| 1019 } | |
| 1020 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | |
| 1021 if (ir->o == IR_VLOAD) { | |
| 1022 ofs = ofs != AHUREF_LSX ? ofs + 8 * ir->op2 : | |
| 1023 ir->op2 ? 8 * ir->op2 : AHUREF_LSX; | |
| 1024 } | |
| 1025 if (irt_isnum(t)) { | |
| 1026 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, rset_exclude(allow, idx)); | |
| 1027 asm_guardcc(as, CC_GE); | |
| 1028 emit_ab(as, PPCI_CMPLW, type, tisnum); | |
| 1029 if (ra_hasreg(dest)) { | |
| 1030 if (!LJ_SOFTFP && ofs == AHUREF_LSX) { | |
| 1031 tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, | |
| 1032 (idx&255)), (idx>>8))); | |
| 1033 emit_fab(as, PPCI_LFDX, dest, (idx&255), tmp); | |
| 1034 } else { | |
| 1035 emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, idx, | |
| 1036 ofs+4*LJ_SOFTFP); | |
| 1037 } | |
| 1038 } | |
| 1039 } else { | |
| 1040 asm_guardcc(as, CC_NE); | |
| 1041 emit_ai(as, PPCI_CMPWI, type, irt_toitype(t)); | |
| 1042 if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, idx, ofs+4); | |
| 1043 } | |
| 1044 if (ofs == AHUREF_LSX) { | |
| 1045 emit_tab(as, PPCI_LWZX, type, (idx&255), tmp); | |
| 1046 emit_slwi(as, tmp, (idx>>8), 3); | |
| 1047 } else { | |
| 1048 emit_tai(as, PPCI_LWZ, type, idx, ofs); | |
| 1049 } | |
| 1050 } | |
| 1051 | |
| 1052 static void asm_ahustore(ASMState *as, IRIns *ir) | |
| 1053 { | |
| 1054 RegSet allow = RSET_GPR; | |
| 1055 Reg idx, src = RID_NONE, type = RID_NONE; | |
| 1056 int32_t ofs = AHUREF_LSX; | |
| 1057 if (ir->r == RID_SINK) | |
| 1058 return; | |
| 1059 if (!LJ_SOFTFP && irt_isnum(ir->t)) { | |
| 1060 src = ra_alloc1(as, ir->op2, RSET_FPR); | |
| 1061 } else { | |
| 1062 if (!irt_ispri(ir->t)) { | |
| 1063 src = ra_alloc1(as, ir->op2, allow); | |
| 1064 rset_clear(allow, src); | |
| 1065 ofs = 0; | |
| 1066 } | |
| 1067 if (LJ_SOFTFP && (ir+1)->o == IR_HIOP) | |
| 1068 type = ra_alloc1(as, (ir+1)->op2, allow); | |
| 1069 else | |
| 1070 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | |
| 1071 rset_clear(allow, type); | |
| 1072 } | |
| 1073 idx = asm_fuseahuref(as, ir->op1, &ofs, allow); | |
| 1074 if (!LJ_SOFTFP && irt_isnum(ir->t)) { | |
| 1075 if (ofs == AHUREF_LSX) { | |
| 1076 emit_fab(as, PPCI_STFDX, src, (idx&255), RID_TMP); | |
| 1077 emit_slwi(as, RID_TMP, (idx>>8), 3); | |
| 1078 } else { | |
| 1079 emit_fai(as, PPCI_STFD, src, idx, ofs); | |
| 1080 } | |
| 1081 } else { | |
| 1082 if (ra_hasreg(src)) | |
| 1083 emit_tai(as, PPCI_STW, src, idx, ofs+4); | |
| 1084 if (ofs == AHUREF_LSX) { | |
| 1085 emit_tab(as, PPCI_STWX, type, (idx&255), RID_TMP); | |
| 1086 emit_slwi(as, RID_TMP, (idx>>8), 3); | |
| 1087 } else { | |
| 1088 emit_tai(as, PPCI_STW, type, idx, ofs); | |
| 1089 } | |
| 1090 } | |
| 1091 } | |
| 1092 | |
| 1093 static void asm_sload(ASMState *as, IRIns *ir) | |
| 1094 { | |
| 1095 int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 0 : 4); | |
| 1096 IRType1 t = ir->t; | |
| 1097 Reg dest = RID_NONE, type = RID_NONE, base; | |
| 1098 RegSet allow = RSET_GPR; | |
| 1099 int hiop = (LJ_SOFTFP && (ir+1)->o == IR_HIOP); | |
| 1100 if (hiop) | |
| 1101 t.irt = IRT_NUM; | |
| 1102 lj_assertA(!(ir->op2 & IRSLOAD_PARENT), | |
| 1103 "bad parent SLOAD"); /* Handled by asm_head_side(). */ | |
| 1104 lj_assertA(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK), | |
| 1105 "inconsistent SLOAD variant"); | |
| 1106 lj_assertA(LJ_DUALNUM || | |
| 1107 !irt_isint(t) || | |
| 1108 (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME|IRSLOAD_KEYINDEX)), | |
| 1109 "bad SLOAD type"); | |
| 1110 #if LJ_SOFTFP | |
| 1111 lj_assertA(!(ir->op2 & IRSLOAD_CONVERT), | |
| 1112 "unsplit SLOAD convert"); /* Handled by LJ_SOFTFP SPLIT. */ | |
| 1113 if (hiop && ra_used(ir+1)) { | |
| 1114 type = ra_dest(as, ir+1, allow); | |
| 1115 rset_clear(allow, type); | |
| 1116 } | |
| 1117 #else | |
| 1118 if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { | |
| 1119 dest = ra_scratch(as, RSET_FPR); | |
| 1120 asm_tointg(as, ir, dest); | |
| 1121 t.irt = IRT_NUM; /* Continue with a regular number type check. */ | |
| 1122 } else | |
| 1123 #endif | |
| 1124 if (ra_used(ir)) { | |
| 1125 lj_assertA(irt_isnum(t) || irt_isint(t) || irt_isaddr(t), | |
| 1126 "bad SLOAD type %d", irt_type(ir->t)); | |
| 1127 dest = ra_dest(as, ir, (!LJ_SOFTFP && irt_isnum(t)) ? RSET_FPR : allow); | |
| 1128 rset_clear(allow, dest); | |
| 1129 base = ra_alloc1(as, REF_BASE, allow); | |
| 1130 rset_clear(allow, base); | |
| 1131 if (!LJ_SOFTFP && (ir->op2 & IRSLOAD_CONVERT)) { | |
| 1132 if (irt_isint(t)) { | |
| 1133 emit_tai(as, PPCI_LWZ, dest, RID_SP, SPOFS_TMPLO); | |
| 1134 dest = ra_scratch(as, RSET_FPR); | |
| 1135 emit_fai(as, PPCI_STFD, dest, RID_SP, SPOFS_TMP); | |
| 1136 emit_fb(as, PPCI_FCTIWZ, dest, dest); | |
| 1137 t.irt = IRT_NUM; /* Check for original type. */ | |
| 1138 } else { | |
| 1139 Reg tmp = ra_scratch(as, allow); | |
| 1140 Reg hibias = ra_allock(as, 0x43300000, rset_clear(allow, tmp)); | |
| 1141 Reg fbias = ra_scratch(as, rset_exclude(RSET_FPR, dest)); | |
| 1142 emit_fab(as, PPCI_FSUB, dest, dest, fbias); | |
| 1143 emit_fai(as, PPCI_LFD, dest, RID_SP, SPOFS_TMP); | |
| 1144 emit_lsptr(as, PPCI_LFS, (fbias & 31), | |
| 1145 (void *)&as->J->k32[LJ_K32_2P52_2P31], | |
| 1146 rset_clear(allow, hibias)); | |
| 1147 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPLO); | |
| 1148 emit_tai(as, PPCI_STW, hibias, RID_SP, SPOFS_TMPHI); | |
| 1149 emit_asi(as, PPCI_XORIS, tmp, tmp, 0x8000); | |
| 1150 dest = tmp; | |
| 1151 t.irt = IRT_INT; /* Check for original type. */ | |
| 1152 } | |
| 1153 } | |
| 1154 goto dotypecheck; | |
| 1155 } | |
| 1156 base = ra_alloc1(as, REF_BASE, allow); | |
| 1157 rset_clear(allow, base); | |
| 1158 dotypecheck: | |
| 1159 if (irt_isnum(t)) { | |
| 1160 if ((ir->op2 & IRSLOAD_TYPECHECK)) { | |
| 1161 Reg tisnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); | |
| 1162 asm_guardcc(as, CC_GE); | |
| 1163 #if !LJ_SOFTFP | |
| 1164 type = RID_TMP; | |
| 1165 #endif | |
| 1166 emit_ab(as, PPCI_CMPLW, type, tisnum); | |
| 1167 } | |
| 1168 if (ra_hasreg(dest)) emit_fai(as, LJ_SOFTFP ? PPCI_LWZ : PPCI_LFD, dest, | |
| 1169 base, ofs-(LJ_SOFTFP?0:4)); | |
| 1170 } else { | |
| 1171 if ((ir->op2 & IRSLOAD_TYPECHECK)) { | |
| 1172 asm_guardcc(as, CC_NE); | |
| 1173 if ((ir->op2 & IRSLOAD_KEYINDEX)) { | |
| 1174 emit_ai(as, PPCI_CMPWI, RID_TMP, (LJ_KEYINDEX & 0xffff)); | |
| 1175 emit_asi(as, PPCI_XORIS, RID_TMP, RID_TMP, (LJ_KEYINDEX >> 16)); | |
| 1176 } else { | |
| 1177 emit_ai(as, PPCI_CMPWI, RID_TMP, irt_toitype(t)); | |
| 1178 } | |
| 1179 type = RID_TMP; | |
| 1180 } | |
| 1181 if (ra_hasreg(dest)) emit_tai(as, PPCI_LWZ, dest, base, ofs); | |
| 1182 } | |
| 1183 if (ra_hasreg(type)) emit_tai(as, PPCI_LWZ, type, base, ofs-4); | |
| 1184 } | |
| 1185 | |
| 1186 /* -- Allocations --------------------------------------------------------- */ | |
| 1187 | |
| 1188 #if LJ_HASFFI | |
| 1189 static void asm_cnew(ASMState *as, IRIns *ir) | |
| 1190 { | |
| 1191 CTState *cts = ctype_ctsG(J2G(as->J)); | |
| 1192 CTypeID id = (CTypeID)IR(ir->op1)->i; | |
| 1193 CTSize sz; | |
| 1194 CTInfo info = lj_ctype_info(cts, id, &sz); | |
| 1195 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; | |
| 1196 IRRef args[4]; | |
| 1197 RegSet drop = RSET_SCRATCH; | |
| 1198 lj_assertA(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL), | |
| 1199 "bad CNEW/CNEWI operands"); | |
| 1200 | |
| 1201 as->gcsteps++; | |
| 1202 if (ra_hasreg(ir->r)) | |
| 1203 rset_clear(drop, ir->r); /* Dest reg handled below. */ | |
| 1204 ra_evictset(as, drop); | |
| 1205 if (ra_used(ir)) | |
| 1206 ra_destreg(as, ir, RID_RET); /* GCcdata * */ | |
| 1207 | |
| 1208 /* Initialize immutable cdata object. */ | |
| 1209 if (ir->o == IR_CNEWI) { | |
| 1210 RegSet allow = (RSET_GPR & ~RSET_SCRATCH); | |
| 1211 int32_t ofs = sizeof(GCcdata); | |
| 1212 lj_assertA(sz == 4 || sz == 8, "bad CNEWI size %d", sz); | |
| 1213 if (sz == 8) { | |
| 1214 ofs += 4; | |
| 1215 lj_assertA((ir+1)->o == IR_HIOP, "expected HIOP for CNEWI"); | |
| 1216 } | |
| 1217 for (;;) { | |
| 1218 Reg r = ra_alloc1(as, ir->op2, allow); | |
| 1219 emit_tai(as, PPCI_STW, r, RID_RET, ofs); | |
| 1220 rset_clear(allow, r); | |
| 1221 if (ofs == sizeof(GCcdata)) break; | |
| 1222 ofs -= 4; ir++; | |
| 1223 } | |
| 1224 } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ | |
| 1225 ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; | |
| 1226 args[0] = ASMREF_L; /* lua_State *L */ | |
| 1227 args[1] = ir->op1; /* CTypeID id */ | |
| 1228 args[2] = ir->op2; /* CTSize sz */ | |
| 1229 args[3] = ASMREF_TMP1; /* CTSize align */ | |
| 1230 asm_gencall(as, ci, args); | |
| 1231 emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); | |
| 1232 return; | |
| 1233 } | |
| 1234 | |
| 1235 /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ | |
| 1236 emit_tai(as, PPCI_STB, RID_RET+1, RID_RET, offsetof(GCcdata, gct)); | |
| 1237 emit_tai(as, PPCI_STH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid)); | |
| 1238 emit_ti(as, PPCI_LI, RID_RET+1, ~LJ_TCDATA); | |
| 1239 emit_ti(as, PPCI_LI, RID_TMP, id); /* Lower 16 bit used. Sign-ext ok. */ | |
| 1240 args[0] = ASMREF_L; /* lua_State *L */ | |
| 1241 args[1] = ASMREF_TMP1; /* MSize size */ | |
| 1242 asm_gencall(as, ci, args); | |
| 1243 ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), | |
| 1244 ra_releasetmp(as, ASMREF_TMP1)); | |
| 1245 } | |
| 1246 #endif | |
| 1247 | |
| 1248 /* -- Write barriers ------------------------------------------------------ */ | |
| 1249 | |
| 1250 static void asm_tbar(ASMState *as, IRIns *ir) | |
| 1251 { | |
| 1252 Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 1253 Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); | |
| 1254 Reg link = RID_TMP; | |
| 1255 MCLabel l_end = emit_label(as); | |
| 1256 emit_tai(as, PPCI_STW, link, tab, (int32_t)offsetof(GCtab, gclist)); | |
| 1257 emit_tai(as, PPCI_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); | |
| 1258 emit_setgl(as, tab, gc.grayagain); | |
| 1259 lj_assertA(LJ_GC_BLACK == 0x04, "bad LJ_GC_BLACK"); | |
| 1260 emit_rot(as, PPCI_RLWINM, mark, mark, 0, 30, 28); /* Clear black bit. */ | |
| 1261 emit_getgl(as, link, gc.grayagain); | |
| 1262 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); | |
| 1263 emit_asi(as, PPCI_ANDIDOT, RID_TMP, mark, LJ_GC_BLACK); | |
| 1264 emit_tai(as, PPCI_LBZ, mark, tab, (int32_t)offsetof(GCtab, marked)); | |
| 1265 } | |
| 1266 | |
| 1267 static void asm_obar(ASMState *as, IRIns *ir) | |
| 1268 { | |
| 1269 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; | |
| 1270 IRRef args[2]; | |
| 1271 MCLabel l_end; | |
| 1272 Reg obj, val, tmp; | |
| 1273 /* No need for other object barriers (yet). */ | |
| 1274 lj_assertA(IR(ir->op1)->o == IR_UREFC, "bad OBAR type"); | |
| 1275 ra_evictset(as, RSET_SCRATCH); | |
| 1276 l_end = emit_label(as); | |
| 1277 args[0] = ASMREF_TMP1; /* global_State *g */ | |
| 1278 args[1] = ir->op1; /* TValue *tv */ | |
| 1279 asm_gencall(as, ci, args); | |
| 1280 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); | |
| 1281 obj = IR(ir->op1)->r; | |
| 1282 tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); | |
| 1283 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_EQ, l_end); | |
| 1284 emit_asi(as, PPCI_ANDIDOT, tmp, tmp, LJ_GC_BLACK); | |
| 1285 emit_condbranch(as, PPCI_BC, CC_EQ, l_end); | |
| 1286 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, LJ_GC_WHITES); | |
| 1287 val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); | |
| 1288 emit_tai(as, PPCI_LBZ, tmp, obj, | |
| 1289 (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); | |
| 1290 emit_tai(as, PPCI_LBZ, RID_TMP, val, (int32_t)offsetof(GChead, marked)); | |
| 1291 } | |
| 1292 | |
| 1293 /* -- Arithmetic and logic operations ------------------------------------- */ | |
| 1294 | |
| 1295 #if !LJ_SOFTFP | |
| 1296 static void asm_fparith(ASMState *as, IRIns *ir, PPCIns pi) | |
| 1297 { | |
| 1298 Reg dest = ra_dest(as, ir, RSET_FPR); | |
| 1299 Reg right, left = ra_alloc2(as, ir, RSET_FPR); | |
| 1300 right = (left >> 8); left &= 255; | |
| 1301 if (pi == PPCI_FMUL) | |
| 1302 emit_fac(as, pi, dest, left, right); | |
| 1303 else | |
| 1304 emit_fab(as, pi, dest, left, right); | |
| 1305 } | |
| 1306 | |
| 1307 static void asm_fpunary(ASMState *as, IRIns *ir, PPCIns pi) | |
| 1308 { | |
| 1309 Reg dest = ra_dest(as, ir, RSET_FPR); | |
| 1310 Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); | |
| 1311 emit_fb(as, pi, dest, left); | |
| 1312 } | |
| 1313 | |
| 1314 static void asm_fpmath(ASMState *as, IRIns *ir) | |
| 1315 { | |
| 1316 if (ir->op2 == IRFPM_SQRT && (as->flags & JIT_F_SQRT)) | |
| 1317 asm_fpunary(as, ir, PPCI_FSQRT); | |
| 1318 else | |
| 1319 asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); | |
| 1320 } | |
| 1321 #endif | |
| 1322 | |
| 1323 static void asm_add(ASMState *as, IRIns *ir) | |
| 1324 { | |
| 1325 #if !LJ_SOFTFP | |
| 1326 if (irt_isnum(ir->t)) { | |
| 1327 if (!asm_fusemadd(as, ir, PPCI_FMADD, PPCI_FMADD)) | |
| 1328 asm_fparith(as, ir, PPCI_FADD); | |
| 1329 } else | |
| 1330 #endif | |
| 1331 { | |
| 1332 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 1333 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | |
| 1334 PPCIns pi; | |
| 1335 if (irref_isk(ir->op2)) { | |
| 1336 int32_t k = IR(ir->op2)->i; | |
| 1337 if (checki16(k)) { | |
| 1338 pi = PPCI_ADDI; | |
| 1339 /* May fail due to spills/restores above, but simplifies the logic. */ | |
| 1340 if (as->flagmcp == as->mcp) { | |
| 1341 as->flagmcp = NULL; | |
| 1342 as->mcp++; | |
| 1343 pi = PPCI_ADDICDOT; | |
| 1344 } | |
| 1345 emit_tai(as, pi, dest, left, k); | |
| 1346 return; | |
| 1347 } else if ((k & 0xffff) == 0) { | |
| 1348 emit_tai(as, PPCI_ADDIS, dest, left, (k >> 16)); | |
| 1349 return; | |
| 1350 } else if (!as->sectref) { | |
| 1351 emit_tai(as, PPCI_ADDIS, dest, dest, (k + 32768) >> 16); | |
| 1352 emit_tai(as, PPCI_ADDI, dest, left, k); | |
| 1353 return; | |
| 1354 } | |
| 1355 } | |
| 1356 pi = PPCI_ADD; | |
| 1357 /* May fail due to spills/restores above, but simplifies the logic. */ | |
| 1358 if (as->flagmcp == as->mcp) { | |
| 1359 as->flagmcp = NULL; | |
| 1360 as->mcp++; | |
| 1361 pi |= PPCF_DOT; | |
| 1362 } | |
| 1363 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | |
| 1364 emit_tab(as, pi, dest, left, right); | |
| 1365 } | |
| 1366 } | |
| 1367 | |
| 1368 static void asm_sub(ASMState *as, IRIns *ir) | |
| 1369 { | |
| 1370 #if !LJ_SOFTFP | |
| 1371 if (irt_isnum(ir->t)) { | |
| 1372 if (!asm_fusemadd(as, ir, PPCI_FMSUB, PPCI_FNMSUB)) | |
| 1373 asm_fparith(as, ir, PPCI_FSUB); | |
| 1374 } else | |
| 1375 #endif | |
| 1376 { | |
| 1377 PPCIns pi = PPCI_SUBF; | |
| 1378 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 1379 Reg left, right; | |
| 1380 if (irref_isk(ir->op1)) { | |
| 1381 int32_t k = IR(ir->op1)->i; | |
| 1382 if (checki16(k)) { | |
| 1383 right = ra_alloc1(as, ir->op2, RSET_GPR); | |
| 1384 emit_tai(as, PPCI_SUBFIC, dest, right, k); | |
| 1385 return; | |
| 1386 } | |
| 1387 } | |
| 1388 /* May fail due to spills/restores above, but simplifies the logic. */ | |
| 1389 if (as->flagmcp == as->mcp) { | |
| 1390 as->flagmcp = NULL; | |
| 1391 as->mcp++; | |
| 1392 pi |= PPCF_DOT; | |
| 1393 } | |
| 1394 left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | |
| 1395 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | |
| 1396 emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */ | |
| 1397 } | |
| 1398 } | |
| 1399 | |
| 1400 static void asm_mul(ASMState *as, IRIns *ir) | |
| 1401 { | |
| 1402 #if !LJ_SOFTFP | |
| 1403 if (irt_isnum(ir->t)) { | |
| 1404 asm_fparith(as, ir, PPCI_FMUL); | |
| 1405 } else | |
| 1406 #endif | |
| 1407 { | |
| 1408 PPCIns pi = PPCI_MULLW; | |
| 1409 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 1410 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | |
| 1411 if (irref_isk(ir->op2)) { | |
| 1412 int32_t k = IR(ir->op2)->i; | |
| 1413 if (checki16(k)) { | |
| 1414 emit_tai(as, PPCI_MULLI, dest, left, k); | |
| 1415 return; | |
| 1416 } | |
| 1417 } | |
| 1418 /* May fail due to spills/restores above, but simplifies the logic. */ | |
| 1419 if (as->flagmcp == as->mcp) { | |
| 1420 as->flagmcp = NULL; | |
| 1421 as->mcp++; | |
| 1422 pi |= PPCF_DOT; | |
| 1423 } | |
| 1424 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | |
| 1425 emit_tab(as, pi, dest, left, right); | |
| 1426 } | |
| 1427 } | |
| 1428 | |
| 1429 #define asm_fpdiv(as, ir) asm_fparith(as, ir, PPCI_FDIV) | |
| 1430 | |
| 1431 static void asm_neg(ASMState *as, IRIns *ir) | |
| 1432 { | |
| 1433 #if !LJ_SOFTFP | |
| 1434 if (irt_isnum(ir->t)) { | |
| 1435 asm_fpunary(as, ir, PPCI_FNEG); | |
| 1436 } else | |
| 1437 #endif | |
| 1438 { | |
| 1439 Reg dest, left; | |
| 1440 PPCIns pi = PPCI_NEG; | |
| 1441 if (as->flagmcp == as->mcp) { | |
| 1442 as->flagmcp = NULL; | |
| 1443 as->mcp++; | |
| 1444 pi |= PPCF_DOT; | |
| 1445 } | |
| 1446 dest = ra_dest(as, ir, RSET_GPR); | |
| 1447 left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | |
| 1448 emit_tab(as, pi, dest, left, 0); | |
| 1449 } | |
| 1450 } | |
| 1451 | |
| 1452 #define asm_abs(as, ir) asm_fpunary(as, ir, PPCI_FABS) | |
| 1453 | |
| 1454 static void asm_arithov(ASMState *as, IRIns *ir, PPCIns pi) | |
| 1455 { | |
| 1456 Reg dest, left, right; | |
| 1457 if (as->flagmcp == as->mcp) { | |
| 1458 as->flagmcp = NULL; | |
| 1459 as->mcp++; | |
| 1460 } | |
| 1461 asm_guardcc(as, CC_SO); | |
| 1462 dest = ra_dest(as, ir, RSET_GPR); | |
| 1463 left = ra_alloc2(as, ir, RSET_GPR); | |
| 1464 right = (left >> 8); left &= 255; | |
| 1465 if (pi == PPCI_SUBFO) { Reg tmp = left; left = right; right = tmp; } | |
| 1466 emit_tab(as, pi|PPCF_DOT, dest, left, right); | |
| 1467 } | |
| 1468 | |
| 1469 #define asm_addov(as, ir) asm_arithov(as, ir, PPCI_ADDO) | |
| 1470 #define asm_subov(as, ir) asm_arithov(as, ir, PPCI_SUBFO) | |
| 1471 #define asm_mulov(as, ir) asm_arithov(as, ir, PPCI_MULLWO) | |
| 1472 | |
| 1473 #if LJ_HASFFI | |
| 1474 static void asm_add64(ASMState *as, IRIns *ir) | |
| 1475 { | |
| 1476 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 1477 Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 1478 PPCIns pi = PPCI_ADDE; | |
| 1479 if (irref_isk(ir->op2)) { | |
| 1480 int32_t k = IR(ir->op2)->i; | |
| 1481 if (k == 0) | |
| 1482 pi = PPCI_ADDZE; | |
| 1483 else if (k == -1) | |
| 1484 pi = PPCI_ADDME; | |
| 1485 else | |
| 1486 goto needright; | |
| 1487 right = 0; | |
| 1488 } else { | |
| 1489 needright: | |
| 1490 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | |
| 1491 } | |
| 1492 emit_tab(as, pi, dest, left, right); | |
| 1493 ir--; | |
| 1494 dest = ra_dest(as, ir, RSET_GPR); | |
| 1495 left = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 1496 if (irref_isk(ir->op2)) { | |
| 1497 int32_t k = IR(ir->op2)->i; | |
| 1498 if (checki16(k)) { | |
| 1499 emit_tai(as, PPCI_ADDIC, dest, left, k); | |
| 1500 return; | |
| 1501 } | |
| 1502 } | |
| 1503 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | |
| 1504 emit_tab(as, PPCI_ADDC, dest, left, right); | |
| 1505 } | |
| 1506 | |
| 1507 static void asm_sub64(ASMState *as, IRIns *ir) | |
| 1508 { | |
| 1509 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 1510 Reg left, right = ra_alloc1(as, ir->op2, RSET_GPR); | |
| 1511 PPCIns pi = PPCI_SUBFE; | |
| 1512 if (irref_isk(ir->op1)) { | |
| 1513 int32_t k = IR(ir->op1)->i; | |
| 1514 if (k == 0) | |
| 1515 pi = PPCI_SUBFZE; | |
| 1516 else if (k == -1) | |
| 1517 pi = PPCI_SUBFME; | |
| 1518 else | |
| 1519 goto needleft; | |
| 1520 left = 0; | |
| 1521 } else { | |
| 1522 needleft: | |
| 1523 left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right)); | |
| 1524 } | |
| 1525 emit_tab(as, pi, dest, right, left); /* Subtract right _from_ left. */ | |
| 1526 ir--; | |
| 1527 dest = ra_dest(as, ir, RSET_GPR); | |
| 1528 right = ra_alloc1(as, ir->op2, RSET_GPR); | |
| 1529 if (irref_isk(ir->op1)) { | |
| 1530 int32_t k = IR(ir->op1)->i; | |
| 1531 if (checki16(k)) { | |
| 1532 emit_tai(as, PPCI_SUBFIC, dest, right, k); | |
| 1533 return; | |
| 1534 } | |
| 1535 } | |
| 1536 left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, right)); | |
| 1537 emit_tab(as, PPCI_SUBFC, dest, right, left); | |
| 1538 } | |
| 1539 | |
| 1540 static void asm_neg64(ASMState *as, IRIns *ir) | |
| 1541 { | |
| 1542 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 1543 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 1544 emit_tab(as, PPCI_SUBFZE, dest, left, 0); | |
| 1545 ir--; | |
| 1546 dest = ra_dest(as, ir, RSET_GPR); | |
| 1547 left = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 1548 emit_tai(as, PPCI_SUBFIC, dest, left, 0); | |
| 1549 } | |
| 1550 #endif | |
| 1551 | |
| 1552 static void asm_bnot(ASMState *as, IRIns *ir) | |
| 1553 { | |
| 1554 Reg dest, left, right; | |
| 1555 PPCIns pi = PPCI_NOR; | |
| 1556 if (as->flagmcp == as->mcp) { | |
| 1557 as->flagmcp = NULL; | |
| 1558 as->mcp++; | |
| 1559 pi |= PPCF_DOT; | |
| 1560 } | |
| 1561 dest = ra_dest(as, ir, RSET_GPR); | |
| 1562 if (mayfuse(as, ir->op1)) { | |
| 1563 IRIns *irl = IR(ir->op1); | |
| 1564 if (irl->o == IR_BAND) | |
| 1565 pi ^= (PPCI_NOR ^ PPCI_NAND); | |
| 1566 else if (irl->o == IR_BXOR) | |
| 1567 pi ^= (PPCI_NOR ^ PPCI_EQV); | |
| 1568 else if (irl->o != IR_BOR) | |
| 1569 goto nofuse; | |
| 1570 left = ra_hintalloc(as, irl->op1, dest, RSET_GPR); | |
| 1571 right = ra_alloc1(as, irl->op2, rset_exclude(RSET_GPR, left)); | |
| 1572 } else { | |
| 1573 nofuse: | |
| 1574 left = right = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | |
| 1575 } | |
| 1576 emit_asb(as, pi, dest, left, right); | |
| 1577 } | |
| 1578 | |
| 1579 static void asm_bswap(ASMState *as, IRIns *ir) | |
| 1580 { | |
| 1581 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 1582 IRIns *irx; | |
| 1583 if (mayfuse(as, ir->op1) && (irx = IR(ir->op1))->o == IR_XLOAD && | |
| 1584 ra_noreg(irx->r) && (irt_isint(irx->t) || irt_isu32(irx->t))) { | |
| 1585 /* Fuse BSWAP with XLOAD to lwbrx. */ | |
| 1586 asm_fusexrefx(as, PPCI_LWBRX, dest, irx->op1, RSET_GPR); | |
| 1587 } else { | |
| 1588 Reg left = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 1589 Reg tmp = dest; | |
| 1590 if (tmp == left) { | |
| 1591 tmp = RID_TMP; | |
| 1592 emit_mr(as, dest, RID_TMP); | |
| 1593 } | |
| 1594 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 16, 23); | |
| 1595 emit_rot(as, PPCI_RLWIMI, tmp, left, 24, 0, 7); | |
| 1596 emit_rotlwi(as, tmp, left, 8); | |
| 1597 } | |
| 1598 } | |
| 1599 | |
| 1600 /* Fuse BAND with contiguous bitmask and a shift to rlwinm. */ | |
| 1601 static void asm_fuseandsh(ASMState *as, PPCIns pi, int32_t mask, IRRef ref) | |
| 1602 { | |
| 1603 IRIns *ir; | |
| 1604 Reg left; | |
| 1605 if (mayfuse(as, ref) && (ir = IR(ref), ra_noreg(ir->r)) && | |
| 1606 irref_isk(ir->op2) && ir->o >= IR_BSHL && ir->o <= IR_BROR) { | |
| 1607 int32_t sh = (IR(ir->op2)->i & 31); | |
| 1608 switch (ir->o) { | |
| 1609 case IR_BSHL: | |
| 1610 if ((mask & ((1u<<sh)-1))) goto nofuse; | |
| 1611 break; | |
| 1612 case IR_BSHR: | |
| 1613 if ((mask & ~((~0u)>>sh))) goto nofuse; | |
| 1614 sh = ((32-sh)&31); | |
| 1615 break; | |
| 1616 case IR_BROL: | |
| 1617 break; | |
| 1618 default: | |
| 1619 goto nofuse; | |
| 1620 } | |
| 1621 left = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 1622 *--as->mcp = pi | PPCF_T(left) | PPCF_B(sh); | |
| 1623 return; | |
| 1624 } | |
| 1625 nofuse: | |
| 1626 left = ra_alloc1(as, ref, RSET_GPR); | |
| 1627 *--as->mcp = pi | PPCF_T(left); | |
| 1628 } | |
| 1629 | |
| 1630 static void asm_band(ASMState *as, IRIns *ir) | |
| 1631 { | |
| 1632 Reg dest, left, right; | |
| 1633 IRRef lref = ir->op1; | |
| 1634 PPCIns dot = 0; | |
| 1635 IRRef op2; | |
| 1636 if (as->flagmcp == as->mcp) { | |
| 1637 as->flagmcp = NULL; | |
| 1638 as->mcp++; | |
| 1639 dot = PPCF_DOT; | |
| 1640 } | |
| 1641 dest = ra_dest(as, ir, RSET_GPR); | |
| 1642 if (irref_isk(ir->op2)) { | |
| 1643 int32_t k = IR(ir->op2)->i; | |
| 1644 if (k) { | |
| 1645 /* First check for a contiguous bitmask as used by rlwinm. */ | |
| 1646 uint32_t s1 = lj_ffs((uint32_t)k); | |
| 1647 uint32_t k1 = ((uint32_t)k >> s1); | |
| 1648 if ((k1 & (k1+1)) == 0) { | |
| 1649 asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | | |
| 1650 PPCF_MB(31-lj_fls((uint32_t)k)) | PPCF_ME(31-s1), | |
| 1651 k, lref); | |
| 1652 return; | |
| 1653 } | |
| 1654 if (~(uint32_t)k) { | |
| 1655 uint32_t s2 = lj_ffs(~(uint32_t)k); | |
| 1656 uint32_t k2 = (~(uint32_t)k >> s2); | |
| 1657 if ((k2 & (k2+1)) == 0) { | |
| 1658 asm_fuseandsh(as, PPCI_RLWINM|dot | PPCF_A(dest) | | |
| 1659 PPCF_MB(32-s2) | PPCF_ME(30-lj_fls(~(uint32_t)k)), | |
| 1660 k, lref); | |
| 1661 return; | |
| 1662 } | |
| 1663 } | |
| 1664 } | |
| 1665 if (checku16(k)) { | |
| 1666 left = ra_alloc1(as, lref, RSET_GPR); | |
| 1667 emit_asi(as, PPCI_ANDIDOT, dest, left, k); | |
| 1668 return; | |
| 1669 } else if ((k & 0xffff) == 0) { | |
| 1670 left = ra_alloc1(as, lref, RSET_GPR); | |
| 1671 emit_asi(as, PPCI_ANDISDOT, dest, left, (k >> 16)); | |
| 1672 return; | |
| 1673 } | |
| 1674 } | |
| 1675 op2 = ir->op2; | |
| 1676 if (mayfuse(as, op2) && IR(op2)->o == IR_BNOT && ra_noreg(IR(op2)->r)) { | |
| 1677 dot ^= (PPCI_AND ^ PPCI_ANDC); | |
| 1678 op2 = IR(op2)->op1; | |
| 1679 } | |
| 1680 left = ra_hintalloc(as, lref, dest, RSET_GPR); | |
| 1681 right = ra_alloc1(as, op2, rset_exclude(RSET_GPR, left)); | |
| 1682 emit_asb(as, PPCI_AND ^ dot, dest, left, right); | |
| 1683 } | |
| 1684 | |
| 1685 static void asm_bitop(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | |
| 1686 { | |
| 1687 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 1688 Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); | |
| 1689 if (irref_isk(ir->op2)) { | |
| 1690 int32_t k = IR(ir->op2)->i; | |
| 1691 Reg tmp = left; | |
| 1692 if ((checku16(k) || (k & 0xffff) == 0) || (tmp = dest, !as->sectref)) { | |
| 1693 if (!checku16(k)) { | |
| 1694 emit_asi(as, pik ^ (PPCI_ORI ^ PPCI_ORIS), dest, tmp, (k >> 16)); | |
| 1695 if ((k & 0xffff) == 0) return; | |
| 1696 } | |
| 1697 emit_asi(as, pik, dest, left, k); | |
| 1698 return; | |
| 1699 } | |
| 1700 } | |
| 1701 /* May fail due to spills/restores above, but simplifies the logic. */ | |
| 1702 if (as->flagmcp == as->mcp) { | |
| 1703 as->flagmcp = NULL; | |
| 1704 as->mcp++; | |
| 1705 pi |= PPCF_DOT; | |
| 1706 } | |
| 1707 right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | |
| 1708 emit_asb(as, pi, dest, left, right); | |
| 1709 } | |
| 1710 | |
| 1711 #define asm_bor(as, ir) asm_bitop(as, ir, PPCI_OR, PPCI_ORI) | |
| 1712 #define asm_bxor(as, ir) asm_bitop(as, ir, PPCI_XOR, PPCI_XORI) | |
| 1713 | |
| 1714 static void asm_bitshift(ASMState *as, IRIns *ir, PPCIns pi, PPCIns pik) | |
| 1715 { | |
| 1716 Reg dest, left; | |
| 1717 Reg dot = 0; | |
| 1718 if (as->flagmcp == as->mcp) { | |
| 1719 as->flagmcp = NULL; | |
| 1720 as->mcp++; | |
| 1721 dot = PPCF_DOT; | |
| 1722 } | |
| 1723 dest = ra_dest(as, ir, RSET_GPR); | |
| 1724 left = ra_alloc1(as, ir->op1, RSET_GPR); | |
| 1725 if (irref_isk(ir->op2)) { /* Constant shifts. */ | |
| 1726 int32_t shift = (IR(ir->op2)->i & 31); | |
| 1727 if (pik == 0) /* SLWI */ | |
| 1728 emit_rot(as, PPCI_RLWINM|dot, dest, left, shift, 0, 31-shift); | |
| 1729 else if (pik == 1) /* SRWI */ | |
| 1730 emit_rot(as, PPCI_RLWINM|dot, dest, left, (32-shift)&31, shift, 31); | |
| 1731 else | |
| 1732 emit_asb(as, pik|dot, dest, left, shift); | |
| 1733 } else { | |
| 1734 Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); | |
| 1735 emit_asb(as, pi|dot, dest, left, right); | |
| 1736 } | |
| 1737 } | |
| 1738 | |
| 1739 #define asm_bshl(as, ir) asm_bitshift(as, ir, PPCI_SLW, 0) | |
| 1740 #define asm_bshr(as, ir) asm_bitshift(as, ir, PPCI_SRW, 1) | |
| 1741 #define asm_bsar(as, ir) asm_bitshift(as, ir, PPCI_SRAW, PPCI_SRAWI) | |
| 1742 #define asm_brol(as, ir) \ | |
| 1743 asm_bitshift(as, ir, PPCI_RLWNM|PPCF_MB(0)|PPCF_ME(31), \ | |
| 1744 PPCI_RLWINM|PPCF_MB(0)|PPCF_ME(31)) | |
| 1745 #define asm_bror(as, ir) lj_assertA(0, "unexpected BROR") | |
| 1746 | |
| 1747 #if LJ_SOFTFP | |
| 1748 static void asm_sfpmin_max(ASMState *as, IRIns *ir) | |
| 1749 { | |
| 1750 CCallInfo ci = lj_ir_callinfo[IRCALL_softfp_cmp]; | |
| 1751 IRRef args[4]; | |
| 1752 MCLabel l_right, l_end; | |
| 1753 Reg desthi = ra_dest(as, ir, RSET_GPR), destlo = ra_dest(as, ir+1, RSET_GPR); | |
| 1754 Reg righthi, lefthi = ra_alloc2(as, ir, RSET_GPR); | |
| 1755 Reg rightlo, leftlo = ra_alloc2(as, ir+1, RSET_GPR); | |
| 1756 PPCCC cond = (IROp)ir->o == IR_MIN ? CC_EQ : CC_NE; | |
| 1757 righthi = (lefthi >> 8); lefthi &= 255; | |
| 1758 rightlo = (leftlo >> 8); leftlo &= 255; | |
| 1759 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; | |
| 1760 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; | |
| 1761 l_end = emit_label(as); | |
| 1762 if (desthi != righthi) emit_mr(as, desthi, righthi); | |
| 1763 if (destlo != rightlo) emit_mr(as, destlo, rightlo); | |
| 1764 l_right = emit_label(as); | |
| 1765 if (l_end != l_right) emit_jmp(as, l_end); | |
| 1766 if (desthi != lefthi) emit_mr(as, desthi, lefthi); | |
| 1767 if (destlo != leftlo) emit_mr(as, destlo, leftlo); | |
| 1768 if (l_right == as->mcp+1) { | |
| 1769 cond ^= 4; l_right = l_end; ++as->mcp; | |
| 1770 } | |
| 1771 emit_condbranch(as, PPCI_BC, cond, l_right); | |
| 1772 ra_evictset(as, RSET_SCRATCH); | |
| 1773 emit_cmpi(as, RID_RET, 1); | |
| 1774 asm_gencall(as, &ci, args); | |
| 1775 } | |
| 1776 #endif | |
| 1777 | |
| 1778 static void asm_min_max(ASMState *as, IRIns *ir, int ismax) | |
| 1779 { | |
| 1780 if (!LJ_SOFTFP && irt_isnum(ir->t)) { | |
| 1781 Reg dest = ra_dest(as, ir, RSET_FPR); | |
| 1782 Reg tmp = dest; | |
| 1783 Reg right, left = ra_alloc2(as, ir, RSET_FPR); | |
| 1784 right = (left >> 8); left &= 255; | |
| 1785 if (tmp == left || tmp == right) | |
| 1786 tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_FPR, | |
| 1787 dest), left), right)); | |
| 1788 emit_facb(as, PPCI_FSEL, dest, tmp, left, right); | |
| 1789 emit_fab(as, PPCI_FSUB, tmp, ismax ? left : right, ismax ? right : left); | |
| 1790 } else { | |
| 1791 Reg dest = ra_dest(as, ir, RSET_GPR); | |
| 1792 Reg tmp1 = RID_TMP, tmp2 = dest; | |
| 1793 Reg right, left = ra_alloc2(as, ir, RSET_GPR); | |
| 1794 right = (left >> 8); left &= 255; | |
| 1795 if (tmp2 == left || tmp2 == right) | |
| 1796 tmp2 = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, | |
| 1797 dest), left), right)); | |
| 1798 emit_tab(as, PPCI_ADD, dest, tmp2, right); | |
| 1799 emit_asb(as, ismax ? PPCI_ANDC : PPCI_AND, tmp2, tmp2, tmp1); | |
| 1800 emit_tab(as, PPCI_SUBFE, tmp1, tmp1, tmp1); | |
| 1801 emit_tab(as, PPCI_SUBFC, tmp2, tmp2, tmp1); | |
| 1802 emit_asi(as, PPCI_XORIS, tmp2, right, 0x8000); | |
| 1803 emit_asi(as, PPCI_XORIS, tmp1, left, 0x8000); | |
| 1804 } | |
| 1805 } | |
| 1806 | |
| 1807 #define asm_min(as, ir) asm_min_max(as, ir, 0) | |
| 1808 #define asm_max(as, ir) asm_min_max(as, ir, 1) | |
| 1809 | |
| 1810 /* -- Comparisons --------------------------------------------------------- */ | |
| 1811 | |
| 1812 #define CC_UNSIGNED 0x08 /* Unsigned integer comparison. */ | |
| 1813 #define CC_TWO 0x80 /* Check two flags for FP comparison. */ | |
| 1814 | |
| 1815 /* Map of comparisons to flags. ORDER IR. */ | |
| 1816 static const uint8_t asm_compmap[IR_ABC+1] = { | |
| 1817 /* op int cc FP cc */ | |
| 1818 /* LT */ CC_GE + (CC_GE<<4), | |
| 1819 /* GE */ CC_LT + (CC_LE<<4) + CC_TWO, | |
| 1820 /* LE */ CC_GT + (CC_GE<<4) + CC_TWO, | |
| 1821 /* GT */ CC_LE + (CC_LE<<4), | |
| 1822 /* ULT */ CC_GE + CC_UNSIGNED + (CC_GT<<4) + CC_TWO, | |
| 1823 /* UGE */ CC_LT + CC_UNSIGNED + (CC_LT<<4), | |
| 1824 /* ULE */ CC_GT + CC_UNSIGNED + (CC_GT<<4), | |
| 1825 /* UGT */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO, | |
| 1826 /* EQ */ CC_NE + (CC_NE<<4), | |
| 1827 /* NE */ CC_EQ + (CC_EQ<<4), | |
| 1828 /* ABC */ CC_LE + CC_UNSIGNED + (CC_LT<<4) + CC_TWO /* Same as UGT. */ | |
| 1829 }; | |
| 1830 | |
| 1831 static void asm_intcomp_(ASMState *as, IRRef lref, IRRef rref, Reg cr, PPCCC cc) | |
| 1832 { | |
| 1833 Reg right, left = ra_alloc1(as, lref, RSET_GPR); | |
| 1834 if (irref_isk(rref)) { | |
| 1835 int32_t k = IR(rref)->i; | |
| 1836 if ((cc & CC_UNSIGNED) == 0) { /* Signed comparison with constant. */ | |
| 1837 if (checki16(k)) { | |
| 1838 emit_tai(as, PPCI_CMPWI, cr, left, k); | |
| 1839 /* Signed comparison with zero and referencing previous ins? */ | |
| 1840 if (k == 0 && lref == as->curins-1) | |
| 1841 as->flagmcp = as->mcp; /* Allow elimination of the compare. */ | |
| 1842 return; | |
| 1843 } else if ((cc & 3) == (CC_EQ & 3)) { /* Use CMPLWI for EQ or NE. */ | |
| 1844 if (checku16(k)) { | |
| 1845 emit_tai(as, PPCI_CMPLWI, cr, left, k); | |
| 1846 return; | |
| 1847 } else if (!as->sectref && ra_noreg(IR(rref)->r)) { | |
| 1848 emit_tai(as, PPCI_CMPLWI, cr, RID_TMP, k); | |
| 1849 emit_asi(as, PPCI_XORIS, RID_TMP, left, (k >> 16)); | |
| 1850 return; | |
| 1851 } | |
| 1852 } | |
| 1853 } else { /* Unsigned comparison with constant. */ | |
| 1854 if (checku16(k)) { | |
| 1855 emit_tai(as, PPCI_CMPLWI, cr, left, k); | |
| 1856 return; | |
| 1857 } | |
| 1858 } | |
| 1859 } | |
| 1860 right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left)); | |
| 1861 emit_tab(as, (cc & CC_UNSIGNED) ? PPCI_CMPLW : PPCI_CMPW, cr, left, right); | |
| 1862 } | |
| 1863 | |
| 1864 static void asm_comp(ASMState *as, IRIns *ir) | |
| 1865 { | |
| 1866 PPCCC cc = asm_compmap[ir->o]; | |
| 1867 if (!LJ_SOFTFP && irt_isnum(ir->t)) { | |
| 1868 Reg right, left = ra_alloc2(as, ir, RSET_FPR); | |
| 1869 right = (left >> 8); left &= 255; | |
| 1870 asm_guardcc(as, (cc >> 4)); | |
| 1871 if ((cc & CC_TWO)) | |
| 1872 emit_tab(as, PPCI_CROR, ((cc>>4)&3), ((cc>>4)&3), (CC_EQ&3)); | |
| 1873 emit_fab(as, PPCI_FCMPU, 0, left, right); | |
| 1874 } else { | |
| 1875 IRRef lref = ir->op1, rref = ir->op2; | |
| 1876 if (irref_isk(lref) && !irref_isk(rref)) { | |
| 1877 /* Swap constants to the right (only for ABC). */ | |
| 1878 IRRef tmp = lref; lref = rref; rref = tmp; | |
| 1879 if ((cc & 2) == 0) cc ^= 1; /* LT <-> GT, LE <-> GE */ | |
| 1880 } | |
| 1881 asm_guardcc(as, cc); | |
| 1882 asm_intcomp_(as, lref, rref, 0, cc); | |
| 1883 } | |
| 1884 } | |
| 1885 | |
| 1886 #define asm_equal(as, ir) asm_comp(as, ir) | |
| 1887 | |
| 1888 #if LJ_SOFTFP | |
| 1889 /* SFP comparisons. */ | |
| 1890 static void asm_sfpcomp(ASMState *as, IRIns *ir) | |
| 1891 { | |
| 1892 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_softfp_cmp]; | |
| 1893 RegSet drop = RSET_SCRATCH; | |
| 1894 Reg r; | |
| 1895 IRRef args[4]; | |
| 1896 args[0^LJ_BE] = ir->op1; args[1^LJ_BE] = (ir+1)->op1; | |
| 1897 args[2^LJ_BE] = ir->op2; args[3^LJ_BE] = (ir+1)->op2; | |
| 1898 | |
| 1899 for (r = REGARG_FIRSTGPR; r <= REGARG_FIRSTGPR+3; r++) { | |
| 1900 if (!rset_test(as->freeset, r) && | |
| 1901 regcost_ref(as->cost[r]) == args[r-REGARG_FIRSTGPR]) | |
| 1902 rset_clear(drop, r); | |
| 1903 } | |
| 1904 ra_evictset(as, drop); | |
| 1905 asm_setupresult(as, ir, ci); | |
| 1906 switch ((IROp)ir->o) { | |
| 1907 case IR_ULT: | |
| 1908 asm_guardcc(as, CC_EQ); | |
| 1909 emit_ai(as, PPCI_CMPWI, RID_RET, 0); | |
| 1910 case IR_ULE: | |
| 1911 asm_guardcc(as, CC_EQ); | |
| 1912 emit_ai(as, PPCI_CMPWI, RID_RET, 1); | |
| 1913 break; | |
| 1914 case IR_GE: case IR_GT: | |
| 1915 asm_guardcc(as, CC_EQ); | |
| 1916 emit_ai(as, PPCI_CMPWI, RID_RET, 2); | |
| 1917 default: | |
| 1918 asm_guardcc(as, (asm_compmap[ir->o] & 0xf)); | |
| 1919 emit_ai(as, PPCI_CMPWI, RID_RET, 0); | |
| 1920 break; | |
| 1921 } | |
| 1922 asm_gencall(as, ci, args); | |
| 1923 } | |
| 1924 #endif | |
| 1925 | |
| 1926 #if LJ_HASFFI | |
| 1927 /* 64 bit integer comparisons. */ | |
| 1928 static void asm_comp64(ASMState *as, IRIns *ir) | |
| 1929 { | |
| 1930 PPCCC cc = asm_compmap[(ir-1)->o]; | |
| 1931 if ((cc&3) == (CC_EQ&3)) { | |
| 1932 asm_guardcc(as, cc); | |
| 1933 emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CROR, | |
| 1934 (CC_EQ&3), (CC_EQ&3), 4+(CC_EQ&3)); | |
| 1935 } else { | |
| 1936 asm_guardcc(as, CC_EQ); | |
| 1937 emit_tab(as, PPCI_CROR, (CC_EQ&3), (CC_EQ&3), ((cc^~(cc>>2))&1)); | |
| 1938 emit_tab(as, (cc&4) ? PPCI_CRAND : PPCI_CRANDC, | |
| 1939 (CC_EQ&3), (CC_EQ&3), 4+(cc&3)); | |
| 1940 } | |
| 1941 /* Loword comparison sets cr1 and is unsigned, except for equality. */ | |
| 1942 asm_intcomp_(as, (ir-1)->op1, (ir-1)->op2, 4, | |
| 1943 cc | ((cc&3) == (CC_EQ&3) ? 0 : CC_UNSIGNED)); | |
| 1944 /* Hiword comparison sets cr0. */ | |
| 1945 asm_intcomp_(as, ir->op1, ir->op2, 0, cc); | |
| 1946 as->flagmcp = NULL; /* Doesn't work here. */ | |
| 1947 } | |
| 1948 #endif | |
| 1949 | |
| 1950 /* -- Split register ops -------------------------------------------------- */ | |
| 1951 | |
| 1952 /* Hiword op of a split 32/32 bit op. Previous op is be the loword op. */ | |
| 1953 static void asm_hiop(ASMState *as, IRIns *ir) | |
| 1954 { | |
| 1955 /* HIOP is marked as a store because it needs its own DCE logic. */ | |
| 1956 int uselo = ra_used(ir-1), usehi = ra_used(ir); /* Loword/hiword used? */ | |
| 1957 if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1; | |
| 1958 #if LJ_HASFFI || LJ_SOFTFP | |
| 1959 if ((ir-1)->o == IR_CONV) { /* Conversions to/from 64 bit. */ | |
| 1960 as->curins--; /* Always skip the CONV. */ | |
| 1961 #if LJ_HASFFI && !LJ_SOFTFP | |
| 1962 if (usehi || uselo) | |
| 1963 asm_conv64(as, ir); | |
| 1964 return; | |
| 1965 #endif | |
| 1966 } else if ((ir-1)->o <= IR_NE) { /* 64 bit integer comparisons. ORDER IR. */ | |
| 1967 as->curins--; /* Always skip the loword comparison. */ | |
| 1968 #if LJ_SOFTFP | |
| 1969 if (!irt_isint(ir->t)) { | |
| 1970 asm_sfpcomp(as, ir-1); | |
| 1971 return; | |
| 1972 } | |
| 1973 #endif | |
| 1974 #if LJ_HASFFI | |
| 1975 asm_comp64(as, ir); | |
| 1976 #endif | |
| 1977 return; | |
| 1978 #if LJ_SOFTFP | |
| 1979 } else if ((ir-1)->o == IR_MIN || (ir-1)->o == IR_MAX) { | |
| 1980 as->curins--; /* Always skip the loword min/max. */ | |
| 1981 if (uselo || usehi) | |
| 1982 asm_sfpmin_max(as, ir-1); | |
| 1983 return; | |
| 1984 #endif | |
| 1985 } else if ((ir-1)->o == IR_XSTORE) { | |
| 1986 as->curins--; /* Handle both stores here. */ | |
| 1987 if ((ir-1)->r != RID_SINK) { | |
| 1988 asm_xstore_(as, ir, 0); | |
| 1989 asm_xstore_(as, ir-1, 4); | |
| 1990 } | |
| 1991 return; | |
| 1992 } | |
| 1993 #endif | |
| 1994 if (!usehi) return; /* Skip unused hiword op for all remaining ops. */ | |
| 1995 switch ((ir-1)->o) { | |
| 1996 #if LJ_HASFFI | |
| 1997 case IR_ADD: as->curins--; asm_add64(as, ir); break; | |
| 1998 case IR_SUB: as->curins--; asm_sub64(as, ir); break; | |
| 1999 case IR_NEG: as->curins--; asm_neg64(as, ir); break; | |
| 2000 case IR_CNEWI: | |
| 2001 /* Nothing to do here. Handled by lo op itself. */ | |
| 2002 break; | |
| 2003 #endif | |
| 2004 #if LJ_SOFTFP | |
| 2005 case IR_SLOAD: case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | |
| 2006 case IR_STRTO: | |
| 2007 if (!uselo) | |
| 2008 ra_allocref(as, ir->op1, RSET_GPR); /* Mark lo op as used. */ | |
| 2009 break; | |
| 2010 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_TOSTR: case IR_TMPREF: | |
| 2011 /* Nothing to do here. Handled by lo op itself. */ | |
| 2012 break; | |
| 2013 #endif | |
| 2014 case IR_CALLN: case IR_CALLL: case IR_CALLS: case IR_CALLXS: | |
| 2015 if (!uselo) | |
| 2016 ra_allocref(as, ir->op1, RID2RSET(RID_RETLO)); /* Mark lo op as used. */ | |
| 2017 break; | |
| 2018 default: lj_assertA(0, "bad HIOP for op %d", (ir-1)->o); break; | |
| 2019 } | |
| 2020 } | |
| 2021 | |
| 2022 /* -- Profiling ----------------------------------------------------------- */ | |
| 2023 | |
| 2024 static void asm_prof(ASMState *as, IRIns *ir) | |
| 2025 { | |
| 2026 UNUSED(ir); | |
| 2027 asm_guardcc(as, CC_NE); | |
| 2028 emit_asi(as, PPCI_ANDIDOT, RID_TMP, RID_TMP, HOOK_PROFILE); | |
| 2029 emit_lsglptr(as, PPCI_LBZ, RID_TMP, | |
| 2030 (int32_t)offsetof(global_State, hookmask)); | |
| 2031 } | |
| 2032 | |
| 2033 /* -- Stack handling ------------------------------------------------------ */ | |
| 2034 | |
| 2035 /* Check Lua stack size for overflow. Use exit handler as fallback. */ | |
| 2036 static void asm_stack_check(ASMState *as, BCReg topslot, | |
| 2037 IRIns *irp, RegSet allow, ExitNo exitno) | |
| 2038 { | |
| 2039 /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ | |
| 2040 Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; | |
| 2041 rset_clear(allow, pbase); | |
| 2042 tmp = allow ? rset_pickbot(allow) : | |
| 2043 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI); | |
| 2044 emit_condbranch(as, PPCI_BC, CC_LT, asm_exitstub_addr(as, exitno)); | |
| 2045 if (allow == RSET_EMPTY) /* Restore temp. register. */ | |
| 2046 emit_tai(as, PPCI_LWZ, tmp, RID_SP, SPOFS_TMPW); | |
| 2047 else | |
| 2048 ra_modified(as, tmp); | |
| 2049 emit_ai(as, PPCI_CMPLWI, RID_TMP, (int32_t)(8*topslot)); | |
| 2050 emit_tab(as, PPCI_SUBF, RID_TMP, pbase, tmp); | |
| 2051 emit_tai(as, PPCI_LWZ, tmp, tmp, offsetof(lua_State, maxstack)); | |
| 2052 if (pbase == RID_TMP) | |
| 2053 emit_getgl(as, RID_TMP, jit_base); | |
| 2054 emit_getgl(as, tmp, cur_L); | |
| 2055 if (allow == RSET_EMPTY) /* Spill temp. register. */ | |
| 2056 emit_tai(as, PPCI_STW, tmp, RID_SP, SPOFS_TMPW); | |
| 2057 } | |
| 2058 | |
| 2059 /* Restore Lua stack from on-trace state. */ | |
| 2060 static void asm_stack_restore(ASMState *as, SnapShot *snap) | |
| 2061 { | |
| 2062 SnapEntry *map = &as->T->snapmap[snap->mapofs]; | |
| 2063 SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1]; | |
| 2064 MSize n, nent = snap->nent; | |
| 2065 /* Store the value of all modified slots to the Lua stack. */ | |
| 2066 for (n = 0; n < nent; n++) { | |
| 2067 SnapEntry sn = map[n]; | |
| 2068 BCReg s = snap_slot(sn); | |
| 2069 int32_t ofs = 8*((int32_t)s-1); | |
| 2070 IRRef ref = snap_ref(sn); | |
| 2071 IRIns *ir = IR(ref); | |
| 2072 if ((sn & SNAP_NORESTORE)) | |
| 2073 continue; | |
| 2074 if (irt_isnum(ir->t)) { | |
| 2075 #if LJ_SOFTFP | |
| 2076 Reg tmp; | |
| 2077 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | |
| 2078 /* LJ_SOFTFP: must be a number constant. */ | |
| 2079 lj_assertA(irref_isk(ref), "unsplit FP op"); | |
| 2080 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.lo, allow); | |
| 2081 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?4:0)); | |
| 2082 if (rset_test(as->freeset, tmp+1)) allow = RID2RSET(tmp+1); | |
| 2083 tmp = ra_allock(as, (int32_t)ir_knum(ir)->u32.hi, allow); | |
| 2084 emit_tai(as, PPCI_STW, tmp, RID_BASE, ofs+(LJ_BE?0:4)); | |
| 2085 #else | |
| 2086 Reg src = ra_alloc1(as, ref, RSET_FPR); | |
| 2087 emit_fai(as, PPCI_STFD, src, RID_BASE, ofs); | |
| 2088 #endif | |
| 2089 } else { | |
| 2090 Reg type; | |
| 2091 RegSet allow = rset_exclude(RSET_GPR, RID_BASE); | |
| 2092 lj_assertA(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t), | |
| 2093 "restore of IR type %d", irt_type(ir->t)); | |
| 2094 if (!irt_ispri(ir->t)) { | |
| 2095 Reg src = ra_alloc1(as, ref, allow); | |
| 2096 rset_clear(allow, src); | |
| 2097 emit_tai(as, PPCI_STW, src, RID_BASE, ofs+4); | |
| 2098 } | |
| 2099 if ((sn & (SNAP_CONT|SNAP_FRAME))) { | |
| 2100 if (s == 0) continue; /* Do not overwrite link to previous frame. */ | |
| 2101 type = ra_allock(as, (int32_t)(*flinks--), allow); | |
| 2102 #if LJ_SOFTFP | |
| 2103 } else if ((sn & SNAP_SOFTFPNUM)) { | |
| 2104 type = ra_alloc1(as, ref+1, rset_exclude(RSET_GPR, RID_BASE)); | |
| 2105 #endif | |
| 2106 } else if ((sn & SNAP_KEYINDEX)) { | |
| 2107 type = ra_allock(as, (int32_t)LJ_KEYINDEX, allow); | |
| 2108 } else { | |
| 2109 type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow); | |
| 2110 } | |
| 2111 emit_tai(as, PPCI_STW, type, RID_BASE, ofs); | |
| 2112 } | |
| 2113 checkmclim(as); | |
| 2114 } | |
| 2115 lj_assertA(map + nent == flinks, "inconsistent frames in snapshot"); | |
| 2116 } | |
| 2117 | |
| 2118 /* -- GC handling --------------------------------------------------------- */ | |
| 2119 | |
| 2120 /* Marker to prevent patching the GC check exit. */ | |
| 2121 #define PPC_NOPATCH_GC_CHECK PPCI_ORIS | |
| 2122 | |
| 2123 /* Check GC threshold and do one or more GC steps. */ | |
| 2124 static void asm_gc_check(ASMState *as) | |
| 2125 { | |
| 2126 const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; | |
| 2127 IRRef args[2]; | |
| 2128 MCLabel l_end; | |
| 2129 Reg tmp; | |
| 2130 ra_evictset(as, RSET_SCRATCH); | |
| 2131 l_end = emit_label(as); | |
| 2132 /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ | |
| 2133 asm_guardcc(as, CC_NE); /* Assumes asm_snap_prep() already done. */ | |
| 2134 *--as->mcp = PPC_NOPATCH_GC_CHECK; | |
| 2135 emit_ai(as, PPCI_CMPWI, RID_RET, 0); | |
| 2136 args[0] = ASMREF_TMP1; /* global_State *g */ | |
| 2137 args[1] = ASMREF_TMP2; /* MSize steps */ | |
| 2138 asm_gencall(as, ci, args); | |
| 2139 emit_tai(as, PPCI_ADDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); | |
| 2140 tmp = ra_releasetmp(as, ASMREF_TMP2); | |
| 2141 emit_loadi(as, tmp, as->gcsteps); | |
| 2142 /* Jump around GC step if GC total < GC threshold. */ | |
| 2143 emit_condbranch(as, PPCI_BC|PPCF_Y, CC_LT, l_end); | |
| 2144 emit_ab(as, PPCI_CMPLW, RID_TMP, tmp); | |
| 2145 emit_getgl(as, tmp, gc.threshold); | |
| 2146 emit_getgl(as, RID_TMP, gc.total); | |
| 2147 as->gcsteps = 0; | |
| 2148 checkmclim(as); | |
| 2149 } | |
| 2150 | |
| 2151 /* -- Loop handling ------------------------------------------------------- */ | |
| 2152 | |
| 2153 /* Fixup the loop branch. */ | |
| 2154 static void asm_loop_fixup(ASMState *as) | |
| 2155 { | |
| 2156 MCode *p = as->mctop; | |
| 2157 MCode *target = as->mcp; | |
| 2158 if (as->loopinv) { /* Inverted loop branch? */ | |
| 2159 /* asm_guardcc already inverted the cond branch and patched the final b. */ | |
| 2160 p[-2] = (p[-2] & (0xffff0000u & ~PPCF_Y)) | (((target-p+2) & 0x3fffu) << 2); | |
| 2161 } else { | |
| 2162 p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); | |
| 2163 } | |
| 2164 } | |
| 2165 | |
| 2166 /* Fixup the tail of the loop. */ | |
| 2167 static void asm_loop_tail_fixup(ASMState *as) | |
| 2168 { | |
| 2169 UNUSED(as); /* Nothing to do. */ | |
| 2170 } | |
| 2171 | |
| 2172 /* -- Head of trace ------------------------------------------------------- */ | |
| 2173 | |
| 2174 /* Coalesce BASE register for a root trace. */ | |
| 2175 static void asm_head_root_base(ASMState *as) | |
| 2176 { | |
| 2177 IRIns *ir = IR(REF_BASE); | |
| 2178 Reg r = ir->r; | |
| 2179 if (ra_hasreg(r)) { | |
| 2180 ra_free(as, r); | |
| 2181 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) | |
| 2182 ir->r = RID_INIT; /* No inheritance for modified BASE register. */ | |
| 2183 if (r != RID_BASE) | |
| 2184 emit_mr(as, r, RID_BASE); | |
| 2185 } | |
| 2186 } | |
| 2187 | |
| 2188 /* Coalesce BASE register for a side trace. */ | |
| 2189 static Reg asm_head_side_base(ASMState *as, IRIns *irp) | |
| 2190 { | |
| 2191 IRIns *ir = IR(REF_BASE); | |
| 2192 Reg r = ir->r; | |
| 2193 if (ra_hasreg(r)) { | |
| 2194 ra_free(as, r); | |
| 2195 if (rset_test(as->modset, r) || irt_ismarked(ir->t)) | |
| 2196 ir->r = RID_INIT; /* No inheritance for modified BASE register. */ | |
| 2197 if (irp->r == r) { | |
| 2198 return r; /* Same BASE register already coalesced. */ | |
| 2199 } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { | |
| 2200 emit_mr(as, r, irp->r); /* Move from coalesced parent reg. */ | |
| 2201 return irp->r; | |
| 2202 } else { | |
| 2203 emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ | |
| 2204 } | |
| 2205 } | |
| 2206 return RID_NONE; | |
| 2207 } | |
| 2208 | |
| 2209 /* -- Tail of trace ------------------------------------------------------- */ | |
| 2210 | |
| 2211 /* Fixup the tail code. */ | |
| 2212 static void asm_tail_fixup(ASMState *as, TraceNo lnk) | |
| 2213 { | |
| 2214 MCode *p = as->mctop; | |
| 2215 MCode *target; | |
| 2216 int32_t spadj = as->T->spadjust; | |
| 2217 if (spadj == 0) { | |
| 2218 *--p = PPCI_NOP; | |
| 2219 *--p = PPCI_NOP; | |
| 2220 as->mctop = p; | |
| 2221 } else { | |
| 2222 /* Patch stack adjustment. */ | |
| 2223 lj_assertA(checki16(CFRAME_SIZE+spadj), "stack adjustment out of range"); | |
| 2224 p[-3] = PPCI_ADDI | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | (CFRAME_SIZE+spadj); | |
| 2225 p[-2] = PPCI_STWU | PPCF_T(RID_TMP) | PPCF_A(RID_SP) | spadj; | |
| 2226 } | |
| 2227 /* Patch exit branch. */ | |
| 2228 target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; | |
| 2229 p[-1] = PPCI_B|(((target-p+1)&0x00ffffffu)<<2); | |
| 2230 } | |
| 2231 | |
| 2232 /* Prepare tail of code. */ | |
| 2233 static void asm_tail_prep(ASMState *as) | |
| 2234 { | |
| 2235 MCode *p = as->mctop - 1; /* Leave room for exit branch. */ | |
| 2236 if (as->loopref) { | |
| 2237 as->invmcp = as->mcp = p; | |
| 2238 } else { | |
| 2239 as->mcp = p-2; /* Leave room for stack pointer adjustment. */ | |
| 2240 as->invmcp = NULL; | |
| 2241 } | |
| 2242 } | |
| 2243 | |
| 2244 /* -- Trace setup --------------------------------------------------------- */ | |
| 2245 | |
| 2246 /* Ensure there are enough stack slots for call arguments. */ | |
| 2247 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) | |
| 2248 { | |
| 2249 IRRef args[CCI_NARGS_MAX*2]; | |
| 2250 uint32_t i, nargs = CCI_XNARGS(ci); | |
| 2251 int nslots = 2, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR; | |
| 2252 asm_collectargs(as, ir, ci, args); | |
| 2253 for (i = 0; i < nargs; i++) | |
| 2254 if (!LJ_SOFTFP && args[i] && irt_isfp(IR(args[i])->t)) { | |
| 2255 if (nfpr > 0) nfpr--; else nslots = (nslots+3) & ~1; | |
| 2256 } else { | |
| 2257 if (ngpr > 0) ngpr--; else nslots++; | |
| 2258 } | |
| 2259 if (nslots > as->evenspill) /* Leave room for args in stack slots. */ | |
| 2260 as->evenspill = nslots; | |
| 2261 return (!LJ_SOFTFP && irt_isfp(ir->t)) ? REGSP_HINT(RID_FPRET) : | |
| 2262 REGSP_HINT(RID_RET); | |
| 2263 } | |
| 2264 | |
| 2265 static void asm_setup_target(ASMState *as) | |
| 2266 { | |
| 2267 asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0)); | |
| 2268 } | |
| 2269 | |
| 2270 /* -- Trace patching ------------------------------------------------------ */ | |
| 2271 | |
| 2272 /* Patch exit jumps of existing machine code to a new target. */ | |
| 2273 void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) | |
| 2274 { | |
| 2275 MCode *p = T->mcode; | |
| 2276 MCode *pe = (MCode *)((char *)p + T->szmcode); | |
| 2277 MCode *px = exitstub_trace_addr(T, exitno); | |
| 2278 MCode *cstart = NULL; | |
| 2279 MCode *mcarea = lj_mcode_patch(J, p, 0); | |
| 2280 int clearso = 0, patchlong = 1; | |
| 2281 for (; p < pe; p++) { | |
| 2282 /* Look for exitstub branch, try to replace with branch to target. */ | |
| 2283 uint32_t ins = *p; | |
| 2284 if ((ins & 0xfc000000u) == 0x40000000u && | |
| 2285 ((ins ^ ((char *)px-(char *)p)) & 0xffffu) == 0) { | |
| 2286 ptrdiff_t delta = (char *)target - (char *)p; | |
| 2287 if (((ins >> 16) & 3) == (CC_SO&3)) { | |
| 2288 clearso = sizeof(MCode); | |
| 2289 delta -= sizeof(MCode); | |
| 2290 } | |
| 2291 /* Many, but not all short-range branches can be patched directly. */ | |
| 2292 if (p[-1] == PPC_NOPATCH_GC_CHECK) { | |
| 2293 patchlong = 0; | |
| 2294 } else if (((delta + 0x8000) >> 16) == 0) { | |
| 2295 *p = (ins & 0xffdf0000u) | ((uint32_t)delta & 0xffffu) | | |
| 2296 ((delta & 0x8000) * (PPCF_Y/0x8000)); | |
| 2297 if (!cstart) cstart = p; | |
| 2298 } | |
| 2299 } else if ((ins & 0xfc000000u) == PPCI_B && | |
| 2300 ((ins ^ ((char *)px-(char *)p)) & 0x03ffffffu) == 0) { | |
| 2301 ptrdiff_t delta = (char *)target - (char *)p; | |
| 2302 lj_assertJ(((delta + 0x02000000) >> 26) == 0, | |
| 2303 "branch target out of range"); | |
| 2304 *p = PPCI_B | ((uint32_t)delta & 0x03ffffffu); | |
| 2305 if (!cstart) cstart = p; | |
| 2306 } | |
| 2307 } | |
| 2308 /* Always patch long-range branch in exit stub itself. Except, if we can't. */ | |
| 2309 if (patchlong) { | |
| 2310 ptrdiff_t delta = (char *)target - (char *)px - clearso; | |
| 2311 lj_assertJ(((delta + 0x02000000) >> 26) == 0, | |
| 2312 "branch target out of range"); | |
| 2313 *px = PPCI_B | ((uint32_t)delta & 0x03ffffffu); | |
| 2314 } | |
| 2315 if (!cstart) cstart = px; | |
| 2316 lj_mcode_sync(cstart, px+1); | |
| 2317 if (clearso) { /* Extend the current trace. Ugly workaround. */ | |
| 2318 MCode *pp = J->cur.mcode; | |
| 2319 J->cur.szmcode += sizeof(MCode); | |
| 2320 *--pp = PPCI_MCRXR; /* Clear SO flag. */ | |
| 2321 J->cur.mcode = pp; | |
| 2322 lj_mcode_sync(pp, pp+1); | |
| 2323 } | |
| 2324 lj_mcode_patch(J, mcarea, 1); | |
| 2325 } | |
| 2326 |