Mercurial
comparison third_party/luajit/src/lj_opt_split.c @ 178:94705b5986b3
[ThirdParty] Added WRK and luajit for load testing.
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Thu, 22 Jan 2026 20:10:30 -0800 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 177:24fe8ff94056 | 178:94705b5986b3 |
|---|---|
| 1 /* | |
| 2 ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions. | |
| 3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h | |
| 4 */ | |
| 5 | |
| 6 #define lj_opt_split_c | |
| 7 #define LUA_CORE | |
| 8 | |
| 9 #include "lj_obj.h" | |
| 10 | |
| 11 #if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) | |
| 12 | |
| 13 #include "lj_err.h" | |
| 14 #include "lj_buf.h" | |
| 15 #include "lj_ir.h" | |
| 16 #include "lj_jit.h" | |
| 17 #include "lj_ircall.h" | |
| 18 #include "lj_iropt.h" | |
| 19 #include "lj_dispatch.h" | |
| 20 #include "lj_vm.h" | |
| 21 | |
| 22 /* SPLIT pass: | |
| 23 ** | |
| 24 ** This pass splits up 64 bit IR instructions into multiple 32 bit IR | |
| 25 ** instructions. It's only active for soft-float targets or for 32 bit CPUs | |
| 26 ** which lack native 64 bit integer operations (the FFI is currently the | |
| 27 ** only emitter for 64 bit integer instructions). | |
| 28 ** | |
| 29 ** Splitting the IR in a separate pass keeps each 32 bit IR assembler | |
| 30 ** backend simple. Only a small amount of extra functionality needs to be | |
| 31 ** implemented. This is much easier than adding support for allocating | |
| 32 ** register pairs to each backend (believe me, I tried). A few simple, but | |
| 33 ** important optimizations can be performed by the SPLIT pass, which would | |
| 34 ** be tedious to do in the backend. | |
| 35 ** | |
| 36 ** The basic idea is to replace each 64 bit IR instruction with its 32 bit | |
| 37 ** equivalent plus an extra HIOP instruction. The splitted IR is not passed | |
| 38 ** through FOLD or any other optimizations, so each HIOP is guaranteed to | |
| 39 ** immediately follow it's counterpart. The actual functionality of HIOP is | |
| 40 ** inferred from the previous instruction. | |
| 41 ** | |
| 42 ** The operands of HIOP hold the hiword input references. The output of HIOP | |
| 43 ** is the hiword output reference, which is also used to hold the hiword | |
| 44 ** register or spill slot information. The register allocator treats this | |
| 45 ** instruction independently of any other instruction, which improves code | |
| 46 ** quality compared to using fixed register pairs. | |
| 47 ** | |
| 48 ** It's easier to split up some instructions into two regular 32 bit | |
| 49 ** instructions. E.g. XLOAD is split up into two XLOADs with two different | |
| 50 ** addresses. Obviously 64 bit constants need to be split up into two 32 bit | |
| 51 ** constants, too. Some hiword instructions can be entirely omitted, e.g. | |
| 52 ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls | |
| 53 ** are split up into two 32 bit arguments each. | |
| 54 ** | |
| 55 ** On soft-float targets, floating-point instructions are directly converted | |
| 56 ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX). | |
| 57 ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump). | |
| 58 ** | |
| 59 ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with | |
| 60 ** two int64_t fields: | |
| 61 ** | |
| 62 ** 0100 p32 ADD base +8 | |
| 63 ** 0101 i64 XLOAD 0100 | |
| 64 ** 0102 i64 ADD 0101 +1 | |
| 65 ** 0103 p32 ADD base +16 | |
| 66 ** 0104 i64 XSTORE 0103 0102 | |
| 67 ** | |
| 68 ** mov rax, [esi+0x8] | |
| 69 ** add rax, +0x01 | |
| 70 ** mov [esi+0x10], rax | |
| 71 ** | |
| 72 ** Here's the transformed IR and the x86 machine code after the SPLIT pass: | |
| 73 ** | |
| 74 ** 0100 p32 ADD base +8 | |
| 75 ** 0101 int XLOAD 0100 | |
| 76 ** 0102 p32 ADD base +12 | |
| 77 ** 0103 int XLOAD 0102 | |
| 78 ** 0104 int ADD 0101 +1 | |
| 79 ** 0105 int HIOP 0103 +0 | |
| 80 ** 0106 p32 ADD base +16 | |
| 81 ** 0107 int XSTORE 0106 0104 | |
| 82 ** 0108 int HIOP 0106 0105 | |
| 83 ** | |
| 84 ** mov eax, [esi+0x8] | |
| 85 ** mov ecx, [esi+0xc] | |
| 86 ** add eax, +0x01 | |
| 87 ** adc ecx, +0x00 | |
| 88 ** mov [esi+0x10], eax | |
| 89 ** mov [esi+0x14], ecx | |
| 90 ** | |
| 91 ** You may notice the reassociated hiword address computation, which is | |
| 92 ** later fused into the mov operands by the assembler. | |
| 93 */ | |
| 94 | |
| 95 /* Some local macros to save typing. Undef'd at the end. */ | |
| 96 #define IR(ref) (&J->cur.ir[(ref)]) | |
| 97 | |
| 98 /* Directly emit the transformed IR without updating chains etc. */ | |
| 99 static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2) | |
| 100 { | |
| 101 IRRef nref = lj_ir_nextins(J); | |
| 102 IRIns *ir = IR(nref); | |
| 103 ir->ot = ot; | |
| 104 ir->op1 = op1; | |
| 105 ir->op2 = op2; | |
| 106 return nref; | |
| 107 } | |
| 108 | |
| 109 #if LJ_SOFTFP | |
| 110 /* Emit a (checked) number to integer conversion. */ | |
| 111 static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check) | |
| 112 { | |
| 113 IRRef tmp, res; | |
| 114 #if LJ_LE | |
| 115 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi); | |
| 116 #else | |
| 117 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo); | |
| 118 #endif | |
| 119 res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i); | |
| 120 if (check) { | |
| 121 tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d); | |
| 122 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); | |
| 123 split_emit(J, IRTGI(IR_EQ), tmp, lo); | |
| 124 split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi); | |
| 125 } | |
| 126 return res; | |
| 127 } | |
| 128 | |
| 129 /* Emit a CALLN with one split 64 bit argument. */ | |
| 130 static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir, | |
| 131 IRIns *ir, IRCallID id) | |
| 132 { | |
| 133 IRRef tmp, op1 = ir->op1; | |
| 134 J->cur.nins--; | |
| 135 #if LJ_LE | |
| 136 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); | |
| 137 #else | |
| 138 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); | |
| 139 #endif | |
| 140 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | |
| 141 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); | |
| 142 } | |
| 143 #endif | |
| 144 | |
| 145 /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */ | |
| 146 static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir, | |
| 147 IRIns *ir, IRCallID id) | |
| 148 { | |
| 149 IRRef tmp, op1 = ir->op1, op2 = ir->op2; | |
| 150 J->cur.nins--; | |
| 151 #if LJ_LE | |
| 152 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); | |
| 153 #else | |
| 154 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); | |
| 155 #endif | |
| 156 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); | |
| 157 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | |
| 158 return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp); | |
| 159 } | |
| 160 | |
| 161 /* Emit a CALLN with two split 64 bit arguments. */ | |
| 162 static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir, | |
| 163 IRIns *ir, IRCallID id) | |
| 164 { | |
| 165 IRRef tmp, op1 = ir->op1, op2 = ir->op2; | |
| 166 J->cur.nins--; | |
| 167 #if LJ_LE | |
| 168 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); | |
| 169 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); | |
| 170 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); | |
| 171 #else | |
| 172 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); | |
| 173 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]); | |
| 174 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev); | |
| 175 #endif | |
| 176 ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id); | |
| 177 return split_emit(J, | |
| 178 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), | |
| 179 tmp, tmp); | |
| 180 } | |
| 181 | |
| 182 /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */ | |
| 183 static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref) | |
| 184 { | |
| 185 IRRef nref = oir[ref].prev; | |
| 186 IRIns *ir = IR(nref); | |
| 187 int32_t ofs = 4; | |
| 188 if (ir->o == IR_KPTR) | |
| 189 return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs); | |
| 190 if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) { | |
| 191 /* Reassociate address. */ | |
| 192 ofs += IR(ir->op2)->i; | |
| 193 nref = ir->op1; | |
| 194 if (ofs == 0) return nref; | |
| 195 } | |
| 196 return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs)); | |
| 197 } | |
| 198 | |
| 199 #if LJ_HASFFI | |
| 200 static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst, | |
| 201 IRIns *oir, IRIns *nir, IRIns *ir) | |
| 202 { | |
| 203 IROp op = ir->o; | |
| 204 IRRef kref = nir->op2; | |
| 205 if (irref_isk(kref)) { /* Optimize constant shifts. */ | |
| 206 int32_t k = (IR(kref)->i & 63); | |
| 207 IRRef lo = nir->op1, hi = hisubst[ir->op1]; | |
| 208 if (op == IR_BROL || op == IR_BROR) { | |
| 209 if (op == IR_BROR) k = (-k & 63); | |
| 210 if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; } | |
| 211 if (k == 0) { | |
| 212 passthrough: | |
| 213 J->cur.nins--; | |
| 214 ir->prev = lo; | |
| 215 return hi; | |
| 216 } else { | |
| 217 TRef k1, k2; | |
| 218 IRRef t1, t2, t3, t4; | |
| 219 J->cur.nins--; | |
| 220 k1 = lj_ir_kint(J, k); | |
| 221 k2 = lj_ir_kint(J, (-k & 31)); | |
| 222 t1 = split_emit(J, IRTI(IR_BSHL), lo, k1); | |
| 223 t2 = split_emit(J, IRTI(IR_BSHL), hi, k1); | |
| 224 t3 = split_emit(J, IRTI(IR_BSHR), lo, k2); | |
| 225 t4 = split_emit(J, IRTI(IR_BSHR), hi, k2); | |
| 226 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4); | |
| 227 return split_emit(J, IRTI(IR_BOR), t2, t3); | |
| 228 } | |
| 229 } else if (k == 0) { | |
| 230 goto passthrough; | |
| 231 } else if (k < 32) { | |
| 232 if (op == IR_BSHL) { | |
| 233 IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref); | |
| 234 IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31))); | |
| 235 return split_emit(J, IRTI(IR_BOR), t1, t2); | |
| 236 } else { | |
| 237 IRRef t1 = ir->prev, t2; | |
| 238 lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); | |
| 239 nir->o = IR_BSHR; | |
| 240 t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31))); | |
| 241 ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2); | |
| 242 return split_emit(J, IRTI(op), hi, kref); | |
| 243 } | |
| 244 } else { | |
| 245 if (op == IR_BSHL) { | |
| 246 if (k == 32) | |
| 247 J->cur.nins--; | |
| 248 else | |
| 249 lo = ir->prev; | |
| 250 ir->prev = lj_ir_kint(J, 0); | |
| 251 return lo; | |
| 252 } else { | |
| 253 lj_assertJ(op == IR_BSHR || op == IR_BSAR, "bad usage"); | |
| 254 if (k == 32) { | |
| 255 J->cur.nins--; | |
| 256 ir->prev = hi; | |
| 257 } else { | |
| 258 nir->op1 = hi; | |
| 259 } | |
| 260 if (op == IR_BSHR) | |
| 261 return lj_ir_kint(J, 0); | |
| 262 else | |
| 263 return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31)); | |
| 264 } | |
| 265 } | |
| 266 } | |
| 267 return split_call_li(J, hisubst, oir, ir, | |
| 268 op - IR_BSHL + IRCALL_lj_carith_shl64); | |
| 269 } | |
| 270 | |
| 271 static IRRef split_bitop(jit_State *J, IRRef1 *hisubst, | |
| 272 IRIns *nir, IRIns *ir) | |
| 273 { | |
| 274 IROp op = ir->o; | |
| 275 IRRef hi, kref = nir->op2; | |
| 276 if (irref_isk(kref)) { /* Optimize bit operations with lo constant. */ | |
| 277 int32_t k = IR(kref)->i; | |
| 278 if (k == 0 || k == -1) { | |
| 279 if (op == IR_BAND) k = ~k; | |
| 280 if (k == 0) { | |
| 281 J->cur.nins--; | |
| 282 ir->prev = nir->op1; | |
| 283 } else if (op == IR_BXOR) { | |
| 284 nir->o = IR_BNOT; | |
| 285 nir->op2 = 0; | |
| 286 } else { | |
| 287 J->cur.nins--; | |
| 288 ir->prev = kref; | |
| 289 } | |
| 290 } | |
| 291 } | |
| 292 hi = hisubst[ir->op1]; | |
| 293 kref = hisubst[ir->op2]; | |
| 294 if (irref_isk(kref)) { /* Optimize bit operations with hi constant. */ | |
| 295 int32_t k = IR(kref)->i; | |
| 296 if (k == 0 || k == -1) { | |
| 297 if (op == IR_BAND) k = ~k; | |
| 298 if (k == 0) { | |
| 299 return hi; | |
| 300 } else if (op == IR_BXOR) { | |
| 301 return split_emit(J, IRTI(IR_BNOT), hi, 0); | |
| 302 } else { | |
| 303 return kref; | |
| 304 } | |
| 305 } | |
| 306 } | |
| 307 return split_emit(J, IRTI(op), hi, kref); | |
| 308 } | |
| 309 #endif | |
| 310 | |
| 311 /* Substitute references of a snapshot. */ | |
| 312 static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir) | |
| 313 { | |
| 314 SnapEntry *map = &J->cur.snapmap[snap->mapofs]; | |
| 315 MSize n, nent = snap->nent; | |
| 316 for (n = 0; n < nent; n++) { | |
| 317 SnapEntry sn = map[n]; | |
| 318 IRIns *ir = &oir[snap_ref(sn)]; | |
| 319 if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn)))) | |
| 320 map[n] = ((sn & 0xffff0000) | ir->prev); | |
| 321 } | |
| 322 } | |
| 323 | |
| 324 /* Transform the old IR to the new IR. */ | |
| 325 static void split_ir(jit_State *J) | |
| 326 { | |
| 327 IRRef nins = J->cur.nins, nk = J->cur.nk; | |
| 328 MSize irlen = nins - nk; | |
| 329 MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1)); | |
| 330 IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need); | |
| 331 IRRef1 *hisubst; | |
| 332 IRRef ref, snref; | |
| 333 SnapShot *snap; | |
| 334 | |
| 335 /* Copy old IR to buffer. */ | |
| 336 memcpy(oir, IR(nk), irlen*sizeof(IRIns)); | |
| 337 /* Bias hiword substitution table and old IR. Loword kept in field prev. */ | |
| 338 hisubst = (IRRef1 *)&oir[irlen] - nk; | |
| 339 oir -= nk; | |
| 340 | |
| 341 /* Remove all IR instructions, but retain IR constants. */ | |
| 342 J->cur.nins = REF_FIRST; | |
| 343 J->loopref = 0; | |
| 344 | |
| 345 /* Process constants and fixed references. */ | |
| 346 for (ref = nk; ref <= REF_BASE; ref++) { | |
| 347 IRIns *ir = &oir[ref]; | |
| 348 if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) { | |
| 349 /* Split up 64 bit constant. */ | |
| 350 TValue tv = *ir_k64(ir); | |
| 351 ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo); | |
| 352 hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi); | |
| 353 } else { | |
| 354 ir->prev = ref; /* Identity substitution for loword. */ | |
| 355 hisubst[ref] = 0; | |
| 356 } | |
| 357 if (irt_is64(ir->t) && ir->o != IR_KNULL) | |
| 358 ref++; | |
| 359 } | |
| 360 | |
| 361 /* Process old IR instructions. */ | |
| 362 snap = J->cur.snap; | |
| 363 snref = snap->ref; | |
| 364 for (ref = REF_FIRST; ref < nins; ref++) { | |
| 365 IRIns *ir = &oir[ref]; | |
| 366 IRRef nref = lj_ir_nextins(J); | |
| 367 IRIns *nir = IR(nref); | |
| 368 IRRef hi = 0; | |
| 369 | |
| 370 if (ref >= snref) { | |
| 371 snap->ref = nref; | |
| 372 split_subst_snap(J, snap++, oir); | |
| 373 snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0; | |
| 374 } | |
| 375 | |
| 376 /* Copy-substitute old instruction to new instruction. */ | |
| 377 nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev; | |
| 378 nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev; | |
| 379 ir->prev = nref; /* Loword substitution. */ | |
| 380 nir->o = ir->o; | |
| 381 nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI); | |
| 382 hisubst[ref] = 0; | |
| 383 | |
| 384 /* Split 64 bit instructions. */ | |
| 385 #if LJ_SOFTFP | |
| 386 if (irt_isnum(ir->t)) { | |
| 387 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ | |
| 388 /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */ | |
| 389 switch (ir->o) { | |
| 390 case IR_ADD: | |
| 391 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add); | |
| 392 break; | |
| 393 case IR_SUB: | |
| 394 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub); | |
| 395 break; | |
| 396 case IR_MUL: | |
| 397 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul); | |
| 398 break; | |
| 399 case IR_DIV: | |
| 400 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div); | |
| 401 break; | |
| 402 case IR_POW: | |
| 403 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_pow); | |
| 404 break; | |
| 405 case IR_FPMATH: | |
| 406 hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2); | |
| 407 break; | |
| 408 case IR_LDEXP: | |
| 409 hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp); | |
| 410 break; | |
| 411 case IR_NEG: case IR_ABS: | |
| 412 nir->o = IR_CONV; /* Pass through loword. */ | |
| 413 nir->op2 = (IRT_INT << 5) | IRT_INT; | |
| 414 hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP), | |
| 415 hisubst[ir->op1], | |
| 416 lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG)))); | |
| 417 break; | |
| 418 case IR_SLOAD: | |
| 419 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from int to number. */ | |
| 420 nir->op2 &= ~IRSLOAD_CONVERT; | |
| 421 ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref, | |
| 422 IRCALL_softfp_i2d); | |
| 423 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); | |
| 424 break; | |
| 425 } | |
| 426 /* fallthrough */ | |
| 427 case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD: | |
| 428 case IR_STRTO: | |
| 429 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); | |
| 430 break; | |
| 431 case IR_FLOAD: | |
| 432 lj_assertJ(ir->op1 == REF_NIL, "expected FLOAD from GG_State"); | |
| 433 hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4)); | |
| 434 nir->op2 += LJ_BE*4; | |
| 435 break; | |
| 436 case IR_XLOAD: { | |
| 437 IRIns inslo = *nir; /* Save/undo the emit of the lo XLOAD. */ | |
| 438 J->cur.nins--; | |
| 439 hi = split_ptr(J, oir, ir->op1); /* Insert the hiref ADD. */ | |
| 440 #if LJ_BE | |
| 441 hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2); | |
| 442 inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD); | |
| 443 #endif | |
| 444 nref = lj_ir_nextins(J); | |
| 445 nir = IR(nref); | |
| 446 *nir = inslo; /* Re-emit lo XLOAD. */ | |
| 447 #if LJ_LE | |
| 448 hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2); | |
| 449 ir->prev = nref; | |
| 450 #else | |
| 451 ir->prev = hi; hi = nref; | |
| 452 #endif | |
| 453 break; | |
| 454 } | |
| 455 case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE: | |
| 456 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]); | |
| 457 break; | |
| 458 case IR_CONV: { /* Conversion to number. Others handled below. */ | |
| 459 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | |
| 460 UNUSED(st); | |
| 461 #if LJ_32 && LJ_HASFFI | |
| 462 if (st == IRT_I64 || st == IRT_U64) { | |
| 463 hi = split_call_l(J, hisubst, oir, ir, | |
| 464 st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d); | |
| 465 break; | |
| 466 } | |
| 467 #endif | |
| 468 lj_assertJ(st == IRT_INT || | |
| 469 (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)), | |
| 470 "bad source type for CONV"); | |
| 471 nir->o = IR_CALLN; | |
| 472 #if LJ_32 && LJ_HASFFI | |
| 473 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d : | |
| 474 st == IRT_FLOAT ? IRCALL_softfp_f2d : | |
| 475 IRCALL_softfp_ui2d; | |
| 476 #else | |
| 477 nir->op2 = IRCALL_softfp_i2d; | |
| 478 #endif | |
| 479 hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); | |
| 480 break; | |
| 481 } | |
| 482 case IR_CALLN: | |
| 483 case IR_CALLL: | |
| 484 case IR_CALLS: | |
| 485 case IR_CALLXS: | |
| 486 goto split_call; | |
| 487 case IR_PHI: | |
| 488 if (nir->op1 == nir->op2) | |
| 489 J->cur.nins--; /* Drop useless PHIs. */ | |
| 490 if (hisubst[ir->op1] != hisubst[ir->op2]) | |
| 491 split_emit(J, IRT(IR_PHI, IRT_SOFTFP), | |
| 492 hisubst[ir->op1], hisubst[ir->op2]); | |
| 493 break; | |
| 494 case IR_HIOP: | |
| 495 J->cur.nins--; /* Drop joining HIOP. */ | |
| 496 ir->prev = nir->op1; | |
| 497 hi = nir->op2; | |
| 498 break; | |
| 499 default: | |
| 500 lj_assertJ(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX, | |
| 501 "bad IR op %d", ir->o); | |
| 502 hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), | |
| 503 hisubst[ir->op1], hisubst[ir->op2]); | |
| 504 break; | |
| 505 } | |
| 506 } else | |
| 507 #endif | |
| 508 #if LJ_32 && LJ_HASFFI | |
| 509 if (irt_isint64(ir->t)) { | |
| 510 IRRef hiref = hisubst[ir->op1]; | |
| 511 nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD); /* Turn into INT op. */ | |
| 512 switch (ir->o) { | |
| 513 case IR_ADD: | |
| 514 case IR_SUB: | |
| 515 /* Use plain op for hiword if loword cannot produce a carry/borrow. */ | |
| 516 if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) { | |
| 517 ir->prev = nir->op1; /* Pass through loword. */ | |
| 518 nir->op1 = hiref; nir->op2 = hisubst[ir->op2]; | |
| 519 hi = nref; | |
| 520 break; | |
| 521 } | |
| 522 /* fallthrough */ | |
| 523 case IR_NEG: | |
| 524 hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]); | |
| 525 break; | |
| 526 case IR_MUL: | |
| 527 hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64); | |
| 528 break; | |
| 529 case IR_DIV: | |
| 530 hi = split_call_ll(J, hisubst, oir, ir, | |
| 531 irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : | |
| 532 IRCALL_lj_carith_divu64); | |
| 533 break; | |
| 534 case IR_MOD: | |
| 535 hi = split_call_ll(J, hisubst, oir, ir, | |
| 536 irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : | |
| 537 IRCALL_lj_carith_modu64); | |
| 538 break; | |
| 539 case IR_POW: | |
| 540 hi = split_call_ll(J, hisubst, oir, ir, | |
| 541 irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : | |
| 542 IRCALL_lj_carith_powu64); | |
| 543 break; | |
| 544 case IR_BNOT: | |
| 545 hi = split_emit(J, IRTI(IR_BNOT), hiref, 0); | |
| 546 break; | |
| 547 case IR_BSWAP: | |
| 548 ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0); | |
| 549 hi = nref; | |
| 550 break; | |
| 551 case IR_BAND: case IR_BOR: case IR_BXOR: | |
| 552 hi = split_bitop(J, hisubst, nir, ir); | |
| 553 break; | |
| 554 case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR: | |
| 555 hi = split_bitshift(J, hisubst, oir, nir, ir); | |
| 556 break; | |
| 557 case IR_FLOAD: | |
| 558 lj_assertJ(ir->op2 == IRFL_CDATA_INT64, "only INT64 supported"); | |
| 559 hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4); | |
| 560 #if LJ_BE | |
| 561 ir->prev = hi; hi = nref; | |
| 562 #endif | |
| 563 break; | |
| 564 case IR_XLOAD: | |
| 565 hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2); | |
| 566 #if LJ_BE | |
| 567 ir->prev = hi; hi = nref; | |
| 568 #endif | |
| 569 break; | |
| 570 case IR_XSTORE: | |
| 571 split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]); | |
| 572 break; | |
| 573 case IR_CONV: { /* Conversion to 64 bit integer. Others handled below. */ | |
| 574 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | |
| 575 #if LJ_SOFTFP | |
| 576 if (st == IRT_NUM) { /* NUM to 64 bit int conv. */ | |
| 577 hi = split_call_l(J, hisubst, oir, ir, | |
| 578 irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul); | |
| 579 } else if (st == IRT_FLOAT) { /* FLOAT to 64 bit int conv. */ | |
| 580 nir->o = IR_CALLN; | |
| 581 nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul; | |
| 582 hi = split_emit(J, IRTI(IR_HIOP), nref, nref); | |
| 583 } | |
| 584 #else | |
| 585 if (st == IRT_NUM || st == IRT_FLOAT) { /* FP to 64 bit int conv. */ | |
| 586 hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref); | |
| 587 } | |
| 588 #endif | |
| 589 else if (st == IRT_I64 || st == IRT_U64) { /* 64/64 bit cast. */ | |
| 590 /* Drop cast, since assembler doesn't care. But fwd both parts. */ | |
| 591 hi = hiref; | |
| 592 goto fwdlo; | |
| 593 } else if ((ir->op2 & IRCONV_SEXT)) { /* Sign-extend to 64 bit. */ | |
| 594 IRRef k31 = lj_ir_kint(J, 31); | |
| 595 nir = IR(nref); /* May have been reallocated. */ | |
| 596 ir->prev = nir->op1; /* Pass through loword. */ | |
| 597 nir->o = IR_BSAR; /* hi = bsar(lo, 31). */ | |
| 598 nir->op2 = k31; | |
| 599 hi = nref; | |
| 600 } else { /* Zero-extend to 64 bit. */ | |
| 601 hi = lj_ir_kint(J, 0); | |
| 602 goto fwdlo; | |
| 603 } | |
| 604 break; | |
| 605 } | |
| 606 case IR_CALLXS: | |
| 607 goto split_call; | |
| 608 case IR_PHI: { | |
| 609 IRRef hiref2; | |
| 610 if ((irref_isk(nir->op1) && irref_isk(nir->op2)) || | |
| 611 nir->op1 == nir->op2) | |
| 612 J->cur.nins--; /* Drop useless PHIs. */ | |
| 613 hiref2 = hisubst[ir->op2]; | |
| 614 if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2)) | |
| 615 split_emit(J, IRTI(IR_PHI), hiref, hiref2); | |
| 616 break; | |
| 617 } | |
| 618 case IR_HIOP: | |
| 619 J->cur.nins--; /* Drop joining HIOP. */ | |
| 620 ir->prev = nir->op1; | |
| 621 hi = nir->op2; | |
| 622 break; | |
| 623 default: | |
| 624 lj_assertJ(ir->o <= IR_NE, "bad IR op %d", ir->o); /* Comparisons. */ | |
| 625 split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]); | |
| 626 break; | |
| 627 } | |
| 628 } else | |
| 629 #endif | |
| 630 #if LJ_SOFTFP | |
| 631 if (ir->o == IR_SLOAD) { | |
| 632 if ((nir->op2 & IRSLOAD_CONVERT)) { /* Convert from number to int. */ | |
| 633 nir->op2 &= ~IRSLOAD_CONVERT; | |
| 634 if (!(nir->op2 & IRSLOAD_TYPECHECK)) | |
| 635 nir->t.irt = IRT_INT; /* Drop guard. */ | |
| 636 split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref); | |
| 637 ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t)); | |
| 638 } | |
| 639 } else if (ir->o == IR_TOBIT) { | |
| 640 IRRef tmp, op1 = ir->op1; | |
| 641 J->cur.nins--; | |
| 642 #if LJ_LE | |
| 643 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]); | |
| 644 #else | |
| 645 tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev); | |
| 646 #endif | |
| 647 ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit); | |
| 648 } else if (ir->o == IR_TOSTR || ir->o == IR_TMPREF) { | |
| 649 if (hisubst[ir->op1]) { | |
| 650 if (irref_isk(ir->op1)) | |
| 651 nir->op1 = ir->op1; | |
| 652 else | |
| 653 split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref); | |
| 654 } | |
| 655 } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) { | |
| 656 if (irref_isk(ir->op2) && hisubst[ir->op2]) | |
| 657 nir->op2 = ir->op2; | |
| 658 } else | |
| 659 #endif | |
| 660 if (ir->o == IR_CONV) { /* See above, too. */ | |
| 661 IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); | |
| 662 #if LJ_32 && LJ_HASFFI | |
| 663 if (st == IRT_I64 || st == IRT_U64) { /* Conversion from 64 bit int. */ | |
| 664 #if LJ_SOFTFP | |
| 665 if (irt_isfloat(ir->t)) { | |
| 666 split_call_l(J, hisubst, oir, ir, | |
| 667 st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f); | |
| 668 J->cur.nins--; /* Drop unused HIOP. */ | |
| 669 } | |
| 670 #else | |
| 671 if (irt_isfp(ir->t)) { /* 64 bit integer to FP conversion. */ | |
| 672 ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)), | |
| 673 hisubst[ir->op1], nref); | |
| 674 } | |
| 675 #endif | |
| 676 else { /* Truncate to lower 32 bits. */ | |
| 677 fwdlo: | |
| 678 ir->prev = nir->op1; /* Forward loword. */ | |
| 679 /* Replace with NOP to avoid messing up the snapshot logic. */ | |
| 680 nir->ot = IRT(IR_NOP, IRT_NIL); | |
| 681 nir->op1 = nir->op2 = 0; | |
| 682 } | |
| 683 } | |
| 684 #endif | |
| 685 #if LJ_SOFTFP && LJ_32 && LJ_HASFFI | |
| 686 else if (irt_isfloat(ir->t)) { | |
| 687 if (st == IRT_NUM) { | |
| 688 split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f); | |
| 689 J->cur.nins--; /* Drop unused HIOP. */ | |
| 690 } else { | |
| 691 nir->o = IR_CALLN; | |
| 692 nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f; | |
| 693 } | |
| 694 } else if (st == IRT_FLOAT) { | |
| 695 nir->o = IR_CALLN; | |
| 696 nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui; | |
| 697 } else | |
| 698 #endif | |
| 699 #if LJ_SOFTFP | |
| 700 if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) { | |
| 701 if (irt_isguard(ir->t)) { | |
| 702 lj_assertJ(st == IRT_NUM && irt_isint(ir->t), "bad CONV types"); | |
| 703 J->cur.nins--; | |
| 704 ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1); | |
| 705 } else { | |
| 706 split_call_l(J, hisubst, oir, ir, | |
| 707 #if LJ_32 && LJ_HASFFI | |
| 708 st == IRT_NUM ? | |
| 709 (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) : | |
| 710 (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui) | |
| 711 #else | |
| 712 IRCALL_softfp_d2i | |
| 713 #endif | |
| 714 ); | |
| 715 J->cur.nins--; /* Drop unused HIOP. */ | |
| 716 } | |
| 717 } | |
| 718 #endif | |
| 719 } else if (ir->o == IR_CALLXS) { | |
| 720 IRRef hiref; | |
| 721 split_call: | |
| 722 hiref = hisubst[ir->op1]; | |
| 723 if (hiref) { | |
| 724 IROpT ot = nir->ot; | |
| 725 IRRef op2 = nir->op2; | |
| 726 nir->ot = IRT(IR_CARG, IRT_NIL); | |
| 727 #if LJ_LE | |
| 728 nir->op2 = hiref; | |
| 729 #else | |
| 730 nir->op2 = nir->op1; nir->op1 = hiref; | |
| 731 #endif | |
| 732 ir->prev = nref = split_emit(J, ot, nref, op2); | |
| 733 } | |
| 734 if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t)) | |
| 735 hi = split_emit(J, | |
| 736 IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT), | |
| 737 nref, nref); | |
| 738 } else if (ir->o == IR_CARG) { | |
| 739 IRRef hiref = hisubst[ir->op1]; | |
| 740 if (hiref) { | |
| 741 IRRef op2 = nir->op2; | |
| 742 #if LJ_LE | |
| 743 nir->op2 = hiref; | |
| 744 #else | |
| 745 nir->op2 = nir->op1; nir->op1 = hiref; | |
| 746 #endif | |
| 747 ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); | |
| 748 nir = IR(nref); | |
| 749 } | |
| 750 hiref = hisubst[ir->op2]; | |
| 751 if (hiref) { | |
| 752 #if !LJ_TARGET_X86 | |
| 753 int carg = 0; | |
| 754 IRIns *cir; | |
| 755 for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1)) | |
| 756 carg++; | |
| 757 if ((carg & 1) == 0) { /* Align 64 bit arguments. */ | |
| 758 IRRef op2 = nir->op2; | |
| 759 nir->op2 = REF_NIL; | |
| 760 nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2); | |
| 761 nir = IR(nref); | |
| 762 } | |
| 763 #endif | |
| 764 #if LJ_BE | |
| 765 { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; } | |
| 766 #endif | |
| 767 ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref); | |
| 768 } | |
| 769 } else if (ir->o == IR_CNEWI) { | |
| 770 if (hisubst[ir->op2]) | |
| 771 split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]); | |
| 772 } else if (ir->o == IR_LOOP) { | |
| 773 J->loopref = nref; /* Needed by assembler. */ | |
| 774 } | |
| 775 hisubst[ref] = hi; /* Store hiword substitution. */ | |
| 776 } | |
| 777 if (snref == nins) { /* Substitution for last snapshot. */ | |
| 778 snap->ref = J->cur.nins; | |
| 779 split_subst_snap(J, snap, oir); | |
| 780 } | |
| 781 | |
| 782 /* Add PHI marks. */ | |
| 783 for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) { | |
| 784 IRIns *ir = IR(ref); | |
| 785 if (ir->o != IR_PHI) break; | |
| 786 if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t); | |
| 787 if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t); | |
| 788 } | |
| 789 } | |
| 790 | |
| 791 /* Protected callback for split pass. */ | |
| 792 static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud) | |
| 793 { | |
| 794 jit_State *J = (jit_State *)ud; | |
| 795 split_ir(J); | |
| 796 UNUSED(L); UNUSED(dummy); | |
| 797 return NULL; | |
| 798 } | |
| 799 | |
| 800 #if defined(LUA_USE_ASSERT) || LJ_SOFTFP | |
| 801 /* Slow, but sure way to check whether a SPLIT pass is needed. */ | |
| 802 static int split_needsplit(jit_State *J) | |
| 803 { | |
| 804 IRIns *ir, *irend; | |
| 805 IRRef ref; | |
| 806 for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++) | |
| 807 if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t)) | |
| 808 return 1; | |
| 809 if (LJ_SOFTFP) { | |
| 810 for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev) | |
| 811 if ((IR(ref)->op2 & IRSLOAD_CONVERT)) | |
| 812 return 1; | |
| 813 if (J->chain[IR_TOBIT]) | |
| 814 return 1; | |
| 815 } | |
| 816 for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) { | |
| 817 IRType st = (IR(ref)->op2 & IRCONV_SRCMASK); | |
| 818 if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) || | |
| 819 st == IRT_I64 || st == IRT_U64) | |
| 820 return 1; | |
| 821 } | |
| 822 return 0; /* Nope. */ | |
| 823 } | |
| 824 #endif | |
| 825 | |
| 826 /* SPLIT pass. */ | |
| 827 void lj_opt_split(jit_State *J) | |
| 828 { | |
| 829 #if LJ_SOFTFP | |
| 830 if (!J->needsplit) | |
| 831 J->needsplit = split_needsplit(J); | |
| 832 #else | |
| 833 lj_assertJ(J->needsplit >= split_needsplit(J), "bad SPLIT state"); | |
| 834 #endif | |
| 835 if (J->needsplit) { | |
| 836 int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit); | |
| 837 if (errcode) { | |
| 838 /* Completely reset the trace to avoid inconsistent dump on abort. */ | |
| 839 J->cur.nins = J->cur.nk = REF_BASE; | |
| 840 J->cur.nsnap = 0; | |
| 841 lj_err_throw(J->L, errcode); /* Propagate errors. */ | |
| 842 } | |
| 843 } | |
| 844 } | |
| 845 | |
| 846 #undef IR | |
| 847 | |
| 848 #endif |