Mercurial
comparison third_party/luajit/src/lj_snap.c @ 186:8cf4ec5e2191 hg-web
Fixed merge conflict.
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Fri, 23 Jan 2026 22:38:59 -0800 |
| parents | 94705b5986b3 |
| children |
comparison
equal
deleted
inserted
replaced
| 176:fed99fc04e12 | 186:8cf4ec5e2191 |
|---|---|
| 1 /* | |
| 2 ** Snapshot handling. | |
| 3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h | |
| 4 */ | |
| 5 | |
| 6 #define lj_snap_c | |
| 7 #define LUA_CORE | |
| 8 | |
| 9 #include "lj_obj.h" | |
| 10 | |
| 11 #if LJ_HASJIT | |
| 12 | |
| 13 #include "lj_gc.h" | |
| 14 #include "lj_tab.h" | |
| 15 #include "lj_state.h" | |
| 16 #include "lj_frame.h" | |
| 17 #include "lj_bc.h" | |
| 18 #include "lj_ir.h" | |
| 19 #include "lj_jit.h" | |
| 20 #include "lj_iropt.h" | |
| 21 #include "lj_trace.h" | |
| 22 #include "lj_snap.h" | |
| 23 #include "lj_target.h" | |
| 24 #if LJ_HASFFI | |
| 25 #include "lj_ctype.h" | |
| 26 #include "lj_cdata.h" | |
| 27 #endif | |
| 28 | |
| 29 /* Pass IR on to next optimization in chain (FOLD). */ | |
| 30 #define emitir(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J)) | |
| 31 | |
| 32 /* Emit raw IR without passing through optimizations. */ | |
| 33 #define emitir_raw(ot, a, b) (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J)) | |
| 34 | |
| 35 /* -- Snapshot buffer allocation ------------------------------------------ */ | |
| 36 | |
| 37 /* Grow snapshot buffer. */ | |
| 38 void lj_snap_grow_buf_(jit_State *J, MSize need) | |
| 39 { | |
| 40 MSize maxsnap = (MSize)J->param[JIT_P_maxsnap]; | |
| 41 if (need > maxsnap) | |
| 42 lj_trace_err(J, LJ_TRERR_SNAPOV); | |
| 43 lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot); | |
| 44 J->cur.snap = J->snapbuf; | |
| 45 } | |
| 46 | |
| 47 /* Grow snapshot map buffer. */ | |
| 48 void lj_snap_grow_map_(jit_State *J, MSize need) | |
| 49 { | |
| 50 if (need < 2*J->sizesnapmap) | |
| 51 need = 2*J->sizesnapmap; | |
| 52 else if (need < 64) | |
| 53 need = 64; | |
| 54 J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf, | |
| 55 J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry)); | |
| 56 J->cur.snapmap = J->snapmapbuf; | |
| 57 J->sizesnapmap = need; | |
| 58 } | |
| 59 | |
| 60 /* -- Snapshot generation ------------------------------------------------- */ | |
| 61 | |
| 62 /* Add all modified slots to the snapshot. */ | |
| 63 static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots) | |
| 64 { | |
| 65 IRRef retf = J->chain[IR_RETF]; /* Limits SLOAD restore elimination. */ | |
| 66 BCReg s; | |
| 67 MSize n = 0; | |
| 68 for (s = 0; s < nslots; s++) { | |
| 69 TRef tr = J->slot[s]; | |
| 70 IRRef ref = tref_ref(tr); | |
| 71 #if LJ_FR2 | |
| 72 if (s == 1) { /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */ | |
| 73 if ((tr & TREF_FRAME)) | |
| 74 map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL); | |
| 75 continue; | |
| 76 } | |
| 77 if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) { | |
| 78 cTValue *base = J->L->base - J->baseslot; | |
| 79 tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64); | |
| 80 ref = tref_ref(tr); | |
| 81 } | |
| 82 #endif | |
| 83 if (ref) { | |
| 84 SnapEntry sn = SNAP_TR(s, tr); | |
| 85 IRIns *ir = &J->cur.ir[ref]; | |
| 86 if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) && | |
| 87 ir->o == IR_SLOAD && ir->op1 == s && ref > retf) { | |
| 88 /* | |
| 89 ** No need to snapshot unmodified non-inherited slots. | |
| 90 ** But always snapshot the function below a frame in LJ_FR2 mode. | |
| 91 */ | |
| 92 if (!(ir->op2 & IRSLOAD_INHERIT) && | |
| 93 (!LJ_FR2 || s == 0 || s+1 == nslots || | |
| 94 !(J->slot[s+1] & (TREF_CONT|TREF_FRAME)))) | |
| 95 continue; | |
| 96 /* No need to restore readonly slots and unmodified non-parent slots. */ | |
| 97 if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) && | |
| 98 (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT) | |
| 99 sn |= SNAP_NORESTORE; | |
| 100 } | |
| 101 if (LJ_SOFTFP32 && irt_isnum(ir->t)) | |
| 102 sn |= SNAP_SOFTFPNUM; | |
| 103 map[n++] = sn; | |
| 104 } | |
| 105 } | |
| 106 return n; | |
| 107 } | |
| 108 | |
| 109 /* Add frame links at the end of the snapshot. */ | |
| 110 static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot) | |
| 111 { | |
| 112 cTValue *frame = J->L->base - 1; | |
| 113 cTValue *lim = J->L->base - J->baseslot + LJ_FR2; | |
| 114 GCfunc *fn = frame_func(frame); | |
| 115 cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top; | |
| 116 #if LJ_FR2 | |
| 117 uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2); | |
| 118 lj_assertJ(2 <= J->baseslot && J->baseslot <= 257, "bad baseslot"); | |
| 119 memcpy(map, &pcbase, sizeof(uint64_t)); | |
| 120 #else | |
| 121 MSize f = 0; | |
| 122 map[f++] = SNAP_MKPC(J->pc); /* The current PC is always the first entry. */ | |
| 123 #endif | |
| 124 lj_assertJ(!J->pt || | |
| 125 (J->pc >= proto_bc(J->pt) && | |
| 126 J->pc < proto_bc(J->pt) + J->pt->sizebc), "bad snapshot PC"); | |
| 127 while (frame > lim) { /* Backwards traversal of all frames above base. */ | |
| 128 if (frame_islua(frame)) { | |
| 129 #if !LJ_FR2 | |
| 130 map[f++] = SNAP_MKPC(frame_pc(frame)); | |
| 131 #endif | |
| 132 frame = frame_prevl(frame); | |
| 133 } else if (frame_iscont(frame)) { | |
| 134 #if !LJ_FR2 | |
| 135 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | |
| 136 map[f++] = SNAP_MKPC(frame_contpc(frame)); | |
| 137 #endif | |
| 138 frame = frame_prevd(frame); | |
| 139 } else { | |
| 140 lj_assertJ(!frame_isc(frame), "broken frame chain"); | |
| 141 #if !LJ_FR2 | |
| 142 map[f++] = SNAP_MKFTSZ(frame_ftsz(frame)); | |
| 143 #endif | |
| 144 frame = frame_prevd(frame); | |
| 145 continue; | |
| 146 } | |
| 147 if (frame + funcproto(frame_func(frame))->framesize > ftop) | |
| 148 ftop = frame + funcproto(frame_func(frame))->framesize; | |
| 149 } | |
| 150 *topslot = (uint8_t)(ftop - lim); | |
| 151 #if LJ_FR2 | |
| 152 lj_assertJ(sizeof(SnapEntry) * 2 == sizeof(uint64_t), "bad SnapEntry def"); | |
| 153 return 2; | |
| 154 #else | |
| 155 lj_assertJ(f == (MSize)(1 + J->framedepth), "miscalculated snapshot size"); | |
| 156 return f; | |
| 157 #endif | |
| 158 } | |
| 159 | |
| 160 /* Take a snapshot of the current stack. */ | |
| 161 static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap) | |
| 162 { | |
| 163 BCReg nslots = J->baseslot + J->maxslot; | |
| 164 MSize nent; | |
| 165 SnapEntry *p; | |
| 166 /* Conservative estimate. */ | |
| 167 lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1)); | |
| 168 p = &J->cur.snapmap[nsnapmap]; | |
| 169 nent = snapshot_slots(J, p, nslots); | |
| 170 snap->nent = (uint8_t)nent; | |
| 171 nent += snapshot_framelinks(J, p + nent, &snap->topslot); | |
| 172 snap->mapofs = (uint32_t)nsnapmap; | |
| 173 snap->ref = (IRRef1)J->cur.nins; | |
| 174 snap->mcofs = 0; | |
| 175 snap->nslots = (uint8_t)nslots; | |
| 176 snap->count = 0; | |
| 177 J->cur.nsnapmap = (uint32_t)(nsnapmap + nent); | |
| 178 } | |
| 179 | |
| 180 /* Add or merge a snapshot. */ | |
| 181 void lj_snap_add(jit_State *J) | |
| 182 { | |
| 183 MSize nsnap = J->cur.nsnap; | |
| 184 MSize nsnapmap = J->cur.nsnapmap; | |
| 185 /* Merge if no ins. inbetween or if requested and no guard inbetween. */ | |
| 186 if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) || | |
| 187 (J->mergesnap && !irt_isguard(J->guardemit))) { | |
| 188 if (nsnap == 1) { /* But preserve snap #0 PC. */ | |
| 189 emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0); | |
| 190 goto nomerge; | |
| 191 } | |
| 192 nsnapmap = J->cur.snap[--nsnap].mapofs; | |
| 193 } else { | |
| 194 nomerge: | |
| 195 lj_snap_grow_buf(J, nsnap+1); | |
| 196 J->cur.nsnap = (uint16_t)(nsnap+1); | |
| 197 } | |
| 198 J->mergesnap = 0; | |
| 199 J->guardemit.irt = 0; | |
| 200 snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap); | |
| 201 } | |
| 202 | |
| 203 /* -- Snapshot modification ----------------------------------------------- */ | |
| 204 | |
| 205 #define SNAP_USEDEF_SLOTS (LJ_MAX_JSLOTS+LJ_STACK_EXTRA) | |
| 206 | |
| 207 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */ | |
| 208 static BCReg snap_usedef(jit_State *J, uint8_t *udf, | |
| 209 const BCIns *pc, BCReg maxslot) | |
| 210 { | |
| 211 BCReg s; | |
| 212 GCobj *o; | |
| 213 | |
| 214 if (maxslot == 0) return 0; | |
| 215 #ifdef LUAJIT_USE_VALGRIND | |
| 216 /* Avoid errors for harmless reads beyond maxslot. */ | |
| 217 memset(udf, 1, SNAP_USEDEF_SLOTS); | |
| 218 #else | |
| 219 memset(udf, 1, maxslot); | |
| 220 #endif | |
| 221 | |
| 222 /* Treat open upvalues as used. */ | |
| 223 o = gcref(J->L->openupval); | |
| 224 while (o) { | |
| 225 if (uvval(gco2uv(o)) < J->L->base) break; | |
| 226 udf[uvval(gco2uv(o)) - J->L->base] = 0; | |
| 227 o = gcref(o->gch.nextgc); | |
| 228 } | |
| 229 | |
| 230 #define USE_SLOT(s) udf[(s)] &= ~1 | |
| 231 #define DEF_SLOT(s) udf[(s)] *= 3 | |
| 232 | |
| 233 /* Scan through following bytecode and check for uses/defs. */ | |
| 234 lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, | |
| 235 "snapshot PC out of range"); | |
| 236 for (;;) { | |
| 237 BCIns ins = *pc++; | |
| 238 BCOp op = bc_op(ins); | |
| 239 switch (bcmode_b(op)) { | |
| 240 case BCMvar: USE_SLOT(bc_b(ins)); break; | |
| 241 default: break; | |
| 242 } | |
| 243 switch (bcmode_c(op)) { | |
| 244 case BCMvar: USE_SLOT(bc_c(ins)); break; | |
| 245 case BCMrbase: | |
| 246 lj_assertJ(op == BC_CAT, "unhandled op %d with RC rbase", op); | |
| 247 for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s); | |
| 248 for (; s < maxslot; s++) DEF_SLOT(s); | |
| 249 break; | |
| 250 case BCMjump: | |
| 251 handle_jump: { | |
| 252 BCReg minslot = bc_a(ins); | |
| 253 if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT; | |
| 254 else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1; | |
| 255 else if (op == BC_UCLO) { | |
| 256 ptrdiff_t delta = bc_j(ins); | |
| 257 if (delta < 0) return maxslot; /* Prevent loop. */ | |
| 258 pc += delta; | |
| 259 break; | |
| 260 } | |
| 261 for (s = minslot; s < maxslot; s++) DEF_SLOT(s); | |
| 262 return minslot < maxslot ? minslot : maxslot; | |
| 263 } | |
| 264 case BCMlit: | |
| 265 if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) { | |
| 266 goto handle_jump; | |
| 267 } else if (bc_isret(op)) { | |
| 268 BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1); | |
| 269 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); | |
| 270 for (; s < top; s++) USE_SLOT(s); | |
| 271 for (; s < maxslot; s++) DEF_SLOT(s); | |
| 272 return 0; | |
| 273 } | |
| 274 break; | |
| 275 case BCMfunc: return maxslot; /* NYI: will abort, anyway. */ | |
| 276 default: break; | |
| 277 } | |
| 278 switch (bcmode_a(op)) { | |
| 279 case BCMvar: USE_SLOT(bc_a(ins)); break; | |
| 280 case BCMdst: | |
| 281 if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins)); | |
| 282 break; | |
| 283 case BCMbase: | |
| 284 if (op >= BC_CALLM && op <= BC_ITERN) { | |
| 285 BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ? | |
| 286 maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2); | |
| 287 if (LJ_FR2) DEF_SLOT(bc_a(ins)+1); | |
| 288 s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0); | |
| 289 for (; s < top; s++) USE_SLOT(s); | |
| 290 for (; s < maxslot; s++) DEF_SLOT(s); | |
| 291 if (op == BC_CALLT || op == BC_CALLMT) { | |
| 292 for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s); | |
| 293 return 0; | |
| 294 } | |
| 295 } else if (op == BC_VARG) { | |
| 296 return maxslot; /* NYI: punt. */ | |
| 297 } else if (op == BC_KNIL) { | |
| 298 for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s); | |
| 299 } else if (op == BC_TSETM) { | |
| 300 for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s); | |
| 301 } | |
| 302 break; | |
| 303 default: break; | |
| 304 } | |
| 305 lj_assertJ(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc, | |
| 306 "use/def analysis PC out of range"); | |
| 307 } | |
| 308 | |
| 309 #undef USE_SLOT | |
| 310 #undef DEF_SLOT | |
| 311 | |
| 312 return 0; /* unreachable */ | |
| 313 } | |
| 314 | |
| 315 /* Mark slots used by upvalues of child prototypes as used. */ | |
| 316 static void snap_useuv(GCproto *pt, uint8_t *udf) | |
| 317 { | |
| 318 /* This is a coarse check, because it's difficult to correlate the lifetime | |
| 319 ** of slots and closures. But the number of false positives is quite low. | |
| 320 ** A false positive may cause a slot not to be purged, which is just | |
| 321 ** a missed optimization. | |
| 322 */ | |
| 323 if ((pt->flags & PROTO_CHILD)) { | |
| 324 ptrdiff_t i, j, n = pt->sizekgc; | |
| 325 GCRef *kr = mref(pt->k, GCRef) - 1; | |
| 326 for (i = 0; i < n; i++, kr--) { | |
| 327 GCobj *o = gcref(*kr); | |
| 328 if (o->gch.gct == ~LJ_TPROTO) { | |
| 329 for (j = 0; j < gco2pt(o)->sizeuv; j++) { | |
| 330 uint32_t v = proto_uv(gco2pt(o))[j]; | |
| 331 if ((v & PROTO_UV_LOCAL)) { | |
| 332 udf[(v & 0xff)] = 0; | |
| 333 } | |
| 334 } | |
| 335 } | |
| 336 } | |
| 337 } | |
| 338 } | |
| 339 | |
| 340 /* Purge dead slots before the next snapshot. */ | |
| 341 void lj_snap_purge(jit_State *J) | |
| 342 { | |
| 343 uint8_t udf[SNAP_USEDEF_SLOTS]; | |
| 344 BCReg s, maxslot = J->maxslot; | |
| 345 if (bc_op(*J->pc) == BC_FUNCV && maxslot > J->pt->numparams) | |
| 346 maxslot = J->pt->numparams; | |
| 347 s = snap_usedef(J, udf, J->pc, maxslot); | |
| 348 if (s < maxslot) { | |
| 349 snap_useuv(J->pt, udf); | |
| 350 for (; s < maxslot; s++) | |
| 351 if (udf[s] != 0) | |
| 352 J->base[s] = 0; /* Purge dead slots. */ | |
| 353 } | |
| 354 } | |
| 355 | |
| 356 /* Shrink last snapshot. */ | |
| 357 void lj_snap_shrink(jit_State *J) | |
| 358 { | |
| 359 SnapShot *snap = &J->cur.snap[J->cur.nsnap-1]; | |
| 360 SnapEntry *map = &J->cur.snapmap[snap->mapofs]; | |
| 361 MSize n, m, nlim, nent = snap->nent; | |
| 362 uint8_t udf[SNAP_USEDEF_SLOTS]; | |
| 363 BCReg maxslot = J->maxslot; | |
| 364 BCReg baseslot = J->baseslot; | |
| 365 BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot); | |
| 366 if (minslot < maxslot) snap_useuv(J->pt, udf); | |
| 367 maxslot += baseslot; | |
| 368 minslot += baseslot; | |
| 369 snap->nslots = (uint8_t)maxslot; | |
| 370 for (n = m = 0; n < nent; n++) { /* Remove unused slots from snapshot. */ | |
| 371 BCReg s = snap_slot(map[n]); | |
| 372 if (s < minslot || (s < maxslot && udf[s-baseslot] == 0)) | |
| 373 map[m++] = map[n]; /* Only copy used slots. */ | |
| 374 } | |
| 375 snap->nent = (uint8_t)m; | |
| 376 nlim = J->cur.nsnapmap - snap->mapofs - 1; | |
| 377 while (n <= nlim) map[m++] = map[n++]; /* Move PC + frame links down. */ | |
| 378 J->cur.nsnapmap = (uint32_t)(snap->mapofs + m); /* Free up space in map. */ | |
| 379 } | |
| 380 | |
| 381 /* -- Snapshot access ----------------------------------------------------- */ | |
| 382 | |
| 383 /* Initialize a Bloom Filter with all renamed refs. | |
| 384 ** There are very few renames (often none), so the filter has | |
| 385 ** very few bits set. This makes it suitable for negative filtering. | |
| 386 */ | |
| 387 static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim) | |
| 388 { | |
| 389 BloomFilter rfilt = 0; | |
| 390 IRIns *ir; | |
| 391 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) | |
| 392 if (ir->op2 <= lim) | |
| 393 bloomset(rfilt, ir->op1); | |
| 394 return rfilt; | |
| 395 } | |
| 396 | |
| 397 /* Process matching renames to find the original RegSP. */ | |
| 398 static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs) | |
| 399 { | |
| 400 IRIns *ir; | |
| 401 for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--) | |
| 402 if (ir->op1 == ref && ir->op2 <= lim) | |
| 403 rs = ir->prev; | |
| 404 return rs; | |
| 405 } | |
| 406 | |
| 407 /* Copy RegSP from parent snapshot to the parent links of the IR. */ | |
| 408 IRIns *lj_snap_regspmap(jit_State *J, GCtrace *T, SnapNo snapno, IRIns *ir) | |
| 409 { | |
| 410 SnapShot *snap = &T->snap[snapno]; | |
| 411 SnapEntry *map = &T->snapmap[snap->mapofs]; | |
| 412 BloomFilter rfilt = snap_renamefilter(T, snapno); | |
| 413 MSize n = 0; | |
| 414 IRRef ref = 0; | |
| 415 UNUSED(J); | |
| 416 for ( ; ; ir++) { | |
| 417 uint32_t rs; | |
| 418 if (ir->o == IR_SLOAD) { | |
| 419 if (!(ir->op2 & IRSLOAD_PARENT)) break; | |
| 420 for ( ; ; n++) { | |
| 421 lj_assertJ(n < snap->nent, "slot %d not found in snapshot", ir->op1); | |
| 422 if (snap_slot(map[n]) == ir->op1) { | |
| 423 ref = snap_ref(map[n++]); | |
| 424 break; | |
| 425 } | |
| 426 } | |
| 427 } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) { | |
| 428 ref++; | |
| 429 } else if (ir->o == IR_PVAL) { | |
| 430 ref = ir->op1 + REF_BIAS; | |
| 431 } else { | |
| 432 break; | |
| 433 } | |
| 434 rs = T->ir[ref].prev; | |
| 435 if (bloomtest(rfilt, ref)) | |
| 436 rs = snap_renameref(T, snapno, ref, rs); | |
| 437 ir->prev = (uint16_t)rs; | |
| 438 lj_assertJ(regsp_used(rs), "unused IR %04d in snapshot", ref - REF_BIAS); | |
| 439 } | |
| 440 return ir; | |
| 441 } | |
| 442 | |
| 443 /* -- Snapshot replay ----------------------------------------------------- */ | |
| 444 | |
| 445 /* Replay constant from parent trace. */ | |
| 446 static TRef snap_replay_const(jit_State *J, IRIns *ir) | |
| 447 { | |
| 448 /* Only have to deal with constants that can occur in stack slots. */ | |
| 449 switch ((IROp)ir->o) { | |
| 450 case IR_KPRI: return TREF_PRI(irt_type(ir->t)); | |
| 451 case IR_KINT: return lj_ir_kint(J, ir->i); | |
| 452 case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t)); | |
| 453 case IR_KNUM: case IR_KINT64: | |
| 454 return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64); | |
| 455 case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir)); /* Continuation. */ | |
| 456 default: lj_assertJ(0, "bad IR constant op %d", ir->o); return TREF_NIL; | |
| 457 } | |
| 458 } | |
| 459 | |
| 460 /* De-duplicate parent reference. */ | |
| 461 static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref) | |
| 462 { | |
| 463 MSize j; | |
| 464 for (j = 0; j < nmax; j++) | |
| 465 if (snap_ref(map[j]) == ref) | |
| 466 return J->slot[snap_slot(map[j])] & ~(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME); | |
| 467 return 0; | |
| 468 } | |
| 469 | |
| 470 /* Emit parent reference with de-duplication. */ | |
| 471 static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax, | |
| 472 BloomFilter seen, IRRef ref) | |
| 473 { | |
| 474 IRIns *ir = &T->ir[ref]; | |
| 475 TRef tr; | |
| 476 if (irref_isk(ref)) | |
| 477 tr = snap_replay_const(J, ir); | |
| 478 else if (!regsp_used(ir->prev)) | |
| 479 tr = 0; | |
| 480 else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0) | |
| 481 tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0); | |
| 482 return tr; | |
| 483 } | |
| 484 | |
| 485 /* Check whether a sunk store corresponds to an allocation. Slow path. */ | |
| 486 static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs) | |
| 487 { | |
| 488 if (irs->o == IR_ASTORE || irs->o == IR_HSTORE || | |
| 489 irs->o == IR_FSTORE || irs->o == IR_XSTORE) { | |
| 490 IRIns *irk = &T->ir[irs->op1]; | |
| 491 if (irk->o == IR_AREF || irk->o == IR_HREFK) | |
| 492 irk = &T->ir[irk->op1]; | |
| 493 return (&T->ir[irk->op1] == ira); | |
| 494 } | |
| 495 return 0; | |
| 496 } | |
| 497 | |
| 498 /* Check whether a sunk store corresponds to an allocation. Fast path. */ | |
| 499 static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs) | |
| 500 { | |
| 501 if (irs->s != 255) | |
| 502 return (ira + irs->s == irs); /* Fast check. */ | |
| 503 return snap_sunk_store2(T, ira, irs); | |
| 504 } | |
| 505 | |
| 506 /* Replay snapshot state to setup side trace. */ | |
| 507 void lj_snap_replay(jit_State *J, GCtrace *T) | |
| 508 { | |
| 509 SnapShot *snap = &T->snap[J->exitno]; | |
| 510 SnapEntry *map = &T->snapmap[snap->mapofs]; | |
| 511 MSize n, nent = snap->nent; | |
| 512 BloomFilter seen = 0; | |
| 513 int pass23 = 0; | |
| 514 J->framedepth = 0; | |
| 515 /* Emit IR for slots inherited from parent snapshot. */ | |
| 516 for (n = 0; n < nent; n++) { | |
| 517 SnapEntry sn = map[n]; | |
| 518 BCReg s = snap_slot(sn); | |
| 519 IRRef ref = snap_ref(sn); | |
| 520 IRIns *ir = &T->ir[ref]; | |
| 521 TRef tr; | |
| 522 /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */ | |
| 523 if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0) | |
| 524 goto setslot; | |
| 525 bloomset(seen, ref); | |
| 526 if (irref_isk(ref)) { | |
| 527 /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */ | |
| 528 if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL))) | |
| 529 tr = 0; | |
| 530 else | |
| 531 tr = snap_replay_const(J, ir); | |
| 532 } else if (!regsp_used(ir->prev)) { | |
| 533 pass23 = 1; | |
| 534 lj_assertJ(s != 0, "unused slot 0 in snapshot"); | |
| 535 tr = s; | |
| 536 } else { | |
| 537 IRType t = irt_type(ir->t); | |
| 538 uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT; | |
| 539 if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM; | |
| 540 if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY); | |
| 541 if ((sn & SNAP_KEYINDEX)) mode |= IRSLOAD_KEYINDEX; | |
| 542 tr = emitir_raw(IRT(IR_SLOAD, t), s, mode); | |
| 543 } | |
| 544 setslot: | |
| 545 /* Same as TREF_* flags. */ | |
| 546 J->slot[s] = tr | (sn&(SNAP_KEYINDEX|SNAP_CONT|SNAP_FRAME)); | |
| 547 J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2)); | |
| 548 if ((sn & SNAP_FRAME)) | |
| 549 J->baseslot = s+1; | |
| 550 } | |
| 551 if (pass23) { | |
| 552 IRIns *irlast = &T->ir[snap->ref]; | |
| 553 pass23 = 0; | |
| 554 /* Emit dependent PVALs. */ | |
| 555 for (n = 0; n < nent; n++) { | |
| 556 SnapEntry sn = map[n]; | |
| 557 IRRef refp = snap_ref(sn); | |
| 558 IRIns *ir = &T->ir[refp]; | |
| 559 if (regsp_reg(ir->r) == RID_SUNK) { | |
| 560 if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue; | |
| 561 pass23 = 1; | |
| 562 lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || | |
| 563 ir->o == IR_CNEW || ir->o == IR_CNEWI, | |
| 564 "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); | |
| 565 if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1); | |
| 566 if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2); | |
| 567 if (LJ_HASFFI && ir->o == IR_CNEWI) { | |
| 568 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) | |
| 569 snap_pref(J, T, map, nent, seen, (ir+1)->op2); | |
| 570 } else { | |
| 571 IRIns *irs; | |
| 572 for (irs = ir+1; irs < irlast; irs++) | |
| 573 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { | |
| 574 if (snap_pref(J, T, map, nent, seen, irs->op2) == 0) | |
| 575 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1); | |
| 576 else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && | |
| 577 irs+1 < irlast && (irs+1)->o == IR_HIOP) | |
| 578 snap_pref(J, T, map, nent, seen, (irs+1)->op2); | |
| 579 } | |
| 580 } | |
| 581 } else if (!irref_isk(refp) && !regsp_used(ir->prev)) { | |
| 582 lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, | |
| 583 "sunk parent IR %04d has bad op %d", refp - REF_BIAS, ir->o); | |
| 584 J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1); | |
| 585 } | |
| 586 } | |
| 587 /* Replay sunk instructions. */ | |
| 588 for (n = 0; pass23 && n < nent; n++) { | |
| 589 SnapEntry sn = map[n]; | |
| 590 IRRef refp = snap_ref(sn); | |
| 591 IRIns *ir = &T->ir[refp]; | |
| 592 if (regsp_reg(ir->r) == RID_SUNK) { | |
| 593 TRef op1, op2; | |
| 594 if (J->slot[snap_slot(sn)] != snap_slot(sn)) { /* De-dup allocs. */ | |
| 595 J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]]; | |
| 596 continue; | |
| 597 } | |
| 598 op1 = ir->op1; | |
| 599 if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1); | |
| 600 op2 = ir->op2; | |
| 601 if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2); | |
| 602 if (LJ_HASFFI && ir->o == IR_CNEWI) { | |
| 603 if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) { | |
| 604 lj_needsplit(J); /* Emit joining HIOP. */ | |
| 605 op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2, | |
| 606 snap_pref(J, T, map, nent, seen, (ir+1)->op2)); | |
| 607 } | |
| 608 J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2); | |
| 609 } else { | |
| 610 IRIns *irs; | |
| 611 TRef tr = emitir(ir->ot, op1, op2); | |
| 612 J->slot[snap_slot(sn)] = tr; | |
| 613 for (irs = ir+1; irs < irlast; irs++) | |
| 614 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { | |
| 615 IRIns *irr = &T->ir[irs->op1]; | |
| 616 TRef val, key = irr->op2, tmp = tr; | |
| 617 if (irr->o != IR_FREF) { | |
| 618 IRIns *irk = &T->ir[key]; | |
| 619 if (irr->o == IR_HREFK) | |
| 620 key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]), | |
| 621 irk->op2); | |
| 622 else | |
| 623 key = snap_replay_const(J, irk); | |
| 624 if (irr->o == IR_HREFK || irr->o == IR_AREF) { | |
| 625 IRIns *irf = &T->ir[irr->op1]; | |
| 626 tmp = emitir(irf->ot, tmp, irf->op2); | |
| 627 } | |
| 628 } | |
| 629 tmp = emitir(irr->ot, tmp, key); | |
| 630 val = snap_pref(J, T, map, nent, seen, irs->op2); | |
| 631 if (val == 0) { | |
| 632 IRIns *irc = &T->ir[irs->op2]; | |
| 633 lj_assertJ(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT, | |
| 634 "sunk store for parent IR %04d with bad op %d", | |
| 635 refp - REF_BIAS, irc->o); | |
| 636 val = snap_pref(J, T, map, nent, seen, irc->op1); | |
| 637 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT); | |
| 638 } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) && | |
| 639 irs+1 < irlast && (irs+1)->o == IR_HIOP) { | |
| 640 IRType t = IRT_I64; | |
| 641 if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP) | |
| 642 t = IRT_NUM; | |
| 643 lj_needsplit(J); | |
| 644 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) { | |
| 645 uint64_t k = (uint32_t)T->ir[irs->op2].i + | |
| 646 ((uint64_t)T->ir[(irs+1)->op2].i << 32); | |
| 647 val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k); | |
| 648 } else { | |
| 649 val = emitir_raw(IRT(IR_HIOP, t), val, | |
| 650 snap_pref(J, T, map, nent, seen, (irs+1)->op2)); | |
| 651 } | |
| 652 tmp = emitir(IRT(irs->o, t), tmp, val); | |
| 653 continue; | |
| 654 } | |
| 655 tmp = emitir(irs->ot, tmp, val); | |
| 656 } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) { | |
| 657 emitir(IRT(IR_XBAR, IRT_NIL), 0, 0); | |
| 658 } | |
| 659 } | |
| 660 } | |
| 661 } | |
| 662 } | |
| 663 J->base = J->slot + J->baseslot; | |
| 664 J->maxslot = snap->nslots - J->baseslot; | |
| 665 lj_snap_add(J); | |
| 666 if (pass23) /* Need explicit GC step _after_ initial snapshot. */ | |
| 667 emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0); | |
| 668 } | |
| 669 | |
| 670 /* -- Snapshot restore ---------------------------------------------------- */ | |
| 671 | |
| 672 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | |
| 673 SnapNo snapno, BloomFilter rfilt, | |
| 674 IRIns *ir, TValue *o); | |
| 675 | |
| 676 /* Restore a value from the trace exit state. */ | |
| 677 static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex, | |
| 678 SnapNo snapno, BloomFilter rfilt, | |
| 679 IRRef ref, TValue *o) | |
| 680 { | |
| 681 IRIns *ir = &T->ir[ref]; | |
| 682 IRType1 t = ir->t; | |
| 683 RegSP rs = ir->prev; | |
| 684 if (irref_isk(ref)) { /* Restore constant slot. */ | |
| 685 if (ir->o == IR_KPTR) { | |
| 686 o->u64 = (uint64_t)(uintptr_t)ir_kptr(ir); | |
| 687 } else { | |
| 688 lj_assertJ(!(ir->o == IR_KKPTR || ir->o == IR_KNULL), | |
| 689 "restore of const from IR %04d with bad op %d", | |
| 690 ref - REF_BIAS, ir->o); | |
| 691 lj_ir_kvalue(J->L, o, ir); | |
| 692 } | |
| 693 return; | |
| 694 } | |
| 695 if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | |
| 696 rs = snap_renameref(T, snapno, ref, rs); | |
| 697 if (ra_hasspill(regsp_spill(rs))) { /* Restore from spill slot. */ | |
| 698 int32_t *sps = &ex->spill[regsp_spill(rs)]; | |
| 699 if (irt_isinteger(t)) { | |
| 700 setintV(o, *sps); | |
| 701 #if !LJ_SOFTFP32 | |
| 702 } else if (irt_isnum(t)) { | |
| 703 o->u64 = *(uint64_t *)sps; | |
| 704 #endif | |
| 705 #if LJ_64 && !LJ_GC64 | |
| 706 } else if (irt_islightud(t)) { | |
| 707 /* 64 bit lightuserdata which may escape already has the tag bits. */ | |
| 708 o->u64 = *(uint64_t *)sps; | |
| 709 #endif | |
| 710 } else { | |
| 711 lj_assertJ(!irt_ispri(t), "PRI ref with spill slot"); | |
| 712 setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t)); | |
| 713 } | |
| 714 } else { /* Restore from register. */ | |
| 715 Reg r = regsp_reg(rs); | |
| 716 if (ra_noreg(r)) { | |
| 717 lj_assertJ(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, | |
| 718 "restore from IR %04d has no reg", ref - REF_BIAS); | |
| 719 snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o); | |
| 720 if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o)); | |
| 721 return; | |
| 722 } else if (irt_isinteger(t)) { | |
| 723 setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]); | |
| 724 #if !LJ_SOFTFP | |
| 725 } else if (irt_isnum(t)) { | |
| 726 setnumV(o, ex->fpr[r-RID_MIN_FPR]); | |
| 727 #elif LJ_64 /* && LJ_SOFTFP */ | |
| 728 } else if (irt_isnum(t)) { | |
| 729 o->u64 = ex->gpr[r-RID_MIN_GPR]; | |
| 730 #endif | |
| 731 #if LJ_64 && !LJ_GC64 | |
| 732 } else if (irt_is64(t)) { | |
| 733 /* 64 bit values that already have the tag bits. */ | |
| 734 o->u64 = ex->gpr[r-RID_MIN_GPR]; | |
| 735 #endif | |
| 736 } else if (irt_ispri(t)) { | |
| 737 setpriV(o, irt_toitype(t)); | |
| 738 } else { | |
| 739 setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t)); | |
| 740 } | |
| 741 } | |
| 742 } | |
| 743 | |
| 744 #if LJ_HASFFI | |
| 745 /* Restore raw data from the trace exit state. */ | |
| 746 static void snap_restoredata(jit_State *J, GCtrace *T, ExitState *ex, | |
| 747 SnapNo snapno, BloomFilter rfilt, | |
| 748 IRRef ref, void *dst, CTSize sz) | |
| 749 { | |
| 750 IRIns *ir = &T->ir[ref]; | |
| 751 RegSP rs = ir->prev; | |
| 752 int32_t *src; | |
| 753 uint64_t tmp; | |
| 754 UNUSED(J); | |
| 755 if (irref_isk(ref)) { | |
| 756 if (ir_isk64(ir)) { | |
| 757 src = (int32_t *)&ir[1]; | |
| 758 } else if (sz == 8) { | |
| 759 tmp = (uint64_t)(uint32_t)ir->i; | |
| 760 src = (int32_t *)&tmp; | |
| 761 } else { | |
| 762 src = &ir->i; | |
| 763 } | |
| 764 } else { | |
| 765 if (LJ_UNLIKELY(bloomtest(rfilt, ref))) | |
| 766 rs = snap_renameref(T, snapno, ref, rs); | |
| 767 if (ra_hasspill(regsp_spill(rs))) { | |
| 768 src = &ex->spill[regsp_spill(rs)]; | |
| 769 if (sz == 8 && !irt_is64(ir->t)) { | |
| 770 tmp = (uint64_t)(uint32_t)*src; | |
| 771 src = (int32_t *)&tmp; | |
| 772 } | |
| 773 } else { | |
| 774 Reg r = regsp_reg(rs); | |
| 775 if (ra_noreg(r)) { | |
| 776 /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */ | |
| 777 lj_assertJ(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT, | |
| 778 "restore from IR %04d has no reg", ref - REF_BIAS); | |
| 779 snap_restoredata(J, T, ex, snapno, rfilt, ir->op1, dst, 4); | |
| 780 *(lua_Number *)dst = (lua_Number)*(int32_t *)dst; | |
| 781 return; | |
| 782 } | |
| 783 src = (int32_t *)&ex->gpr[r-RID_MIN_GPR]; | |
| 784 #if !LJ_SOFTFP | |
| 785 if (r >= RID_MAX_GPR) { | |
| 786 src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; | |
| 787 #if LJ_TARGET_PPC | |
| 788 if (sz == 4) { /* PPC FPRs are always doubles. */ | |
| 789 *(float *)dst = (float)*(double *)src; | |
| 790 return; | |
| 791 } | |
| 792 #else | |
| 793 if (LJ_BE && sz == 4) src++; | |
| 794 #endif | |
| 795 } else | |
| 796 #endif | |
| 797 if (LJ_64 && LJ_BE && sz == 4) src++; | |
| 798 } | |
| 799 } | |
| 800 lj_assertJ(sz == 1 || sz == 2 || sz == 4 || sz == 8, | |
| 801 "restore from IR %04d with bad size %d", ref - REF_BIAS, sz); | |
| 802 if (sz == 4) *(int32_t *)dst = *src; | |
| 803 else if (sz == 8) *(int64_t *)dst = *(int64_t *)src; | |
| 804 else if (sz == 1) *(int8_t *)dst = (int8_t)*src; | |
| 805 else *(int16_t *)dst = (int16_t)*src; | |
| 806 } | |
| 807 #endif | |
| 808 | |
| 809 /* Unsink allocation from the trace exit state. Unsink sunk stores. */ | |
| 810 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex, | |
| 811 SnapNo snapno, BloomFilter rfilt, | |
| 812 IRIns *ir, TValue *o) | |
| 813 { | |
| 814 lj_assertJ(ir->o == IR_TNEW || ir->o == IR_TDUP || | |
| 815 ir->o == IR_CNEW || ir->o == IR_CNEWI, | |
| 816 "sunk allocation with bad op %d", ir->o); | |
| 817 #if LJ_HASFFI | |
| 818 if (ir->o == IR_CNEW || ir->o == IR_CNEWI) { | |
| 819 CTState *cts = ctype_cts(J->L); | |
| 820 CTypeID id = (CTypeID)T->ir[ir->op1].i; | |
| 821 CTSize sz; | |
| 822 CTInfo info = lj_ctype_info(cts, id, &sz); | |
| 823 GCcdata *cd = lj_cdata_newx(cts, id, sz, info); | |
| 824 setcdataV(J->L, o, cd); | |
| 825 if (ir->o == IR_CNEWI) { | |
| 826 uint8_t *p = (uint8_t *)cdataptr(cd); | |
| 827 lj_assertJ(sz == 4 || sz == 8, "sunk cdata with bad size %d", sz); | |
| 828 if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) { | |
| 829 snap_restoredata(J, T, ex, snapno, rfilt, (ir+1)->op2, | |
| 830 LJ_LE ? p+4 : p, 4); | |
| 831 if (LJ_BE) p += 4; | |
| 832 sz = 4; | |
| 833 } | |
| 834 snap_restoredata(J, T, ex, snapno, rfilt, ir->op2, p, sz); | |
| 835 } else { | |
| 836 IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref]; | |
| 837 for (irs = ir+1; irs < irlast; irs++) | |
| 838 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { | |
| 839 IRIns *iro = &T->ir[T->ir[irs->op1].op2]; | |
| 840 uint8_t *p = (uint8_t *)cd; | |
| 841 CTSize szs; | |
| 842 lj_assertJ(irs->o == IR_XSTORE, "sunk store with bad op %d", irs->o); | |
| 843 lj_assertJ(T->ir[irs->op1].o == IR_ADD, | |
| 844 "sunk store with bad add op %d", T->ir[irs->op1].o); | |
| 845 lj_assertJ(iro->o == IR_KINT || iro->o == IR_KINT64, | |
| 846 "sunk store with bad const offset op %d", iro->o); | |
| 847 if (irt_is64(irs->t)) szs = 8; | |
| 848 else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1; | |
| 849 else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2; | |
| 850 else szs = 4; | |
| 851 if (LJ_64 && iro->o == IR_KINT64) | |
| 852 p += (int64_t)ir_k64(iro)->u64; | |
| 853 else | |
| 854 p += iro->i; | |
| 855 lj_assertJ(p >= (uint8_t *)cdataptr(cd) && | |
| 856 p + szs <= (uint8_t *)cdataptr(cd) + sz, | |
| 857 "sunk store with offset out of range"); | |
| 858 if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { | |
| 859 lj_assertJ(szs == 4, "sunk store with bad size %d", szs); | |
| 860 snap_restoredata(J, T, ex, snapno, rfilt, (irs+1)->op2, | |
| 861 LJ_LE ? p+4 : p, 4); | |
| 862 if (LJ_BE) p += 4; | |
| 863 } | |
| 864 snap_restoredata(J, T, ex, snapno, rfilt, irs->op2, p, szs); | |
| 865 } | |
| 866 } | |
| 867 } else | |
| 868 #endif | |
| 869 { | |
| 870 IRIns *irs, *irlast; | |
| 871 GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) : | |
| 872 lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1])); | |
| 873 settabV(J->L, o, t); | |
| 874 irlast = &T->ir[T->snap[snapno].ref]; | |
| 875 for (irs = ir+1; irs < irlast; irs++) | |
| 876 if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) { | |
| 877 IRIns *irk = &T->ir[irs->op1]; | |
| 878 TValue tmp, *val; | |
| 879 lj_assertJ(irs->o == IR_ASTORE || irs->o == IR_HSTORE || | |
| 880 irs->o == IR_FSTORE, | |
| 881 "sunk store with bad op %d", irs->o); | |
| 882 if (irk->o == IR_FREF) { | |
| 883 lj_assertJ(irk->op2 == IRFL_TAB_META, | |
| 884 "sunk store with bad field %d", irk->op2); | |
| 885 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp); | |
| 886 /* NOBARRIER: The table is new (marked white). */ | |
| 887 setgcref(t->metatable, obj2gco(tabV(&tmp))); | |
| 888 } else { | |
| 889 irk = &T->ir[irk->op2]; | |
| 890 if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1]; | |
| 891 lj_ir_kvalue(J->L, &tmp, irk); | |
| 892 val = lj_tab_set(J->L, t, &tmp); | |
| 893 /* NOBARRIER: The table is new (marked white). */ | |
| 894 snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val); | |
| 895 if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) { | |
| 896 snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp); | |
| 897 val->u32.hi = tmp.u32.lo; | |
| 898 } | |
| 899 } | |
| 900 } | |
| 901 } | |
| 902 } | |
| 903 | |
| 904 /* Restore interpreter state from exit state with the help of a snapshot. */ | |
| 905 const BCIns *lj_snap_restore(jit_State *J, void *exptr) | |
| 906 { | |
| 907 ExitState *ex = (ExitState *)exptr; | |
| 908 SnapNo snapno = J->exitno; /* For now, snapno == exitno. */ | |
| 909 GCtrace *T = traceref(J, J->parent); | |
| 910 SnapShot *snap = &T->snap[snapno]; | |
| 911 MSize n, nent = snap->nent; | |
| 912 SnapEntry *map = &T->snapmap[snap->mapofs]; | |
| 913 #if !LJ_FR2 || defined(LUA_USE_ASSERT) | |
| 914 SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2]; | |
| 915 #endif | |
| 916 #if !LJ_FR2 | |
| 917 ptrdiff_t ftsz0; | |
| 918 #endif | |
| 919 TValue *frame; | |
| 920 BloomFilter rfilt = snap_renamefilter(T, snapno); | |
| 921 const BCIns *pc = snap_pc(&map[nent]); | |
| 922 lua_State *L = J->L; | |
| 923 | |
| 924 /* Set interpreter PC to the next PC to get correct error messages. */ | |
| 925 setcframe_pc(cframe_raw(L->cframe), pc+1); | |
| 926 | |
| 927 /* Make sure the stack is big enough for the slots from the snapshot. */ | |
| 928 if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) { | |
| 929 L->top = curr_topL(L); | |
| 930 lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize); | |
| 931 } | |
| 932 | |
| 933 /* Fill stack slots with data from the registers and spill slots. */ | |
| 934 frame = L->base-1-LJ_FR2; | |
| 935 #if !LJ_FR2 | |
| 936 ftsz0 = frame_ftsz(frame); /* Preserve link to previous frame in slot #0. */ | |
| 937 #endif | |
| 938 for (n = 0; n < nent; n++) { | |
| 939 SnapEntry sn = map[n]; | |
| 940 if (!(sn & SNAP_NORESTORE)) { | |
| 941 TValue *o = &frame[snap_slot(sn)]; | |
| 942 IRRef ref = snap_ref(sn); | |
| 943 IRIns *ir = &T->ir[ref]; | |
| 944 if (ir->r == RID_SUNK) { | |
| 945 MSize j; | |
| 946 for (j = 0; j < n; j++) | |
| 947 if (snap_ref(map[j]) == ref) { /* De-duplicate sunk allocations. */ | |
| 948 copyTV(L, o, &frame[snap_slot(map[j])]); | |
| 949 goto dupslot; | |
| 950 } | |
| 951 snap_unsink(J, T, ex, snapno, rfilt, ir, o); | |
| 952 dupslot: | |
| 953 continue; | |
| 954 } | |
| 955 snap_restoreval(J, T, ex, snapno, rfilt, ref, o); | |
| 956 if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) { | |
| 957 TValue tmp; | |
| 958 snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp); | |
| 959 o->u32.hi = tmp.u32.lo; | |
| 960 #if !LJ_FR2 | |
| 961 } else if ((sn & (SNAP_CONT|SNAP_FRAME))) { | |
| 962 /* Overwrite tag with frame link. */ | |
| 963 setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0); | |
| 964 L->base = o+1; | |
| 965 #endif | |
| 966 } else if ((sn & SNAP_KEYINDEX)) { | |
| 967 /* A IRT_INT key index slot is restored as a number. Undo this. */ | |
| 968 o->u32.lo = (uint32_t)(LJ_DUALNUM ? intV(o) : lj_num2int(numV(o))); | |
| 969 o->u32.hi = LJ_KEYINDEX; | |
| 970 } | |
| 971 } | |
| 972 } | |
| 973 #if LJ_FR2 | |
| 974 L->base += (map[nent+LJ_BE] & 0xff); | |
| 975 #endif | |
| 976 lj_assertJ(map + nent == flinks, "inconsistent frames in snapshot"); | |
| 977 | |
| 978 /* Compute current stack top. */ | |
| 979 switch (bc_op(*pc)) { | |
| 980 default: | |
| 981 if (bc_op(*pc) < BC_FUNCF) { | |
| 982 L->top = curr_topL(L); | |
| 983 break; | |
| 984 } | |
| 985 /* fallthrough */ | |
| 986 case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM: | |
| 987 L->top = frame + snap->nslots; | |
| 988 break; | |
| 989 } | |
| 990 return pc; | |
| 991 } | |
| 992 | |
| 993 #undef emitir_raw | |
| 994 #undef emitir | |
| 995 | |
| 996 #endif |