comparison third_party/luajit/src/vm_x86.dasc @ 178:94705b5986b3

[ThirdParty] Added WRK and luajit for load testing.
author MrJuneJune <me@mrjunejune.com>
date Thu, 22 Jan 2026 20:10:30 -0800
parents
children
comparison
equal deleted inserted replaced
177:24fe8ff94056 178:94705b5986b3
1 |// Low-level VM code for x86 CPUs.
2 |// Bytecode interpreter, fast functions and helper functions.
3 |// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h
4 |
5 |.if P64
6 |.arch x64
7 |.else
8 |.arch x86
9 |.endif
10 |.section code_op, code_sub
11 |
12 |.actionlist build_actionlist
13 |.globals GLOB_
14 |.globalnames globnames
15 |.externnames extnames
16 |
17 |//-----------------------------------------------------------------------
18 |
19 |.if P64
20 |.define X64, 1
21 |.if WIN
22 |.define X64WIN, 1
23 |.endif
24 |.endif
25 |
26 |// Fixed register assignments for the interpreter.
27 |// This is very fragile and has many dependencies. Caveat emptor.
28 |.define BASE, edx // Not C callee-save, refetched anyway.
29 |.if not X64
30 |.define KBASE, edi // Must be C callee-save.
31 |.define KBASEa, KBASE
32 |.define PC, esi // Must be C callee-save.
33 |.define PCa, PC
34 |.define DISPATCH, ebx // Must be C callee-save.
35 |.elif X64WIN
36 |.define KBASE, edi // Must be C callee-save.
37 |.define KBASEa, rdi
38 |.define PC, esi // Must be C callee-save.
39 |.define PCa, rsi
40 |.define DISPATCH, ebx // Must be C callee-save.
41 |.else
42 |.define KBASE, r15d // Must be C callee-save.
43 |.define KBASEa, r15
44 |.define PC, ebx // Must be C callee-save.
45 |.define PCa, rbx
46 |.define DISPATCH, r14d // Must be C callee-save.
47 |.endif
48 |
49 |.define RA, ecx
50 |.define RAH, ch
51 |.define RAL, cl
52 |.define RB, ebp // Must be ebp (C callee-save).
53 |.define RC, eax // Must be eax.
54 |.define RCW, ax
55 |.define RCH, ah
56 |.define RCL, al
57 |.define OP, RB
58 |.define RD, RC
59 |.define RDW, RCW
60 |.define RDL, RCL
61 |.if X64
62 |.define RAa, rcx
63 |.define RBa, rbp
64 |.define RCa, rax
65 |.define RDa, rax
66 |.else
67 |.define RAa, RA
68 |.define RBa, RB
69 |.define RCa, RC
70 |.define RDa, RD
71 |.endif
72 |
73 |.if not X64
74 |.define FCARG1, ecx // x86 fastcall arguments.
75 |.define FCARG2, edx
76 |.elif X64WIN
77 |.define CARG1, rcx // x64/WIN64 C call arguments.
78 |.define CARG2, rdx
79 |.define CARG3, r8
80 |.define CARG4, r9
81 |.define CARG1d, ecx
82 |.define CARG2d, edx
83 |.define CARG3d, r8d
84 |.define CARG4d, r9d
85 |.define FCARG1, CARG1d // Upwards compatible to x86 fastcall.
86 |.define FCARG2, CARG2d
87 |.else
88 |.define CARG1, rdi // x64/POSIX C call arguments.
89 |.define CARG2, rsi
90 |.define CARG3, rdx
91 |.define CARG4, rcx
92 |.define CARG5, r8
93 |.define CARG6, r9
94 |.define CARG1d, edi
95 |.define CARG2d, esi
96 |.define CARG3d, edx
97 |.define CARG4d, ecx
98 |.define CARG5d, r8d
99 |.define CARG6d, r9d
100 |.define FCARG1, CARG1d // Simulate x86 fastcall.
101 |.define FCARG2, CARG2d
102 |.endif
103 |
104 |// Type definitions. Some of these are only used for documentation.
105 |.type L, lua_State
106 |.type GL, global_State
107 |.type TVALUE, TValue
108 |.type GCOBJ, GCobj
109 |.type STR, GCstr
110 |.type TAB, GCtab
111 |.type LFUNC, GCfuncL
112 |.type CFUNC, GCfuncC
113 |.type PROTO, GCproto
114 |.type UPVAL, GCupval
115 |.type NODE, Node
116 |.type NARGS, int
117 |.type TRACE, GCtrace
118 |.type SBUF, SBuf
119 |
120 |// Stack layout while in interpreter. Must match with lj_frame.h.
121 |//-----------------------------------------------------------------------
122 |.if not X64 // x86 stack layout.
123 |
124 |.if WIN
125 |
126 |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--).
127 |.macro saveregs_
128 | push edi; push esi; push ebx
129 | push extern lj_err_unwind_win
130 | fs; push dword [0]
131 | fs; mov [0], esp
132 | sub esp, CFRAME_SPACE
133 |.endmacro
134 |.macro restoreregs
135 | add esp, CFRAME_SPACE
136 | fs; pop dword [0]
137 | pop edi // Short for esp += 4.
138 | pop ebx; pop esi; pop edi; pop ebp
139 |.endmacro
140 |
141 |.else
142 |
143 |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--).
144 |.macro saveregs_
145 | push edi; push esi; push ebx
146 | sub esp, CFRAME_SPACE
147 |.endmacro
148 |.macro restoreregs
149 | add esp, CFRAME_SPACE
150 | pop ebx; pop esi; pop edi; pop ebp
151 |.endmacro
152 |
153 |.endif
154 |
155 |.macro saveregs
156 | push ebp; saveregs_
157 |.endmacro
158 |
159 |.if WIN
160 |.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only.
161 |.define SAVE_NRES, aword [esp+aword*18]
162 |.define SAVE_CFRAME, aword [esp+aword*17]
163 |.define SAVE_L, aword [esp+aword*16]
164 |//----- 16 byte aligned, ^^^ arguments from C caller
165 |.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter.
166 |.define SAVE_R4, aword [esp+aword*14]
167 |.define SAVE_R3, aword [esp+aword*13]
168 |.define SAVE_R2, aword [esp+aword*12]
169 |//----- 16 byte aligned
170 |.define SAVE_R1, aword [esp+aword*11]
171 |.define SEH_FUNC, aword [esp+aword*10]
172 |.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves.
173 |.define UNUSED2, aword [esp+aword*8]
174 |//----- 16 byte aligned
175 |.define UNUSED1, aword [esp+aword*7]
176 |.define SAVE_PC, aword [esp+aword*6]
177 |.define TMP2, aword [esp+aword*5]
178 |.define TMP1, aword [esp+aword*4]
179 |//----- 16 byte aligned
180 |.define ARG4, aword [esp+aword*3]
181 |.define ARG3, aword [esp+aword*2]
182 |.define ARG2, aword [esp+aword*1]
183 |.define ARG1, aword [esp] //<-- esp while in interpreter.
184 |//----- 16 byte aligned, ^^^ arguments for C callee
185 |.else
186 |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only.
187 |.define SAVE_NRES, aword [esp+aword*14]
188 |.define SAVE_CFRAME, aword [esp+aword*13]
189 |.define SAVE_L, aword [esp+aword*12]
190 |//----- 16 byte aligned, ^^^ arguments from C caller
191 |.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter.
192 |.define SAVE_R4, aword [esp+aword*10]
193 |.define SAVE_R3, aword [esp+aword*9]
194 |.define SAVE_R2, aword [esp+aword*8]
195 |//----- 16 byte aligned
196 |.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves.
197 |.define SAVE_PC, aword [esp+aword*6]
198 |.define TMP2, aword [esp+aword*5]
199 |.define TMP1, aword [esp+aword*4]
200 |//----- 16 byte aligned
201 |.define ARG4, aword [esp+aword*3]
202 |.define ARG3, aword [esp+aword*2]
203 |.define ARG2, aword [esp+aword*1]
204 |.define ARG1, aword [esp] //<-- esp while in interpreter.
205 |//----- 16 byte aligned, ^^^ arguments for C callee
206 |.endif
207 |
208 |// FPARGx overlaps ARGx and ARG(x+1) on x86.
209 |.define FPARG3, qword [esp+qword*1]
210 |.define FPARG1, qword [esp]
211 |// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ).
212 |.define TMPQ, qword [esp+aword*4]
213 |.define TMP3, ARG4
214 |.define ARG5, TMP1
215 |.define TMPa, TMP1
216 |.define MULTRES, TMP2
217 |
218 |// Arguments for vm_call and vm_pcall.
219 |.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME!
220 |
221 |// Arguments for vm_cpcall.
222 |.define INARG_CP_CALL, SAVE_ERRF
223 |.define INARG_CP_UD, SAVE_NRES
224 |.define INARG_CP_FUNC, SAVE_CFRAME
225 |
226 |//-----------------------------------------------------------------------
227 |.elif X64WIN // x64/Windows stack layout
228 |
229 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
230 |.macro saveregs_
231 | push rdi; push rsi; push rbx
232 | sub rsp, CFRAME_SPACE
233 |.endmacro
234 |.macro saveregs
235 | push rbp; saveregs_
236 |.endmacro
237 |.macro restoreregs
238 | add rsp, CFRAME_SPACE
239 | pop rbx; pop rsi; pop rdi; pop rbp
240 |.endmacro
241 |
242 |.define SAVE_CFRAME, aword [rsp+aword*13]
243 |.define SAVE_PC, dword [rsp+dword*25]
244 |.define SAVE_L, dword [rsp+dword*24]
245 |.define SAVE_ERRF, dword [rsp+dword*23]
246 |.define SAVE_NRES, dword [rsp+dword*22]
247 |.define TMP2, dword [rsp+dword*21]
248 |.define TMP1, dword [rsp+dword*20]
249 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter
250 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
251 |.define SAVE_R4, aword [rsp+aword*8]
252 |.define SAVE_R3, aword [rsp+aword*7]
253 |.define SAVE_R2, aword [rsp+aword*6]
254 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
255 |.define ARG5, aword [rsp+aword*4]
256 |.define CSAVE_4, aword [rsp+aword*3]
257 |.define CSAVE_3, aword [rsp+aword*2]
258 |.define CSAVE_2, aword [rsp+aword*1]
259 |.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter.
260 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee
261 |
262 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
263 |.define TMPQ, qword [rsp+aword*10]
264 |.define MULTRES, TMP2
265 |.define TMPa, ARG5
266 |.define ARG5d, dword [rsp+aword*4]
267 |.define TMP3, ARG5d
268 |
269 |//-----------------------------------------------------------------------
270 |.else // x64/POSIX stack layout
271 |
272 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--).
273 |.macro saveregs_
274 | push rbx; push r15; push r14
275 |.if NO_UNWIND
276 | push r13; push r12
277 |.endif
278 | sub rsp, CFRAME_SPACE
279 |.endmacro
280 |.macro saveregs
281 | push rbp; saveregs_
282 |.endmacro
283 |.macro restoreregs
284 | add rsp, CFRAME_SPACE
285 |.if NO_UNWIND
286 | pop r12; pop r13
287 |.endif
288 | pop r14; pop r15; pop rbx; pop rbp
289 |.endmacro
290 |
291 |//----- 16 byte aligned,
292 |.if NO_UNWIND
293 |.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter.
294 |.define SAVE_R4, aword [rsp+aword*10]
295 |.define SAVE_R3, aword [rsp+aword*9]
296 |.define SAVE_R2, aword [rsp+aword*8]
297 |.define SAVE_R1, aword [rsp+aword*7]
298 |.define SAVE_RU2, aword [rsp+aword*6]
299 |.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves.
300 |.else
301 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter.
302 |.define SAVE_R4, aword [rsp+aword*8]
303 |.define SAVE_R3, aword [rsp+aword*7]
304 |.define SAVE_R2, aword [rsp+aword*6]
305 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves.
306 |.endif
307 |.define SAVE_CFRAME, aword [rsp+aword*4]
308 |.define SAVE_PC, dword [rsp+dword*7]
309 |.define SAVE_L, dword [rsp+dword*6]
310 |.define SAVE_ERRF, dword [rsp+dword*5]
311 |.define SAVE_NRES, dword [rsp+dword*4]
312 |.define TMPa, aword [rsp+aword*1]
313 |.define TMP2, dword [rsp+dword*1]
314 |.define TMP1, dword [rsp] //<-- rsp while in interpreter.
315 |//----- 16 byte aligned
316 |
317 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ).
318 |.define TMPQ, qword [rsp]
319 |.define TMP3, dword [rsp+aword*1]
320 |.define MULTRES, TMP2
321 |
322 |.endif
323 |
324 |//-----------------------------------------------------------------------
325 |
326 |// Instruction headers.
327 |.macro ins_A; .endmacro
328 |.macro ins_AD; .endmacro
329 |.macro ins_AJ; .endmacro
330 |.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro
331 |.macro ins_AB_; movzx RB, RCH; .endmacro
332 |.macro ins_A_C; movzx RC, RCL; .endmacro
333 |.macro ins_AND; not RDa; .endmacro
334 |
335 |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster).
336 |.macro ins_NEXT
337 | mov RC, [PC]
338 | movzx RA, RCH
339 | movzx OP, RCL
340 | add PC, 4
341 | shr RC, 16
342 |.if X64
343 | jmp aword [DISPATCH+OP*8]
344 |.else
345 | jmp aword [DISPATCH+OP*4]
346 |.endif
347 |.endmacro
348 |
349 |// Instruction footer.
350 |.if 1
351 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use.
352 | .define ins_next, ins_NEXT
353 | .define ins_next_, ins_NEXT
354 |.else
355 | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch.
356 | // Affects only certain kinds of benchmarks (and only with -j off).
357 | // Around 10%-30% slower on Core2, a lot more slower on P4.
358 | .macro ins_next
359 | jmp ->ins_next
360 | .endmacro
361 | .macro ins_next_
362 | ->ins_next:
363 | ins_NEXT
364 | .endmacro
365 |.endif
366 |
367 |// Call decode and dispatch.
368 |.macro ins_callt
369 | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC
370 | mov PC, LFUNC:RB->pc
371 | mov RA, [PC]
372 | movzx OP, RAL
373 | movzx RA, RAH
374 | add PC, 4
375 |.if X64
376 | jmp aword [DISPATCH+OP*8]
377 |.else
378 | jmp aword [DISPATCH+OP*4]
379 |.endif
380 |.endmacro
381 |
382 |.macro ins_call
383 | // BASE = new base, RB = LFUNC, RD = nargs+1
384 | mov [BASE-4], PC
385 | ins_callt
386 |.endmacro
387 |
388 |//-----------------------------------------------------------------------
389 |
390 |// Macros to test operand types.
391 |.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro
392 |.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro
393 |.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro
394 |.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro
395 |.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro
396 |
397 |// These operands must be used with movzx.
398 |.define PC_OP, byte [PC-4]
399 |.define PC_RA, byte [PC-3]
400 |.define PC_RB, byte [PC-1]
401 |.define PC_RC, byte [PC-2]
402 |.define PC_RD, word [PC-2]
403 |
404 |.macro branchPC, reg
405 | lea PC, [PC+reg*4-BCBIAS_J*4]
406 |.endmacro
407 |
408 |// Assumes DISPATCH is relative to GL.
409 #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field))
410 #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field))
411 |
412 #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto))
413 |
414 |// Decrement hashed hotcount and trigger trace recorder if zero.
415 |.macro hotloop, reg
416 | mov reg, PC
417 | shr reg, 1
418 | and reg, HOTCOUNT_PCMASK
419 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP
420 | jb ->vm_hotloop
421 |.endmacro
422 |
423 |.macro hotcall, reg
424 | mov reg, PC
425 | shr reg, 1
426 | and reg, HOTCOUNT_PCMASK
427 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL
428 | jb ->vm_hotcall
429 |.endmacro
430 |
431 |// Set current VM state.
432 |.macro set_vmstate, st
433 | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st
434 |.endmacro
435 |
436 |// x87 compares.
437 |.macro fcomparepp // Compare and pop st0 >< st1.
438 | fucomip st1
439 | fpop
440 |.endmacro
441 |
442 |.macro fpop1; fstp st1; .endmacro
443 |
444 |// Synthesize SSE FP constants.
445 |.macro sseconst_abs, reg, tmp // Synthesize abs mask.
446 |.if X64
447 | mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp
448 |.else
449 | pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1
450 |.endif
451 |.endmacro
452 |
453 |.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const.
454 |.if X64
455 | mov64 tmp, U64x(val,00000000); movd reg, tmp
456 |.else
457 | mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51
458 |.endif
459 |.endmacro
460 |
461 |.macro sseconst_sign, reg, tmp // Synthesize sign mask.
462 | sseconst_hi reg, tmp, 80000000
463 |.endmacro
464 |.macro sseconst_1, reg, tmp // Synthesize 1.0.
465 | sseconst_hi reg, tmp, 3ff00000
466 |.endmacro
467 |.macro sseconst_2p52, reg, tmp // Synthesize 2^52.
468 | sseconst_hi reg, tmp, 43300000
469 |.endmacro
470 |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51.
471 | sseconst_hi reg, tmp, 43380000
472 |.endmacro
473 |
474 |// Move table write barrier back. Overwrites reg.
475 |.macro barrierback, tab, reg
476 | and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab)
477 | mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)]
478 | mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab
479 | mov tab->gclist, reg
480 |.endmacro
481 |
482 |//-----------------------------------------------------------------------
483
484 /* Generate subroutines used by opcodes and other parts of the VM. */
485 /* The .code_sub section should be last to help static branch prediction. */
486 static void build_subroutines(BuildCtx *ctx)
487 {
488 |.code_sub
489 |
490 |//-----------------------------------------------------------------------
491 |//-- Return handling ----------------------------------------------------
492 |//-----------------------------------------------------------------------
493 |
494 |->vm_returnp:
495 | test PC, FRAME_P
496 | jz ->cont_dispatch
497 |
498 | // Return from pcall or xpcall fast func.
499 | and PC, -8
500 | sub BASE, PC // Restore caller base.
501 | lea RAa, [RA+PC-8] // Rebase RA and prepend one result.
502 | mov PC, [BASE-4] // Fetch PC of previous frame.
503 | // Prepending may overwrite the pcall frame, so do it at the end.
504 | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results.
505 |
506 |->vm_returnc:
507 | add RD, 1 // RD = nresults+1
508 | jz ->vm_unwind_yield
509 | mov MULTRES, RD
510 | test PC, FRAME_TYPE
511 | jz ->BC_RET_Z // Handle regular return to Lua.
512 |
513 |->vm_return:
514 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return
515 | xor PC, FRAME_C
516 | test PC, FRAME_TYPE
517 | jnz ->vm_returnp
518 |
519 | // Return to C.
520 | set_vmstate C
521 | and PC, -8
522 | sub PC, BASE
523 | neg PC // Previous base = BASE - delta.
524 |
525 | sub RD, 1
526 | jz >2
527 |1: // Move results down.
528 |.if X64
529 | mov RBa, [BASE+RA]
530 | mov [BASE-8], RBa
531 |.else
532 | mov RB, [BASE+RA]
533 | mov [BASE-8], RB
534 | mov RB, [BASE+RA+4]
535 | mov [BASE-4], RB
536 |.endif
537 | add BASE, 8
538 | sub RD, 1
539 | jnz <1
540 |2:
541 | mov L:RB, SAVE_L
542 | mov L:RB->base, PC
543 |3:
544 | mov RD, MULTRES
545 | mov RA, SAVE_NRES // RA = wanted nresults+1
546 |4:
547 | cmp RA, RD
548 | jne >6 // More/less results wanted?
549 |5:
550 | sub BASE, 8
551 | mov L:RB->top, BASE
552 |
553 |->vm_leave_cp:
554 | mov RAa, SAVE_CFRAME // Restore previous C frame.
555 | mov L:RB->cframe, RAa
556 | xor eax, eax // Ok return status for vm_pcall.
557 |
558 |->vm_leave_unw:
559 | restoreregs
560 | ret
561 |
562 |6:
563 | jb >7 // Less results wanted?
564 | // More results wanted. Check stack size and fill up results with nil.
565 | cmp BASE, L:RB->maxstack
566 | ja >8
567 | mov dword [BASE-4], LJ_TNIL
568 | add BASE, 8
569 | add RD, 1
570 | jmp <4
571 |
572 |7: // Less results wanted.
573 | test RA, RA
574 | jz <5 // But check for LUA_MULTRET+1.
575 | sub RA, RD // Negative result!
576 | lea BASE, [BASE+RA*8] // Correct top.
577 | jmp <5
578 |
579 |8: // Corner case: need to grow stack for filling up results.
580 | // This can happen if:
581 | // - A C function grows the stack (a lot).
582 | // - The GC shrinks the stack in between.
583 | // - A return back from a lua_call() with (high) nresults adjustment.
584 | mov L:RB->top, BASE // Save current top held in BASE (yes).
585 | mov MULTRES, RD // Need to fill only remainder with nil.
586 | mov FCARG2, RA
587 | mov FCARG1, L:RB
588 | call extern lj_state_growstack@8 // (lua_State *L, int n)
589 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE.
590 | jmp <3
591 |
592 |->vm_unwind_yield:
593 | mov al, LUA_YIELD
594 | jmp ->vm_unwind_c_eh
595 |
596 |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall.
597 | // (void *cframe, int errcode)
598 |.if X64
599 | mov eax, CARG2d // Error return status for vm_pcall.
600 | mov rsp, CARG1
601 |.else
602 | mov eax, FCARG2 // Error return status for vm_pcall.
603 | mov esp, FCARG1
604 |.if WIN
605 | lea FCARG1, SEH_NEXT
606 | fs; mov [0], FCARG1
607 |.endif
608 |.endif
609 |->vm_unwind_c_eh: // Landing pad for external unwinder.
610 | mov L:RB, SAVE_L
611 | mov GL:RB, L:RB->glref
612 | mov dword GL:RB->vmstate, ~LJ_VMST_C
613 | jmp ->vm_leave_unw
614 |
615 |->vm_unwind_rethrow:
616 |.if X64 and not X64WIN
617 | mov FCARG1, SAVE_L
618 | mov FCARG2, eax
619 | restoreregs
620 | jmp extern lj_err_throw@8 // (lua_State *L, int errcode)
621 |.endif
622 |
623 |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall.
624 | // (void *cframe)
625 |.if X64
626 | and CARG1, CFRAME_RAWMASK
627 | mov rsp, CARG1
628 |.else
629 | and FCARG1, CFRAME_RAWMASK
630 | mov esp, FCARG1
631 |.if WIN
632 | lea FCARG1, SEH_NEXT
633 | fs; mov [0], FCARG1
634 |.endif
635 |.endif
636 |->vm_unwind_ff_eh: // Landing pad for external unwinder.
637 | mov L:RB, SAVE_L
638 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
639 | mov RD, 1+1 // Really 1+2 results, incr. later.
640 | mov BASE, L:RB->base
641 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
642 | add DISPATCH, GG_G2DISP
643 | mov PC, [BASE-4] // Fetch PC of previous frame.
644 | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message.
645 | set_vmstate INTERP
646 | jmp ->vm_returnc // Increments RD/MULTRES and returns.
647 |
648 |.if WIN and not X64
649 |->vm_rtlunwind@16: // Thin layer around RtlUnwind.
650 | // (void *cframe, void *excptrec, void *unwinder, int errcode)
651 | mov [esp], FCARG1 // Return value for RtlUnwind.
652 | push FCARG2 // Exception record for RtlUnwind.
653 | push 0 // Ignored by RtlUnwind.
654 | push dword [FCARG1+CFRAME_OFS_SEH]
655 | call extern RtlUnwind@16 // Violates ABI (clobbers too much).
656 | mov FCARG1, eax
657 | mov FCARG2, [esp+4] // errcode (for vm_unwind_c).
658 | ret // Jump to unwinder.
659 |.endif
660 |
661 |//-----------------------------------------------------------------------
662 |//-- Grow stack for calls -----------------------------------------------
663 |//-----------------------------------------------------------------------
664 |
665 |->vm_growstack_c: // Grow stack for C function.
666 | mov FCARG2, LUA_MINSTACK
667 | jmp >2
668 |
669 |->vm_growstack_v: // Grow stack for vararg Lua function.
670 | sub RD, 8
671 | jmp >1
672 |
673 |->vm_growstack_f: // Grow stack for fixarg Lua function.
674 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC
675 | lea RD, [BASE+NARGS:RD*8-8]
676 |1:
677 | movzx RA, byte [PC-4+PC2PROTO(framesize)]
678 | add PC, 4 // Must point after first instruction.
679 | mov L:RB->base, BASE
680 | mov L:RB->top, RD
681 | mov SAVE_PC, PC
682 | mov FCARG2, RA
683 |2:
684 | // RB = L, L->base = new base, L->top = top
685 | mov FCARG1, L:RB
686 | call extern lj_state_growstack@8 // (lua_State *L, int n)
687 | mov BASE, L:RB->base
688 | mov RD, L:RB->top
689 | mov LFUNC:RB, [BASE-8]
690 | sub RD, BASE
691 | shr RD, 3
692 | add NARGS:RD, 1
693 | // BASE = new base, RB = LFUNC, RD = nargs+1
694 | ins_callt // Just retry the call.
695 |
696 |//-----------------------------------------------------------------------
697 |//-- Entry points into the assembler VM ---------------------------------
698 |//-----------------------------------------------------------------------
699 |
700 |->vm_resume: // Setup C frame and resume thread.
701 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0)
702 | saveregs
703 |.if X64
704 | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
705 | mov SAVE_L, CARG1d
706 | mov RA, CARG2d
707 |.else
708 | mov L:RB, SAVE_L
709 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
710 |.endif
711 | mov PC, FRAME_CP
712 | xor RD, RD
713 | lea KBASEa, [esp+CFRAME_RESUME]
714 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
715 | add DISPATCH, GG_G2DISP
716 | mov SAVE_PC, RD // Any value outside of bytecode is ok.
717 | mov SAVE_CFRAME, RDa
718 |.if X64
719 | mov SAVE_NRES, RD
720 | mov SAVE_ERRF, RD
721 |.endif
722 | mov L:RB->cframe, KBASEa
723 | cmp byte L:RB->status, RDL
724 | je >2 // Initial resume (like a call).
725 |
726 | // Resume after yield (like a return).
727 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
728 | set_vmstate INTERP
729 | mov byte L:RB->status, RDL
730 | mov BASE, L:RB->base
731 | mov RD, L:RB->top
732 | sub RD, RA
733 | shr RD, 3
734 | add RD, 1 // RD = nresults+1
735 | sub RA, BASE // RA = resultofs
736 | mov PC, [BASE-4]
737 | mov MULTRES, RD
738 | test PC, FRAME_TYPE
739 | jz ->BC_RET_Z
740 | jmp ->vm_return
741 |
742 |->vm_pcall: // Setup protected C frame and enter VM.
743 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef)
744 | saveregs
745 | mov PC, FRAME_CP
746 |.if X64
747 | mov SAVE_ERRF, CARG4d
748 |.endif
749 | jmp >1
750 |
751 |->vm_call: // Setup C frame and enter VM.
752 | // (lua_State *L, TValue *base, int nres1)
753 | saveregs
754 | mov PC, FRAME_C
755 |
756 |1: // Entry point for vm_pcall above (PC = ftype).
757 |.if X64
758 | mov SAVE_NRES, CARG3d
759 | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
760 | mov SAVE_L, CARG1d
761 | mov RA, CARG2d
762 |.else
763 | mov L:RB, SAVE_L
764 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME!
765 |.endif
766 |
767 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
768 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
769 | mov SAVE_CFRAME, KBASEa
770 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
771 | add DISPATCH, GG_G2DISP
772 |.if X64
773 | mov L:RB->cframe, rsp
774 |.else
775 | mov L:RB->cframe, esp
776 |.endif
777 |
778 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype).
779 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
780 | set_vmstate INTERP
781 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call).
782 | add PC, RA
783 | sub PC, BASE // PC = frame delta + frame type
784 |
785 | mov RD, L:RB->top
786 | sub RD, RA
787 | shr NARGS:RD, 3
788 | add NARGS:RD, 1 // RD = nargs+1
789 |
790 |->vm_call_dispatch:
791 | mov LFUNC:RB, [RA-8]
792 | cmp dword [RA-4], LJ_TFUNC
793 | jne ->vmeta_call // Ensure KBASE defined and != BASE.
794 |
795 |->vm_call_dispatch_f:
796 | mov BASE, RA
797 | ins_call
798 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC
799 |
800 |->vm_cpcall: // Setup protected C frame, call C.
801 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp)
802 | saveregs
803 |.if X64
804 | mov L:RB, CARG1d // Caveat: CARG1d may be RA.
805 | mov SAVE_L, CARG1d
806 |.else
807 | mov L:RB, SAVE_L
808 | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap!
809 | mov RC, INARG_CP_UD // Get args before they are overwritten.
810 | mov RA, INARG_CP_FUNC
811 | mov BASE, INARG_CP_CALL
812 |.endif
813 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok.
814 |
815 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top).
816 | sub KBASE, L:RB->top
817 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table.
818 | mov SAVE_ERRF, 0 // No error function.
819 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame.
820 | add DISPATCH, GG_G2DISP
821 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe).
822 |
823 |.if X64
824 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain.
825 | mov SAVE_CFRAME, KBASEa
826 | mov L:RB->cframe, rsp
827 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
828 |
829 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud)
830 |.else
831 | mov ARG3, RC // Have to copy args downwards.
832 | mov ARG2, RA
833 | mov ARG1, L:RB
834 |
835 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain.
836 | mov SAVE_CFRAME, KBASE
837 | mov L:RB->cframe, esp
838 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
839 |
840 | call BASE // (lua_State *L, lua_CFunction func, void *ud)
841 |.endif
842 | // TValue * (new base) or NULL returned in eax (RC).
843 | test RC, RC
844 | jz ->vm_leave_cp // No base? Just remove C frame.
845 | mov RA, RC
846 | mov PC, FRAME_CP
847 | jmp <2 // Else continue with the call.
848 |
849 |//-----------------------------------------------------------------------
850 |//-- Metamethod handling ------------------------------------------------
851 |//-----------------------------------------------------------------------
852 |
853 |//-- Continuation dispatch ----------------------------------------------
854 |
855 |->cont_dispatch:
856 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES)
857 | add RA, BASE
858 | and PC, -8
859 | mov RB, BASE
860 | sub BASE, PC // Restore caller BASE.
861 | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg.
862 | mov RC, RA // ... in [RC]
863 | mov PC, [RB-12] // Restore PC from [cont|PC].
864 |.if X64
865 | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug.
866 |.if FFI
867 | cmp RA, 1
868 | jbe >1
869 |.endif
870 | lea KBASEa, qword [=>0]
871 | add RAa, KBASEa
872 |.else
873 | mov RA, dword [RB-16]
874 |.if FFI
875 | cmp RA, 1
876 | jbe >1
877 |.endif
878 |.endif
879 | mov LFUNC:KBASE, [BASE-8]
880 | mov KBASE, LFUNC:KBASE->pc
881 | mov KBASE, [KBASE+PC2PROTO(k)]
882 | // BASE = base, RC = result, RB = meta base
883 | jmp RAa // Jump to continuation.
884 |
885 |.if FFI
886 |1:
887 | je ->cont_ffi_callback // cont = 1: return from FFI callback.
888 | // cont = 0: Tail call from C function.
889 | sub RB, BASE
890 | shr RB, 3
891 | lea RD, [RB-1]
892 | jmp ->vm_call_tail
893 |.endif
894 |
895 |->cont_cat: // BASE = base, RC = result, RB = mbase
896 | movzx RA, PC_RB
897 | sub RB, 16
898 | lea RA, [BASE+RA*8]
899 | sub RA, RB
900 | je ->cont_ra
901 | neg RA
902 | shr RA, 3
903 |.if X64WIN
904 | mov CARG3d, RA
905 | mov L:CARG1d, SAVE_L
906 | mov L:CARG1d->base, BASE
907 | mov RCa, [RC]
908 | mov [RB], RCa
909 | mov CARG2d, RB
910 |.elif X64
911 | mov L:CARG1d, SAVE_L
912 | mov L:CARG1d->base, BASE
913 | mov CARG3d, RA
914 | mov RAa, [RC]
915 | mov [RB], RAa
916 | mov CARG2d, RB
917 |.else
918 | mov ARG3, RA
919 | mov RA, [RC+4]
920 | mov RC, [RC]
921 | mov [RB+4], RA
922 | mov [RB], RC
923 | mov ARG2, RB
924 |.endif
925 | jmp ->BC_CAT_Z
926 |
927 |//-- Table indexing metamethods -----------------------------------------
928 |
929 |->vmeta_tgets:
930 | mov TMP1, RC // RC = GCstr *
931 | mov TMP2, LJ_TSTR
932 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
933 | cmp PC_OP, BC_GGET
934 | jne >1
935 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
936 | mov [RA], TAB:RB // RB = GCtab *
937 | mov dword [RA+4], LJ_TTAB
938 | mov RB, RA
939 | jmp >2
940 |
941 |->vmeta_tgetb:
942 | movzx RC, PC_RC
943 |.if DUALNUM
944 | mov TMP2, LJ_TISNUM
945 | mov TMP1, RC
946 |.else
947 | cvtsi2sd xmm0, RC
948 | movsd TMPQ, xmm0
949 |.endif
950 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
951 | jmp >1
952 |
953 |->vmeta_tgetv:
954 | movzx RC, PC_RC // Reload TValue *k from RC.
955 | lea RC, [BASE+RC*8]
956 |1:
957 | movzx RB, PC_RB // Reload TValue *t from RB.
958 | lea RB, [BASE+RB*8]
959 |2:
960 |.if X64
961 | mov L:CARG1d, SAVE_L
962 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
963 | mov CARG2d, RB
964 | mov CARG3, RCa // May be 64 bit ptr to stack.
965 | mov L:RB, L:CARG1d
966 |.else
967 | mov ARG2, RB
968 | mov L:RB, SAVE_L
969 | mov ARG3, RC
970 | mov ARG1, L:RB
971 | mov L:RB->base, BASE
972 |.endif
973 | mov SAVE_PC, PC
974 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k)
975 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
976 | mov BASE, L:RB->base
977 | test RC, RC
978 | jz >3
979 |->cont_ra: // BASE = base, RC = result
980 | movzx RA, PC_RA
981 |.if X64
982 | mov RBa, [RC]
983 | mov [BASE+RA*8], RBa
984 |.else
985 | mov RB, [RC+4]
986 | mov RC, [RC]
987 | mov [BASE+RA*8+4], RB
988 | mov [BASE+RA*8], RC
989 |.endif
990 | ins_next
991 |
992 |3: // Call __index metamethod.
993 | // BASE = base, L->top = new base, stack = cont/func/t/k
994 | mov RA, L:RB->top
995 | mov [RA-12], PC // [cont|PC]
996 | lea PC, [RA+FRAME_CONT]
997 | sub PC, BASE
998 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
999 | mov NARGS:RD, 2+1 // 2 args for func(t, k).
1000 | jmp ->vm_call_dispatch_f
1001 |
1002 |->vmeta_tgetr:
1003 | mov FCARG1, TAB:RB
1004 | mov RB, BASE // Save BASE.
1005 | mov FCARG2, RC // Caveat: FCARG2 == BASE
1006 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1007 | // cTValue * or NULL returned in eax (RC).
1008 | movzx RA, PC_RA
1009 | mov BASE, RB // Restore BASE.
1010 | test RC, RC
1011 | jnz ->BC_TGETR_Z
1012 | mov dword [BASE+RA*8+4], LJ_TNIL
1013 | jmp ->BC_TGETR2_Z
1014 |
1015 |//-----------------------------------------------------------------------
1016 |
1017 |->vmeta_tsets:
1018 | mov TMP1, RC // RC = GCstr *
1019 | mov TMP2, LJ_TSTR
1020 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2.
1021 | cmp PC_OP, BC_GSET
1022 | jne >1
1023 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv.
1024 | mov [RA], TAB:RB // RB = GCtab *
1025 | mov dword [RA+4], LJ_TTAB
1026 | mov RB, RA
1027 | jmp >2
1028 |
1029 |->vmeta_tsetb:
1030 | movzx RC, PC_RC
1031 |.if DUALNUM
1032 | mov TMP2, LJ_TISNUM
1033 | mov TMP1, RC
1034 |.else
1035 | cvtsi2sd xmm0, RC
1036 | movsd TMPQ, xmm0
1037 |.endif
1038 | lea RCa, TMPQ // Store temp. TValue in TMPQ.
1039 | jmp >1
1040 |
1041 |->vmeta_tsetv:
1042 | movzx RC, PC_RC // Reload TValue *k from RC.
1043 | lea RC, [BASE+RC*8]
1044 |1:
1045 | movzx RB, PC_RB // Reload TValue *t from RB.
1046 | lea RB, [BASE+RB*8]
1047 |2:
1048 |.if X64
1049 | mov L:CARG1d, SAVE_L
1050 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1051 | mov CARG2d, RB
1052 | mov CARG3, RCa // May be 64 bit ptr to stack.
1053 | mov L:RB, L:CARG1d
1054 |.else
1055 | mov ARG2, RB
1056 | mov L:RB, SAVE_L
1057 | mov ARG3, RC
1058 | mov ARG1, L:RB
1059 | mov L:RB->base, BASE
1060 |.endif
1061 | mov SAVE_PC, PC
1062 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k)
1063 | // TValue * (finished) or NULL (metamethod) returned in eax (RC).
1064 | mov BASE, L:RB->base
1065 | test RC, RC
1066 | jz >3
1067 | // NOBARRIER: lj_meta_tset ensures the table is not black.
1068 | movzx RA, PC_RA
1069 |.if X64
1070 | mov RBa, [BASE+RA*8]
1071 | mov [RC], RBa
1072 |.else
1073 | mov RB, [BASE+RA*8+4]
1074 | mov RA, [BASE+RA*8]
1075 | mov [RC+4], RB
1076 | mov [RC], RA
1077 |.endif
1078 |->cont_nop: // BASE = base, (RC = result)
1079 | ins_next
1080 |
1081 |3: // Call __newindex metamethod.
1082 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v)
1083 | mov RA, L:RB->top
1084 | mov [RA-12], PC // [cont|PC]
1085 | movzx RC, PC_RA
1086 | // Copy value to third argument.
1087 |.if X64
1088 | mov RBa, [BASE+RC*8]
1089 | mov [RA+16], RBa
1090 |.else
1091 | mov RB, [BASE+RC*8+4]
1092 | mov RC, [BASE+RC*8]
1093 | mov [RA+20], RB
1094 | mov [RA+16], RC
1095 |.endif
1096 | lea PC, [RA+FRAME_CONT]
1097 | sub PC, BASE
1098 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here.
1099 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v).
1100 | jmp ->vm_call_dispatch_f
1101 |
1102 |->vmeta_tsetr:
1103 |.if X64WIN
1104 | mov L:CARG1d, SAVE_L
1105 | mov CARG3d, RC
1106 | mov L:CARG1d->base, BASE
1107 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE.
1108 |.elif X64
1109 | mov L:CARG1d, SAVE_L
1110 | mov CARG2d, TAB:RB
1111 | mov L:CARG1d->base, BASE
1112 | mov RB, BASE // Save BASE.
1113 | mov CARG3d, RC // Caveat: CARG3d == BASE.
1114 |.else
1115 | mov L:RA, SAVE_L
1116 | mov ARG2, TAB:RB
1117 | mov RB, BASE // Save BASE.
1118 | mov ARG3, RC
1119 | mov ARG1, L:RA
1120 | mov L:RA->base, BASE
1121 |.endif
1122 | mov SAVE_PC, PC
1123 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key)
1124 | // TValue * returned in eax (RC).
1125 | movzx RA, PC_RA
1126 | mov BASE, RB // Restore BASE.
1127 | jmp ->BC_TSETR_Z
1128 |
1129 |//-- Comparison metamethods ---------------------------------------------
1130 |
1131 |->vmeta_comp:
1132 |.if X64
1133 | mov L:RB, SAVE_L
1134 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE.
1135 |.if X64WIN
1136 | lea CARG3d, [BASE+RD*8]
1137 | lea CARG2d, [BASE+RA*8]
1138 |.else
1139 | lea CARG2d, [BASE+RA*8]
1140 | lea CARG3d, [BASE+RD*8]
1141 |.endif
1142 | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA.
1143 | movzx CARG4d, PC_OP
1144 |.else
1145 | movzx RB, PC_OP
1146 | lea RD, [BASE+RD*8]
1147 | lea RA, [BASE+RA*8]
1148 | mov ARG4, RB
1149 | mov L:RB, SAVE_L
1150 | mov ARG3, RD
1151 | mov ARG2, RA
1152 | mov ARG1, L:RB
1153 | mov L:RB->base, BASE
1154 |.endif
1155 | mov SAVE_PC, PC
1156 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op)
1157 | // 0/1 or TValue * (metamethod) returned in eax (RC).
1158 |3:
1159 | mov BASE, L:RB->base
1160 | cmp RC, 1
1161 | ja ->vmeta_binop
1162 |4:
1163 | lea PC, [PC+4]
1164 | jb >6
1165 |5:
1166 | movzx RD, PC_RD
1167 | branchPC RD
1168 |6:
1169 | ins_next
1170 |
1171 |->cont_condt: // BASE = base, RC = result
1172 | add PC, 4
1173 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true.
1174 | jb <5
1175 | jmp <6
1176 |
1177 |->cont_condf: // BASE = base, RC = result
1178 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false.
1179 | jmp <4
1180 |
1181 |->vmeta_equal:
1182 | sub PC, 4
1183 |.if X64WIN
1184 | mov CARG3d, RD
1185 | mov CARG4d, RB
1186 | mov L:RB, SAVE_L
1187 | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
1188 | mov CARG2d, RA
1189 | mov CARG1d, L:RB // Caveat: CARG1d == RA.
1190 |.elif X64
1191 | mov CARG2d, RA
1192 | mov CARG4d, RB // Caveat: CARG4d == RA.
1193 | mov L:RB, SAVE_L
1194 | mov L:RB->base, BASE // Caveat: CARG3d == BASE.
1195 | mov CARG3d, RD
1196 | mov CARG1d, L:RB
1197 |.else
1198 | mov ARG4, RB
1199 | mov L:RB, SAVE_L
1200 | mov ARG3, RD
1201 | mov ARG2, RA
1202 | mov ARG1, L:RB
1203 | mov L:RB->base, BASE
1204 |.endif
1205 | mov SAVE_PC, PC
1206 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne)
1207 | // 0/1 or TValue * (metamethod) returned in eax (RC).
1208 | jmp <3
1209 |
1210 |->vmeta_equal_cd:
1211 |.if FFI
1212 | sub PC, 4
1213 | mov L:RB, SAVE_L
1214 | mov L:RB->base, BASE
1215 | mov FCARG1, L:RB
1216 | mov FCARG2, dword [PC-4]
1217 | mov SAVE_PC, PC
1218 | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins)
1219 | // 0/1 or TValue * (metamethod) returned in eax (RC).
1220 | jmp <3
1221 |.endif
1222 |
1223 |->vmeta_istype:
1224 |.if X64
1225 | mov L:RB, SAVE_L
1226 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1227 | mov CARG2d, RA
1228 | movzx CARG3d, PC_RD
1229 | mov L:CARG1d, L:RB
1230 |.else
1231 | movzx RD, PC_RD
1232 | mov ARG2, RA
1233 | mov L:RB, SAVE_L
1234 | mov ARG3, RD
1235 | mov ARG1, L:RB
1236 | mov L:RB->base, BASE
1237 |.endif
1238 | mov SAVE_PC, PC
1239 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp)
1240 | mov BASE, L:RB->base
1241 | jmp <6
1242 |
1243 |//-- Arithmetic metamethods ---------------------------------------------
1244 |
1245 |->vmeta_arith_vno:
1246 |.if DUALNUM
1247 | movzx RB, PC_RB
1248 |.endif
1249 |->vmeta_arith_vn:
1250 | lea RC, [KBASE+RC*8]
1251 | jmp >1
1252 |
1253 |->vmeta_arith_nvo:
1254 |.if DUALNUM
1255 | movzx RC, PC_RC
1256 |.endif
1257 |->vmeta_arith_nv:
1258 | lea RC, [KBASE+RC*8]
1259 | lea RB, [BASE+RB*8]
1260 | xchg RB, RC
1261 | jmp >2
1262 |
1263 |->vmeta_unm:
1264 | lea RC, [BASE+RD*8]
1265 | mov RB, RC
1266 | jmp >2
1267 |
1268 |->vmeta_arith_vvo:
1269 |.if DUALNUM
1270 | movzx RB, PC_RB
1271 |.endif
1272 |->vmeta_arith_vv:
1273 | lea RC, [BASE+RC*8]
1274 |1:
1275 | lea RB, [BASE+RB*8]
1276 |2:
1277 | lea RA, [BASE+RA*8]
1278 |.if X64WIN
1279 | mov CARG3d, RB
1280 | mov CARG4d, RC
1281 | movzx RC, PC_OP
1282 | mov ARG5d, RC
1283 | mov L:RB, SAVE_L
1284 | mov L:RB->base, BASE // Caveat: CARG2d == BASE.
1285 | mov CARG2d, RA
1286 | mov CARG1d, L:RB // Caveat: CARG1d == RA.
1287 |.elif X64
1288 | movzx CARG5d, PC_OP
1289 | mov CARG2d, RA
1290 | mov CARG4d, RC // Caveat: CARG4d == RA.
1291 | mov L:CARG1d, SAVE_L
1292 | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE.
1293 | mov CARG3d, RB
1294 | mov L:RB, L:CARG1d
1295 |.else
1296 | mov ARG3, RB
1297 | mov L:RB, SAVE_L
1298 | mov ARG4, RC
1299 | movzx RC, PC_OP
1300 | mov ARG2, RA
1301 | mov ARG5, RC
1302 | mov ARG1, L:RB
1303 | mov L:RB->base, BASE
1304 |.endif
1305 | mov SAVE_PC, PC
1306 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op)
1307 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
1308 | mov BASE, L:RB->base
1309 | test RC, RC
1310 | jz ->cont_nop
1311 |
1312 | // Call metamethod for binary op.
1313 |->vmeta_binop:
1314 | // BASE = base, RC = new base, stack = cont/func/o1/o2
1315 | mov RA, RC
1316 | sub RC, BASE
1317 | mov [RA-12], PC // [cont|PC]
1318 | lea PC, [RC+FRAME_CONT]
1319 | mov NARGS:RD, 2+1 // 2 args for func(o1, o2).
1320 | jmp ->vm_call_dispatch
1321 |
1322 |->vmeta_len:
1323 | mov L:RB, SAVE_L
1324 | mov L:RB->base, BASE
1325 | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE
1326 | mov L:FCARG1, L:RB
1327 | mov SAVE_PC, PC
1328 | call extern lj_meta_len@8 // (lua_State *L, TValue *o)
1329 | // NULL (retry) or TValue * (metamethod) returned in eax (RC).
1330 | mov BASE, L:RB->base
1331 #if LJ_52
1332 | test RC, RC
1333 | jne ->vmeta_binop // Binop call for compatibility.
1334 | movzx RD, PC_RD
1335 | mov TAB:FCARG1, [BASE+RD*8]
1336 | jmp ->BC_LEN_Z
1337 #else
1338 | jmp ->vmeta_binop // Binop call for compatibility.
1339 #endif
1340 |
1341 |//-- Call metamethod ----------------------------------------------------
1342 |
1343 |->vmeta_call_ra:
1344 | lea RA, [BASE+RA*8+8]
1345 |->vmeta_call: // Resolve and call __call metamethod.
1346 | // BASE = old base, RA = new base, RC = nargs+1, PC = return
1347 | mov TMP2, RA // Save RA, RC for us.
1348 | mov TMP1, NARGS:RD
1349 | sub RA, 8
1350 |.if X64
1351 | mov L:RB, SAVE_L
1352 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
1353 | mov CARG2d, RA
1354 | lea CARG3d, [RA+NARGS:RD*8]
1355 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
1356 |.else
1357 | lea RC, [RA+NARGS:RD*8]
1358 | mov L:RB, SAVE_L
1359 | mov ARG2, RA
1360 | mov ARG3, RC
1361 | mov ARG1, L:RB
1362 | mov L:RB->base, BASE // This is the callers base!
1363 |.endif
1364 | mov SAVE_PC, PC
1365 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top)
1366 | mov BASE, L:RB->base
1367 | mov RA, TMP2
1368 | mov NARGS:RD, TMP1
1369 | mov LFUNC:RB, [RA-8]
1370 | add NARGS:RD, 1
1371 | // This is fragile. L->base must not move, KBASE must always be defined.
1372 |.if x64
1373 | cmp KBASEa, rdx // Continue with CALLT if flag set.
1374 |.else
1375 | cmp KBASE, BASE // Continue with CALLT if flag set.
1376 |.endif
1377 | je ->BC_CALLT_Z
1378 | mov BASE, RA
1379 | ins_call // Otherwise call resolved metamethod.
1380 |
1381 |//-- Argument coercion for 'for' statement ------------------------------
1382 |
1383 |->vmeta_for:
1384 | mov L:RB, SAVE_L
1385 | mov L:RB->base, BASE
1386 | mov FCARG2, RA // Caveat: FCARG2 == BASE
1387 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
1388 | mov SAVE_PC, PC
1389 | call extern lj_meta_for@8 // (lua_State *L, TValue *base)
1390 | mov BASE, L:RB->base
1391 | mov RC, [PC-4]
1392 | movzx RA, RCH
1393 | movzx OP, RCL
1394 | shr RC, 16
1395 |.if X64
1396 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI.
1397 |.else
1398 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI.
1399 |.endif
1400 |
1401 |//-----------------------------------------------------------------------
1402 |//-- Fast functions -----------------------------------------------------
1403 |//-----------------------------------------------------------------------
1404 |
1405 |.macro .ffunc, name
1406 |->ff_ .. name:
1407 |.endmacro
1408 |
1409 |.macro .ffunc_1, name
1410 |->ff_ .. name:
1411 | cmp NARGS:RD, 1+1; jb ->fff_fallback
1412 |.endmacro
1413 |
1414 |.macro .ffunc_2, name
1415 |->ff_ .. name:
1416 | cmp NARGS:RD, 2+1; jb ->fff_fallback
1417 |.endmacro
1418 |
1419 |.macro .ffunc_nsse, name, op
1420 | .ffunc_1 name
1421 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1422 | op xmm0, qword [BASE]
1423 |.endmacro
1424 |
1425 |.macro .ffunc_nsse, name
1426 | .ffunc_nsse name, movsd
1427 |.endmacro
1428 |
1429 |.macro .ffunc_nnsse, name
1430 | .ffunc_2 name
1431 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1432 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1433 | movsd xmm0, qword [BASE]
1434 | movsd xmm1, qword [BASE+8]
1435 |.endmacro
1436 |
1437 |.macro .ffunc_nnr, name
1438 | .ffunc_2 name
1439 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
1440 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback
1441 | fld qword [BASE+8]
1442 | fld qword [BASE]
1443 |.endmacro
1444 |
1445 |// Inlined GC threshold check. Caveat: uses label 1.
1446 |.macro ffgccheck
1447 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)]
1448 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)]
1449 | jb >1
1450 | call ->fff_gcstep
1451 |1:
1452 |.endmacro
1453 |
1454 |//-- Base library: checks -----------------------------------------------
1455 |
1456 |.ffunc_1 assert
1457 | mov RB, [BASE+4]
1458 | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback
1459 | mov PC, [BASE-4]
1460 | mov MULTRES, RD
1461 | mov [BASE-4], RB
1462 | mov RB, [BASE]
1463 | mov [BASE-8], RB
1464 | sub RD, 2
1465 | jz >2
1466 | mov RA, BASE
1467 |1:
1468 | add RA, 8
1469 |.if X64
1470 | mov RBa, [RA]
1471 | mov [RA-8], RBa
1472 |.else
1473 | mov RB, [RA+4]
1474 | mov [RA-4], RB
1475 | mov RB, [RA]
1476 | mov [RA-8], RB
1477 |.endif
1478 | sub RD, 1
1479 | jnz <1
1480 |2:
1481 | mov RD, MULTRES
1482 | jmp ->fff_res_
1483 |
1484 |.ffunc_1 type
1485 | mov RB, [BASE+4]
1486 |.if X64
1487 | mov RA, RB
1488 | sar RA, 15
1489 | cmp RA, -2
1490 | je >3
1491 |.endif
1492 | mov RC, ~LJ_TNUMX
1493 | not RB
1494 | cmp RC, RB
1495 | cmova RC, RB
1496 |2:
1497 | mov CFUNC:RB, [BASE-8]
1498 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))]
1499 | mov PC, [BASE-4]
1500 | mov dword [BASE-4], LJ_TSTR
1501 | mov [BASE-8], STR:RC
1502 | jmp ->fff_res1
1503 |.if X64
1504 |3:
1505 | mov RC, ~LJ_TLIGHTUD
1506 | jmp <2
1507 |.endif
1508 |
1509 |//-- Base library: getters and setters ---------------------------------
1510 |
1511 |.ffunc_1 getmetatable
1512 | mov RB, [BASE+4]
1513 | mov PC, [BASE-4]
1514 | cmp RB, LJ_TTAB; jne >6
1515 |1: // Field metatable must be at same offset for GCtab and GCudata!
1516 | mov TAB:RB, [BASE]
1517 | mov TAB:RB, TAB:RB->metatable
1518 |2:
1519 | test TAB:RB, TAB:RB
1520 | mov dword [BASE-4], LJ_TNIL
1521 | jz ->fff_res1
1522 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)]
1523 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result.
1524 | mov [BASE-8], TAB:RB
1525 | mov RA, TAB:RB->hmask
1526 | and RA, STR:RC->sid
1527 | imul RA, #NODE
1528 | add NODE:RA, TAB:RB->node
1529 |3: // Rearranged logic, because we expect _not_ to find the key.
1530 | cmp dword NODE:RA->key.it, LJ_TSTR
1531 | jne >4
1532 | cmp dword NODE:RA->key.gcr, STR:RC
1533 | je >5
1534 |4:
1535 | mov NODE:RA, NODE:RA->next
1536 | test NODE:RA, NODE:RA
1537 | jnz <3
1538 | jmp ->fff_res1 // Not found, keep default result.
1539 |5:
1540 | mov RB, [RA+4]
1541 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value.
1542 | mov RC, [RA]
1543 | mov [BASE-4], RB // Return value of mt.__metatable.
1544 | mov [BASE-8], RC
1545 | jmp ->fff_res1
1546 |
1547 |6:
1548 | cmp RB, LJ_TUDATA; je <1
1549 |.if X64
1550 | cmp RB, LJ_TNUMX; ja >8
1551 | cmp RB, LJ_TISNUM; jbe >7
1552 | mov RB, LJ_TLIGHTUD
1553 | jmp >8
1554 |7:
1555 |.else
1556 | cmp RB, LJ_TISNUM; ja >8
1557 |.endif
1558 | mov RB, LJ_TNUMX
1559 |8:
1560 | not RB
1561 | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])]
1562 | jmp <2
1563 |
1564 |.ffunc_2 setmetatable
1565 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1566 | // Fast path: no mt for table yet and not clearing the mt.
1567 | mov TAB:RB, [BASE]
1568 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1569 | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback
1570 | mov TAB:RC, [BASE+8]
1571 | mov TAB:RB->metatable, TAB:RC
1572 | mov PC, [BASE-4]
1573 | mov dword [BASE-4], LJ_TTAB // Return original table.
1574 | mov [BASE-8], TAB:RB
1575 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
1576 | jz >1
1577 | // Possible write barrier. Table is black, but skip iswhite(mt) check.
1578 | barrierback TAB:RB, RC
1579 |1:
1580 | jmp ->fff_res1
1581 |
1582 |.ffunc_2 rawget
1583 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1584 |.if X64WIN
1585 | mov RB, BASE // Save BASE.
1586 | lea CARG3d, [BASE+8]
1587 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE.
1588 | mov CARG1d, SAVE_L
1589 |.elif X64
1590 | mov RB, BASE // Save BASE.
1591 | mov CARG2d, [BASE]
1592 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE.
1593 | mov CARG1d, SAVE_L
1594 |.else
1595 | mov TAB:RD, [BASE]
1596 | mov L:RB, SAVE_L
1597 | mov ARG2, TAB:RD
1598 | mov ARG1, L:RB
1599 | mov RB, BASE // Save BASE.
1600 | add BASE, 8
1601 | mov ARG3, BASE
1602 |.endif
1603 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key)
1604 | // cTValue * returned in eax (RD).
1605 | mov BASE, RB // Restore BASE.
1606 | // Copy table slot.
1607 |.if X64
1608 | mov RBa, [RD]
1609 | mov PC, [BASE-4]
1610 | mov [BASE-8], RBa
1611 |.else
1612 | mov RB, [RD]
1613 | mov RD, [RD+4]
1614 | mov PC, [BASE-4]
1615 | mov [BASE-8], RB
1616 | mov [BASE-4], RD
1617 |.endif
1618 | jmp ->fff_res1
1619 |
1620 |//-- Base library: conversions ------------------------------------------
1621 |
1622 |.ffunc tonumber
1623 | // Only handles the number case inline (without a base argument).
1624 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
1625 | cmp dword [BASE+4], LJ_TISNUM
1626 |.if DUALNUM
1627 | jne >1
1628 | mov RB, dword [BASE]; jmp ->fff_resi
1629 |1:
1630 | ja ->fff_fallback
1631 |.else
1632 | jae ->fff_fallback
1633 |.endif
1634 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0
1635 |
1636 |.ffunc_1 tostring
1637 | // Only handles the string or number case inline.
1638 | mov PC, [BASE-4]
1639 | cmp dword [BASE+4], LJ_TSTR; jne >3
1640 | // A __tostring method in the string base metatable is ignored.
1641 | mov STR:RD, [BASE]
1642 |2:
1643 | mov dword [BASE-4], LJ_TSTR
1644 | mov [BASE-8], STR:RD
1645 | jmp ->fff_res1
1646 |3: // Handle numbers inline, unless a number base metatable is present.
1647 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback
1648 | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0
1649 | jne ->fff_fallback
1650 | ffgccheck // Caveat: uses label 1.
1651 | mov L:RB, SAVE_L
1652 | mov L:RB->base, BASE // Add frame since C call can throw.
1653 | mov SAVE_PC, PC // Redundant (but a defined value).
1654 |.if X64 and not X64WIN
1655 | mov FCARG2, BASE // Otherwise: FCARG2 == BASE
1656 |.endif
1657 | mov L:FCARG1, L:RB
1658 |.if DUALNUM
1659 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o)
1660 |.else
1661 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np)
1662 |.endif
1663 | // GCstr returned in eax (RD).
1664 | mov BASE, L:RB->base
1665 | jmp <2
1666 |
1667 |//-- Base library: iterators -------------------------------------------
1668 |
1669 |.ffunc_1 next
1670 | je >2 // Missing 2nd arg?
1671 |1:
1672 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1673 | mov PC, [BASE-4]
1674 | mov RB, BASE // Save BASE.
1675 |.if X64WIN
1676 | mov CARG1d, [BASE]
1677 | lea CARG3d, [BASE-8]
1678 | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE.
1679 |.elif X64
1680 | mov CARG1d, [BASE]
1681 | lea CARG2d, [BASE+8]
1682 | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE.
1683 |.else
1684 | mov TAB:RD, [BASE]
1685 | mov ARG1, TAB:RD
1686 | add BASE, 8
1687 | mov ARG2, BASE
1688 | sub BASE, 8+8
1689 | mov ARG3, BASE
1690 |.endif
1691 | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o)
1692 | // 1=found, 0=end, -1=error returned in eax (RD).
1693 | mov BASE, RB // Restore BASE.
1694 | test RD, RD; jg ->fff_res2 // Found key/value.
1695 | js ->fff_fallback_2 // Invalid key.
1696 | // End of traversal: return nil.
1697 | mov dword [BASE-4], LJ_TNIL
1698 | jmp ->fff_res1
1699 |2: // Set missing 2nd arg to nil.
1700 | mov dword [BASE+12], LJ_TNIL
1701 | jmp <1
1702 |
1703 |.ffunc_1 pairs
1704 | mov TAB:RB, [BASE]
1705 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1706 #if LJ_52
1707 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1708 #endif
1709 | mov CFUNC:RB, [BASE-8]
1710 | mov CFUNC:RD, CFUNC:RB->upvalue[0]
1711 | mov PC, [BASE-4]
1712 | mov dword [BASE-4], LJ_TFUNC
1713 | mov [BASE-8], CFUNC:RD
1714 | mov dword [BASE+12], LJ_TNIL
1715 | mov RD, 1+3
1716 | jmp ->fff_res
1717 |
1718 |.ffunc_2 ipairs_aux
1719 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1720 | cmp dword [BASE+12], LJ_TISNUM
1721 |.if DUALNUM
1722 | jne ->fff_fallback
1723 |.else
1724 | jae ->fff_fallback
1725 |.endif
1726 | mov PC, [BASE-4]
1727 |.if DUALNUM
1728 | mov RD, dword [BASE+8]
1729 | add RD, 1
1730 | mov dword [BASE-4], LJ_TISNUM
1731 | mov dword [BASE-8], RD
1732 |.else
1733 | movsd xmm0, qword [BASE+8]
1734 | sseconst_1 xmm1, RBa
1735 | addsd xmm0, xmm1
1736 | cvttsd2si RD, xmm0
1737 | movsd qword [BASE-8], xmm0
1738 |.endif
1739 | mov TAB:RB, [BASE]
1740 | cmp RD, TAB:RB->asize; jae >2 // Not in array part?
1741 | shl RD, 3
1742 | add RD, TAB:RB->array
1743 |1:
1744 | cmp dword [RD+4], LJ_TNIL; je ->fff_res0
1745 | // Copy array slot.
1746 |.if X64
1747 | mov RBa, [RD]
1748 | mov [BASE], RBa
1749 |.else
1750 | mov RB, [RD]
1751 | mov RD, [RD+4]
1752 | mov [BASE], RB
1753 | mov [BASE+4], RD
1754 |.endif
1755 |->fff_res2:
1756 | mov RD, 1+2
1757 | jmp ->fff_res
1758 |2: // Check for empty hash part first. Otherwise call C function.
1759 | cmp dword TAB:RB->hmask, 0; je ->fff_res0
1760 | mov FCARG1, TAB:RB
1761 | mov RB, BASE // Save BASE.
1762 | mov FCARG2, RD // Caveat: FCARG2 == BASE
1763 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key)
1764 | // cTValue * or NULL returned in eax (RD).
1765 | mov BASE, RB
1766 | test RD, RD
1767 | jnz <1
1768 |->fff_res0:
1769 | mov RD, 1+0
1770 | jmp ->fff_res
1771 |
1772 |.ffunc_1 ipairs
1773 | mov TAB:RB, [BASE]
1774 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback
1775 #if LJ_52
1776 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback
1777 #endif
1778 | mov CFUNC:RB, [BASE-8]
1779 | mov CFUNC:RD, CFUNC:RB->upvalue[0]
1780 | mov PC, [BASE-4]
1781 | mov dword [BASE-4], LJ_TFUNC
1782 | mov [BASE-8], CFUNC:RD
1783 |.if DUALNUM
1784 | mov dword [BASE+12], LJ_TISNUM
1785 | mov dword [BASE+8], 0
1786 |.else
1787 | xorps xmm0, xmm0
1788 | movsd qword [BASE+8], xmm0
1789 |.endif
1790 | mov RD, 1+3
1791 | jmp ->fff_res
1792 |
1793 |//-- Base library: catch errors ----------------------------------------
1794 |
1795 |.ffunc_1 pcall
1796 | lea RA, [BASE+8]
1797 | sub NARGS:RD, 1
1798 | mov PC, 8+FRAME_PCALL
1799 |1:
1800 | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)]
1801 | shr RB, HOOK_ACTIVE_SHIFT
1802 | and RB, 1
1803 | add PC, RB // Remember active hook before pcall.
1804 | jmp ->vm_call_dispatch
1805 |
1806 |.ffunc_2 xpcall
1807 | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback
1808 | mov RB, [BASE+4] // Swap function and traceback.
1809 | mov [BASE+12], RB
1810 | mov dword [BASE+4], LJ_TFUNC
1811 | mov LFUNC:RB, [BASE]
1812 | mov PC, [BASE+8]
1813 | mov [BASE+8], LFUNC:RB
1814 | mov [BASE], PC
1815 | lea RA, [BASE+16]
1816 | sub NARGS:RD, 2
1817 | mov PC, 16+FRAME_PCALL
1818 | jmp <1
1819 |
1820 |//-- Coroutine library --------------------------------------------------
1821 |
1822 |.macro coroutine_resume_wrap, resume
1823 |.if resume
1824 |.ffunc_1 coroutine_resume
1825 | mov L:RB, [BASE]
1826 |.else
1827 |.ffunc coroutine_wrap_aux
1828 | mov CFUNC:RB, [BASE-8]
1829 | mov L:RB, CFUNC:RB->upvalue[0].gcr
1830 |.endif
1831 | mov PC, [BASE-4]
1832 | mov SAVE_PC, PC
1833 |.if X64
1834 | mov TMP1, L:RB
1835 |.else
1836 | mov ARG1, L:RB
1837 |.endif
1838 |.if resume
1839 | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback
1840 |.endif
1841 | cmp aword L:RB->cframe, 0; jne ->fff_fallback
1842 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback
1843 | mov RA, L:RB->top
1844 | je >1 // Status != LUA_YIELD (i.e. 0)?
1845 | cmp RA, L:RB->base // Check for presence of initial func.
1846 | je ->fff_fallback
1847 |1:
1848 |.if resume
1849 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread).
1850 |.else
1851 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1).
1852 |.endif
1853 | cmp PC, L:RB->maxstack; ja ->fff_fallback
1854 | mov L:RB->top, PC
1855 |
1856 | mov L:RB, SAVE_L
1857 | mov L:RB->base, BASE
1858 |.if resume
1859 | add BASE, 8 // Keep resumed thread in stack for GC.
1860 |.endif
1861 | mov L:RB->top, BASE
1862 |.if resume
1863 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move.
1864 |.else
1865 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move.
1866 |.endif
1867 | sub RBa, PCa // Relative to PC.
1868 |
1869 | cmp PC, RA
1870 | je >3
1871 |2: // Move args to coroutine.
1872 |.if X64
1873 | mov RCa, [PC+RB]
1874 | mov [PC-8], RCa
1875 |.else
1876 | mov RC, [PC+RB+4]
1877 | mov [PC-4], RC
1878 | mov RC, [PC+RB]
1879 | mov [PC-8], RC
1880 |.endif
1881 | sub PC, 8
1882 | cmp PC, RA
1883 | jne <2
1884 |3:
1885 |.if X64
1886 | mov CARG2d, RA
1887 | mov CARG1d, TMP1
1888 |.else
1889 | mov ARG2, RA
1890 | xor RA, RA
1891 | mov ARG4, RA
1892 | mov ARG3, RA
1893 |.endif
1894 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0)
1895 |
1896 | mov L:RB, SAVE_L
1897 |.if X64
1898 | mov L:PC, TMP1
1899 |.else
1900 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L.
1901 |.endif
1902 | mov BASE, L:RB->base
1903 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
1904 | set_vmstate INTERP
1905 |
1906 | cmp eax, LUA_YIELD
1907 | ja >8
1908 |4:
1909 | mov RA, L:PC->base
1910 | mov KBASE, L:PC->top
1911 | mov L:PC->top, RA // Clear coroutine stack.
1912 | mov PC, KBASE
1913 | sub PC, RA
1914 | je >6 // No results?
1915 | lea RD, [BASE+PC]
1916 | shr PC, 3
1917 | cmp RD, L:RB->maxstack
1918 | ja >9 // Need to grow stack?
1919 |
1920 | mov RB, BASE
1921 | sub RBa, RAa
1922 |5: // Move results from coroutine.
1923 |.if X64
1924 | mov RDa, [RA]
1925 | mov [RA+RB], RDa
1926 |.else
1927 | mov RD, [RA]
1928 | mov [RA+RB], RD
1929 | mov RD, [RA+4]
1930 | mov [RA+RB+4], RD
1931 |.endif
1932 | add RA, 8
1933 | cmp RA, KBASE
1934 | jne <5
1935 |6:
1936 |.if resume
1937 | lea RD, [PC+2] // nresults+1 = 1 + true + results.
1938 | mov dword [BASE-4], LJ_TTRUE // Prepend true to results.
1939 |.else
1940 | lea RD, [PC+1] // nresults+1 = 1 + results.
1941 |.endif
1942 |7:
1943 | mov PC, SAVE_PC
1944 | mov MULTRES, RD
1945 |.if resume
1946 | mov RAa, -8
1947 |.else
1948 | xor RA, RA
1949 |.endif
1950 | test PC, FRAME_TYPE
1951 | jz ->BC_RET_Z
1952 | jmp ->vm_return
1953 |
1954 |8: // Coroutine returned with error (at co->top-1).
1955 |.if resume
1956 | mov dword [BASE-4], LJ_TFALSE // Prepend false to results.
1957 | mov RA, L:PC->top
1958 | sub RA, 8
1959 | mov L:PC->top, RA // Clear error from coroutine stack.
1960 | // Copy error message.
1961 |.if X64
1962 | mov RDa, [RA]
1963 | mov [BASE], RDa
1964 |.else
1965 | mov RD, [RA]
1966 | mov [BASE], RD
1967 | mov RD, [RA+4]
1968 | mov [BASE+4], RD
1969 |.endif
1970 | mov RD, 1+2 // nresults+1 = 1 + false + error.
1971 | jmp <7
1972 |.else
1973 | mov FCARG2, L:PC
1974 | mov FCARG1, L:RB
1975 | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co)
1976 | // Error function does not return.
1977 |.endif
1978 |
1979 |9: // Handle stack expansion on return from yield.
1980 |.if X64
1981 | mov L:RA, TMP1
1982 |.else
1983 | mov L:RA, ARG1 // The callee doesn't modify SAVE_L.
1984 |.endif
1985 | mov L:RA->top, KBASE // Undo coroutine stack clearing.
1986 | mov FCARG2, PC
1987 | mov FCARG1, L:RB
1988 | call extern lj_state_growstack@8 // (lua_State *L, int n)
1989 |.if X64
1990 | mov L:PC, TMP1
1991 |.else
1992 | mov L:PC, ARG1
1993 |.endif
1994 | mov BASE, L:RB->base
1995 | jmp <4 // Retry the stack move.
1996 |.endmacro
1997 |
1998 | coroutine_resume_wrap 1 // coroutine.resume
1999 | coroutine_resume_wrap 0 // coroutine.wrap
2000 |
2001 |.ffunc coroutine_yield
2002 | mov L:RB, SAVE_L
2003 | test aword L:RB->cframe, CFRAME_RESUME
2004 | jz ->fff_fallback
2005 | mov L:RB->base, BASE
2006 | lea RD, [BASE+NARGS:RD*8-8]
2007 | mov L:RB->top, RD
2008 | xor RD, RD
2009 | mov aword L:RB->cframe, RDa
2010 | mov al, LUA_YIELD
2011 | mov byte L:RB->status, al
2012 | jmp ->vm_leave_unw
2013 |
2014 |//-- Math library -------------------------------------------------------
2015 |
2016 |.if not DUALNUM
2017 |->fff_resi: // Dummy.
2018 |.endif
2019 |
2020 |->fff_resn:
2021 | mov PC, [BASE-4]
2022 | fstp qword [BASE-8]
2023 | jmp ->fff_res1
2024 |
2025 | .ffunc_1 math_abs
2026 |.if DUALNUM
2027 | cmp dword [BASE+4], LJ_TISNUM; jne >2
2028 | mov RB, dword [BASE]
2029 | cmp RB, 0; jns ->fff_resi
2030 | neg RB; js >1
2031 |->fff_resbit:
2032 |->fff_resi:
2033 | mov PC, [BASE-4]
2034 | mov dword [BASE-4], LJ_TISNUM
2035 | mov dword [BASE-8], RB
2036 | jmp ->fff_res1
2037 |1:
2038 | mov PC, [BASE-4]
2039 | mov dword [BASE-4], 0x41e00000 // 2^31.
2040 | mov dword [BASE-8], 0
2041 | jmp ->fff_res1
2042 |2:
2043 | ja ->fff_fallback
2044 |.else
2045 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2046 |.endif
2047 | movsd xmm0, qword [BASE]
2048 | sseconst_abs xmm1, RDa
2049 | andps xmm0, xmm1
2050 |->fff_resxmm0:
2051 | mov PC, [BASE-4]
2052 | movsd qword [BASE-8], xmm0
2053 | // fallthrough
2054 |
2055 |->fff_res1:
2056 | mov RD, 1+1
2057 |->fff_res:
2058 | mov MULTRES, RD
2059 |->fff_res_:
2060 | test PC, FRAME_TYPE
2061 | jnz >7
2062 |5:
2063 | cmp PC_RB, RDL // More results expected?
2064 | ja >6
2065 | // Adjust BASE. KBASE is assumed to be set for the calling frame.
2066 | movzx RA, PC_RA
2067 | not RAa // Note: ~RA = -(RA+1)
2068 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
2069 | ins_next
2070 |
2071 |6: // Fill up results with nil.
2072 | mov dword [BASE+RD*8-12], LJ_TNIL
2073 | add RD, 1
2074 | jmp <5
2075 |
2076 |7: // Non-standard return case.
2077 | mov RAa, -8 // Results start at BASE+RA = BASE-8.
2078 | jmp ->vm_return
2079 |
2080 |.if X64
2081 |.define fff_resfp, fff_resxmm0
2082 |.else
2083 |.define fff_resfp, fff_resn
2084 |.endif
2085 |
2086 |.macro math_round, func
2087 | .ffunc math_ .. func
2088 |.if DUALNUM
2089 | cmp dword [BASE+4], LJ_TISNUM; jne >1
2090 | mov RB, dword [BASE]; jmp ->fff_resi
2091 |1:
2092 | ja ->fff_fallback
2093 |.else
2094 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2095 |.endif
2096 | movsd xmm0, qword [BASE]
2097 | call ->vm_ .. func .. _sse
2098 |.if DUALNUM
2099 | cvttsd2si RB, xmm0
2100 | cmp RB, 0x80000000
2101 | jne ->fff_resi
2102 | cvtsi2sd xmm1, RB
2103 | ucomisd xmm0, xmm1
2104 | jp ->fff_resxmm0
2105 | je ->fff_resi
2106 |.endif
2107 | jmp ->fff_resxmm0
2108 |.endmacro
2109 |
2110 | math_round floor
2111 | math_round ceil
2112 |
2113 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0
2114 |
2115 |.ffunc math_log
2116 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument.
2117 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback
2118 | movsd xmm0, qword [BASE]
2119 |.if not X64
2120 | movsd FPARG1, xmm0
2121 |.endif
2122 | mov RB, BASE
2123 | call extern log
2124 | mov BASE, RB
2125 | jmp ->fff_resfp
2126 |
2127 |.macro math_extern, func
2128 | .ffunc_nsse math_ .. func
2129 |.if not X64
2130 | movsd FPARG1, xmm0
2131 |.endif
2132 | mov RB, BASE
2133 | call extern func
2134 | mov BASE, RB
2135 | jmp ->fff_resfp
2136 |.endmacro
2137 |
2138 |.macro math_extern2, func
2139 | .ffunc_nnsse math_ .. func
2140 |.if not X64
2141 | movsd FPARG1, xmm0
2142 | movsd FPARG3, xmm1
2143 |.endif
2144 | mov RB, BASE
2145 | call extern func
2146 | mov BASE, RB
2147 | jmp ->fff_resfp
2148 |.endmacro
2149 |
2150 | math_extern log10
2151 | math_extern exp
2152 | math_extern sin
2153 | math_extern cos
2154 | math_extern tan
2155 | math_extern asin
2156 | math_extern acos
2157 | math_extern atan
2158 | math_extern sinh
2159 | math_extern cosh
2160 | math_extern tanh
2161 | math_extern2 pow
2162 | math_extern2 atan2
2163 | math_extern2 fmod
2164 |
2165 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn
2166 |
2167 |.ffunc_1 math_frexp
2168 | mov RB, [BASE+4]
2169 | cmp RB, LJ_TISNUM; jae ->fff_fallback
2170 | mov PC, [BASE-4]
2171 | mov RC, [BASE]
2172 | mov [BASE-4], RB; mov [BASE-8], RC
2173 | shl RB, 1; cmp RB, 0xffe00000; jae >3
2174 | or RC, RB; jz >3
2175 | mov RC, 1022
2176 | cmp RB, 0x00200000; jb >4
2177 |1:
2178 | shr RB, 21; sub RB, RC // Extract and unbias exponent.
2179 | cvtsi2sd xmm0, RB
2180 | mov RB, [BASE-4]
2181 | and RB, 0x800fffff // Mask off exponent.
2182 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0.
2183 | mov [BASE-4], RB
2184 |2:
2185 | movsd qword [BASE], xmm0
2186 | mov RD, 1+2
2187 | jmp ->fff_res
2188 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0.
2189 | xorps xmm0, xmm0; jmp <2
2190 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias.
2191 | movsd xmm0, qword [BASE]
2192 | sseconst_hi xmm1, RBa, 43500000 // 2^54.
2193 | mulsd xmm0, xmm1
2194 | movsd qword [BASE-8], xmm0
2195 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1
2196 |
2197 |.ffunc_nsse math_modf
2198 | mov RB, [BASE+4]
2199 | mov PC, [BASE-4]
2200 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf?
2201 | movaps xmm4, xmm0
2202 | call ->vm_trunc_sse
2203 | subsd xmm4, xmm0
2204 |1:
2205 | movsd qword [BASE-8], xmm0
2206 | movsd qword [BASE], xmm4
2207 | mov RC, [BASE-4]; mov RB, [BASE+4]
2208 | xor RC, RB; js >3 // Need to adjust sign?
2209 |2:
2210 | mov RD, 1+2
2211 | jmp ->fff_res
2212 |3:
2213 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction.
2214 | jmp <2
2215 |4:
2216 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0.
2217 |
2218 |.macro math_minmax, name, cmovop, sseop
2219 | .ffunc_1 name
2220 | mov RA, 2
2221 | cmp dword [BASE+4], LJ_TISNUM
2222 |.if DUALNUM
2223 | jne >4
2224 | mov RB, dword [BASE]
2225 |1: // Handle integers.
2226 | cmp RA, RD; jae ->fff_resi
2227 | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3
2228 | cmp RB, dword [BASE+RA*8-8]
2229 | cmovop RB, dword [BASE+RA*8-8]
2230 | add RA, 1
2231 | jmp <1
2232 |3:
2233 | ja ->fff_fallback
2234 | // Convert intermediate result to number and continue below.
2235 | cvtsi2sd xmm0, RB
2236 | jmp >6
2237 |4:
2238 | ja ->fff_fallback
2239 |.else
2240 | jae ->fff_fallback
2241 |.endif
2242 |
2243 | movsd xmm0, qword [BASE]
2244 |5: // Handle numbers or integers.
2245 | cmp RA, RD; jae ->fff_resxmm0
2246 | cmp dword [BASE+RA*8-4], LJ_TISNUM
2247 |.if DUALNUM
2248 | jb >6
2249 | ja ->fff_fallback
2250 | cvtsi2sd xmm1, dword [BASE+RA*8-8]
2251 | jmp >7
2252 |.else
2253 | jae ->fff_fallback
2254 |.endif
2255 |6:
2256 | movsd xmm1, qword [BASE+RA*8-8]
2257 |7:
2258 | sseop xmm0, xmm1
2259 | add RA, 1
2260 | jmp <5
2261 |.endmacro
2262 |
2263 | math_minmax math_min, cmovg, minsd
2264 | math_minmax math_max, cmovl, maxsd
2265 |
2266 |//-- String library -----------------------------------------------------
2267 |
2268 |.ffunc string_byte // Only handle the 1-arg case here.
2269 | cmp NARGS:RD, 1+1; jne ->fff_fallback
2270 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2271 | mov STR:RB, [BASE]
2272 | mov PC, [BASE-4]
2273 | cmp dword STR:RB->len, 1
2274 | jb ->fff_res0 // Return no results for empty string.
2275 | movzx RB, byte STR:RB[1]
2276 |.if DUALNUM
2277 | jmp ->fff_resi
2278 |.else
2279 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0
2280 |.endif
2281 |
2282 |.ffunc string_char // Only handle the 1-arg case here.
2283 | ffgccheck
2284 | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg.
2285 | cmp dword [BASE+4], LJ_TISNUM
2286 |.if DUALNUM
2287 | jne ->fff_fallback
2288 | mov RB, dword [BASE]
2289 | cmp RB, 255; ja ->fff_fallback
2290 | mov TMP2, RB
2291 |.else
2292 | jae ->fff_fallback
2293 | cvttsd2si RB, qword [BASE]
2294 | cmp RB, 255; ja ->fff_fallback
2295 | mov TMP2, RB
2296 |.endif
2297 |.if X64
2298 | mov TMP3, 1
2299 |.else
2300 | mov ARG3, 1
2301 |.endif
2302 | lea RDa, TMP2 // Points to stack. Little-endian.
2303 |->fff_newstr:
2304 | mov L:RB, SAVE_L
2305 | mov L:RB->base, BASE
2306 |.if X64
2307 | mov CARG3d, TMP3 // Zero-extended to size_t.
2308 | mov CARG2, RDa // May be 64 bit ptr to stack.
2309 | mov CARG1d, L:RB
2310 |.else
2311 | mov ARG2, RD
2312 | mov ARG1, L:RB
2313 |.endif
2314 | mov SAVE_PC, PC
2315 | call extern lj_str_new // (lua_State *L, char *str, size_t l)
2316 |->fff_resstr:
2317 | // GCstr * returned in eax (RD).
2318 | mov BASE, L:RB->base
2319 | mov PC, [BASE-4]
2320 | mov dword [BASE-4], LJ_TSTR
2321 | mov [BASE-8], STR:RD
2322 | jmp ->fff_res1
2323 |
2324 |.ffunc string_sub
2325 | ffgccheck
2326 | mov TMP2, -1
2327 | cmp NARGS:RD, 1+2; jb ->fff_fallback
2328 | jna >1
2329 | cmp dword [BASE+20], LJ_TISNUM
2330 |.if DUALNUM
2331 | jne ->fff_fallback
2332 | mov RB, dword [BASE+16]
2333 | mov TMP2, RB
2334 |.else
2335 | jae ->fff_fallback
2336 | cvttsd2si RB, qword [BASE+16]
2337 | mov TMP2, RB
2338 |.endif
2339 |1:
2340 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2341 | cmp dword [BASE+12], LJ_TISNUM
2342 |.if DUALNUM
2343 | jne ->fff_fallback
2344 |.else
2345 | jae ->fff_fallback
2346 |.endif
2347 | mov STR:RB, [BASE]
2348 | mov TMP3, STR:RB
2349 | mov RB, STR:RB->len
2350 |.if DUALNUM
2351 | mov RA, dword [BASE+8]
2352 |.else
2353 | cvttsd2si RA, qword [BASE+8]
2354 |.endif
2355 | mov RC, TMP2
2356 | cmp RB, RC // len < end? (unsigned compare)
2357 | jb >5
2358 |2:
2359 | test RA, RA // start <= 0?
2360 | jle >7
2361 |3:
2362 | mov STR:RB, TMP3
2363 | sub RC, RA // start > end?
2364 | jl ->fff_emptystr
2365 | lea RB, [STR:RB+RA+#STR-1]
2366 | add RC, 1
2367 |4:
2368 |.if X64
2369 | mov TMP3, RC
2370 |.else
2371 | mov ARG3, RC
2372 |.endif
2373 | mov RD, RB
2374 | jmp ->fff_newstr
2375 |
2376 |5: // Negative end or overflow.
2377 | jl >6
2378 | lea RC, [RC+RB+1] // end = end+(len+1)
2379 | jmp <2
2380 |6: // Overflow.
2381 | mov RC, RB // end = len
2382 | jmp <2
2383 |
2384 |7: // Negative start or underflow.
2385 | je >8
2386 | add RA, RB // start = start+(len+1)
2387 | add RA, 1
2388 | jg <3 // start > 0?
2389 |8: // Underflow.
2390 | mov RA, 1 // start = 1
2391 | jmp <3
2392 |
2393 |->fff_emptystr: // Range underflow.
2394 | xor RC, RC // Zero length. Any ptr in RB is ok.
2395 | jmp <4
2396 |
2397 |.macro ffstring_op, name
2398 | .ffunc_1 string_ .. name
2399 | ffgccheck
2400 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback
2401 | mov L:RB, SAVE_L
2402 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)]
2403 | mov L:RB->base, BASE
2404 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE
2405 | mov RCa, SBUF:FCARG1->b
2406 | mov SBUF:FCARG1->L, L:RB
2407 | mov SBUF:FCARG1->w, RCa
2408 | mov SAVE_PC, PC
2409 | call extern lj_buf_putstr_ .. name .. @8
2410 | mov FCARG1, eax
2411 | call extern lj_buf_tostr@4
2412 | jmp ->fff_resstr
2413 |.endmacro
2414 |
2415 |ffstring_op reverse
2416 |ffstring_op lower
2417 |ffstring_op upper
2418 |
2419 |//-- Bit library --------------------------------------------------------
2420 |
2421 |.macro .ffunc_bit, name, kind, fdef
2422 | fdef name
2423 |.if kind == 2
2424 | sseconst_tobit xmm1, RBa
2425 |.endif
2426 | cmp dword [BASE+4], LJ_TISNUM
2427 |.if DUALNUM
2428 | jne >1
2429 | mov RB, dword [BASE]
2430 |.if kind > 0
2431 | jmp >2
2432 |.else
2433 | jmp ->fff_resbit
2434 |.endif
2435 |1:
2436 | ja ->fff_fallback
2437 |.else
2438 | jae ->fff_fallback
2439 |.endif
2440 | movsd xmm0, qword [BASE]
2441 |.if kind < 2
2442 | sseconst_tobit xmm1, RBa
2443 |.endif
2444 | addsd xmm0, xmm1
2445 | movd RB, xmm0
2446 |2:
2447 |.endmacro
2448 |
2449 |.macro .ffunc_bit, name, kind
2450 | .ffunc_bit name, kind, .ffunc_1
2451 |.endmacro
2452 |
2453 |.ffunc_bit bit_tobit, 0
2454 | jmp ->fff_resbit
2455 |
2456 |.macro .ffunc_bit_op, name, ins
2457 | .ffunc_bit name, 2
2458 | mov TMP2, NARGS:RD // Save for fallback.
2459 | lea RD, [BASE+NARGS:RD*8-16]
2460 |1:
2461 | cmp RD, BASE
2462 | jbe ->fff_resbit
2463 | cmp dword [RD+4], LJ_TISNUM
2464 |.if DUALNUM
2465 | jne >2
2466 | ins RB, dword [RD]
2467 | sub RD, 8
2468 | jmp <1
2469 |2:
2470 | ja ->fff_fallback_bit_op
2471 |.else
2472 | jae ->fff_fallback_bit_op
2473 |.endif
2474 | movsd xmm0, qword [RD]
2475 | addsd xmm0, xmm1
2476 | movd RA, xmm0
2477 | ins RB, RA
2478 | sub RD, 8
2479 | jmp <1
2480 |.endmacro
2481 |
2482 |.ffunc_bit_op bit_band, and
2483 |.ffunc_bit_op bit_bor, or
2484 |.ffunc_bit_op bit_bxor, xor
2485 |
2486 |.ffunc_bit bit_bswap, 1
2487 | bswap RB
2488 | jmp ->fff_resbit
2489 |
2490 |.ffunc_bit bit_bnot, 1
2491 | not RB
2492 |.if DUALNUM
2493 | jmp ->fff_resbit
2494 |.else
2495 |->fff_resbit:
2496 | cvtsi2sd xmm0, RB
2497 | jmp ->fff_resxmm0
2498 |.endif
2499 |
2500 |->fff_fallback_bit_op:
2501 | mov NARGS:RD, TMP2 // Restore for fallback
2502 | jmp ->fff_fallback
2503 |
2504 |.macro .ffunc_bit_sh, name, ins
2505 |.if DUALNUM
2506 | .ffunc_bit name, 1, .ffunc_2
2507 | // Note: no inline conversion from number for 2nd argument!
2508 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback
2509 | mov RA, dword [BASE+8]
2510 |.else
2511 | .ffunc_nnsse name
2512 | sseconst_tobit xmm2, RBa
2513 | addsd xmm0, xmm2
2514 | addsd xmm1, xmm2
2515 | movd RB, xmm0
2516 | movd RA, xmm1
2517 |.endif
2518 | ins RB, cl // Assumes RA is ecx.
2519 | jmp ->fff_resbit
2520 |.endmacro
2521 |
2522 |.ffunc_bit_sh bit_lshift, shl
2523 |.ffunc_bit_sh bit_rshift, shr
2524 |.ffunc_bit_sh bit_arshift, sar
2525 |.ffunc_bit_sh bit_rol, rol
2526 |.ffunc_bit_sh bit_ror, ror
2527 |
2528 |//-----------------------------------------------------------------------
2529 |
2530 |->fff_fallback_2:
2531 | mov NARGS:RD, 1+2 // Other args are ignored, anyway.
2532 | jmp ->fff_fallback
2533 |->fff_fallback_1:
2534 | mov NARGS:RD, 1+1 // Other args are ignored, anyway.
2535 |->fff_fallback: // Call fast function fallback handler.
2536 | // BASE = new base, RD = nargs+1
2537 | mov L:RB, SAVE_L
2538 | mov PC, [BASE-4] // Fallback may overwrite PC.
2539 | mov SAVE_PC, PC // Redundant (but a defined value).
2540 | mov L:RB->base, BASE
2541 | lea RD, [BASE+NARGS:RD*8-8]
2542 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler.
2543 | mov L:RB->top, RD
2544 | mov CFUNC:RD, [BASE-8]
2545 | cmp RA, L:RB->maxstack
2546 | ja >5 // Need to grow stack.
2547 |.if X64
2548 | mov CARG1d, L:RB
2549 |.else
2550 | mov ARG1, L:RB
2551 |.endif
2552 | call aword CFUNC:RD->f // (lua_State *L)
2553 | mov BASE, L:RB->base
2554 | // Either throws an error, or recovers and returns -1, 0 or nresults+1.
2555 | test RD, RD; jg ->fff_res // Returned nresults+1?
2556 |1:
2557 | mov RA, L:RB->top
2558 | sub RA, BASE
2559 | shr RA, 3
2560 | test RD, RD
2561 | lea NARGS:RD, [RA+1]
2562 | mov LFUNC:RB, [BASE-8]
2563 | jne ->vm_call_tail // Returned -1?
2564 | ins_callt // Returned 0: retry fast path.
2565 |
2566 |// Reconstruct previous base for vmeta_call during tailcall.
2567 |->vm_call_tail:
2568 | mov RA, BASE
2569 | test PC, FRAME_TYPE
2570 | jnz >3
2571 | movzx RB, PC_RA
2572 | not RBa // Note: ~RB = -(RB+1)
2573 | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8
2574 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2575 |3:
2576 | mov RB, PC
2577 | and RB, -8
2578 | sub BASE, RB
2579 | jmp ->vm_call_dispatch // Resolve again for tailcall.
2580 |
2581 |5: // Grow stack for fallback handler.
2582 | mov FCARG2, LUA_MINSTACK
2583 | mov FCARG1, L:RB
2584 | call extern lj_state_growstack@8 // (lua_State *L, int n)
2585 | mov BASE, L:RB->base
2586 | xor RD, RD // Simulate a return 0.
2587 | jmp <1 // Dumb retry (goes through ff first).
2588 |
2589 |->fff_gcstep: // Call GC step function.
2590 | // BASE = new base, RD = nargs+1
2591 | pop RBa // Must keep stack at same level.
2592 | mov TMPa, RBa // Save return address
2593 | mov L:RB, SAVE_L
2594 | mov SAVE_PC, PC // Redundant (but a defined value).
2595 | mov L:RB->base, BASE
2596 | lea RD, [BASE+NARGS:RD*8-8]
2597 | mov FCARG1, L:RB
2598 | mov L:RB->top, RD
2599 | call extern lj_gc_step@4 // (lua_State *L)
2600 | mov BASE, L:RB->base
2601 | mov RD, L:RB->top
2602 | sub RD, BASE
2603 | shr RD, 3
2604 | add NARGS:RD, 1
2605 | mov RBa, TMPa
2606 | push RBa // Restore return address.
2607 | ret
2608 |
2609 |//-----------------------------------------------------------------------
2610 |//-- Special dispatch targets -------------------------------------------
2611 |//-----------------------------------------------------------------------
2612 |
2613 |->vm_record: // Dispatch target for recording phase.
2614 |.if JIT
2615 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2616 | test RDL, HOOK_VMEVENT // No recording while in vmevent.
2617 | jnz >5
2618 | // Decrement the hookcount for consistency, but always do the call.
2619 | test RDL, HOOK_ACTIVE
2620 | jnz >1
2621 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2622 | jz >1
2623 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2624 | jmp >1
2625 |.endif
2626 |
2627 |->vm_rethook: // Dispatch target for return hooks.
2628 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2629 | test RDL, HOOK_ACTIVE // Hook already active?
2630 | jnz >5
2631 | jmp >1
2632 |
2633 |->vm_inshook: // Dispatch target for instr/line hooks.
2634 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)]
2635 | test RDL, HOOK_ACTIVE // Hook already active?
2636 | jnz >5
2637 |
2638 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT
2639 | jz >5
2640 | dec dword [DISPATCH+DISPATCH_GL(hookcount)]
2641 | jz >1
2642 | test RDL, LUA_MASKLINE
2643 | jz >5
2644 |1:
2645 | mov L:RB, SAVE_L
2646 | mov L:RB->base, BASE
2647 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2648 | mov FCARG1, L:RB
2649 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC.
2650 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc)
2651 |3:
2652 | mov BASE, L:RB->base
2653 |4:
2654 | movzx RA, PC_RA
2655 |5:
2656 | movzx OP, PC_OP
2657 | movzx RD, PC_RD
2658 |.if X64
2659 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins.
2660 |.else
2661 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins.
2662 |.endif
2663 |
2664 |->cont_hook: // Continue from hook yield.
2665 | add PC, 4
2666 | mov RA, [RB-24]
2667 | mov MULTRES, RA // Restore MULTRES for *M ins.
2668 | jmp <4
2669 |
2670 |->vm_hotloop: // Hot loop counter underflow.
2671 |.if JIT
2672 | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L).
2673 | mov RB, LFUNC:RB->pc
2674 | movzx RD, byte [RB+PC2PROTO(framesize)]
2675 | lea RD, [BASE+RD*8]
2676 | mov L:RB, SAVE_L
2677 | mov L:RB->base, BASE
2678 | mov L:RB->top, RD
2679 | mov FCARG2, PC
2680 | lea FCARG1, [DISPATCH+GG_DISP2J]
2681 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2682 | mov SAVE_PC, PC
2683 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc)
2684 | jmp <3
2685 |.endif
2686 |
2687 |->vm_callhook: // Dispatch target for call hooks.
2688 | mov SAVE_PC, PC
2689 |.if JIT
2690 | jmp >1
2691 |.endif
2692 |
2693 |->vm_hotcall: // Hot call counter underflow.
2694 |.if JIT
2695 | mov SAVE_PC, PC
2696 | or PC, 1 // Marker for hot call.
2697 |1:
2698 |.endif
2699 | lea RD, [BASE+NARGS:RD*8-8]
2700 | mov L:RB, SAVE_L
2701 | mov L:RB->base, BASE
2702 | mov L:RB->top, RD
2703 | mov FCARG2, PC
2704 | mov FCARG1, L:RB
2705 | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc)
2706 | // ASMFunction returned in eax/rax (RDa).
2707 | mov SAVE_PC, 0 // Invalidate for subsequent line hook.
2708 |.if JIT
2709 | and PC, -2
2710 |.endif
2711 | mov BASE, L:RB->base
2712 | mov RAa, RDa
2713 | mov RD, L:RB->top
2714 | sub RD, BASE
2715 | mov RBa, RAa
2716 | movzx RA, PC_RA
2717 | shr RD, 3
2718 | add NARGS:RD, 1
2719 | jmp RBa
2720 |
2721 |->cont_stitch: // Trace stitching.
2722 |.if JIT
2723 | // BASE = base, RC = result, RB = mbase
2724 | mov TRACE:RA, [RB-24] // Save previous trace.
2725 | mov TMP1, TRACE:RA
2726 | mov TMP3, DISPATCH // Need one more register.
2727 | mov DISPATCH, MULTRES
2728 | movzx RA, PC_RA
2729 | lea RA, [BASE+RA*8] // Call base.
2730 | sub DISPATCH, 1
2731 | jz >2
2732 |1: // Move results down.
2733 |.if X64
2734 | mov RBa, [RC]
2735 | mov [RA], RBa
2736 |.else
2737 | mov RB, [RC]
2738 | mov [RA], RB
2739 | mov RB, [RC+4]
2740 | mov [RA+4], RB
2741 |.endif
2742 | add RC, 8
2743 | add RA, 8
2744 | sub DISPATCH, 1
2745 | jnz <1
2746 |2:
2747 | movzx RC, PC_RA
2748 | movzx RB, PC_RB
2749 | add RC, RB
2750 | lea RC, [BASE+RC*8-8]
2751 |3:
2752 | cmp RC, RA
2753 | ja >9 // More results wanted?
2754 |
2755 | mov DISPATCH, TMP3
2756 | mov TRACE:RD, TMP1 // Get previous trace.
2757 | movzx RB, word TRACE:RD->traceno
2758 | movzx RD, word TRACE:RD->link
2759 | cmp RD, RB
2760 | je ->cont_nop // Blacklisted.
2761 | test RD, RD
2762 | jne =>BC_JLOOP // Jump to stitched trace.
2763 |
2764 | // Stitch a new trace to the previous trace.
2765 | mov [DISPATCH+DISPATCH_J(exitno)], RB
2766 | mov L:RB, SAVE_L
2767 | mov L:RB->base, BASE
2768 | mov FCARG2, PC
2769 | lea FCARG1, [DISPATCH+GG_DISP2J]
2770 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2771 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc)
2772 | mov BASE, L:RB->base
2773 | jmp ->cont_nop
2774 |
2775 |9: // Fill up results with nil.
2776 | mov dword [RA+4], LJ_TNIL
2777 | add RA, 8
2778 | jmp <3
2779 |.endif
2780 |
2781 |->vm_profhook: // Dispatch target for profiler hook.
2782 #if LJ_HASPROFILE
2783 | mov L:RB, SAVE_L
2784 | mov L:RB->base, BASE
2785 | mov FCARG2, PC // Caveat: FCARG2 == BASE
2786 | mov FCARG1, L:RB
2787 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc)
2788 | mov BASE, L:RB->base
2789 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction.
2790 | sub PC, 4
2791 | jmp ->cont_nop
2792 #endif
2793 |
2794 |//-----------------------------------------------------------------------
2795 |//-- Trace exit handler -------------------------------------------------
2796 |//-----------------------------------------------------------------------
2797 |
2798 |// Called from an exit stub with the exit number on the stack.
2799 |// The 16 bit exit number is stored with two (sign-extended) push imm8.
2800 |->vm_exit_handler:
2801 |.if JIT
2802 |.if X64
2803 | push r13; push r12
2804 | push r11; push r10; push r9; push r8
2805 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp
2806 | push rbx; push rdx; push rcx; push rax
2807 | movzx RC, byte [rbp-8] // Reconstruct exit number.
2808 | mov RCH, byte [rbp-16]
2809 | mov [rbp-8], r15; mov [rbp-16], r14
2810 |.else
2811 | push ebp; lea ebp, [esp+12]; push ebp
2812 | push ebx; push edx; push ecx; push eax
2813 | movzx RC, byte [ebp-4] // Reconstruct exit number.
2814 | mov RCH, byte [ebp-8]
2815 | mov [ebp-4], edi; mov [ebp-8], esi
2816 |.endif
2817 | // Caveat: DISPATCH is ebx.
2818 | mov DISPATCH, [ebp]
2819 | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number.
2820 | set_vmstate EXIT
2821 | mov [DISPATCH+DISPATCH_J(exitno)], RC
2822 | mov [DISPATCH+DISPATCH_J(parent)], RA
2823 |.if X64
2824 |.if X64WIN
2825 | sub rsp, 16*8+4*8 // Room for SSE regs + save area.
2826 |.else
2827 | sub rsp, 16*8 // Room for SSE regs.
2828 |.endif
2829 | add rbp, -128
2830 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14
2831 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12
2832 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10
2833 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8
2834 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6
2835 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4
2836 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2
2837 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0
2838 |.else
2839 | sub esp, 8*8+16 // Room for SSE regs + args.
2840 | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6
2841 | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4
2842 | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2
2843 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0
2844 |.endif
2845 | // Caveat: RB is ebp.
2846 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)]
2847 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)]
2848 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa
2849 | mov L:RB->base, BASE
2850 |.if X64WIN
2851 | lea CARG2, [rsp+4*8]
2852 |.elif X64
2853 | mov CARG2, rsp
2854 |.else
2855 | lea FCARG2, [esp+16]
2856 |.endif
2857 | lea FCARG1, [DISPATCH+GG_DISP2J]
2858 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2859 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex)
2860 | // MULTRES or negated error code returned in eax (RD).
2861 | mov RAa, L:RB->cframe
2862 | and RAa, CFRAME_RAWMASK
2863 |.if X64WIN
2864 | // Reposition stack later.
2865 |.elif X64
2866 | mov rsp, RAa // Reposition stack to C frame.
2867 |.else
2868 | mov esp, RAa // Reposition stack to C frame.
2869 |.endif
2870 | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield).
2871 | mov BASE, L:RB->base
2872 | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC.
2873 |.if X64
2874 | jmp >1
2875 |.endif
2876 |.endif
2877 |->vm_exit_interp:
2878 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set.
2879 |.if JIT
2880 |.if X64
2881 | // Restore additional callee-save registers only used in compiled code.
2882 |.if X64WIN
2883 | lea RAa, [rsp+9*16+4*8]
2884 |1:
2885 | movdqa xmm15, [RAa-9*16]
2886 | movdqa xmm14, [RAa-8*16]
2887 | movdqa xmm13, [RAa-7*16]
2888 | movdqa xmm12, [RAa-6*16]
2889 | movdqa xmm11, [RAa-5*16]
2890 | movdqa xmm10, [RAa-4*16]
2891 | movdqa xmm9, [RAa-3*16]
2892 | movdqa xmm8, [RAa-2*16]
2893 | movdqa xmm7, [RAa-1*16]
2894 | mov rsp, RAa // Reposition stack to C frame.
2895 | movdqa xmm6, [RAa]
2896 | mov r15, CSAVE_3
2897 | mov r14, CSAVE_4
2898 |.else
2899 | add rsp, 16 // Reposition stack to C frame.
2900 |1:
2901 |.endif
2902 | mov r13, TMPa
2903 | mov r12, TMPQ
2904 |.endif
2905 | cmp RD, -LUA_ERRERR; jae >9 // Check for error from exit.
2906 | mov L:RB, SAVE_L
2907 | mov MULTRES, RD
2908 | mov LFUNC:KBASE, [BASE-8]
2909 | mov KBASE, LFUNC:KBASE->pc
2910 | mov KBASE, [KBASE+PC2PROTO(k)]
2911 | mov L:RB->base, BASE
2912 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0
2913 | set_vmstate INTERP
2914 | // Modified copy of ins_next which handles function header dispatch, too.
2915 | mov RC, [PC]
2916 | movzx RA, RCH
2917 | movzx OP, RCL
2918 | add PC, 4
2919 | shr RC, 16
2920 | cmp MULTRES, -17 // Static dispatch?
2921 | je >5
2922 | cmp OP, BC_FUNCF // Function header?
2923 | jb >3
2924 | cmp OP, BC_FUNCC+2 // Fast function?
2925 | jae >4
2926 |2:
2927 | mov RC, MULTRES // RC/RD holds nres+1.
2928 |3:
2929 |.if X64
2930 | jmp aword [DISPATCH+OP*8]
2931 |.else
2932 | jmp aword [DISPATCH+OP*4]
2933 |.endif
2934 |
2935 |4: // Check frame below fast function.
2936 | mov RC, [BASE-4]
2937 | test RC, FRAME_TYPE
2938 | jnz <2 // Trace stitching continuation?
2939 | // Otherwise set KBASE for Lua function below fast function.
2940 | movzx RC, byte [RC-3]
2941 | not RCa
2942 | mov LFUNC:KBASE, [BASE+RC*8-8]
2943 | mov KBASE, LFUNC:KBASE->pc
2944 | mov KBASE, [KBASE+PC2PROTO(k)]
2945 | jmp <2
2946 |
2947 |5: // Dispatch to static entry of original ins replaced by BC_JLOOP.
2948 | mov RA, [DISPATCH+DISPATCH_J(trace)]
2949 | mov TRACE:RA, [RA+RD*4]
2950 | mov RC, TRACE:RA->startins
2951 | movzx RA, RCH
2952 | movzx OP, RCL
2953 | shr RC, 16
2954 |.if X64
2955 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC]
2956 |.else
2957 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC]
2958 |.endif
2959 |
2960 |9: // Rethrow error from the right C frame.
2961 | mov FCARG2, RD
2962 | mov FCARG1, L:RB
2963 | neg FCARG2
2964 | call extern lj_err_trace@8 // (lua_State *L, int errcode)
2965 |.endif
2966 |
2967 |//-----------------------------------------------------------------------
2968 |//-- Math helper functions ----------------------------------------------
2969 |//-----------------------------------------------------------------------
2970 |
2971 |// FP value rounding. Called by math.floor/math.ceil fast functions
2972 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified.
2973 |.macro vm_round, name, mode, cond
2974 |->name:
2975 |.if not X64 and cond
2976 | movsd xmm0, qword [esp+4]
2977 | call ->name .. _sse
2978 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg.
2979 | fld qword [esp+4]
2980 | ret
2981 |.endif
2982 |
2983 |->name .. _sse:
2984 | sseconst_abs xmm2, RDa
2985 | sseconst_2p52 xmm3, RDa
2986 | movaps xmm1, xmm0
2987 | andpd xmm1, xmm2 // |x|
2988 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|.
2989 | jbe >1
2990 | andnpd xmm2, xmm0 // Isolate sign bit.
2991 |.if mode == 2 // trunc(x)?
2992 | movaps xmm0, xmm1
2993 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
2994 | subsd xmm1, xmm3
2995 | sseconst_1 xmm3, RDa
2996 | cmpsd xmm0, xmm1, 1 // |x| < result?
2997 | andpd xmm0, xmm3
2998 | subsd xmm1, xmm0 // If yes, subtract -1.
2999 | orpd xmm1, xmm2 // Merge sign bit back in.
3000 |.else
3001 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52
3002 | subsd xmm1, xmm3
3003 | orpd xmm1, xmm2 // Merge sign bit back in.
3004 | sseconst_1 xmm3, RDa
3005 | .if mode == 1 // ceil(x)?
3006 | cmpsd xmm0, xmm1, 6 // x > result?
3007 | andpd xmm0, xmm3
3008 | addsd xmm1, xmm0 // If yes, add 1.
3009 | orpd xmm1, xmm2 // Merge sign bit back in (again).
3010 | .else // floor(x)?
3011 | cmpsd xmm0, xmm1, 1 // x < result?
3012 | andpd xmm0, xmm3
3013 | subsd xmm1, xmm0 // If yes, subtract 1.
3014 | .endif
3015 |.endif
3016 | movaps xmm0, xmm1
3017 |1:
3018 | ret
3019 |.endmacro
3020 |
3021 | vm_round vm_floor, 0, 1
3022 | vm_round vm_ceil, 1, JIT
3023 | vm_round vm_trunc, 2, JIT
3024 |
3025 |// FP modulo x%y. Called by BC_MOD* and vm_arith.
3026 |->vm_mod:
3027 |// Args in xmm0/xmm1, return value in xmm0.
3028 |// Caveat: xmm0-xmm5 and RC (eax) modified!
3029 | movaps xmm5, xmm0
3030 | divsd xmm0, xmm1
3031 | sseconst_abs xmm2, RDa
3032 | sseconst_2p52 xmm3, RDa
3033 | movaps xmm4, xmm0
3034 | andpd xmm4, xmm2 // |x/y|
3035 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|.
3036 | jbe >1
3037 | andnpd xmm2, xmm0 // Isolate sign bit.
3038 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52
3039 | subsd xmm4, xmm3
3040 | orpd xmm4, xmm2 // Merge sign bit back in.
3041 | sseconst_1 xmm2, RDa
3042 | cmpsd xmm0, xmm4, 1 // x/y < result?
3043 | andpd xmm0, xmm2
3044 | subsd xmm4, xmm0 // If yes, subtract 1.0.
3045 | movaps xmm0, xmm5
3046 | mulsd xmm1, xmm4
3047 | subsd xmm0, xmm1
3048 | ret
3049 |1:
3050 | mulsd xmm1, xmm0
3051 | movaps xmm0, xmm5
3052 | subsd xmm0, xmm1
3053 | ret
3054 |
3055 |//-----------------------------------------------------------------------
3056 |//-- Miscellaneous functions --------------------------------------------
3057 |//-----------------------------------------------------------------------
3058 |
3059 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4])
3060 |->vm_cpuid:
3061 |.if X64
3062 | mov eax, CARG1d
3063 | .if X64WIN; push rsi; mov rsi, CARG2; .endif
3064 | push rbx
3065 | xor ecx, ecx
3066 | cpuid
3067 | mov [rsi], eax
3068 | mov [rsi+4], ebx
3069 | mov [rsi+8], ecx
3070 | mov [rsi+12], edx
3071 | pop rbx
3072 | .if X64WIN; pop rsi; .endif
3073 | ret
3074 |.else
3075 | pushfd
3076 | pop edx
3077 | mov ecx, edx
3078 | xor edx, 0x00200000 // Toggle ID bit in flags.
3079 | push edx
3080 | popfd
3081 | pushfd
3082 | pop edx
3083 | xor eax, eax // Zero means no features supported.
3084 | cmp ecx, edx
3085 | jz >1 // No ID toggle means no CPUID support.
3086 | mov eax, [esp+4] // Argument 1 is function number.
3087 | push edi
3088 | push ebx
3089 | xor ecx, ecx
3090 | cpuid
3091 | mov edi, [esp+16] // Argument 2 is result area.
3092 | mov [edi], eax
3093 | mov [edi+4], ebx
3094 | mov [edi+8], ecx
3095 | mov [edi+12], edx
3096 | pop ebx
3097 | pop edi
3098 |1:
3099 | ret
3100 |.endif
3101 |
3102 |.define NEXT_TAB, TAB:FCARG1
3103 |.define NEXT_IDX, FCARG2
3104 |.define NEXT_PTR, RCa
3105 |.define NEXT_PTRd, RC
3106 |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro
3107 |.if X64
3108 |.define NEXT_TMP, CARG3d
3109 |.define NEXT_TMPq, CARG3
3110 |.define NEXT_ASIZE, CARG4d
3111 |.macro NEXT_ENTER; .endmacro
3112 |.macro NEXT_LEAVE; ret; .endmacro
3113 |.if X64WIN
3114 |.define NEXT_RES_PTR, [rsp+aword*5]
3115 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
3116 |.else
3117 |.define NEXT_RES_PTR, [rsp+aword*1]
3118 |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro
3119 |.endif
3120 |.else
3121 |.define NEXT_ASIZE, esi
3122 |.define NEXT_TMP, edi
3123 |.macro NEXT_ENTER; push esi; push edi; .endmacro
3124 |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro
3125 |.define NEXT_RES_PTR, [esp+dword*3]
3126 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro
3127 |.endif
3128 |
3129 |// TValue *lj_vm_next(GCtab *t, uint32_t idx)
3130 |// Next idx returned in edx.
3131 |->vm_next:
3132 |.if JIT
3133 | NEXT_ENTER
3134 | mov NEXT_ASIZE, NEXT_TAB->asize
3135 |1: // Traverse array part.
3136 | cmp NEXT_IDX, NEXT_ASIZE; jae >5
3137 | mov NEXT_TMP, NEXT_TAB->array
3138 | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2
3139 | lea NEXT_PTR, NEXT_RES_PTR
3140 |.if X64
3141 | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8]
3142 | mov qword [NEXT_PTR], NEXT_TMPq
3143 |.else
3144 | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4]
3145 | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8]
3146 | mov dword [NEXT_PTR+4], NEXT_ASIZE
3147 | mov dword [NEXT_PTR], NEXT_TMP
3148 |.endif
3149 |.if DUALNUM
3150 | mov dword [NEXT_PTR+dword*3], LJ_TISNUM
3151 | mov dword [NEXT_PTR+dword*2], NEXT_IDX
3152 |.else
3153 | cvtsi2sd xmm0, NEXT_IDX
3154 | movsd qword [NEXT_PTR+dword*2], xmm0
3155 |.endif
3156 | NEXT_RES_IDX 1
3157 | NEXT_LEAVE
3158 |2: // Skip holes in array part.
3159 | add NEXT_IDX, 1
3160 | jmp <1
3161 |
3162 |5: // Traverse hash part.
3163 | sub NEXT_IDX, NEXT_ASIZE
3164 |6:
3165 | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9
3166 | imul NEXT_PTRd, NEXT_IDX, #NODE
3167 | add NODE:NEXT_PTRd, dword NEXT_TAB->node
3168 | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7
3169 | NEXT_RES_IDXL NEXT_ASIZE+1
3170 | NEXT_LEAVE
3171 |7: // Skip holes in hash part.
3172 | add NEXT_IDX, 1
3173 | jmp <6
3174 |
3175 |9: // End of iteration. Set the key to nil (not the value).
3176 | NEXT_RES_IDX NEXT_ASIZE
3177 | lea NEXT_PTR, NEXT_RES_PTR
3178 | mov dword [NEXT_PTR+dword*3], LJ_TNIL
3179 | NEXT_LEAVE
3180 |.endif
3181 |
3182 |//-----------------------------------------------------------------------
3183 |//-- Assertions ---------------------------------------------------------
3184 |//-----------------------------------------------------------------------
3185 |
3186 |->assert_bad_for_arg_type:
3187 #ifdef LUA_USE_ASSERT
3188 | int3
3189 #endif
3190 | int3
3191 |
3192 |//-----------------------------------------------------------------------
3193 |//-- FFI helper functions -----------------------------------------------
3194 |//-----------------------------------------------------------------------
3195 |
3196 |// Handler for callback functions. Callback slot number in ah/al.
3197 |->vm_ffi_callback:
3198 |.if FFI
3199 |.type CTSTATE, CTState, PC
3200 |.if not X64
3201 | sub esp, 16 // Leave room for SAVE_ERRF etc.
3202 |.endif
3203 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *.
3204 | lea DISPATCH, [ebp+GG_G2DISP]
3205 | mov CTSTATE, GL:ebp->ctype_state
3206 | movzx eax, ax
3207 | mov CTSTATE->cb.slot, eax
3208 |.if X64
3209 | mov CTSTATE->cb.gpr[0], CARG1
3210 | mov CTSTATE->cb.gpr[1], CARG2
3211 | mov CTSTATE->cb.gpr[2], CARG3
3212 | mov CTSTATE->cb.gpr[3], CARG4
3213 | movsd qword CTSTATE->cb.fpr[0], xmm0
3214 | movsd qword CTSTATE->cb.fpr[1], xmm1
3215 | movsd qword CTSTATE->cb.fpr[2], xmm2
3216 | movsd qword CTSTATE->cb.fpr[3], xmm3
3217 |.if X64WIN
3218 | lea rax, [rsp+CFRAME_SIZE+4*8]
3219 |.else
3220 | lea rax, [rsp+CFRAME_SIZE]
3221 | mov CTSTATE->cb.gpr[4], CARG5
3222 | mov CTSTATE->cb.gpr[5], CARG6
3223 | movsd qword CTSTATE->cb.fpr[4], xmm4
3224 | movsd qword CTSTATE->cb.fpr[5], xmm5
3225 | movsd qword CTSTATE->cb.fpr[6], xmm6
3226 | movsd qword CTSTATE->cb.fpr[7], xmm7
3227 |.endif
3228 | mov CTSTATE->cb.stack, rax
3229 | mov CARG2, rsp
3230 |.else
3231 | lea eax, [esp+CFRAME_SIZE+16]
3232 | mov CTSTATE->cb.gpr[0], FCARG1
3233 | mov CTSTATE->cb.gpr[1], FCARG2
3234 | mov CTSTATE->cb.stack, eax
3235 | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp.
3236 | mov FCARG2, [esp+CFRAME_SIZE+8]
3237 | mov SAVE_RET, FCARG1
3238 | mov SAVE_R4, FCARG2
3239 | mov FCARG2, esp
3240 |.endif
3241 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok.
3242 | mov FCARG1, CTSTATE
3243 | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf)
3244 | // lua_State * returned in eax (RD).
3245 | set_vmstate INTERP
3246 | mov BASE, L:RD->base
3247 | mov RD, L:RD->top
3248 | sub RD, BASE
3249 | mov LFUNC:RB, [BASE-8]
3250 | shr RD, 3
3251 | add RD, 1
3252 | ins_callt
3253 |.endif
3254 |
3255 |->cont_ffi_callback: // Return from FFI callback.
3256 |.if FFI
3257 | mov L:RA, SAVE_L
3258 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)]
3259 | mov aword CTSTATE->L, L:RAa
3260 | mov L:RA->base, BASE
3261 | mov L:RA->top, RB
3262 | mov FCARG1, CTSTATE
3263 | mov FCARG2, RC
3264 | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o)
3265 |.if X64
3266 | mov rax, CTSTATE->cb.gpr[0]
3267 | movsd xmm0, qword CTSTATE->cb.fpr[0]
3268 | jmp ->vm_leave_unw
3269 |.else
3270 | mov L:RB, SAVE_L
3271 | mov eax, CTSTATE->cb.gpr[0]
3272 | mov edx, CTSTATE->cb.gpr[1]
3273 | cmp dword CTSTATE->cb.gpr[2], 1
3274 | jb >7
3275 | je >6
3276 | fld qword CTSTATE->cb.fpr[0].d
3277 | jmp >7
3278 |6:
3279 | fld dword CTSTATE->cb.fpr[0].f
3280 |7:
3281 | mov ecx, L:RB->top
3282 | movzx ecx, word [ecx+6] // Get stack adjustment and copy up.
3283 | mov SAVE_L, ecx // Must be one slot above SAVE_RET
3284 | restoreregs
3285 | pop ecx // Move return addr from SAVE_RET.
3286 | add esp, [esp] // Adjust stack.
3287 | add esp, 16
3288 | push ecx
3289 | ret
3290 |.endif
3291 |.endif
3292 |
3293 |->vm_ffi_call@4: // Call C function via FFI.
3294 | // Caveat: needs special frame unwinding, see below.
3295 |.if FFI
3296 |.if X64
3297 | .type CCSTATE, CCallState, rbx
3298 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1
3299 |.else
3300 | .type CCSTATE, CCallState, ebx
3301 | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1
3302 |.endif
3303 |
3304 | // Readjust stack.
3305 |.if X64
3306 | mov eax, CCSTATE->spadj
3307 | sub rsp, rax
3308 |.else
3309 | sub esp, CCSTATE->spadj
3310 |.if WIN
3311 | mov CCSTATE->spadj, esp
3312 |.endif
3313 |.endif
3314 |
3315 | // Copy stack slots.
3316 | movzx ecx, byte CCSTATE->nsp
3317 | sub ecx, 1
3318 | js >2
3319 |1:
3320 |.if X64
3321 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)]
3322 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax
3323 |.else
3324 | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)]
3325 | mov [esp+ecx*4], eax
3326 |.endif
3327 | sub ecx, 1
3328 | jns <1
3329 |2:
3330 |
3331 |.if X64
3332 | movzx eax, byte CCSTATE->nfpr
3333 | mov CARG1, CCSTATE->gpr[0]
3334 | mov CARG2, CCSTATE->gpr[1]
3335 | mov CARG3, CCSTATE->gpr[2]
3336 | mov CARG4, CCSTATE->gpr[3]
3337 |.if not X64WIN
3338 | mov CARG5, CCSTATE->gpr[4]
3339 | mov CARG6, CCSTATE->gpr[5]
3340 |.endif
3341 | test eax, eax; jz >5
3342 | movaps xmm0, CCSTATE->fpr[0]
3343 | movaps xmm1, CCSTATE->fpr[1]
3344 | movaps xmm2, CCSTATE->fpr[2]
3345 | movaps xmm3, CCSTATE->fpr[3]
3346 |.if not X64WIN
3347 | cmp eax, 4; jbe >5
3348 | movaps xmm4, CCSTATE->fpr[4]
3349 | movaps xmm5, CCSTATE->fpr[5]
3350 | movaps xmm6, CCSTATE->fpr[6]
3351 | movaps xmm7, CCSTATE->fpr[7]
3352 |.endif
3353 |5:
3354 |.else
3355 | mov FCARG1, CCSTATE->gpr[0]
3356 | mov FCARG2, CCSTATE->gpr[1]
3357 |.endif
3358 |
3359 | call aword CCSTATE->func
3360 |
3361 |.if X64
3362 | mov CCSTATE->gpr[0], rax
3363 | movaps CCSTATE->fpr[0], xmm0
3364 |.if not X64WIN
3365 | mov CCSTATE->gpr[1], rdx
3366 | movaps CCSTATE->fpr[1], xmm1
3367 |.endif
3368 |.else
3369 | mov CCSTATE->gpr[0], eax
3370 | mov CCSTATE->gpr[1], edx
3371 | cmp byte CCSTATE->resx87, 1
3372 | jb >7
3373 | je >6
3374 | fstp qword CCSTATE->fpr[0].d[0]
3375 | jmp >7
3376 |6:
3377 | fstp dword CCSTATE->fpr[0].f[0]
3378 |7:
3379 |.if WIN
3380 | sub CCSTATE->spadj, esp
3381 |.endif
3382 |.endif
3383 |
3384 |.if X64
3385 | mov rbx, [rbp-8]; leave; ret
3386 |.else
3387 | mov ebx, [ebp-4]; leave; ret
3388 |.endif
3389 |.endif
3390 |// Note: vm_ffi_call must be the last function in this object file!
3391 |
3392 |//-----------------------------------------------------------------------
3393 }
3394
3395 /* Generate the code for a single instruction. */
3396 static void build_ins(BuildCtx *ctx, BCOp op, int defop)
3397 {
3398 int vk = 0;
3399 |// Note: aligning all instructions does not pay off.
3400 |=>defop:
3401
3402 switch (op) {
3403
3404 /* -- Comparison ops ---------------------------------------------------- */
3405
3406 /* Remember: all ops branch for a true comparison, fall through otherwise. */
3407
3408 |.macro jmp_comp, lt, ge, le, gt, target
3409 ||switch (op) {
3410 ||case BC_ISLT:
3411 | lt target
3412 ||break;
3413 ||case BC_ISGE:
3414 | ge target
3415 ||break;
3416 ||case BC_ISLE:
3417 | le target
3418 ||break;
3419 ||case BC_ISGT:
3420 | gt target
3421 ||break;
3422 ||default: break; /* Shut up GCC. */
3423 ||}
3424 |.endmacro
3425
3426 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
3427 | // RA = src1, RD = src2, JMP with RD = target
3428 | ins_AD
3429 |.if DUALNUM
3430 | checkint RA, >7
3431 | checkint RD, >8
3432 | mov RB, dword [BASE+RA*8]
3433 | add PC, 4
3434 | cmp RB, dword [BASE+RD*8]
3435 | jmp_comp jge, jl, jg, jle, >9
3436 |6:
3437 | movzx RD, PC_RD
3438 | branchPC RD
3439 |9:
3440 | ins_next
3441 |
3442 |7: // RA is not an integer.
3443 | ja ->vmeta_comp
3444 | // RA is a number.
3445 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp
3446 | // RA is a number, RD is an integer.
3447 | cvtsi2sd xmm0, dword [BASE+RD*8]
3448 | jmp >2
3449 |
3450 |8: // RA is an integer, RD is not an integer.
3451 | ja ->vmeta_comp
3452 | // RA is an integer, RD is a number.
3453 | cvtsi2sd xmm1, dword [BASE+RA*8]
3454 | movsd xmm0, qword [BASE+RD*8]
3455 | add PC, 4
3456 | ucomisd xmm0, xmm1
3457 | jmp_comp jbe, ja, jb, jae, <9
3458 | jmp <6
3459 |.else
3460 | checknum RA, ->vmeta_comp
3461 | checknum RD, ->vmeta_comp
3462 |.endif
3463 |1:
3464 | movsd xmm0, qword [BASE+RD*8]
3465 |2:
3466 | add PC, 4
3467 | ucomisd xmm0, qword [BASE+RA*8]
3468 |3:
3469 | // Unordered: all of ZF CF PF set, ordered: PF clear.
3470 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't.
3471 |.if DUALNUM
3472 | jmp_comp jbe, ja, jb, jae, <9
3473 | jmp <6
3474 |.else
3475 | jmp_comp jbe, ja, jb, jae, >1
3476 | movzx RD, PC_RD
3477 | branchPC RD
3478 |1:
3479 | ins_next
3480 |.endif
3481 break;
3482
3483 case BC_ISEQV: case BC_ISNEV:
3484 vk = op == BC_ISEQV;
3485 | ins_AD // RA = src1, RD = src2, JMP with RD = target
3486 | mov RB, [BASE+RD*8+4]
3487 | add PC, 4
3488 |.if DUALNUM
3489 | cmp RB, LJ_TISNUM; jne >7
3490 | checkint RA, >8
3491 | mov RB, dword [BASE+RD*8]
3492 | cmp RB, dword [BASE+RA*8]
3493 if (vk) {
3494 | jne >9
3495 } else {
3496 | je >9
3497 }
3498 | movzx RD, PC_RD
3499 | branchPC RD
3500 |9:
3501 | ins_next
3502 |
3503 |7: // RD is not an integer.
3504 | ja >5
3505 | // RD is a number.
3506 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5
3507 | // RD is a number, RA is an integer.
3508 | cvtsi2sd xmm0, dword [BASE+RA*8]
3509 | jmp >2
3510 |
3511 |8: // RD is an integer, RA is not an integer.
3512 | ja >5
3513 | // RD is an integer, RA is a number.
3514 | cvtsi2sd xmm0, dword [BASE+RD*8]
3515 | ucomisd xmm0, qword [BASE+RA*8]
3516 | jmp >4
3517 |
3518 |.else
3519 | cmp RB, LJ_TISNUM; jae >5
3520 | checknum RA, >5
3521 |.endif
3522 |1:
3523 | movsd xmm0, qword [BASE+RA*8]
3524 |2:
3525 | ucomisd xmm0, qword [BASE+RD*8]
3526 |4:
3527 iseqne_fp:
3528 if (vk) {
3529 | jp >2 // Unordered means not equal.
3530 | jne >2
3531 } else {
3532 | jp >2 // Unordered means not equal.
3533 | je >1
3534 }
3535 iseqne_end:
3536 if (vk) {
3537 |1: // EQ: Branch to the target.
3538 | movzx RD, PC_RD
3539 | branchPC RD
3540 |2: // NE: Fallthrough to next instruction.
3541 |.if not FFI
3542 |3:
3543 |.endif
3544 } else {
3545 |.if not FFI
3546 |3:
3547 |.endif
3548 |2: // NE: Branch to the target.
3549 | movzx RD, PC_RD
3550 | branchPC RD
3551 |1: // EQ: Fallthrough to next instruction.
3552 }
3553 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV ||
3554 op == BC_ISEQN || op == BC_ISNEN)) {
3555 | jmp <9
3556 } else {
3557 | ins_next
3558 }
3559 |
3560 if (op == BC_ISEQV || op == BC_ISNEV) {
3561 |5: // Either or both types are not numbers.
3562 |.if FFI
3563 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
3564 | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd
3565 |.endif
3566 | checktp RA, RB // Compare types.
3567 | jne <2 // Not the same type?
3568 | cmp RB, LJ_TISPRI
3569 | jae <1 // Same type and primitive type?
3570 |
3571 | // Same types and not a primitive type. Compare GCobj or pvalue.
3572 | mov RA, [BASE+RA*8]
3573 | mov RD, [BASE+RD*8]
3574 | cmp RA, RD
3575 | je <1 // Same GCobjs or pvalues?
3576 | cmp RB, LJ_TISTABUD
3577 | ja <2 // Different objects and not table/ud?
3578 |.if X64
3579 | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata.
3580 | jb <2
3581 |.endif
3582 |
3583 | // Different tables or userdatas. Need to check __eq metamethod.
3584 | // Field metatable must be at same offset for GCtab and GCudata!
3585 | mov TAB:RB, TAB:RA->metatable
3586 | test TAB:RB, TAB:RB
3587 | jz <2 // No metatable?
3588 | test byte TAB:RB->nomm, 1<<MM_eq
3589 | jnz <2 // Or 'no __eq' flag set?
3590 if (vk) {
3591 | xor RB, RB // ne = 0
3592 } else {
3593 | mov RB, 1 // ne = 1
3594 }
3595 | jmp ->vmeta_equal // Handle __eq metamethod.
3596 } else {
3597 |.if FFI
3598 |3:
3599 | cmp RB, LJ_TCDATA
3600 if (LJ_DUALNUM && vk) {
3601 | jne <9
3602 } else {
3603 | jne <2
3604 }
3605 | jmp ->vmeta_equal_cd
3606 |.endif
3607 }
3608 break;
3609 case BC_ISEQS: case BC_ISNES:
3610 vk = op == BC_ISEQS;
3611 | ins_AND // RA = src, RD = str const, JMP with RD = target
3612 | mov RB, [BASE+RA*8+4]
3613 | add PC, 4
3614 | cmp RB, LJ_TSTR; jne >3
3615 | mov RA, [BASE+RA*8]
3616 | cmp RA, [KBASE+RD*4]
3617 iseqne_test:
3618 if (vk) {
3619 | jne >2
3620 } else {
3621 | je >1
3622 }
3623 goto iseqne_end;
3624 case BC_ISEQN: case BC_ISNEN:
3625 vk = op == BC_ISEQN;
3626 | ins_AD // RA = src, RD = num const, JMP with RD = target
3627 | mov RB, [BASE+RA*8+4]
3628 | add PC, 4
3629 |.if DUALNUM
3630 | cmp RB, LJ_TISNUM; jne >7
3631 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8
3632 | mov RB, dword [KBASE+RD*8]
3633 | cmp RB, dword [BASE+RA*8]
3634 if (vk) {
3635 | jne >9
3636 } else {
3637 | je >9
3638 }
3639 | movzx RD, PC_RD
3640 | branchPC RD
3641 |9:
3642 | ins_next
3643 |
3644 |7: // RA is not an integer.
3645 | ja >3
3646 | // RA is a number.
3647 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1
3648 | // RA is a number, RD is an integer.
3649 | cvtsi2sd xmm0, dword [KBASE+RD*8]
3650 | jmp >2
3651 |
3652 |8: // RA is an integer, RD is a number.
3653 | cvtsi2sd xmm0, dword [BASE+RA*8]
3654 | ucomisd xmm0, qword [KBASE+RD*8]
3655 | jmp >4
3656 |.else
3657 | cmp RB, LJ_TISNUM; jae >3
3658 |.endif
3659 |1:
3660 | movsd xmm0, qword [KBASE+RD*8]
3661 |2:
3662 | ucomisd xmm0, qword [BASE+RA*8]
3663 |4:
3664 goto iseqne_fp;
3665 case BC_ISEQP: case BC_ISNEP:
3666 vk = op == BC_ISEQP;
3667 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target
3668 | mov RB, [BASE+RA*8+4]
3669 | add PC, 4
3670 | cmp RB, RD
3671 if (!LJ_HASFFI) goto iseqne_test;
3672 if (vk) {
3673 | jne >3
3674 | movzx RD, PC_RD
3675 | branchPC RD
3676 |2:
3677 | ins_next
3678 |3:
3679 | cmp RB, LJ_TCDATA; jne <2
3680 | jmp ->vmeta_equal_cd
3681 } else {
3682 | je >2
3683 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd
3684 | movzx RD, PC_RD
3685 | branchPC RD
3686 |2:
3687 | ins_next
3688 }
3689 break;
3690
3691 /* -- Unary test and copy ops ------------------------------------------- */
3692
3693 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF:
3694 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target
3695 | mov RB, [BASE+RD*8+4]
3696 | add PC, 4
3697 | cmp RB, LJ_TISTRUECOND
3698 if (op == BC_IST || op == BC_ISTC) {
3699 | jae >1
3700 } else {
3701 | jb >1
3702 }
3703 if (op == BC_ISTC || op == BC_ISFC) {
3704 | mov [BASE+RA*8+4], RB
3705 | mov RB, [BASE+RD*8]
3706 | mov [BASE+RA*8], RB
3707 }
3708 | movzx RD, PC_RD
3709 | branchPC RD
3710 |1: // Fallthrough to the next instruction.
3711 | ins_next
3712 break;
3713
3714 case BC_ISTYPE:
3715 | ins_AD // RA = src, RD = -type
3716 | add RD, [BASE+RA*8+4]
3717 | jne ->vmeta_istype
3718 | ins_next
3719 break;
3720 case BC_ISNUM:
3721 | ins_AD // RA = src, RD = -(TISNUM-1)
3722 | checknum RA, ->vmeta_istype
3723 | ins_next
3724 break;
3725
3726 /* -- Unary ops --------------------------------------------------------- */
3727
3728 case BC_MOV:
3729 | ins_AD // RA = dst, RD = src
3730 |.if X64
3731 | mov RBa, [BASE+RD*8]
3732 | mov [BASE+RA*8], RBa
3733 |.else
3734 | mov RB, [BASE+RD*8+4]
3735 | mov RD, [BASE+RD*8]
3736 | mov [BASE+RA*8+4], RB
3737 | mov [BASE+RA*8], RD
3738 |.endif
3739 | ins_next_
3740 break;
3741 case BC_NOT:
3742 | ins_AD // RA = dst, RD = src
3743 | xor RB, RB
3744 | checktp RD, LJ_TISTRUECOND
3745 | adc RB, LJ_TTRUE
3746 | mov [BASE+RA*8+4], RB
3747 | ins_next
3748 break;
3749 case BC_UNM:
3750 | ins_AD // RA = dst, RD = src
3751 |.if DUALNUM
3752 | checkint RD, >5
3753 | mov RB, [BASE+RD*8]
3754 | neg RB
3755 | jo >4
3756 | mov dword [BASE+RA*8+4], LJ_TISNUM
3757 | mov dword [BASE+RA*8], RB
3758 |9:
3759 | ins_next
3760 |4:
3761 | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31.
3762 | mov dword [BASE+RA*8], 0
3763 | jmp <9
3764 |5:
3765 | ja ->vmeta_unm
3766 |.else
3767 | checknum RD, ->vmeta_unm
3768 |.endif
3769 | movsd xmm0, qword [BASE+RD*8]
3770 | sseconst_sign xmm1, RDa
3771 | xorps xmm0, xmm1
3772 | movsd qword [BASE+RA*8], xmm0
3773 |.if DUALNUM
3774 | jmp <9
3775 |.else
3776 | ins_next
3777 |.endif
3778 break;
3779 case BC_LEN:
3780 | ins_AD // RA = dst, RD = src
3781 | checkstr RD, >2
3782 | mov STR:RD, [BASE+RD*8]
3783 |.if DUALNUM
3784 | mov RD, dword STR:RD->len
3785 |1:
3786 | mov dword [BASE+RA*8+4], LJ_TISNUM
3787 | mov dword [BASE+RA*8], RD
3788 |.else
3789 | xorps xmm0, xmm0
3790 | cvtsi2sd xmm0, dword STR:RD->len
3791 |1:
3792 | movsd qword [BASE+RA*8], xmm0
3793 |.endif
3794 | ins_next
3795 |2:
3796 | checktab RD, ->vmeta_len
3797 | mov TAB:FCARG1, [BASE+RD*8]
3798 #if LJ_52
3799 | mov TAB:RB, TAB:FCARG1->metatable
3800 | cmp TAB:RB, 0
3801 | jnz >9
3802 |3:
3803 #endif
3804 |->BC_LEN_Z:
3805 | mov RB, BASE // Save BASE.
3806 | call extern lj_tab_len@4 // (GCtab *t)
3807 | // Length of table returned in eax (RD).
3808 |.if DUALNUM
3809 | // Nothing to do.
3810 |.else
3811 | cvtsi2sd xmm0, RD
3812 |.endif
3813 | mov BASE, RB // Restore BASE.
3814 | movzx RA, PC_RA
3815 | jmp <1
3816 #if LJ_52
3817 |9: // Check for __len.
3818 | test byte TAB:RB->nomm, 1<<MM_len
3819 | jnz <3
3820 | jmp ->vmeta_len // 'no __len' flag NOT set: check.
3821 #endif
3822 break;
3823
3824 /* -- Binary ops -------------------------------------------------------- */
3825
3826 |.macro ins_arithpre, sseins, ssereg
3827 | ins_ABC
3828 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3829 ||switch (vk) {
3830 ||case 0:
3831 | checknum RB, ->vmeta_arith_vn
3832 | .if DUALNUM
3833 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn
3834 | .endif
3835 | movsd xmm0, qword [BASE+RB*8]
3836 | sseins ssereg, qword [KBASE+RC*8]
3837 || break;
3838 ||case 1:
3839 | checknum RB, ->vmeta_arith_nv
3840 | .if DUALNUM
3841 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv
3842 | .endif
3843 | movsd xmm0, qword [KBASE+RC*8]
3844 | sseins ssereg, qword [BASE+RB*8]
3845 || break;
3846 ||default:
3847 | checknum RB, ->vmeta_arith_vv
3848 | checknum RC, ->vmeta_arith_vv
3849 | movsd xmm0, qword [BASE+RB*8]
3850 | sseins ssereg, qword [BASE+RC*8]
3851 || break;
3852 ||}
3853 |.endmacro
3854 |
3855 |.macro ins_arithdn, intins
3856 | ins_ABC
3857 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN);
3858 ||switch (vk) {
3859 ||case 0:
3860 | checkint RB, ->vmeta_arith_vn
3861 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn
3862 | mov RB, [BASE+RB*8]
3863 | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno
3864 || break;
3865 ||case 1:
3866 | checkint RB, ->vmeta_arith_nv
3867 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv
3868 | mov RC, [KBASE+RC*8]
3869 | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo
3870 || break;
3871 ||default:
3872 | checkint RB, ->vmeta_arith_vv
3873 | checkint RC, ->vmeta_arith_vv
3874 | mov RB, [BASE+RB*8]
3875 | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo
3876 || break;
3877 ||}
3878 | mov dword [BASE+RA*8+4], LJ_TISNUM
3879 ||if (vk == 1) {
3880 | mov dword [BASE+RA*8], RC
3881 ||} else {
3882 | mov dword [BASE+RA*8], RB
3883 ||}
3884 | ins_next
3885 |.endmacro
3886 |
3887 |.macro ins_arithpost
3888 | movsd qword [BASE+RA*8], xmm0
3889 |.endmacro
3890 |
3891 |.macro ins_arith, sseins
3892 | ins_arithpre sseins, xmm0
3893 | ins_arithpost
3894 | ins_next
3895 |.endmacro
3896 |
3897 |.macro ins_arith, intins, sseins
3898 |.if DUALNUM
3899 | ins_arithdn intins
3900 |.else
3901 | ins_arith, sseins
3902 |.endif
3903 |.endmacro
3904
3905 | // RA = dst, RB = src1 or num const, RC = src2 or num const
3906 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV:
3907 | ins_arith add, addsd
3908 break;
3909 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV:
3910 | ins_arith sub, subsd
3911 break;
3912 case BC_MULVN: case BC_MULNV: case BC_MULVV:
3913 | ins_arith imul, mulsd
3914 break;
3915 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV:
3916 | ins_arith divsd
3917 break;
3918 case BC_MODVN:
3919 | ins_arithpre movsd, xmm1
3920 |->BC_MODVN_Z:
3921 | call ->vm_mod
3922 | ins_arithpost
3923 | ins_next
3924 break;
3925 case BC_MODNV: case BC_MODVV:
3926 | ins_arithpre movsd, xmm1
3927 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway.
3928 break;
3929 case BC_POW:
3930 | ins_arithpre movsd, xmm1
3931 | mov RB, BASE
3932 |.if not X64
3933 | movsd FPARG1, xmm0
3934 | movsd FPARG3, xmm1
3935 |.endif
3936 | call extern pow
3937 | movzx RA, PC_RA
3938 | mov BASE, RB
3939 |.if X64
3940 | ins_arithpost
3941 |.else
3942 | fstp qword [BASE+RA*8]
3943 |.endif
3944 | ins_next
3945 break;
3946
3947 case BC_CAT:
3948 | ins_ABC // RA = dst, RB = src_start, RC = src_end
3949 |.if X64
3950 | mov L:CARG1d, SAVE_L
3951 | mov L:CARG1d->base, BASE
3952 | lea CARG2d, [BASE+RC*8]
3953 | mov CARG3d, RC
3954 | sub CARG3d, RB
3955 |->BC_CAT_Z:
3956 | mov L:RB, L:CARG1d
3957 |.else
3958 | lea RA, [BASE+RC*8]
3959 | sub RC, RB
3960 | mov ARG2, RA
3961 | mov ARG3, RC
3962 |->BC_CAT_Z:
3963 | mov L:RB, SAVE_L
3964 | mov ARG1, L:RB
3965 | mov L:RB->base, BASE
3966 |.endif
3967 | mov SAVE_PC, PC
3968 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left)
3969 | // NULL (finished) or TValue * (metamethod) returned in eax (RC).
3970 | mov BASE, L:RB->base
3971 | test RC, RC
3972 | jnz ->vmeta_binop
3973 | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB].
3974 | movzx RA, PC_RA
3975 |.if X64
3976 | mov RCa, [BASE+RB*8]
3977 | mov [BASE+RA*8], RCa
3978 |.else
3979 | mov RC, [BASE+RB*8+4]
3980 | mov RB, [BASE+RB*8]
3981 | mov [BASE+RA*8+4], RC
3982 | mov [BASE+RA*8], RB
3983 |.endif
3984 | ins_next
3985 break;
3986
3987 /* -- Constant ops ------------------------------------------------------ */
3988
3989 case BC_KSTR:
3990 | ins_AND // RA = dst, RD = str const (~)
3991 | mov RD, [KBASE+RD*4]
3992 | mov dword [BASE+RA*8+4], LJ_TSTR
3993 | mov [BASE+RA*8], RD
3994 | ins_next
3995 break;
3996 case BC_KCDATA:
3997 |.if FFI
3998 | ins_AND // RA = dst, RD = cdata const (~)
3999 | mov RD, [KBASE+RD*4]
4000 | mov dword [BASE+RA*8+4], LJ_TCDATA
4001 | mov [BASE+RA*8], RD
4002 | ins_next
4003 |.endif
4004 break;
4005 case BC_KSHORT:
4006 | ins_AD // RA = dst, RD = signed int16 literal
4007 |.if DUALNUM
4008 | movsx RD, RDW
4009 | mov dword [BASE+RA*8+4], LJ_TISNUM
4010 | mov dword [BASE+RA*8], RD
4011 |.else
4012 | movsx RD, RDW // Sign-extend literal.
4013 | cvtsi2sd xmm0, RD
4014 | movsd qword [BASE+RA*8], xmm0
4015 |.endif
4016 | ins_next
4017 break;
4018 case BC_KNUM:
4019 | ins_AD // RA = dst, RD = num const
4020 | movsd xmm0, qword [KBASE+RD*8]
4021 | movsd qword [BASE+RA*8], xmm0
4022 | ins_next
4023 break;
4024 case BC_KPRI:
4025 | ins_AND // RA = dst, RD = primitive type (~)
4026 | mov [BASE+RA*8+4], RD
4027 | ins_next
4028 break;
4029 case BC_KNIL:
4030 | ins_AD // RA = dst_start, RD = dst_end
4031 | lea RA, [BASE+RA*8+12]
4032 | lea RD, [BASE+RD*8+4]
4033 | mov RB, LJ_TNIL
4034 | mov [RA-8], RB // Sets minimum 2 slots.
4035 |1:
4036 | mov [RA], RB
4037 | add RA, 8
4038 | cmp RA, RD
4039 | jbe <1
4040 | ins_next
4041 break;
4042
4043 /* -- Upvalue and function ops ------------------------------------------ */
4044
4045 case BC_UGET:
4046 | ins_AD // RA = dst, RD = upvalue #
4047 | mov LFUNC:RB, [BASE-8]
4048 | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)]
4049 | mov RB, UPVAL:RB->v
4050 |.if X64
4051 | mov RDa, [RB]
4052 | mov [BASE+RA*8], RDa
4053 |.else
4054 | mov RD, [RB+4]
4055 | mov RB, [RB]
4056 | mov [BASE+RA*8+4], RD
4057 | mov [BASE+RA*8], RB
4058 |.endif
4059 | ins_next
4060 break;
4061 case BC_USETV:
4062 #define TV2MARKOFS \
4063 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv))
4064 | ins_AD // RA = upvalue #, RD = src
4065 | mov LFUNC:RB, [BASE-8]
4066 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4067 | cmp byte UPVAL:RB->closed, 0
4068 | mov RB, UPVAL:RB->v
4069 | mov RA, [BASE+RD*8]
4070 | mov RD, [BASE+RD*8+4]
4071 | mov [RB], RA
4072 | mov [RB+4], RD
4073 | jz >1
4074 | // Check barrier for closed upvalue.
4075 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv)
4076 | jnz >2
4077 |1:
4078 | ins_next
4079 |
4080 |2: // Upvalue is black. Check if new value is collectable and white.
4081 | sub RD, LJ_TISGCV
4082 | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v)
4083 | jbe <1
4084 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v)
4085 | jz <1
4086 | // Crossed a write barrier. Move the barrier forward.
4087 |.if X64 and not X64WIN
4088 | mov FCARG2, RB
4089 | mov RB, BASE // Save BASE.
4090 |.else
4091 | xchg FCARG2, RB // Save BASE (FCARG2 == BASE).
4092 |.endif
4093 | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
4094 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
4095 | mov BASE, RB // Restore BASE.
4096 | jmp <1
4097 break;
4098 #undef TV2MARKOFS
4099 case BC_USETS:
4100 | ins_AND // RA = upvalue #, RD = str const (~)
4101 | mov LFUNC:RB, [BASE-8]
4102 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4103 | mov GCOBJ:RA, [KBASE+RD*4]
4104 | mov RD, UPVAL:RB->v
4105 | mov [RD], GCOBJ:RA
4106 | mov dword [RD+4], LJ_TSTR
4107 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv)
4108 | jnz >2
4109 |1:
4110 | ins_next
4111 |
4112 |2: // Check if string is white and ensure upvalue is closed.
4113 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str)
4114 | jz <1
4115 | cmp byte UPVAL:RB->closed, 0
4116 | jz <1
4117 | // Crossed a write barrier. Move the barrier forward.
4118 | mov RB, BASE // Save BASE (FCARG2 == BASE).
4119 | mov FCARG2, RD
4120 | lea GL:FCARG1, [DISPATCH+GG_DISP2G]
4121 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv)
4122 | mov BASE, RB // Restore BASE.
4123 | jmp <1
4124 break;
4125 case BC_USETN:
4126 | ins_AD // RA = upvalue #, RD = num const
4127 | mov LFUNC:RB, [BASE-8]
4128 | movsd xmm0, qword [KBASE+RD*8]
4129 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4130 | mov RA, UPVAL:RB->v
4131 | movsd qword [RA], xmm0
4132 | ins_next
4133 break;
4134 case BC_USETP:
4135 | ins_AND // RA = upvalue #, RD = primitive type (~)
4136 | mov LFUNC:RB, [BASE-8]
4137 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)]
4138 | mov RA, UPVAL:RB->v
4139 | mov [RA+4], RD
4140 | ins_next
4141 break;
4142 case BC_UCLO:
4143 | ins_AD // RA = level, RD = target
4144 | branchPC RD // Do this first to free RD.
4145 | mov L:RB, SAVE_L
4146 | cmp dword L:RB->openupval, 0
4147 | je >1
4148 | mov L:RB->base, BASE
4149 | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE
4150 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
4151 | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level)
4152 | mov BASE, L:RB->base
4153 |1:
4154 | ins_next
4155 break;
4156
4157 case BC_FNEW:
4158 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype)
4159 |.if X64
4160 | mov L:RB, SAVE_L
4161 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
4162 | mov CARG3d, [BASE-8]
4163 | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *.
4164 | mov CARG1d, L:RB
4165 |.else
4166 | mov LFUNC:RA, [BASE-8]
4167 | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *.
4168 | mov L:RB, SAVE_L
4169 | mov ARG3, LFUNC:RA
4170 | mov ARG2, PROTO:RD
4171 | mov ARG1, L:RB
4172 | mov L:RB->base, BASE
4173 |.endif
4174 | mov SAVE_PC, PC
4175 | // (lua_State *L, GCproto *pt, GCfuncL *parent)
4176 | call extern lj_func_newL_gc
4177 | // GCfuncL * returned in eax (RC).
4178 | mov BASE, L:RB->base
4179 | movzx RA, PC_RA
4180 | mov [BASE+RA*8], LFUNC:RC
4181 | mov dword [BASE+RA*8+4], LJ_TFUNC
4182 | ins_next
4183 break;
4184
4185 /* -- Table ops --------------------------------------------------------- */
4186
4187 case BC_TNEW:
4188 | ins_AD // RA = dst, RD = hbits|asize
4189 | mov L:RB, SAVE_L
4190 | mov L:RB->base, BASE
4191 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
4192 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
4193 | mov SAVE_PC, PC
4194 | jae >5
4195 |1:
4196 |.if X64
4197 | mov CARG3d, RD
4198 | and RD, 0x7ff
4199 | shr CARG3d, 11
4200 |.else
4201 | mov RA, RD
4202 | and RD, 0x7ff
4203 | shr RA, 11
4204 | mov ARG3, RA
4205 |.endif
4206 | cmp RD, 0x7ff
4207 | je >3
4208 |2:
4209 |.if X64
4210 | mov L:CARG1d, L:RB
4211 | mov CARG2d, RD
4212 |.else
4213 | mov ARG1, L:RB
4214 | mov ARG2, RD
4215 |.endif
4216 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits)
4217 | // Table * returned in eax (RC).
4218 | mov BASE, L:RB->base
4219 | movzx RA, PC_RA
4220 | mov [BASE+RA*8], TAB:RC
4221 | mov dword [BASE+RA*8+4], LJ_TTAB
4222 | ins_next
4223 |3: // Turn 0x7ff into 0x801.
4224 | mov RD, 0x801
4225 | jmp <2
4226 |5:
4227 | mov L:FCARG1, L:RB
4228 | call extern lj_gc_step_fixtop@4 // (lua_State *L)
4229 | movzx RD, PC_RD
4230 | jmp <1
4231 break;
4232 case BC_TDUP:
4233 | ins_AND // RA = dst, RD = table const (~) (holding template table)
4234 | mov L:RB, SAVE_L
4235 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)]
4236 | mov SAVE_PC, PC
4237 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)]
4238 | mov L:RB->base, BASE
4239 | jae >3
4240 |2:
4241 | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE
4242 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA
4243 | call extern lj_tab_dup@8 // (lua_State *L, Table *kt)
4244 | // Table * returned in eax (RC).
4245 | mov BASE, L:RB->base
4246 | movzx RA, PC_RA
4247 | mov [BASE+RA*8], TAB:RC
4248 | mov dword [BASE+RA*8+4], LJ_TTAB
4249 | ins_next
4250 |3:
4251 | mov L:FCARG1, L:RB
4252 | call extern lj_gc_step_fixtop@4 // (lua_State *L)
4253 | movzx RD, PC_RD // Need to reload RD.
4254 | not RDa
4255 | jmp <2
4256 break;
4257
4258 case BC_GGET:
4259 | ins_AND // RA = dst, RD = str const (~)
4260 | mov LFUNC:RB, [BASE-8]
4261 | mov TAB:RB, LFUNC:RB->env
4262 | mov STR:RC, [KBASE+RD*4]
4263 | jmp ->BC_TGETS_Z
4264 break;
4265 case BC_GSET:
4266 | ins_AND // RA = src, RD = str const (~)
4267 | mov LFUNC:RB, [BASE-8]
4268 | mov TAB:RB, LFUNC:RB->env
4269 | mov STR:RC, [KBASE+RD*4]
4270 | jmp ->BC_TSETS_Z
4271 break;
4272
4273 case BC_TGETV:
4274 | ins_ABC // RA = dst, RB = table, RC = key
4275 | checktab RB, ->vmeta_tgetv
4276 | mov TAB:RB, [BASE+RB*8]
4277 |
4278 | // Integer key?
4279 |.if DUALNUM
4280 | checkint RC, >5
4281 | mov RC, dword [BASE+RC*8]
4282 |.else
4283 | // Convert number to int and back and compare.
4284 | checknum RC, >5
4285 | movsd xmm0, qword [BASE+RC*8]
4286 | cvttsd2si RC, xmm0
4287 | cvtsi2sd xmm1, RC
4288 | ucomisd xmm0, xmm1
4289 | jne ->vmeta_tgetv // Generic numeric key? Use fallback.
4290 |.endif
4291 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
4292 | jae ->vmeta_tgetv // Not in array part? Use fallback.
4293 | shl RC, 3
4294 | add RC, TAB:RB->array
4295 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
4296 | je >2
4297 | // Get array slot.
4298 |.if X64
4299 | mov RBa, [RC]
4300 | mov [BASE+RA*8], RBa
4301 |.else
4302 | mov RB, [RC]
4303 | mov RC, [RC+4]
4304 | mov [BASE+RA*8], RB
4305 | mov [BASE+RA*8+4], RC
4306 |.endif
4307 |1:
4308 | ins_next
4309 |
4310 |2: // Check for __index if table value is nil.
4311 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
4312 | jz >3
4313 | mov TAB:RA, TAB:RB->metatable
4314 | test byte TAB:RA->nomm, 1<<MM_index
4315 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check.
4316 | movzx RA, PC_RA // Restore RA.
4317 |3:
4318 | mov dword [BASE+RA*8+4], LJ_TNIL
4319 | jmp <1
4320 |
4321 |5: // String key?
4322 | checkstr RC, ->vmeta_tgetv
4323 | mov STR:RC, [BASE+RC*8]
4324 | jmp ->BC_TGETS_Z
4325 break;
4326 case BC_TGETS:
4327 | ins_ABC // RA = dst, RB = table, RC = str const (~)
4328 | not RCa
4329 | mov STR:RC, [KBASE+RC*4]
4330 | checktab RB, ->vmeta_tgets
4331 | mov TAB:RB, [BASE+RB*8]
4332 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
4333 | mov RA, TAB:RB->hmask
4334 | and RA, STR:RC->sid
4335 | imul RA, #NODE
4336 | add NODE:RA, TAB:RB->node
4337 |1:
4338 | cmp dword NODE:RA->key.it, LJ_TSTR
4339 | jne >4
4340 | cmp dword NODE:RA->key.gcr, STR:RC
4341 | jne >4
4342 | // Ok, key found. Assumes: offsetof(Node, val) == 0
4343 | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath.
4344 | je >5 // Key found, but nil value?
4345 | movzx RC, PC_RA
4346 | // Get node value.
4347 |.if X64
4348 | mov RBa, [RA]
4349 | mov [BASE+RC*8], RBa
4350 |.else
4351 | mov RB, [RA]
4352 | mov RA, [RA+4]
4353 | mov [BASE+RC*8], RB
4354 | mov [BASE+RC*8+4], RA
4355 |.endif
4356 |2:
4357 | ins_next
4358 |
4359 |3:
4360 | movzx RC, PC_RA
4361 | mov dword [BASE+RC*8+4], LJ_TNIL
4362 | jmp <2
4363 |
4364 |4: // Follow hash chain.
4365 | mov NODE:RA, NODE:RA->next
4366 | test NODE:RA, NODE:RA
4367 | jnz <1
4368 | // End of hash chain: key not found, nil result.
4369 |
4370 |5: // Check for __index if table value is nil.
4371 | mov TAB:RA, TAB:RB->metatable
4372 | test TAB:RA, TAB:RA
4373 | jz <3 // No metatable: done.
4374 | test byte TAB:RA->nomm, 1<<MM_index
4375 | jnz <3 // 'no __index' flag set: done.
4376 | jmp ->vmeta_tgets // Caveat: preserve STR:RC.
4377 break;
4378 case BC_TGETB:
4379 | ins_ABC // RA = dst, RB = table, RC = byte literal
4380 | checktab RB, ->vmeta_tgetb
4381 | mov TAB:RB, [BASE+RB*8]
4382 | cmp RC, TAB:RB->asize
4383 | jae ->vmeta_tgetb
4384 | shl RC, 3
4385 | add RC, TAB:RB->array
4386 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath.
4387 | je >2
4388 | // Get array slot.
4389 |.if X64
4390 | mov RBa, [RC]
4391 | mov [BASE+RA*8], RBa
4392 |.else
4393 | mov RB, [RC]
4394 | mov RC, [RC+4]
4395 | mov [BASE+RA*8], RB
4396 | mov [BASE+RA*8+4], RC
4397 |.endif
4398 |1:
4399 | ins_next
4400 |
4401 |2: // Check for __index if table value is nil.
4402 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
4403 | jz >3
4404 | mov TAB:RA, TAB:RB->metatable
4405 | test byte TAB:RA->nomm, 1<<MM_index
4406 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check.
4407 | movzx RA, PC_RA // Restore RA.
4408 |3:
4409 | mov dword [BASE+RA*8+4], LJ_TNIL
4410 | jmp <1
4411 break;
4412 case BC_TGETR:
4413 | ins_ABC // RA = dst, RB = table, RC = key
4414 | mov TAB:RB, [BASE+RB*8]
4415 |.if DUALNUM
4416 | mov RC, dword [BASE+RC*8]
4417 |.else
4418 | cvttsd2si RC, qword [BASE+RC*8]
4419 |.endif
4420 | cmp RC, TAB:RB->asize
4421 | jae ->vmeta_tgetr // Not in array part? Use fallback.
4422 | shl RC, 3
4423 | add RC, TAB:RB->array
4424 | // Get array slot.
4425 |->BC_TGETR_Z:
4426 |.if X64
4427 | mov RBa, [RC]
4428 | mov [BASE+RA*8], RBa
4429 |.else
4430 | mov RB, [RC]
4431 | mov RC, [RC+4]
4432 | mov [BASE+RA*8], RB
4433 | mov [BASE+RA*8+4], RC
4434 |.endif
4435 |->BC_TGETR2_Z:
4436 | ins_next
4437 break;
4438
4439 case BC_TSETV:
4440 | ins_ABC // RA = src, RB = table, RC = key
4441 | checktab RB, ->vmeta_tsetv
4442 | mov TAB:RB, [BASE+RB*8]
4443 |
4444 | // Integer key?
4445 |.if DUALNUM
4446 | checkint RC, >5
4447 | mov RC, dword [BASE+RC*8]
4448 |.else
4449 | // Convert number to int and back and compare.
4450 | checknum RC, >5
4451 | movsd xmm0, qword [BASE+RC*8]
4452 | cvttsd2si RC, xmm0
4453 | cvtsi2sd xmm1, RC
4454 | ucomisd xmm0, xmm1
4455 | jne ->vmeta_tsetv // Generic numeric key? Use fallback.
4456 |.endif
4457 | cmp RC, TAB:RB->asize // Takes care of unordered, too.
4458 | jae ->vmeta_tsetv
4459 | shl RC, 3
4460 | add RC, TAB:RB->array
4461 | cmp dword [RC+4], LJ_TNIL
4462 | je >3 // Previous value is nil?
4463 |1:
4464 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4465 | jnz >7
4466 |2: // Set array slot.
4467 |.if X64
4468 | mov RBa, [BASE+RA*8]
4469 | mov [RC], RBa
4470 |.else
4471 | mov RB, [BASE+RA*8+4]
4472 | mov RA, [BASE+RA*8]
4473 | mov [RC+4], RB
4474 | mov [RC], RA
4475 |.endif
4476 | ins_next
4477 |
4478 |3: // Check for __newindex if previous value is nil.
4479 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
4480 | jz <1
4481 | mov TAB:RA, TAB:RB->metatable
4482 | test byte TAB:RA->nomm, 1<<MM_newindex
4483 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check.
4484 | movzx RA, PC_RA // Restore RA.
4485 | jmp <1
4486 |
4487 |5: // String key?
4488 | checkstr RC, ->vmeta_tsetv
4489 | mov STR:RC, [BASE+RC*8]
4490 | jmp ->BC_TSETS_Z
4491 |
4492 |7: // Possible table write barrier for the value. Skip valiswhite check.
4493 | barrierback TAB:RB, RA
4494 | movzx RA, PC_RA // Restore RA.
4495 | jmp <2
4496 break;
4497 case BC_TSETS:
4498 | ins_ABC // RA = src, RB = table, RC = str const (~)
4499 | not RCa
4500 | mov STR:RC, [KBASE+RC*4]
4501 | checktab RB, ->vmeta_tsets
4502 | mov TAB:RB, [BASE+RB*8]
4503 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA.
4504 | mov RA, TAB:RB->hmask
4505 | and RA, STR:RC->sid
4506 | imul RA, #NODE
4507 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache.
4508 | add NODE:RA, TAB:RB->node
4509 |1:
4510 | cmp dword NODE:RA->key.it, LJ_TSTR
4511 | jne >5
4512 | cmp dword NODE:RA->key.gcr, STR:RC
4513 | jne >5
4514 | // Ok, key found. Assumes: offsetof(Node, val) == 0
4515 | cmp dword [RA+4], LJ_TNIL
4516 | je >4 // Previous value is nil?
4517 |2:
4518 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4519 | jnz >7
4520 |3: // Set node value.
4521 | movzx RC, PC_RA
4522 |.if X64
4523 | mov RBa, [BASE+RC*8]
4524 | mov [RA], RBa
4525 |.else
4526 | mov RB, [BASE+RC*8+4]
4527 | mov RC, [BASE+RC*8]
4528 | mov [RA+4], RB
4529 | mov [RA], RC
4530 |.endif
4531 | ins_next
4532 |
4533 |4: // Check for __newindex if previous value is nil.
4534 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
4535 | jz <2
4536 | mov TMP1, RA // Save RA.
4537 | mov TAB:RA, TAB:RB->metatable
4538 | test byte TAB:RA->nomm, 1<<MM_newindex
4539 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
4540 | mov RA, TMP1 // Restore RA.
4541 | jmp <2
4542 |
4543 |5: // Follow hash chain.
4544 | mov NODE:RA, NODE:RA->next
4545 | test NODE:RA, NODE:RA
4546 | jnz <1
4547 | // End of hash chain: key not found, add a new one.
4548 |
4549 | // But check for __newindex first.
4550 | mov TAB:RA, TAB:RB->metatable
4551 | test TAB:RA, TAB:RA
4552 | jz >6 // No metatable: continue.
4553 | test byte TAB:RA->nomm, 1<<MM_newindex
4554 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check.
4555 |6:
4556 | mov TMP1, STR:RC
4557 | mov TMP2, LJ_TSTR
4558 | mov TMP3, TAB:RB // Save TAB:RB for us.
4559 |.if X64
4560 | mov L:CARG1d, SAVE_L
4561 | mov L:CARG1d->base, BASE
4562 | lea CARG3, TMP1
4563 | mov CARG2d, TAB:RB
4564 | mov L:RB, L:CARG1d
4565 |.else
4566 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2.
4567 | mov ARG2, TAB:RB
4568 | mov L:RB, SAVE_L
4569 | mov ARG3, RC
4570 | mov ARG1, L:RB
4571 | mov L:RB->base, BASE
4572 |.endif
4573 | mov SAVE_PC, PC
4574 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k)
4575 | // Handles write barrier for the new key. TValue * returned in eax (RC).
4576 | mov BASE, L:RB->base
4577 | mov TAB:RB, TMP3 // Need TAB:RB for barrier.
4578 | mov RA, eax
4579 | jmp <2 // Must check write barrier for value.
4580 |
4581 |7: // Possible table write barrier for the value. Skip valiswhite check.
4582 | barrierback TAB:RB, RC // Destroys STR:RC.
4583 | jmp <3
4584 break;
4585 case BC_TSETB:
4586 | ins_ABC // RA = src, RB = table, RC = byte literal
4587 | checktab RB, ->vmeta_tsetb
4588 | mov TAB:RB, [BASE+RB*8]
4589 | cmp RC, TAB:RB->asize
4590 | jae ->vmeta_tsetb
4591 | shl RC, 3
4592 | add RC, TAB:RB->array
4593 | cmp dword [RC+4], LJ_TNIL
4594 | je >3 // Previous value is nil?
4595 |1:
4596 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4597 | jnz >7
4598 |2: // Set array slot.
4599 |.if X64
4600 | mov RAa, [BASE+RA*8]
4601 | mov [RC], RAa
4602 |.else
4603 | mov RB, [BASE+RA*8+4]
4604 | mov RA, [BASE+RA*8]
4605 | mov [RC+4], RB
4606 | mov [RC], RA
4607 |.endif
4608 | ins_next
4609 |
4610 |3: // Check for __newindex if previous value is nil.
4611 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath.
4612 | jz <1
4613 | mov TAB:RA, TAB:RB->metatable
4614 | test byte TAB:RA->nomm, 1<<MM_newindex
4615 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check.
4616 | movzx RA, PC_RA // Restore RA.
4617 | jmp <1
4618 |
4619 |7: // Possible table write barrier for the value. Skip valiswhite check.
4620 | barrierback TAB:RB, RA
4621 | movzx RA, PC_RA // Restore RA.
4622 | jmp <2
4623 break;
4624 case BC_TSETR:
4625 | ins_ABC // RA = src, RB = table, RC = key
4626 | mov TAB:RB, [BASE+RB*8]
4627 |.if DUALNUM
4628 | mov RC, dword [BASE+RC*8]
4629 |.else
4630 | cvttsd2si RC, qword [BASE+RC*8]
4631 |.endif
4632 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4633 | jnz >7
4634 |2:
4635 | cmp RC, TAB:RB->asize
4636 | jae ->vmeta_tsetr
4637 | shl RC, 3
4638 | add RC, TAB:RB->array
4639 | // Set array slot.
4640 |->BC_TSETR_Z:
4641 |.if X64
4642 | mov RBa, [BASE+RA*8]
4643 | mov [RC], RBa
4644 |.else
4645 | mov RB, [BASE+RA*8+4]
4646 | mov RA, [BASE+RA*8]
4647 | mov [RC+4], RB
4648 | mov [RC], RA
4649 |.endif
4650 | ins_next
4651 |
4652 |7: // Possible table write barrier for the value. Skip valiswhite check.
4653 | barrierback TAB:RB, RA
4654 | movzx RA, PC_RA // Restore RA.
4655 | jmp <2
4656 break;
4657
4658 case BC_TSETM:
4659 | ins_AD // RA = base (table at base-1), RD = num const (start index)
4660 | mov TMP1, KBASE // Need one more free register.
4661 | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word.
4662 |1:
4663 | lea RA, [BASE+RA*8]
4664 | mov TAB:RB, [RA-8] // Guaranteed to be a table.
4665 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table)
4666 | jnz >7
4667 |2:
4668 | mov RD, MULTRES
4669 | sub RD, 1
4670 | jz >4 // Nothing to copy?
4671 | add RD, KBASE // Compute needed size.
4672 | cmp RD, TAB:RB->asize
4673 | ja >5 // Doesn't fit into array part?
4674 | sub RD, KBASE
4675 | shl KBASE, 3
4676 | add KBASE, TAB:RB->array
4677 |3: // Copy result slots to table.
4678 |.if X64
4679 | mov RBa, [RA]
4680 | add RA, 8
4681 | mov [KBASE], RBa
4682 |.else
4683 | mov RB, [RA]
4684 | mov [KBASE], RB
4685 | mov RB, [RA+4]
4686 | add RA, 8
4687 | mov [KBASE+4], RB
4688 |.endif
4689 | add KBASE, 8
4690 | sub RD, 1
4691 | jnz <3
4692 |4:
4693 | mov KBASE, TMP1
4694 | ins_next
4695 |
4696 |5: // Need to resize array part.
4697 |.if X64
4698 | mov L:CARG1d, SAVE_L
4699 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE.
4700 | mov CARG2d, TAB:RB
4701 | mov CARG3d, RD
4702 | mov L:RB, L:CARG1d
4703 |.else
4704 | mov ARG2, TAB:RB
4705 | mov L:RB, SAVE_L
4706 | mov L:RB->base, BASE
4707 | mov ARG3, RD
4708 | mov ARG1, L:RB
4709 |.endif
4710 | mov SAVE_PC, PC
4711 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize)
4712 | mov BASE, L:RB->base
4713 | movzx RA, PC_RA // Restore RA.
4714 | jmp <1 // Retry.
4715 |
4716 |7: // Possible table write barrier for any value. Skip valiswhite check.
4717 | barrierback TAB:RB, RD
4718 | jmp <2
4719 break;
4720
4721 /* -- Calls and vararg handling ----------------------------------------- */
4722
4723 case BC_CALL: case BC_CALLM:
4724 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs
4725 if (op == BC_CALLM) {
4726 | add NARGS:RD, MULTRES
4727 }
4728 | cmp dword [BASE+RA*8+4], LJ_TFUNC
4729 | mov LFUNC:RB, [BASE+RA*8]
4730 | jne ->vmeta_call_ra
4731 | lea BASE, [BASE+RA*8+8]
4732 | ins_call
4733 break;
4734
4735 case BC_CALLMT:
4736 | ins_AD // RA = base, RD = extra_nargs
4737 | add NARGS:RD, MULTRES
4738 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op.
4739 break;
4740 case BC_CALLT:
4741 | ins_AD // RA = base, RD = nargs+1
4742 | lea RA, [BASE+RA*8+8]
4743 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint.
4744 | mov LFUNC:RB, [RA-8]
4745 | cmp dword [RA-4], LJ_TFUNC
4746 | jne ->vmeta_call
4747 |->BC_CALLT_Z:
4748 | mov PC, [BASE-4]
4749 | test PC, FRAME_TYPE
4750 | jnz >7
4751 |1:
4752 | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below.
4753 | mov MULTRES, NARGS:RD
4754 | sub NARGS:RD, 1
4755 | jz >3
4756 |2: // Move args down.
4757 |.if X64
4758 | mov RBa, [RA]
4759 | add RA, 8
4760 | mov [KBASE], RBa
4761 |.else
4762 | mov RB, [RA]
4763 | mov [KBASE], RB
4764 | mov RB, [RA+4]
4765 | add RA, 8
4766 | mov [KBASE+4], RB
4767 |.endif
4768 | add KBASE, 8
4769 | sub NARGS:RD, 1
4770 | jnz <2
4771 |
4772 | mov LFUNC:RB, [BASE-8]
4773 |3:
4774 | mov NARGS:RD, MULTRES
4775 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function?
4776 | ja >5
4777 |4:
4778 | ins_callt
4779 |
4780 |5: // Tailcall to a fast function.
4781 | test PC, FRAME_TYPE // Lua frame below?
4782 | jnz <4
4783 | movzx RA, PC_RA
4784 | not RAa
4785 | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE.
4786 | mov KBASE, LFUNC:KBASE->pc
4787 | mov KBASE, [KBASE+PC2PROTO(k)]
4788 | jmp <4
4789 |
4790 |7: // Tailcall from a vararg function.
4791 | sub PC, FRAME_VARG
4792 | test PC, FRAME_TYPEP
4793 | jnz >8 // Vararg frame below?
4794 | sub BASE, PC // Need to relocate BASE/KBASE down.
4795 | mov KBASE, BASE
4796 | mov PC, [BASE-4]
4797 | jmp <1
4798 |8:
4799 | add PC, FRAME_VARG
4800 | jmp <1
4801 break;
4802
4803 case BC_ITERC:
4804 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1)
4805 | lea RA, [BASE+RA*8+8] // fb = base+1
4806 |.if X64
4807 | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3].
4808 | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2].
4809 | mov [RA], RBa
4810 | mov [RA+8], RCa
4811 |.else
4812 | mov RB, [RA-24] // Copy state. fb[0] = fb[-3].
4813 | mov RC, [RA-20]
4814 | mov [RA], RB
4815 | mov [RA+4], RC
4816 | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2].
4817 | mov RC, [RA-12]
4818 | mov [RA+8], RB
4819 | mov [RA+12], RC
4820 |.endif
4821 | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4]
4822 | mov RC, [RA-28]
4823 | mov [RA-8], LFUNC:RB
4824 | mov [RA-4], RC
4825 | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call.
4826 | mov NARGS:RD, 2+1
4827 | jne ->vmeta_call
4828 | mov BASE, RA
4829 | ins_call
4830 break;
4831
4832 case BC_ITERN:
4833 |.if JIT
4834 | hotloop RB
4835 |.endif
4836 |->vm_IITERN:
4837 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1))
4838 | mov TMP1, KBASE // Need two more free registers.
4839 | mov TMP2, DISPATCH
4840 | mov TAB:RB, [BASE+RA*8-16]
4841 | mov RC, [BASE+RA*8-8] // Get index from control var.
4842 | mov DISPATCH, TAB:RB->asize
4843 | add PC, 4
4844 | mov KBASE, TAB:RB->array
4845 |1: // Traverse array part.
4846 | cmp RC, DISPATCH; jae >5 // Index points after array part?
4847 | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4
4848 |.if DUALNUM
4849 | mov dword [BASE+RA*8+4], LJ_TISNUM
4850 | mov dword [BASE+RA*8], RC
4851 |.else
4852 | cvtsi2sd xmm0, RC
4853 |.endif
4854 | // Copy array slot to returned value.
4855 |.if X64
4856 | mov RBa, [KBASE+RC*8]
4857 | mov [BASE+RA*8+8], RBa
4858 |.else
4859 | mov RB, [KBASE+RC*8+4]
4860 | mov [BASE+RA*8+12], RB
4861 | mov RB, [KBASE+RC*8]
4862 | mov [BASE+RA*8+8], RB
4863 |.endif
4864 | add RC, 1
4865 | // Return array index as a numeric key.
4866 |.if DUALNUM
4867 | // See above.
4868 |.else
4869 | movsd qword [BASE+RA*8], xmm0
4870 |.endif
4871 | mov [BASE+RA*8-8], RC // Update control var.
4872 |2:
4873 | movzx RD, PC_RD // Get target from ITERL.
4874 | branchPC RD
4875 |3:
4876 | mov DISPATCH, TMP2
4877 | mov KBASE, TMP1
4878 | ins_next
4879 |
4880 |4: // Skip holes in array part.
4881 | add RC, 1
4882 | jmp <1
4883 |
4884 |5: // Traverse hash part.
4885 | sub RC, DISPATCH
4886 |6:
4887 | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1.
4888 | imul KBASE, RC, #NODE
4889 | add NODE:KBASE, TAB:RB->node
4890 | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7
4891 | lea DISPATCH, [RC+DISPATCH+1]
4892 | // Copy key and value from hash slot.
4893 |.if X64
4894 | mov RBa, NODE:KBASE->key
4895 | mov RCa, NODE:KBASE->val
4896 | mov [BASE+RA*8], RBa
4897 | mov [BASE+RA*8+8], RCa
4898 |.else
4899 | mov RB, NODE:KBASE->key.gcr
4900 | mov RC, NODE:KBASE->key.it
4901 | mov [BASE+RA*8], RB
4902 | mov [BASE+RA*8+4], RC
4903 | mov RB, NODE:KBASE->val.gcr
4904 | mov RC, NODE:KBASE->val.it
4905 | mov [BASE+RA*8+8], RB
4906 | mov [BASE+RA*8+12], RC
4907 |.endif
4908 | mov [BASE+RA*8-8], DISPATCH
4909 | jmp <2
4910 |
4911 |7: // Skip holes in hash part.
4912 | add RC, 1
4913 | jmp <6
4914 break;
4915
4916 case BC_ISNEXT:
4917 | ins_AD // RA = base, RD = target (points to ITERN)
4918 | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5
4919 | mov CFUNC:RB, [BASE+RA*8-24]
4920 | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5
4921 | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5
4922 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5
4923 | branchPC RD
4924 | mov dword [BASE+RA*8-8], 0 // Initialize control var.
4925 | mov dword [BASE+RA*8-4], LJ_KEYINDEX
4926 |1:
4927 | ins_next
4928 |5: // Despecialize bytecode if any of the checks fail.
4929 | mov PC_OP, BC_JMP
4930 | branchPC RD
4931 |.if JIT
4932 | cmp byte [PC], BC_ITERN
4933 | jne >6
4934 |.endif
4935 | mov byte [PC], BC_ITERC
4936 | jmp <1
4937 |.if JIT
4938 |6: // Unpatch JLOOP.
4939 | mov RA, [DISPATCH+DISPATCH_J(trace)]
4940 | movzx RC, word [PC+2]
4941 | mov TRACE:RA, [RA+RC*4]
4942 | mov eax, TRACE:RA->startins
4943 | mov al, BC_ITERC
4944 | mov dword [PC], eax
4945 | jmp <1
4946 |.endif
4947 break;
4948
4949 case BC_VARG:
4950 | ins_ABC // RA = base, RB = nresults+1, RC = numparams
4951 | mov TMP1, KBASE // Need one more free register.
4952 | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)]
4953 | lea RA, [BASE+RA*8]
4954 | sub KBASE, [BASE-4]
4955 | // Note: KBASE may now be even _above_ BASE if nargs was < numparams.
4956 | test RB, RB
4957 | jz >5 // Copy all varargs?
4958 | lea RB, [RA+RB*8-8]
4959 | cmp KBASE, BASE // No vararg slots?
4960 | jnb >2
4961 |1: // Copy vararg slots to destination slots.
4962 |.if X64
4963 | mov RCa, [KBASE-8]
4964 | add KBASE, 8
4965 | mov [RA], RCa
4966 |.else
4967 | mov RC, [KBASE-8]
4968 | mov [RA], RC
4969 | mov RC, [KBASE-4]
4970 | add KBASE, 8
4971 | mov [RA+4], RC
4972 |.endif
4973 | add RA, 8
4974 | cmp RA, RB // All destination slots filled?
4975 | jnb >3
4976 | cmp KBASE, BASE // No more vararg slots?
4977 | jb <1
4978 |2: // Fill up remainder with nil.
4979 | mov dword [RA+4], LJ_TNIL
4980 | add RA, 8
4981 | cmp RA, RB
4982 | jb <2
4983 |3:
4984 | mov KBASE, TMP1
4985 | ins_next
4986 |
4987 |5: // Copy all varargs.
4988 | mov MULTRES, 1 // MULTRES = 0+1
4989 | mov RC, BASE
4990 | sub RC, KBASE
4991 | jbe <3 // No vararg slots?
4992 | mov RB, RC
4993 | shr RB, 3
4994 | add RB, 1
4995 | mov MULTRES, RB // MULTRES = #varargs+1
4996 | mov L:RB, SAVE_L
4997 | add RC, RA
4998 | cmp RC, L:RB->maxstack
4999 | ja >7 // Need to grow stack?
5000 |6: // Copy all vararg slots.
5001 |.if X64
5002 | mov RCa, [KBASE-8]
5003 | add KBASE, 8
5004 | mov [RA], RCa
5005 |.else
5006 | mov RC, [KBASE-8]
5007 | mov [RA], RC
5008 | mov RC, [KBASE-4]
5009 | add KBASE, 8
5010 | mov [RA+4], RC
5011 |.endif
5012 | add RA, 8
5013 | cmp KBASE, BASE // No more vararg slots?
5014 | jb <6
5015 | jmp <3
5016 |
5017 |7: // Grow stack for varargs.
5018 | mov L:RB->base, BASE
5019 | mov L:RB->top, RA
5020 | mov SAVE_PC, PC
5021 | sub KBASE, BASE // Need delta, because BASE may change.
5022 | mov FCARG2, MULTRES
5023 | sub FCARG2, 1
5024 | mov FCARG1, L:RB
5025 | call extern lj_state_growstack@8 // (lua_State *L, int n)
5026 | mov BASE, L:RB->base
5027 | mov RA, L:RB->top
5028 | add KBASE, BASE
5029 | jmp <6
5030 break;
5031
5032 /* -- Returns ----------------------------------------------------------- */
5033
5034 case BC_RETM:
5035 | ins_AD // RA = results, RD = extra_nresults
5036 | add RD, MULTRES // MULTRES >=1, so RD >=1.
5037 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op.
5038 break;
5039
5040 case BC_RET: case BC_RET0: case BC_RET1:
5041 | ins_AD // RA = results, RD = nresults+1
5042 if (op != BC_RET0) {
5043 | shl RA, 3
5044 }
5045 |1:
5046 | mov PC, [BASE-4]
5047 | mov MULTRES, RD // Save nresults+1.
5048 | test PC, FRAME_TYPE // Check frame type marker.
5049 | jnz >7 // Not returning to a fixarg Lua func?
5050 switch (op) {
5051 case BC_RET:
5052 |->BC_RET_Z:
5053 | mov KBASE, BASE // Use KBASE for result move.
5054 | sub RD, 1
5055 | jz >3
5056 |2: // Move results down.
5057 |.if X64
5058 | mov RBa, [KBASE+RA]
5059 | mov [KBASE-8], RBa
5060 |.else
5061 | mov RB, [KBASE+RA]
5062 | mov [KBASE-8], RB
5063 | mov RB, [KBASE+RA+4]
5064 | mov [KBASE-4], RB
5065 |.endif
5066 | add KBASE, 8
5067 | sub RD, 1
5068 | jnz <2
5069 |3:
5070 | mov RD, MULTRES // Note: MULTRES may be >255.
5071 | movzx RB, PC_RB // So cannot compare with RDL!
5072 |5:
5073 | cmp RB, RD // More results expected?
5074 | ja >6
5075 break;
5076 case BC_RET1:
5077 |.if X64
5078 | mov RBa, [BASE+RA]
5079 | mov [BASE-8], RBa
5080 |.else
5081 | mov RB, [BASE+RA+4]
5082 | mov [BASE-4], RB
5083 | mov RB, [BASE+RA]
5084 | mov [BASE-8], RB
5085 |.endif
5086 /* fallthrough */
5087 case BC_RET0:
5088 |5:
5089 | cmp PC_RB, RDL // More results expected?
5090 | ja >6
5091 default:
5092 break;
5093 }
5094 | movzx RA, PC_RA
5095 | not RAa // Note: ~RA = -(RA+1)
5096 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8
5097 | mov LFUNC:KBASE, [BASE-8]
5098 | mov KBASE, LFUNC:KBASE->pc
5099 | mov KBASE, [KBASE+PC2PROTO(k)]
5100 | ins_next
5101 |
5102 |6: // Fill up results with nil.
5103 if (op == BC_RET) {
5104 | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base.
5105 | add KBASE, 8
5106 } else {
5107 | mov dword [BASE+RD*8-12], LJ_TNIL
5108 }
5109 | add RD, 1
5110 | jmp <5
5111 |
5112 |7: // Non-standard return case.
5113 | lea RB, [PC-FRAME_VARG]
5114 | test RB, FRAME_TYPEP
5115 | jnz ->vm_return
5116 | // Return from vararg function: relocate BASE down and RA up.
5117 | sub BASE, RB
5118 if (op != BC_RET0) {
5119 | add RA, RB
5120 }
5121 | jmp <1
5122 break;
5123
5124 /* -- Loops and branches ------------------------------------------------ */
5125
5126 |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4]
5127 |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12]
5128 |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20]
5129 |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28]
5130
5131 case BC_FORL:
5132 |.if JIT
5133 | hotloop RB
5134 |.endif
5135 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op.
5136 break;
5137
5138 case BC_JFORI:
5139 case BC_JFORL:
5140 #if !LJ_HASJIT
5141 break;
5142 #endif
5143 case BC_FORI:
5144 case BC_IFORL:
5145 vk = (op == BC_IFORL || op == BC_JFORL);
5146 | ins_AJ // RA = base, RD = target (after end of loop or start of loop)
5147 | lea RA, [BASE+RA*8]
5148 if (LJ_DUALNUM) {
5149 | cmp FOR_TIDX, LJ_TISNUM; jne >9
5150 if (!vk) {
5151 | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for
5152 | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for
5153 | mov RB, dword FOR_IDX
5154 | cmp dword FOR_STEP, 0; jl >5
5155 } else {
5156 #ifdef LUA_USE_ASSERT
5157 | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type
5158 | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type
5159 #endif
5160 | mov RB, dword FOR_STEP
5161 | test RB, RB; js >5
5162 | add RB, dword FOR_IDX; jo >1
5163 | mov dword FOR_IDX, RB
5164 }
5165 | cmp RB, dword FOR_STOP
5166 | mov FOR_TEXT, LJ_TISNUM
5167 | mov dword FOR_EXT, RB
5168 if (op == BC_FORI) {
5169 | jle >7
5170 |1:
5171 |6:
5172 | branchPC RD
5173 } else if (op == BC_JFORI) {
5174 | branchPC RD
5175 | movzx RD, PC_RD
5176 | jle =>BC_JLOOP
5177 |1:
5178 |6:
5179 } else if (op == BC_IFORL) {
5180 | jg >7
5181 |6:
5182 | branchPC RD
5183 |1:
5184 } else {
5185 | jle =>BC_JLOOP
5186 |1:
5187 |6:
5188 }
5189 |7:
5190 | ins_next
5191 |
5192 |5: // Invert check for negative step.
5193 if (vk) {
5194 | add RB, dword FOR_IDX; jo <1
5195 | mov dword FOR_IDX, RB
5196 }
5197 | cmp RB, dword FOR_STOP
5198 | mov FOR_TEXT, LJ_TISNUM
5199 | mov dword FOR_EXT, RB
5200 if (op == BC_FORI) {
5201 | jge <7
5202 } else if (op == BC_JFORI) {
5203 | branchPC RD
5204 | movzx RD, PC_RD
5205 | jge =>BC_JLOOP
5206 } else if (op == BC_IFORL) {
5207 | jl <7
5208 } else {
5209 | jge =>BC_JLOOP
5210 }
5211 | jmp <6
5212 |9: // Fallback to FP variant.
5213 } else if (!vk) {
5214 | cmp FOR_TIDX, LJ_TISNUM
5215 }
5216 if (!vk) {
5217 | jae ->vmeta_for
5218 | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for
5219 } else {
5220 #ifdef LUA_USE_ASSERT
5221 | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type
5222 | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type
5223 #endif
5224 }
5225 | mov RB, FOR_TSTEP // Load type/hiword of for step.
5226 if (!vk) {
5227 | cmp RB, LJ_TISNUM; jae ->vmeta_for
5228 }
5229 | movsd xmm0, qword FOR_IDX
5230 | movsd xmm1, qword FOR_STOP
5231 if (vk) {
5232 | addsd xmm0, qword FOR_STEP
5233 | movsd qword FOR_IDX, xmm0
5234 | test RB, RB; js >3
5235 } else {
5236 | jl >3
5237 }
5238 | ucomisd xmm1, xmm0
5239 |1:
5240 | movsd qword FOR_EXT, xmm0
5241 if (op == BC_FORI) {
5242 |.if DUALNUM
5243 | jnb <7
5244 |.else
5245 | jnb >2
5246 | branchPC RD
5247 |.endif
5248 } else if (op == BC_JFORI) {
5249 | branchPC RD
5250 | movzx RD, PC_RD
5251 | jnb =>BC_JLOOP
5252 } else if (op == BC_IFORL) {
5253 |.if DUALNUM
5254 | jb <7
5255 |.else
5256 | jb >2
5257 | branchPC RD
5258 |.endif
5259 } else {
5260 | jnb =>BC_JLOOP
5261 }
5262 |.if DUALNUM
5263 | jmp <6
5264 |.else
5265 |2:
5266 | ins_next
5267 |.endif
5268 |
5269 |3: // Invert comparison if step is negative.
5270 | ucomisd xmm0, xmm1
5271 | jmp <1
5272 break;
5273
5274 case BC_ITERL:
5275 |.if JIT
5276 | hotloop RB
5277 |.endif
5278 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op.
5279 break;
5280
5281 case BC_JITERL:
5282 #if !LJ_HASJIT
5283 break;
5284 #endif
5285 case BC_IITERL:
5286 | ins_AJ // RA = base, RD = target
5287 | lea RA, [BASE+RA*8]
5288 | mov RB, [RA+4]
5289 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil.
5290 if (op == BC_JITERL) {
5291 | mov [RA-4], RB
5292 | mov RB, [RA]
5293 | mov [RA-8], RB
5294 | jmp =>BC_JLOOP
5295 } else {
5296 | branchPC RD // Otherwise save control var + branch.
5297 | mov RD, [RA]
5298 | mov [RA-4], RB
5299 | mov [RA-8], RD
5300 }
5301 |1:
5302 | ins_next
5303 break;
5304
5305 case BC_LOOP:
5306 | ins_A // RA = base, RD = target (loop extent)
5307 | // Note: RA/RD is only used by trace recorder to determine scope/extent
5308 | // This opcode does NOT jump, it's only purpose is to detect a hot loop.
5309 |.if JIT
5310 | hotloop RB
5311 |.endif
5312 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op.
5313 break;
5314
5315 case BC_ILOOP:
5316 | ins_A // RA = base, RD = target (loop extent)
5317 | ins_next
5318 break;
5319
5320 case BC_JLOOP:
5321 |.if JIT
5322 | ins_AD // RA = base (ignored), RD = traceno
5323 | mov RA, [DISPATCH+DISPATCH_J(trace)]
5324 | mov TRACE:RD, [RA+RD*4]
5325 | mov RDa, TRACE:RD->mcode
5326 | mov L:RB, SAVE_L
5327 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE
5328 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB
5329 | // Save additional callee-save registers only used in compiled code.
5330 |.if X64WIN
5331 | mov TMPQ, r12
5332 | mov TMPa, r13
5333 | mov CSAVE_4, r14
5334 | mov CSAVE_3, r15
5335 | mov RAa, rsp
5336 | sub rsp, 9*16+4*8
5337 | movdqa [RAa], xmm6
5338 | movdqa [RAa-1*16], xmm7
5339 | movdqa [RAa-2*16], xmm8
5340 | movdqa [RAa-3*16], xmm9
5341 | movdqa [RAa-4*16], xmm10
5342 | movdqa [RAa-5*16], xmm11
5343 | movdqa [RAa-6*16], xmm12
5344 | movdqa [RAa-7*16], xmm13
5345 | movdqa [RAa-8*16], xmm14
5346 | movdqa [RAa-9*16], xmm15
5347 |.elif X64
5348 | mov TMPQ, r12
5349 | mov TMPa, r13
5350 | sub rsp, 16
5351 |.endif
5352 | jmp RDa
5353 |.endif
5354 break;
5355
5356 case BC_JMP:
5357 | ins_AJ // RA = unused, RD = target
5358 | branchPC RD
5359 | ins_next
5360 break;
5361
5362 /* -- Function headers -------------------------------------------------- */
5363
5364 /*
5365 ** Reminder: A function may be called with func/args above L->maxstack,
5366 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot,
5367 ** too. This means all FUNC* ops (including fast functions) must check
5368 ** for stack overflow _before_ adding more slots!
5369 */
5370
5371 case BC_FUNCF:
5372 |.if JIT
5373 | hotcall RB
5374 |.endif
5375 case BC_FUNCV: /* NYI: compiled vararg functions. */
5376 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op.
5377 break;
5378
5379 case BC_JFUNCF:
5380 #if !LJ_HASJIT
5381 break;
5382 #endif
5383 case BC_IFUNCF:
5384 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
5385 | mov KBASE, [PC-4+PC2PROTO(k)]
5386 | mov L:RB, SAVE_L
5387 | lea RA, [BASE+RA*8] // Top of frame.
5388 | cmp RA, L:RB->maxstack
5389 | ja ->vm_growstack_f
5390 | movzx RA, byte [PC-4+PC2PROTO(numparams)]
5391 | cmp NARGS:RD, RA // Check for missing parameters.
5392 | jbe >3
5393 |2:
5394 if (op == BC_JFUNCF) {
5395 | movzx RD, PC_RD
5396 | jmp =>BC_JLOOP
5397 } else {
5398 | ins_next
5399 }
5400 |
5401 |3: // Clear missing parameters.
5402 | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL
5403 | add NARGS:RD, 1
5404 | cmp NARGS:RD, RA
5405 | jbe <3
5406 | jmp <2
5407 break;
5408
5409 case BC_JFUNCV:
5410 #if !LJ_HASJIT
5411 break;
5412 #endif
5413 | int3 // NYI: compiled vararg functions
5414 break; /* NYI: compiled vararg functions. */
5415
5416 case BC_IFUNCV:
5417 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1
5418 | lea RB, [NARGS:RD*8+FRAME_VARG]
5419 | lea RD, [BASE+NARGS:RD*8]
5420 | mov LFUNC:KBASE, [BASE-8]
5421 | mov [RD-4], RB // Store delta + FRAME_VARG.
5422 | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC.
5423 | mov L:RB, SAVE_L
5424 | lea RA, [RD+RA*8]
5425 | cmp RA, L:RB->maxstack
5426 | ja ->vm_growstack_v // Need to grow stack.
5427 | mov RA, BASE
5428 | mov BASE, RD
5429 | movzx RB, byte [PC-4+PC2PROTO(numparams)]
5430 | test RB, RB
5431 | jz >2
5432 |1: // Copy fixarg slots up to new frame.
5433 | add RA, 8
5434 | cmp RA, BASE
5435 | jnb >3 // Less args than parameters?
5436 | mov KBASE, [RA-8]
5437 | mov [RD], KBASE
5438 | mov KBASE, [RA-4]
5439 | mov [RD+4], KBASE
5440 | add RD, 8
5441 | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC).
5442 | sub RB, 1
5443 | jnz <1
5444 |2:
5445 if (op == BC_JFUNCV) {
5446 | movzx RD, PC_RD
5447 | jmp =>BC_JLOOP
5448 } else {
5449 | mov KBASE, [PC-4+PC2PROTO(k)]
5450 | ins_next
5451 }
5452 |
5453 |3: // Clear missing parameters.
5454 | mov dword [RD+4], LJ_TNIL
5455 | add RD, 8
5456 | sub RB, 1
5457 | jnz <3
5458 | jmp <2
5459 break;
5460
5461 case BC_FUNCC:
5462 case BC_FUNCCW:
5463 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1
5464 | mov CFUNC:RB, [BASE-8]
5465 | mov KBASEa, CFUNC:RB->f
5466 | mov L:RB, SAVE_L
5467 | lea RD, [BASE+NARGS:RD*8-8]
5468 | mov L:RB->base, BASE
5469 | lea RA, [RD+8*LUA_MINSTACK]
5470 | cmp RA, L:RB->maxstack
5471 | mov L:RB->top, RD
5472 if (op == BC_FUNCC) {
5473 |.if X64
5474 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
5475 |.else
5476 | mov ARG1, L:RB
5477 |.endif
5478 } else {
5479 |.if X64
5480 | mov CARG2, KBASEa
5481 | mov CARG1d, L:RB // Caveat: CARG1d may be RA.
5482 |.else
5483 | mov ARG2, KBASEa
5484 | mov ARG1, L:RB
5485 |.endif
5486 }
5487 | ja ->vm_growstack_c // Need to grow stack.
5488 | set_vmstate C
5489 if (op == BC_FUNCC) {
5490 | call KBASEa // (lua_State *L)
5491 } else {
5492 | // (lua_State *L, lua_CFunction f)
5493 | call aword [DISPATCH+DISPATCH_GL(wrapf)]
5494 }
5495 | // nresults returned in eax (RD).
5496 | mov BASE, L:RB->base
5497 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB
5498 | set_vmstate INTERP
5499 | lea RA, [BASE+RD*8]
5500 | neg RA
5501 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8
5502 | mov PC, [BASE-4] // Fetch PC of caller.
5503 | jmp ->vm_returnc
5504 break;
5505
5506 /* ---------------------------------------------------------------------- */
5507
5508 default:
5509 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]);
5510 exit(2);
5511 break;
5512 }
5513 }
5514
5515 static int build_backend(BuildCtx *ctx)
5516 {
5517 int op;
5518 dasm_growpc(Dst, BC__MAX);
5519 build_subroutines(ctx);
5520 |.code_op
5521 for (op = 0; op < BC__MAX; op++)
5522 build_ins(ctx, (BCOp)op, op);
5523 return BC__MAX;
5524 }
5525
5526 /* Emit pseudo frame-info for all assembler functions. */
5527 static void emit_asm_debug(BuildCtx *ctx)
5528 {
5529 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code);
5530 #if LJ_64
5531 #define SZPTR "8"
5532 #define BSZPTR "3"
5533 #define REG_SP "0x7"
5534 #define REG_RA "0x10"
5535 #else
5536 #define SZPTR "4"
5537 #define BSZPTR "2"
5538 #define REG_SP "0x4"
5539 #define REG_RA "0x8"
5540 #endif
5541 switch (ctx->mode) {
5542 case BUILD_elfasm:
5543 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n");
5544 fprintf(ctx->fp,
5545 ".Lframe0:\n"
5546 "\t.long .LECIE0-.LSCIE0\n"
5547 ".LSCIE0:\n"
5548 "\t.long 0xffffffff\n"
5549 "\t.byte 0x1\n"
5550 "\t.string \"\"\n"
5551 "\t.uleb128 0x1\n"
5552 "\t.sleb128 -" SZPTR "\n"
5553 "\t.byte " REG_RA "\n"
5554 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
5555 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
5556 "\t.align " SZPTR "\n"
5557 ".LECIE0:\n\n");
5558 fprintf(ctx->fp,
5559 ".LSFDE0:\n"
5560 "\t.long .LEFDE0-.LASFDE0\n"
5561 ".LASFDE0:\n"
5562 "\t.long .Lframe0\n"
5563 #if LJ_64
5564 "\t.quad .Lbegin\n"
5565 "\t.quad %d\n"
5566 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
5567 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
5568 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
5569 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
5570 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
5571 #if LJ_NO_UNWIND
5572 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */
5573 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */
5574 #endif
5575 #else
5576 "\t.long .Lbegin\n"
5577 "\t.long %d\n"
5578 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
5579 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
5580 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
5581 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
5582 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
5583 #endif
5584 "\t.align " SZPTR "\n"
5585 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE);
5586 #if LJ_HASFFI
5587 fprintf(ctx->fp,
5588 ".LSFDE1:\n"
5589 "\t.long .LEFDE1-.LASFDE1\n"
5590 ".LASFDE1:\n"
5591 "\t.long .Lframe0\n"
5592 #if LJ_64
5593 "\t.quad lj_vm_ffi_call\n"
5594 "\t.quad %d\n"
5595 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
5596 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
5597 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
5598 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
5599 #else
5600 "\t.long lj_vm_ffi_call\n"
5601 "\t.long %d\n"
5602 "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
5603 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
5604 "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
5605 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
5606 #endif
5607 "\t.align " SZPTR "\n"
5608 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs);
5609 #endif
5610 #if !LJ_NO_UNWIND
5611 #if LJ_TARGET_SOLARIS
5612 #if LJ_64
5613 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n");
5614 #else
5615 fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n");
5616 #endif
5617 #else
5618 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n");
5619 #endif
5620 fprintf(ctx->fp,
5621 ".Lframe1:\n"
5622 "\t.long .LECIE1-.LSCIE1\n"
5623 ".LSCIE1:\n"
5624 "\t.long 0\n"
5625 "\t.byte 0x1\n"
5626 "\t.string \"zPR\"\n"
5627 "\t.uleb128 0x1\n"
5628 "\t.sleb128 -" SZPTR "\n"
5629 "\t.byte " REG_RA "\n"
5630 "\t.uleb128 6\n" /* augmentation length */
5631 "\t.byte 0x1b\n" /* pcrel|sdata4 */
5632 "\t.long lj_err_unwind_dwarf-.\n"
5633 "\t.byte 0x1b\n" /* pcrel|sdata4 */
5634 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
5635 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
5636 "\t.align " SZPTR "\n"
5637 ".LECIE1:\n\n");
5638 fprintf(ctx->fp,
5639 ".LSFDE2:\n"
5640 "\t.long .LEFDE2-.LASFDE2\n"
5641 ".LASFDE2:\n"
5642 "\t.long .LASFDE2-.Lframe1\n"
5643 "\t.long .Lbegin-.\n"
5644 "\t.long %d\n"
5645 "\t.uleb128 0\n" /* augmentation length */
5646 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */
5647 #if LJ_64
5648 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
5649 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
5650 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */
5651 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */
5652 #else
5653 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
5654 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */
5655 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */
5656 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */
5657 #endif
5658 "\t.align " SZPTR "\n"
5659 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE);
5660 #if LJ_HASFFI
5661 fprintf(ctx->fp,
5662 ".Lframe2:\n"
5663 "\t.long .LECIE2-.LSCIE2\n"
5664 ".LSCIE2:\n"
5665 "\t.long 0\n"
5666 "\t.byte 0x1\n"
5667 "\t.string \"zR\"\n"
5668 "\t.uleb128 0x1\n"
5669 "\t.sleb128 -" SZPTR "\n"
5670 "\t.byte " REG_RA "\n"
5671 "\t.uleb128 1\n" /* augmentation length */
5672 "\t.byte 0x1b\n" /* pcrel|sdata4 */
5673 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n"
5674 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n"
5675 "\t.align " SZPTR "\n"
5676 ".LECIE2:\n\n");
5677 fprintf(ctx->fp,
5678 ".LSFDE3:\n"
5679 "\t.long .LEFDE3-.LASFDE3\n"
5680 ".LASFDE3:\n"
5681 "\t.long .LASFDE3-.Lframe2\n"
5682 "\t.long lj_vm_ffi_call-.\n"
5683 "\t.long %d\n"
5684 "\t.uleb128 0\n" /* augmentation length */
5685 #if LJ_64
5686 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */
5687 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */
5688 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */
5689 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */
5690 #else
5691 "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */
5692 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */
5693 "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */
5694 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */
5695 #endif
5696 "\t.align " SZPTR "\n"
5697 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs);
5698 #endif
5699 #endif
5700 break;
5701 #if !LJ_NO_UNWIND
5702 /* Mental note: never let Apple design an assembler.
5703 ** Or a linker. Or a plastic case. But I digress.
5704 */
5705 case BUILD_machasm: {
5706 #if LJ_HASFFI
5707 int fcsize = 0;
5708 #endif
5709 int i;
5710 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n");
5711 fprintf(ctx->fp,
5712 "EH_frame1:\n"
5713 "\t.set L$set$x,LECIEX-LSCIEX\n"
5714 "\t.long L$set$x\n"
5715 "LSCIEX:\n"
5716 "\t.long 0\n"
5717 "\t.byte 0x1\n"
5718 "\t.ascii \"zPR\\0\"\n"
5719 "\t.byte 0x1\n"
5720 "\t.byte 128-" SZPTR "\n"
5721 "\t.byte " REG_RA "\n"
5722 "\t.byte 6\n" /* augmentation length */
5723 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */
5724 #if LJ_64
5725 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n"
5726 "\t.byte 0x1b\n" /* pcrel|sdata4 */
5727 "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
5728 #else
5729 "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n"
5730 "\t.byte 0x1b\n" /* pcrel|sdata4 */
5731 "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */
5732 #endif
5733 "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
5734 "\t.align " BSZPTR "\n"
5735 "LECIEX:\n\n");
5736 for (i = 0; i < ctx->nsym; i++) {
5737 const char *name = ctx->sym[i].name;
5738 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs;
5739 if (size == 0) continue;
5740 #if LJ_HASFFI
5741 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; }
5742 #endif
5743 fprintf(ctx->fp,
5744 "%s.eh:\n"
5745 "LSFDE%d:\n"
5746 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n"
5747 "\t.long L$set$%d\n"
5748 "LASFDE%d:\n"
5749 "\t.long LASFDE%d-EH_frame1\n"
5750 "\t.long %s-.\n"
5751 "\t.long %d\n"
5752 "\t.byte 0\n" /* augmentation length */
5753 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */
5754 #if LJ_64
5755 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
5756 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
5757 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */
5758 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */
5759 #else
5760 "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
5761 "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */
5762 "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */
5763 "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */
5764 #endif
5765 "\t.align " BSZPTR "\n"
5766 "LEFDE%d:\n\n",
5767 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i);
5768 }
5769 #if LJ_HASFFI
5770 if (fcsize) {
5771 fprintf(ctx->fp,
5772 "EH_frame2:\n"
5773 "\t.set L$set$y,LECIEY-LSCIEY\n"
5774 "\t.long L$set$y\n"
5775 "LSCIEY:\n"
5776 "\t.long 0\n"
5777 "\t.byte 0x1\n"
5778 "\t.ascii \"zR\\0\"\n"
5779 "\t.byte 0x1\n"
5780 "\t.byte 128-" SZPTR "\n"
5781 "\t.byte " REG_RA "\n"
5782 "\t.byte 1\n" /* augmentation length */
5783 #if LJ_64
5784 "\t.byte 0x1b\n" /* pcrel|sdata4 */
5785 "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n"
5786 #else
5787 "\t.byte 0x1b\n" /* pcrel|sdata4 */
5788 "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */
5789 #endif
5790 "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n"
5791 "\t.align " BSZPTR "\n"
5792 "LECIEY:\n\n");
5793 fprintf(ctx->fp,
5794 "_lj_vm_ffi_call.eh:\n"
5795 "LSFDEY:\n"
5796 "\t.set L$set$yy,LEFDEY-LASFDEY\n"
5797 "\t.long L$set$yy\n"
5798 "LASFDEY:\n"
5799 "\t.long LASFDEY-EH_frame2\n"
5800 "\t.long _lj_vm_ffi_call-.\n"
5801 "\t.long %d\n"
5802 "\t.byte 0\n" /* augmentation length */
5803 #if LJ_64
5804 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */
5805 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */
5806 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */
5807 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */
5808 #else
5809 "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */
5810 "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/
5811 "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */
5812 "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */
5813 #endif
5814 "\t.align " BSZPTR "\n"
5815 "LEFDEY:\n\n", fcsize);
5816 }
5817 #endif
5818 #if !LJ_64
5819 fprintf(ctx->fp,
5820 "\t.non_lazy_symbol_pointer\n"
5821 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n"
5822 ".indirect_symbol _lj_err_unwind_dwarf\n"
5823 ".long 0\n\n");
5824 fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n");
5825 {
5826 const char *const *xn;
5827 for (xn = ctx->extnames; *xn; xn++)
5828 if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1))
5829 fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn);
5830 }
5831 #endif
5832 fprintf(ctx->fp, ".subsections_via_symbols\n");
5833 }
5834 break;
5835 #endif
5836 default: /* Difficult for other modes. */
5837 break;
5838 }
5839 }
5840