Mercurial
comparison third_party/luajit/src/vm_x86.dasc @ 178:94705b5986b3
[ThirdParty] Added WRK and luajit for load testing.
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Thu, 22 Jan 2026 20:10:30 -0800 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 177:24fe8ff94056 | 178:94705b5986b3 |
|---|---|
| 1 |// Low-level VM code for x86 CPUs. | |
| 2 |// Bytecode interpreter, fast functions and helper functions. | |
| 3 |// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h | |
| 4 | | |
| 5 |.if P64 | |
| 6 |.arch x64 | |
| 7 |.else | |
| 8 |.arch x86 | |
| 9 |.endif | |
| 10 |.section code_op, code_sub | |
| 11 | | |
| 12 |.actionlist build_actionlist | |
| 13 |.globals GLOB_ | |
| 14 |.globalnames globnames | |
| 15 |.externnames extnames | |
| 16 | | |
| 17 |//----------------------------------------------------------------------- | |
| 18 | | |
| 19 |.if P64 | |
| 20 |.define X64, 1 | |
| 21 |.if WIN | |
| 22 |.define X64WIN, 1 | |
| 23 |.endif | |
| 24 |.endif | |
| 25 | | |
| 26 |// Fixed register assignments for the interpreter. | |
| 27 |// This is very fragile and has many dependencies. Caveat emptor. | |
| 28 |.define BASE, edx // Not C callee-save, refetched anyway. | |
| 29 |.if not X64 | |
| 30 |.define KBASE, edi // Must be C callee-save. | |
| 31 |.define KBASEa, KBASE | |
| 32 |.define PC, esi // Must be C callee-save. | |
| 33 |.define PCa, PC | |
| 34 |.define DISPATCH, ebx // Must be C callee-save. | |
| 35 |.elif X64WIN | |
| 36 |.define KBASE, edi // Must be C callee-save. | |
| 37 |.define KBASEa, rdi | |
| 38 |.define PC, esi // Must be C callee-save. | |
| 39 |.define PCa, rsi | |
| 40 |.define DISPATCH, ebx // Must be C callee-save. | |
| 41 |.else | |
| 42 |.define KBASE, r15d // Must be C callee-save. | |
| 43 |.define KBASEa, r15 | |
| 44 |.define PC, ebx // Must be C callee-save. | |
| 45 |.define PCa, rbx | |
| 46 |.define DISPATCH, r14d // Must be C callee-save. | |
| 47 |.endif | |
| 48 | | |
| 49 |.define RA, ecx | |
| 50 |.define RAH, ch | |
| 51 |.define RAL, cl | |
| 52 |.define RB, ebp // Must be ebp (C callee-save). | |
| 53 |.define RC, eax // Must be eax. | |
| 54 |.define RCW, ax | |
| 55 |.define RCH, ah | |
| 56 |.define RCL, al | |
| 57 |.define OP, RB | |
| 58 |.define RD, RC | |
| 59 |.define RDW, RCW | |
| 60 |.define RDL, RCL | |
| 61 |.if X64 | |
| 62 |.define RAa, rcx | |
| 63 |.define RBa, rbp | |
| 64 |.define RCa, rax | |
| 65 |.define RDa, rax | |
| 66 |.else | |
| 67 |.define RAa, RA | |
| 68 |.define RBa, RB | |
| 69 |.define RCa, RC | |
| 70 |.define RDa, RD | |
| 71 |.endif | |
| 72 | | |
| 73 |.if not X64 | |
| 74 |.define FCARG1, ecx // x86 fastcall arguments. | |
| 75 |.define FCARG2, edx | |
| 76 |.elif X64WIN | |
| 77 |.define CARG1, rcx // x64/WIN64 C call arguments. | |
| 78 |.define CARG2, rdx | |
| 79 |.define CARG3, r8 | |
| 80 |.define CARG4, r9 | |
| 81 |.define CARG1d, ecx | |
| 82 |.define CARG2d, edx | |
| 83 |.define CARG3d, r8d | |
| 84 |.define CARG4d, r9d | |
| 85 |.define FCARG1, CARG1d // Upwards compatible to x86 fastcall. | |
| 86 |.define FCARG2, CARG2d | |
| 87 |.else | |
| 88 |.define CARG1, rdi // x64/POSIX C call arguments. | |
| 89 |.define CARG2, rsi | |
| 90 |.define CARG3, rdx | |
| 91 |.define CARG4, rcx | |
| 92 |.define CARG5, r8 | |
| 93 |.define CARG6, r9 | |
| 94 |.define CARG1d, edi | |
| 95 |.define CARG2d, esi | |
| 96 |.define CARG3d, edx | |
| 97 |.define CARG4d, ecx | |
| 98 |.define CARG5d, r8d | |
| 99 |.define CARG6d, r9d | |
| 100 |.define FCARG1, CARG1d // Simulate x86 fastcall. | |
| 101 |.define FCARG2, CARG2d | |
| 102 |.endif | |
| 103 | | |
| 104 |// Type definitions. Some of these are only used for documentation. | |
| 105 |.type L, lua_State | |
| 106 |.type GL, global_State | |
| 107 |.type TVALUE, TValue | |
| 108 |.type GCOBJ, GCobj | |
| 109 |.type STR, GCstr | |
| 110 |.type TAB, GCtab | |
| 111 |.type LFUNC, GCfuncL | |
| 112 |.type CFUNC, GCfuncC | |
| 113 |.type PROTO, GCproto | |
| 114 |.type UPVAL, GCupval | |
| 115 |.type NODE, Node | |
| 116 |.type NARGS, int | |
| 117 |.type TRACE, GCtrace | |
| 118 |.type SBUF, SBuf | |
| 119 | | |
| 120 |// Stack layout while in interpreter. Must match with lj_frame.h. | |
| 121 |//----------------------------------------------------------------------- | |
| 122 |.if not X64 // x86 stack layout. | |
| 123 | | |
| 124 |.if WIN | |
| 125 | | |
| 126 |.define CFRAME_SPACE, aword*9 // Delta for esp (see <--). | |
| 127 |.macro saveregs_ | |
| 128 | push edi; push esi; push ebx | |
| 129 | push extern lj_err_unwind_win | |
| 130 | fs; push dword [0] | |
| 131 | fs; mov [0], esp | |
| 132 | sub esp, CFRAME_SPACE | |
| 133 |.endmacro | |
| 134 |.macro restoreregs | |
| 135 | add esp, CFRAME_SPACE | |
| 136 | fs; pop dword [0] | |
| 137 | pop edi // Short for esp += 4. | |
| 138 | pop ebx; pop esi; pop edi; pop ebp | |
| 139 |.endmacro | |
| 140 | | |
| 141 |.else | |
| 142 | | |
| 143 |.define CFRAME_SPACE, aword*7 // Delta for esp (see <--). | |
| 144 |.macro saveregs_ | |
| 145 | push edi; push esi; push ebx | |
| 146 | sub esp, CFRAME_SPACE | |
| 147 |.endmacro | |
| 148 |.macro restoreregs | |
| 149 | add esp, CFRAME_SPACE | |
| 150 | pop ebx; pop esi; pop edi; pop ebp | |
| 151 |.endmacro | |
| 152 | | |
| 153 |.endif | |
| 154 | | |
| 155 |.macro saveregs | |
| 156 | push ebp; saveregs_ | |
| 157 |.endmacro | |
| 158 | | |
| 159 |.if WIN | |
| 160 |.define SAVE_ERRF, aword [esp+aword*19] // vm_pcall/vm_cpcall only. | |
| 161 |.define SAVE_NRES, aword [esp+aword*18] | |
| 162 |.define SAVE_CFRAME, aword [esp+aword*17] | |
| 163 |.define SAVE_L, aword [esp+aword*16] | |
| 164 |//----- 16 byte aligned, ^^^ arguments from C caller | |
| 165 |.define SAVE_RET, aword [esp+aword*15] //<-- esp entering interpreter. | |
| 166 |.define SAVE_R4, aword [esp+aword*14] | |
| 167 |.define SAVE_R3, aword [esp+aword*13] | |
| 168 |.define SAVE_R2, aword [esp+aword*12] | |
| 169 |//----- 16 byte aligned | |
| 170 |.define SAVE_R1, aword [esp+aword*11] | |
| 171 |.define SEH_FUNC, aword [esp+aword*10] | |
| 172 |.define SEH_NEXT, aword [esp+aword*9] //<-- esp after register saves. | |
| 173 |.define UNUSED2, aword [esp+aword*8] | |
| 174 |//----- 16 byte aligned | |
| 175 |.define UNUSED1, aword [esp+aword*7] | |
| 176 |.define SAVE_PC, aword [esp+aword*6] | |
| 177 |.define TMP2, aword [esp+aword*5] | |
| 178 |.define TMP1, aword [esp+aword*4] | |
| 179 |//----- 16 byte aligned | |
| 180 |.define ARG4, aword [esp+aword*3] | |
| 181 |.define ARG3, aword [esp+aword*2] | |
| 182 |.define ARG2, aword [esp+aword*1] | |
| 183 |.define ARG1, aword [esp] //<-- esp while in interpreter. | |
| 184 |//----- 16 byte aligned, ^^^ arguments for C callee | |
| 185 |.else | |
| 186 |.define SAVE_ERRF, aword [esp+aword*15] // vm_pcall/vm_cpcall only. | |
| 187 |.define SAVE_NRES, aword [esp+aword*14] | |
| 188 |.define SAVE_CFRAME, aword [esp+aword*13] | |
| 189 |.define SAVE_L, aword [esp+aword*12] | |
| 190 |//----- 16 byte aligned, ^^^ arguments from C caller | |
| 191 |.define SAVE_RET, aword [esp+aword*11] //<-- esp entering interpreter. | |
| 192 |.define SAVE_R4, aword [esp+aword*10] | |
| 193 |.define SAVE_R3, aword [esp+aword*9] | |
| 194 |.define SAVE_R2, aword [esp+aword*8] | |
| 195 |//----- 16 byte aligned | |
| 196 |.define SAVE_R1, aword [esp+aword*7] //<-- esp after register saves. | |
| 197 |.define SAVE_PC, aword [esp+aword*6] | |
| 198 |.define TMP2, aword [esp+aword*5] | |
| 199 |.define TMP1, aword [esp+aword*4] | |
| 200 |//----- 16 byte aligned | |
| 201 |.define ARG4, aword [esp+aword*3] | |
| 202 |.define ARG3, aword [esp+aword*2] | |
| 203 |.define ARG2, aword [esp+aword*1] | |
| 204 |.define ARG1, aword [esp] //<-- esp while in interpreter. | |
| 205 |//----- 16 byte aligned, ^^^ arguments for C callee | |
| 206 |.endif | |
| 207 | | |
| 208 |// FPARGx overlaps ARGx and ARG(x+1) on x86. | |
| 209 |.define FPARG3, qword [esp+qword*1] | |
| 210 |.define FPARG1, qword [esp] | |
| 211 |// TMPQ overlaps TMP1/TMP2. ARG5/MULTRES overlap TMP1/TMP2 (and TMPQ). | |
| 212 |.define TMPQ, qword [esp+aword*4] | |
| 213 |.define TMP3, ARG4 | |
| 214 |.define ARG5, TMP1 | |
| 215 |.define TMPa, TMP1 | |
| 216 |.define MULTRES, TMP2 | |
| 217 | | |
| 218 |// Arguments for vm_call and vm_pcall. | |
| 219 |.define INARG_BASE, SAVE_CFRAME // Overwritten by SAVE_CFRAME! | |
| 220 | | |
| 221 |// Arguments for vm_cpcall. | |
| 222 |.define INARG_CP_CALL, SAVE_ERRF | |
| 223 |.define INARG_CP_UD, SAVE_NRES | |
| 224 |.define INARG_CP_FUNC, SAVE_CFRAME | |
| 225 | | |
| 226 |//----------------------------------------------------------------------- | |
| 227 |.elif X64WIN // x64/Windows stack layout | |
| 228 | | |
| 229 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). | |
| 230 |.macro saveregs_ | |
| 231 | push rdi; push rsi; push rbx | |
| 232 | sub rsp, CFRAME_SPACE | |
| 233 |.endmacro | |
| 234 |.macro saveregs | |
| 235 | push rbp; saveregs_ | |
| 236 |.endmacro | |
| 237 |.macro restoreregs | |
| 238 | add rsp, CFRAME_SPACE | |
| 239 | pop rbx; pop rsi; pop rdi; pop rbp | |
| 240 |.endmacro | |
| 241 | | |
| 242 |.define SAVE_CFRAME, aword [rsp+aword*13] | |
| 243 |.define SAVE_PC, dword [rsp+dword*25] | |
| 244 |.define SAVE_L, dword [rsp+dword*24] | |
| 245 |.define SAVE_ERRF, dword [rsp+dword*23] | |
| 246 |.define SAVE_NRES, dword [rsp+dword*22] | |
| 247 |.define TMP2, dword [rsp+dword*21] | |
| 248 |.define TMP1, dword [rsp+dword*20] | |
| 249 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by interpreter | |
| 250 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. | |
| 251 |.define SAVE_R4, aword [rsp+aword*8] | |
| 252 |.define SAVE_R3, aword [rsp+aword*7] | |
| 253 |.define SAVE_R2, aword [rsp+aword*6] | |
| 254 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. | |
| 255 |.define ARG5, aword [rsp+aword*4] | |
| 256 |.define CSAVE_4, aword [rsp+aword*3] | |
| 257 |.define CSAVE_3, aword [rsp+aword*2] | |
| 258 |.define CSAVE_2, aword [rsp+aword*1] | |
| 259 |.define CSAVE_1, aword [rsp] //<-- rsp while in interpreter. | |
| 260 |//----- 16 byte aligned, ^^^ 32 byte register save area, owned by callee | |
| 261 | | |
| 262 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). | |
| 263 |.define TMPQ, qword [rsp+aword*10] | |
| 264 |.define MULTRES, TMP2 | |
| 265 |.define TMPa, ARG5 | |
| 266 |.define ARG5d, dword [rsp+aword*4] | |
| 267 |.define TMP3, ARG5d | |
| 268 | | |
| 269 |//----------------------------------------------------------------------- | |
| 270 |.else // x64/POSIX stack layout | |
| 271 | | |
| 272 |.define CFRAME_SPACE, aword*5 // Delta for rsp (see <--). | |
| 273 |.macro saveregs_ | |
| 274 | push rbx; push r15; push r14 | |
| 275 |.if NO_UNWIND | |
| 276 | push r13; push r12 | |
| 277 |.endif | |
| 278 | sub rsp, CFRAME_SPACE | |
| 279 |.endmacro | |
| 280 |.macro saveregs | |
| 281 | push rbp; saveregs_ | |
| 282 |.endmacro | |
| 283 |.macro restoreregs | |
| 284 | add rsp, CFRAME_SPACE | |
| 285 |.if NO_UNWIND | |
| 286 | pop r12; pop r13 | |
| 287 |.endif | |
| 288 | pop r14; pop r15; pop rbx; pop rbp | |
| 289 |.endmacro | |
| 290 | | |
| 291 |//----- 16 byte aligned, | |
| 292 |.if NO_UNWIND | |
| 293 |.define SAVE_RET, aword [rsp+aword*11] //<-- rsp entering interpreter. | |
| 294 |.define SAVE_R4, aword [rsp+aword*10] | |
| 295 |.define SAVE_R3, aword [rsp+aword*9] | |
| 296 |.define SAVE_R2, aword [rsp+aword*8] | |
| 297 |.define SAVE_R1, aword [rsp+aword*7] | |
| 298 |.define SAVE_RU2, aword [rsp+aword*6] | |
| 299 |.define SAVE_RU1, aword [rsp+aword*5] //<-- rsp after register saves. | |
| 300 |.else | |
| 301 |.define SAVE_RET, aword [rsp+aword*9] //<-- rsp entering interpreter. | |
| 302 |.define SAVE_R4, aword [rsp+aword*8] | |
| 303 |.define SAVE_R3, aword [rsp+aword*7] | |
| 304 |.define SAVE_R2, aword [rsp+aword*6] | |
| 305 |.define SAVE_R1, aword [rsp+aword*5] //<-- rsp after register saves. | |
| 306 |.endif | |
| 307 |.define SAVE_CFRAME, aword [rsp+aword*4] | |
| 308 |.define SAVE_PC, dword [rsp+dword*7] | |
| 309 |.define SAVE_L, dword [rsp+dword*6] | |
| 310 |.define SAVE_ERRF, dword [rsp+dword*5] | |
| 311 |.define SAVE_NRES, dword [rsp+dword*4] | |
| 312 |.define TMPa, aword [rsp+aword*1] | |
| 313 |.define TMP2, dword [rsp+dword*1] | |
| 314 |.define TMP1, dword [rsp] //<-- rsp while in interpreter. | |
| 315 |//----- 16 byte aligned | |
| 316 | | |
| 317 |// TMPQ overlaps TMP1/TMP2. MULTRES overlaps TMP2 (and TMPQ). | |
| 318 |.define TMPQ, qword [rsp] | |
| 319 |.define TMP3, dword [rsp+aword*1] | |
| 320 |.define MULTRES, TMP2 | |
| 321 | | |
| 322 |.endif | |
| 323 | | |
| 324 |//----------------------------------------------------------------------- | |
| 325 | | |
| 326 |// Instruction headers. | |
| 327 |.macro ins_A; .endmacro | |
| 328 |.macro ins_AD; .endmacro | |
| 329 |.macro ins_AJ; .endmacro | |
| 330 |.macro ins_ABC; movzx RB, RCH; movzx RC, RCL; .endmacro | |
| 331 |.macro ins_AB_; movzx RB, RCH; .endmacro | |
| 332 |.macro ins_A_C; movzx RC, RCL; .endmacro | |
| 333 |.macro ins_AND; not RDa; .endmacro | |
| 334 | | |
| 335 |// Instruction decode+dispatch. Carefully tuned (nope, lodsd is not faster). | |
| 336 |.macro ins_NEXT | |
| 337 | mov RC, [PC] | |
| 338 | movzx RA, RCH | |
| 339 | movzx OP, RCL | |
| 340 | add PC, 4 | |
| 341 | shr RC, 16 | |
| 342 |.if X64 | |
| 343 | jmp aword [DISPATCH+OP*8] | |
| 344 |.else | |
| 345 | jmp aword [DISPATCH+OP*4] | |
| 346 |.endif | |
| 347 |.endmacro | |
| 348 | | |
| 349 |// Instruction footer. | |
| 350 |.if 1 | |
| 351 | // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. | |
| 352 | .define ins_next, ins_NEXT | |
| 353 | .define ins_next_, ins_NEXT | |
| 354 |.else | |
| 355 | // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. | |
| 356 | // Affects only certain kinds of benchmarks (and only with -j off). | |
| 357 | // Around 10%-30% slower on Core2, a lot more slower on P4. | |
| 358 | .macro ins_next | |
| 359 | jmp ->ins_next | |
| 360 | .endmacro | |
| 361 | .macro ins_next_ | |
| 362 | ->ins_next: | |
| 363 | ins_NEXT | |
| 364 | .endmacro | |
| 365 |.endif | |
| 366 | | |
| 367 |// Call decode and dispatch. | |
| 368 |.macro ins_callt | |
| 369 | // BASE = new base, RB = LFUNC, RD = nargs+1, [BASE-4] = PC | |
| 370 | mov PC, LFUNC:RB->pc | |
| 371 | mov RA, [PC] | |
| 372 | movzx OP, RAL | |
| 373 | movzx RA, RAH | |
| 374 | add PC, 4 | |
| 375 |.if X64 | |
| 376 | jmp aword [DISPATCH+OP*8] | |
| 377 |.else | |
| 378 | jmp aword [DISPATCH+OP*4] | |
| 379 |.endif | |
| 380 |.endmacro | |
| 381 | | |
| 382 |.macro ins_call | |
| 383 | // BASE = new base, RB = LFUNC, RD = nargs+1 | |
| 384 | mov [BASE-4], PC | |
| 385 | ins_callt | |
| 386 |.endmacro | |
| 387 | | |
| 388 |//----------------------------------------------------------------------- | |
| 389 | | |
| 390 |// Macros to test operand types. | |
| 391 |.macro checktp, reg, tp; cmp dword [BASE+reg*8+4], tp; .endmacro | |
| 392 |.macro checknum, reg, target; checktp reg, LJ_TISNUM; jae target; .endmacro | |
| 393 |.macro checkint, reg, target; checktp reg, LJ_TISNUM; jne target; .endmacro | |
| 394 |.macro checkstr, reg, target; checktp reg, LJ_TSTR; jne target; .endmacro | |
| 395 |.macro checktab, reg, target; checktp reg, LJ_TTAB; jne target; .endmacro | |
| 396 | | |
| 397 |// These operands must be used with movzx. | |
| 398 |.define PC_OP, byte [PC-4] | |
| 399 |.define PC_RA, byte [PC-3] | |
| 400 |.define PC_RB, byte [PC-1] | |
| 401 |.define PC_RC, byte [PC-2] | |
| 402 |.define PC_RD, word [PC-2] | |
| 403 | | |
| 404 |.macro branchPC, reg | |
| 405 | lea PC, [PC+reg*4-BCBIAS_J*4] | |
| 406 |.endmacro | |
| 407 | | |
| 408 |// Assumes DISPATCH is relative to GL. | |
| 409 #define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) | |
| 410 #define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) | |
| 411 | | |
| 412 #define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) | |
| 413 | | |
| 414 |// Decrement hashed hotcount and trigger trace recorder if zero. | |
| 415 |.macro hotloop, reg | |
| 416 | mov reg, PC | |
| 417 | shr reg, 1 | |
| 418 | and reg, HOTCOUNT_PCMASK | |
| 419 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_LOOP | |
| 420 | jb ->vm_hotloop | |
| 421 |.endmacro | |
| 422 | | |
| 423 |.macro hotcall, reg | |
| 424 | mov reg, PC | |
| 425 | shr reg, 1 | |
| 426 | and reg, HOTCOUNT_PCMASK | |
| 427 | sub word [DISPATCH+reg+GG_DISP2HOT], HOTCOUNT_CALL | |
| 428 | jb ->vm_hotcall | |
| 429 |.endmacro | |
| 430 | | |
| 431 |// Set current VM state. | |
| 432 |.macro set_vmstate, st | |
| 433 | mov dword [DISPATCH+DISPATCH_GL(vmstate)], ~LJ_VMST_..st | |
| 434 |.endmacro | |
| 435 | | |
| 436 |// x87 compares. | |
| 437 |.macro fcomparepp // Compare and pop st0 >< st1. | |
| 438 | fucomip st1 | |
| 439 | fpop | |
| 440 |.endmacro | |
| 441 | | |
| 442 |.macro fpop1; fstp st1; .endmacro | |
| 443 | | |
| 444 |// Synthesize SSE FP constants. | |
| 445 |.macro sseconst_abs, reg, tmp // Synthesize abs mask. | |
| 446 |.if X64 | |
| 447 | mov64 tmp, U64x(7fffffff,ffffffff); movd reg, tmp | |
| 448 |.else | |
| 449 | pxor reg, reg; pcmpeqd reg, reg; psrlq reg, 1 | |
| 450 |.endif | |
| 451 |.endmacro | |
| 452 | | |
| 453 |.macro sseconst_hi, reg, tmp, val // Synthesize hi-32 bit const. | |
| 454 |.if X64 | |
| 455 | mov64 tmp, U64x(val,00000000); movd reg, tmp | |
| 456 |.else | |
| 457 | mov tmp, 0x .. val; movd reg, tmp; pshufd reg, reg, 0x51 | |
| 458 |.endif | |
| 459 |.endmacro | |
| 460 | | |
| 461 |.macro sseconst_sign, reg, tmp // Synthesize sign mask. | |
| 462 | sseconst_hi reg, tmp, 80000000 | |
| 463 |.endmacro | |
| 464 |.macro sseconst_1, reg, tmp // Synthesize 1.0. | |
| 465 | sseconst_hi reg, tmp, 3ff00000 | |
| 466 |.endmacro | |
| 467 |.macro sseconst_2p52, reg, tmp // Synthesize 2^52. | |
| 468 | sseconst_hi reg, tmp, 43300000 | |
| 469 |.endmacro | |
| 470 |.macro sseconst_tobit, reg, tmp // Synthesize 2^52 + 2^51. | |
| 471 | sseconst_hi reg, tmp, 43380000 | |
| 472 |.endmacro | |
| 473 | | |
| 474 |// Move table write barrier back. Overwrites reg. | |
| 475 |.macro barrierback, tab, reg | |
| 476 | and byte tab->marked, (uint8_t)~LJ_GC_BLACK // black2gray(tab) | |
| 477 | mov reg, [DISPATCH+DISPATCH_GL(gc.grayagain)] | |
| 478 | mov [DISPATCH+DISPATCH_GL(gc.grayagain)], tab | |
| 479 | mov tab->gclist, reg | |
| 480 |.endmacro | |
| 481 | | |
| 482 |//----------------------------------------------------------------------- | |
| 483 | |
| 484 /* Generate subroutines used by opcodes and other parts of the VM. */ | |
| 485 /* The .code_sub section should be last to help static branch prediction. */ | |
| 486 static void build_subroutines(BuildCtx *ctx) | |
| 487 { | |
| 488 |.code_sub | |
| 489 | | |
| 490 |//----------------------------------------------------------------------- | |
| 491 |//-- Return handling ---------------------------------------------------- | |
| 492 |//----------------------------------------------------------------------- | |
| 493 | | |
| 494 |->vm_returnp: | |
| 495 | test PC, FRAME_P | |
| 496 | jz ->cont_dispatch | |
| 497 | | |
| 498 | // Return from pcall or xpcall fast func. | |
| 499 | and PC, -8 | |
| 500 | sub BASE, PC // Restore caller base. | |
| 501 | lea RAa, [RA+PC-8] // Rebase RA and prepend one result. | |
| 502 | mov PC, [BASE-4] // Fetch PC of previous frame. | |
| 503 | // Prepending may overwrite the pcall frame, so do it at the end. | |
| 504 | mov dword [BASE+RA+4], LJ_TTRUE // Prepend true to results. | |
| 505 | | |
| 506 |->vm_returnc: | |
| 507 | add RD, 1 // RD = nresults+1 | |
| 508 | jz ->vm_unwind_yield | |
| 509 | mov MULTRES, RD | |
| 510 | test PC, FRAME_TYPE | |
| 511 | jz ->BC_RET_Z // Handle regular return to Lua. | |
| 512 | | |
| 513 |->vm_return: | |
| 514 | // BASE = base, RA = resultofs, RD = nresults+1 (= MULTRES), PC = return | |
| 515 | xor PC, FRAME_C | |
| 516 | test PC, FRAME_TYPE | |
| 517 | jnz ->vm_returnp | |
| 518 | | |
| 519 | // Return to C. | |
| 520 | set_vmstate C | |
| 521 | and PC, -8 | |
| 522 | sub PC, BASE | |
| 523 | neg PC // Previous base = BASE - delta. | |
| 524 | | |
| 525 | sub RD, 1 | |
| 526 | jz >2 | |
| 527 |1: // Move results down. | |
| 528 |.if X64 | |
| 529 | mov RBa, [BASE+RA] | |
| 530 | mov [BASE-8], RBa | |
| 531 |.else | |
| 532 | mov RB, [BASE+RA] | |
| 533 | mov [BASE-8], RB | |
| 534 | mov RB, [BASE+RA+4] | |
| 535 | mov [BASE-4], RB | |
| 536 |.endif | |
| 537 | add BASE, 8 | |
| 538 | sub RD, 1 | |
| 539 | jnz <1 | |
| 540 |2: | |
| 541 | mov L:RB, SAVE_L | |
| 542 | mov L:RB->base, PC | |
| 543 |3: | |
| 544 | mov RD, MULTRES | |
| 545 | mov RA, SAVE_NRES // RA = wanted nresults+1 | |
| 546 |4: | |
| 547 | cmp RA, RD | |
| 548 | jne >6 // More/less results wanted? | |
| 549 |5: | |
| 550 | sub BASE, 8 | |
| 551 | mov L:RB->top, BASE | |
| 552 | | |
| 553 |->vm_leave_cp: | |
| 554 | mov RAa, SAVE_CFRAME // Restore previous C frame. | |
| 555 | mov L:RB->cframe, RAa | |
| 556 | xor eax, eax // Ok return status for vm_pcall. | |
| 557 | | |
| 558 |->vm_leave_unw: | |
| 559 | restoreregs | |
| 560 | ret | |
| 561 | | |
| 562 |6: | |
| 563 | jb >7 // Less results wanted? | |
| 564 | // More results wanted. Check stack size and fill up results with nil. | |
| 565 | cmp BASE, L:RB->maxstack | |
| 566 | ja >8 | |
| 567 | mov dword [BASE-4], LJ_TNIL | |
| 568 | add BASE, 8 | |
| 569 | add RD, 1 | |
| 570 | jmp <4 | |
| 571 | | |
| 572 |7: // Less results wanted. | |
| 573 | test RA, RA | |
| 574 | jz <5 // But check for LUA_MULTRET+1. | |
| 575 | sub RA, RD // Negative result! | |
| 576 | lea BASE, [BASE+RA*8] // Correct top. | |
| 577 | jmp <5 | |
| 578 | | |
| 579 |8: // Corner case: need to grow stack for filling up results. | |
| 580 | // This can happen if: | |
| 581 | // - A C function grows the stack (a lot). | |
| 582 | // - The GC shrinks the stack in between. | |
| 583 | // - A return back from a lua_call() with (high) nresults adjustment. | |
| 584 | mov L:RB->top, BASE // Save current top held in BASE (yes). | |
| 585 | mov MULTRES, RD // Need to fill only remainder with nil. | |
| 586 | mov FCARG2, RA | |
| 587 | mov FCARG1, L:RB | |
| 588 | call extern lj_state_growstack@8 // (lua_State *L, int n) | |
| 589 | mov BASE, L:RB->top // Need the (realloced) L->top in BASE. | |
| 590 | jmp <3 | |
| 591 | | |
| 592 |->vm_unwind_yield: | |
| 593 | mov al, LUA_YIELD | |
| 594 | jmp ->vm_unwind_c_eh | |
| 595 | | |
| 596 |->vm_unwind_c@8: // Unwind C stack, return from vm_pcall. | |
| 597 | // (void *cframe, int errcode) | |
| 598 |.if X64 | |
| 599 | mov eax, CARG2d // Error return status for vm_pcall. | |
| 600 | mov rsp, CARG1 | |
| 601 |.else | |
| 602 | mov eax, FCARG2 // Error return status for vm_pcall. | |
| 603 | mov esp, FCARG1 | |
| 604 |.if WIN | |
| 605 | lea FCARG1, SEH_NEXT | |
| 606 | fs; mov [0], FCARG1 | |
| 607 |.endif | |
| 608 |.endif | |
| 609 |->vm_unwind_c_eh: // Landing pad for external unwinder. | |
| 610 | mov L:RB, SAVE_L | |
| 611 | mov GL:RB, L:RB->glref | |
| 612 | mov dword GL:RB->vmstate, ~LJ_VMST_C | |
| 613 | jmp ->vm_leave_unw | |
| 614 | | |
| 615 |->vm_unwind_rethrow: | |
| 616 |.if X64 and not X64WIN | |
| 617 | mov FCARG1, SAVE_L | |
| 618 | mov FCARG2, eax | |
| 619 | restoreregs | |
| 620 | jmp extern lj_err_throw@8 // (lua_State *L, int errcode) | |
| 621 |.endif | |
| 622 | | |
| 623 |->vm_unwind_ff@4: // Unwind C stack, return from ff pcall. | |
| 624 | // (void *cframe) | |
| 625 |.if X64 | |
| 626 | and CARG1, CFRAME_RAWMASK | |
| 627 | mov rsp, CARG1 | |
| 628 |.else | |
| 629 | and FCARG1, CFRAME_RAWMASK | |
| 630 | mov esp, FCARG1 | |
| 631 |.if WIN | |
| 632 | lea FCARG1, SEH_NEXT | |
| 633 | fs; mov [0], FCARG1 | |
| 634 |.endif | |
| 635 |.endif | |
| 636 |->vm_unwind_ff_eh: // Landing pad for external unwinder. | |
| 637 | mov L:RB, SAVE_L | |
| 638 | mov RAa, -8 // Results start at BASE+RA = BASE-8. | |
| 639 | mov RD, 1+1 // Really 1+2 results, incr. later. | |
| 640 | mov BASE, L:RB->base | |
| 641 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | |
| 642 | add DISPATCH, GG_G2DISP | |
| 643 | mov PC, [BASE-4] // Fetch PC of previous frame. | |
| 644 | mov dword [BASE-4], LJ_TFALSE // Prepend false to error message. | |
| 645 | set_vmstate INTERP | |
| 646 | jmp ->vm_returnc // Increments RD/MULTRES and returns. | |
| 647 | | |
| 648 |.if WIN and not X64 | |
| 649 |->vm_rtlunwind@16: // Thin layer around RtlUnwind. | |
| 650 | // (void *cframe, void *excptrec, void *unwinder, int errcode) | |
| 651 | mov [esp], FCARG1 // Return value for RtlUnwind. | |
| 652 | push FCARG2 // Exception record for RtlUnwind. | |
| 653 | push 0 // Ignored by RtlUnwind. | |
| 654 | push dword [FCARG1+CFRAME_OFS_SEH] | |
| 655 | call extern RtlUnwind@16 // Violates ABI (clobbers too much). | |
| 656 | mov FCARG1, eax | |
| 657 | mov FCARG2, [esp+4] // errcode (for vm_unwind_c). | |
| 658 | ret // Jump to unwinder. | |
| 659 |.endif | |
| 660 | | |
| 661 |//----------------------------------------------------------------------- | |
| 662 |//-- Grow stack for calls ----------------------------------------------- | |
| 663 |//----------------------------------------------------------------------- | |
| 664 | | |
| 665 |->vm_growstack_c: // Grow stack for C function. | |
| 666 | mov FCARG2, LUA_MINSTACK | |
| 667 | jmp >2 | |
| 668 | | |
| 669 |->vm_growstack_v: // Grow stack for vararg Lua function. | |
| 670 | sub RD, 8 | |
| 671 | jmp >1 | |
| 672 | | |
| 673 |->vm_growstack_f: // Grow stack for fixarg Lua function. | |
| 674 | // BASE = new base, RD = nargs+1, RB = L, PC = first PC | |
| 675 | lea RD, [BASE+NARGS:RD*8-8] | |
| 676 |1: | |
| 677 | movzx RA, byte [PC-4+PC2PROTO(framesize)] | |
| 678 | add PC, 4 // Must point after first instruction. | |
| 679 | mov L:RB->base, BASE | |
| 680 | mov L:RB->top, RD | |
| 681 | mov SAVE_PC, PC | |
| 682 | mov FCARG2, RA | |
| 683 |2: | |
| 684 | // RB = L, L->base = new base, L->top = top | |
| 685 | mov FCARG1, L:RB | |
| 686 | call extern lj_state_growstack@8 // (lua_State *L, int n) | |
| 687 | mov BASE, L:RB->base | |
| 688 | mov RD, L:RB->top | |
| 689 | mov LFUNC:RB, [BASE-8] | |
| 690 | sub RD, BASE | |
| 691 | shr RD, 3 | |
| 692 | add NARGS:RD, 1 | |
| 693 | // BASE = new base, RB = LFUNC, RD = nargs+1 | |
| 694 | ins_callt // Just retry the call. | |
| 695 | | |
| 696 |//----------------------------------------------------------------------- | |
| 697 |//-- Entry points into the assembler VM --------------------------------- | |
| 698 |//----------------------------------------------------------------------- | |
| 699 | | |
| 700 |->vm_resume: // Setup C frame and resume thread. | |
| 701 | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) | |
| 702 | saveregs | |
| 703 |.if X64 | |
| 704 | mov L:RB, CARG1d // Caveat: CARG1d may be RA. | |
| 705 | mov SAVE_L, CARG1d | |
| 706 | mov RA, CARG2d | |
| 707 |.else | |
| 708 | mov L:RB, SAVE_L | |
| 709 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! | |
| 710 |.endif | |
| 711 | mov PC, FRAME_CP | |
| 712 | xor RD, RD | |
| 713 | lea KBASEa, [esp+CFRAME_RESUME] | |
| 714 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | |
| 715 | add DISPATCH, GG_G2DISP | |
| 716 | mov SAVE_PC, RD // Any value outside of bytecode is ok. | |
| 717 | mov SAVE_CFRAME, RDa | |
| 718 |.if X64 | |
| 719 | mov SAVE_NRES, RD | |
| 720 | mov SAVE_ERRF, RD | |
| 721 |.endif | |
| 722 | mov L:RB->cframe, KBASEa | |
| 723 | cmp byte L:RB->status, RDL | |
| 724 | je >2 // Initial resume (like a call). | |
| 725 | | |
| 726 | // Resume after yield (like a return). | |
| 727 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | |
| 728 | set_vmstate INTERP | |
| 729 | mov byte L:RB->status, RDL | |
| 730 | mov BASE, L:RB->base | |
| 731 | mov RD, L:RB->top | |
| 732 | sub RD, RA | |
| 733 | shr RD, 3 | |
| 734 | add RD, 1 // RD = nresults+1 | |
| 735 | sub RA, BASE // RA = resultofs | |
| 736 | mov PC, [BASE-4] | |
| 737 | mov MULTRES, RD | |
| 738 | test PC, FRAME_TYPE | |
| 739 | jz ->BC_RET_Z | |
| 740 | jmp ->vm_return | |
| 741 | | |
| 742 |->vm_pcall: // Setup protected C frame and enter VM. | |
| 743 | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) | |
| 744 | saveregs | |
| 745 | mov PC, FRAME_CP | |
| 746 |.if X64 | |
| 747 | mov SAVE_ERRF, CARG4d | |
| 748 |.endif | |
| 749 | jmp >1 | |
| 750 | | |
| 751 |->vm_call: // Setup C frame and enter VM. | |
| 752 | // (lua_State *L, TValue *base, int nres1) | |
| 753 | saveregs | |
| 754 | mov PC, FRAME_C | |
| 755 | | |
| 756 |1: // Entry point for vm_pcall above (PC = ftype). | |
| 757 |.if X64 | |
| 758 | mov SAVE_NRES, CARG3d | |
| 759 | mov L:RB, CARG1d // Caveat: CARG1d may be RA. | |
| 760 | mov SAVE_L, CARG1d | |
| 761 | mov RA, CARG2d | |
| 762 |.else | |
| 763 | mov L:RB, SAVE_L | |
| 764 | mov RA, INARG_BASE // Caveat: overlaps SAVE_CFRAME! | |
| 765 |.endif | |
| 766 | | |
| 767 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | |
| 768 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | |
| 769 | mov SAVE_CFRAME, KBASEa | |
| 770 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | |
| 771 | add DISPATCH, GG_G2DISP | |
| 772 |.if X64 | |
| 773 | mov L:RB->cframe, rsp | |
| 774 |.else | |
| 775 | mov L:RB->cframe, esp | |
| 776 |.endif | |
| 777 | | |
| 778 |2: // Entry point for vm_resume/vm_cpcall (RA = base, RB = L, PC = ftype). | |
| 779 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | |
| 780 | set_vmstate INTERP | |
| 781 | mov BASE, L:RB->base // BASE = old base (used in vmeta_call). | |
| 782 | add PC, RA | |
| 783 | sub PC, BASE // PC = frame delta + frame type | |
| 784 | | |
| 785 | mov RD, L:RB->top | |
| 786 | sub RD, RA | |
| 787 | shr NARGS:RD, 3 | |
| 788 | add NARGS:RD, 1 // RD = nargs+1 | |
| 789 | | |
| 790 |->vm_call_dispatch: | |
| 791 | mov LFUNC:RB, [RA-8] | |
| 792 | cmp dword [RA-4], LJ_TFUNC | |
| 793 | jne ->vmeta_call // Ensure KBASE defined and != BASE. | |
| 794 | | |
| 795 |->vm_call_dispatch_f: | |
| 796 | mov BASE, RA | |
| 797 | ins_call | |
| 798 | // BASE = new base, RB = func, RD = nargs+1, PC = caller PC | |
| 799 | | |
| 800 |->vm_cpcall: // Setup protected C frame, call C. | |
| 801 | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) | |
| 802 | saveregs | |
| 803 |.if X64 | |
| 804 | mov L:RB, CARG1d // Caveat: CARG1d may be RA. | |
| 805 | mov SAVE_L, CARG1d | |
| 806 |.else | |
| 807 | mov L:RB, SAVE_L | |
| 808 | // Caveat: INARG_CP_* and SAVE_CFRAME/SAVE_NRES/SAVE_ERRF overlap! | |
| 809 | mov RC, INARG_CP_UD // Get args before they are overwritten. | |
| 810 | mov RA, INARG_CP_FUNC | |
| 811 | mov BASE, INARG_CP_CALL | |
| 812 |.endif | |
| 813 | mov SAVE_PC, L:RB // Any value outside of bytecode is ok. | |
| 814 | | |
| 815 | mov KBASE, L:RB->stack // Compute -savestack(L, L->top). | |
| 816 | sub KBASE, L:RB->top | |
| 817 | mov DISPATCH, L:RB->glref // Setup pointer to dispatch table. | |
| 818 | mov SAVE_ERRF, 0 // No error function. | |
| 819 | mov SAVE_NRES, KBASE // Neg. delta means cframe w/o frame. | |
| 820 | add DISPATCH, GG_G2DISP | |
| 821 | // Handler may change cframe_nres(L->cframe) or cframe_errfunc(L->cframe). | |
| 822 | | |
| 823 |.if X64 | |
| 824 | mov KBASEa, L:RB->cframe // Add our C frame to cframe chain. | |
| 825 | mov SAVE_CFRAME, KBASEa | |
| 826 | mov L:RB->cframe, rsp | |
| 827 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | |
| 828 | | |
| 829 | call CARG4 // (lua_State *L, lua_CFunction func, void *ud) | |
| 830 |.else | |
| 831 | mov ARG3, RC // Have to copy args downwards. | |
| 832 | mov ARG2, RA | |
| 833 | mov ARG1, L:RB | |
| 834 | | |
| 835 | mov KBASE, L:RB->cframe // Add our C frame to cframe chain. | |
| 836 | mov SAVE_CFRAME, KBASE | |
| 837 | mov L:RB->cframe, esp | |
| 838 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | |
| 839 | | |
| 840 | call BASE // (lua_State *L, lua_CFunction func, void *ud) | |
| 841 |.endif | |
| 842 | // TValue * (new base) or NULL returned in eax (RC). | |
| 843 | test RC, RC | |
| 844 | jz ->vm_leave_cp // No base? Just remove C frame. | |
| 845 | mov RA, RC | |
| 846 | mov PC, FRAME_CP | |
| 847 | jmp <2 // Else continue with the call. | |
| 848 | | |
| 849 |//----------------------------------------------------------------------- | |
| 850 |//-- Metamethod handling ------------------------------------------------ | |
| 851 |//----------------------------------------------------------------------- | |
| 852 | | |
| 853 |//-- Continuation dispatch ---------------------------------------------- | |
| 854 | | |
| 855 |->cont_dispatch: | |
| 856 | // BASE = meta base, RA = resultofs, RD = nresults+1 (also in MULTRES) | |
| 857 | add RA, BASE | |
| 858 | and PC, -8 | |
| 859 | mov RB, BASE | |
| 860 | sub BASE, PC // Restore caller BASE. | |
| 861 | mov dword [RA+RD*8-4], LJ_TNIL // Ensure one valid arg. | |
| 862 | mov RC, RA // ... in [RC] | |
| 863 | mov PC, [RB-12] // Restore PC from [cont|PC]. | |
| 864 |.if X64 | |
| 865 | movsxd RAa, dword [RB-16] // May be negative on WIN64 with debug. | |
| 866 |.if FFI | |
| 867 | cmp RA, 1 | |
| 868 | jbe >1 | |
| 869 |.endif | |
| 870 | lea KBASEa, qword [=>0] | |
| 871 | add RAa, KBASEa | |
| 872 |.else | |
| 873 | mov RA, dword [RB-16] | |
| 874 |.if FFI | |
| 875 | cmp RA, 1 | |
| 876 | jbe >1 | |
| 877 |.endif | |
| 878 |.endif | |
| 879 | mov LFUNC:KBASE, [BASE-8] | |
| 880 | mov KBASE, LFUNC:KBASE->pc | |
| 881 | mov KBASE, [KBASE+PC2PROTO(k)] | |
| 882 | // BASE = base, RC = result, RB = meta base | |
| 883 | jmp RAa // Jump to continuation. | |
| 884 | | |
| 885 |.if FFI | |
| 886 |1: | |
| 887 | je ->cont_ffi_callback // cont = 1: return from FFI callback. | |
| 888 | // cont = 0: Tail call from C function. | |
| 889 | sub RB, BASE | |
| 890 | shr RB, 3 | |
| 891 | lea RD, [RB-1] | |
| 892 | jmp ->vm_call_tail | |
| 893 |.endif | |
| 894 | | |
| 895 |->cont_cat: // BASE = base, RC = result, RB = mbase | |
| 896 | movzx RA, PC_RB | |
| 897 | sub RB, 16 | |
| 898 | lea RA, [BASE+RA*8] | |
| 899 | sub RA, RB | |
| 900 | je ->cont_ra | |
| 901 | neg RA | |
| 902 | shr RA, 3 | |
| 903 |.if X64WIN | |
| 904 | mov CARG3d, RA | |
| 905 | mov L:CARG1d, SAVE_L | |
| 906 | mov L:CARG1d->base, BASE | |
| 907 | mov RCa, [RC] | |
| 908 | mov [RB], RCa | |
| 909 | mov CARG2d, RB | |
| 910 |.elif X64 | |
| 911 | mov L:CARG1d, SAVE_L | |
| 912 | mov L:CARG1d->base, BASE | |
| 913 | mov CARG3d, RA | |
| 914 | mov RAa, [RC] | |
| 915 | mov [RB], RAa | |
| 916 | mov CARG2d, RB | |
| 917 |.else | |
| 918 | mov ARG3, RA | |
| 919 | mov RA, [RC+4] | |
| 920 | mov RC, [RC] | |
| 921 | mov [RB+4], RA | |
| 922 | mov [RB], RC | |
| 923 | mov ARG2, RB | |
| 924 |.endif | |
| 925 | jmp ->BC_CAT_Z | |
| 926 | | |
| 927 |//-- Table indexing metamethods ----------------------------------------- | |
| 928 | | |
| 929 |->vmeta_tgets: | |
| 930 | mov TMP1, RC // RC = GCstr * | |
| 931 | mov TMP2, LJ_TSTR | |
| 932 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. | |
| 933 | cmp PC_OP, BC_GGET | |
| 934 | jne >1 | |
| 935 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. | |
| 936 | mov [RA], TAB:RB // RB = GCtab * | |
| 937 | mov dword [RA+4], LJ_TTAB | |
| 938 | mov RB, RA | |
| 939 | jmp >2 | |
| 940 | | |
| 941 |->vmeta_tgetb: | |
| 942 | movzx RC, PC_RC | |
| 943 |.if DUALNUM | |
| 944 | mov TMP2, LJ_TISNUM | |
| 945 | mov TMP1, RC | |
| 946 |.else | |
| 947 | cvtsi2sd xmm0, RC | |
| 948 | movsd TMPQ, xmm0 | |
| 949 |.endif | |
| 950 | lea RCa, TMPQ // Store temp. TValue in TMPQ. | |
| 951 | jmp >1 | |
| 952 | | |
| 953 |->vmeta_tgetv: | |
| 954 | movzx RC, PC_RC // Reload TValue *k from RC. | |
| 955 | lea RC, [BASE+RC*8] | |
| 956 |1: | |
| 957 | movzx RB, PC_RB // Reload TValue *t from RB. | |
| 958 | lea RB, [BASE+RB*8] | |
| 959 |2: | |
| 960 |.if X64 | |
| 961 | mov L:CARG1d, SAVE_L | |
| 962 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. | |
| 963 | mov CARG2d, RB | |
| 964 | mov CARG3, RCa // May be 64 bit ptr to stack. | |
| 965 | mov L:RB, L:CARG1d | |
| 966 |.else | |
| 967 | mov ARG2, RB | |
| 968 | mov L:RB, SAVE_L | |
| 969 | mov ARG3, RC | |
| 970 | mov ARG1, L:RB | |
| 971 | mov L:RB->base, BASE | |
| 972 |.endif | |
| 973 | mov SAVE_PC, PC | |
| 974 | call extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) | |
| 975 | // TValue * (finished) or NULL (metamethod) returned in eax (RC). | |
| 976 | mov BASE, L:RB->base | |
| 977 | test RC, RC | |
| 978 | jz >3 | |
| 979 |->cont_ra: // BASE = base, RC = result | |
| 980 | movzx RA, PC_RA | |
| 981 |.if X64 | |
| 982 | mov RBa, [RC] | |
| 983 | mov [BASE+RA*8], RBa | |
| 984 |.else | |
| 985 | mov RB, [RC+4] | |
| 986 | mov RC, [RC] | |
| 987 | mov [BASE+RA*8+4], RB | |
| 988 | mov [BASE+RA*8], RC | |
| 989 |.endif | |
| 990 | ins_next | |
| 991 | | |
| 992 |3: // Call __index metamethod. | |
| 993 | // BASE = base, L->top = new base, stack = cont/func/t/k | |
| 994 | mov RA, L:RB->top | |
| 995 | mov [RA-12], PC // [cont|PC] | |
| 996 | lea PC, [RA+FRAME_CONT] | |
| 997 | sub PC, BASE | |
| 998 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. | |
| 999 | mov NARGS:RD, 2+1 // 2 args for func(t, k). | |
| 1000 | jmp ->vm_call_dispatch_f | |
| 1001 | | |
| 1002 |->vmeta_tgetr: | |
| 1003 | mov FCARG1, TAB:RB | |
| 1004 | mov RB, BASE // Save BASE. | |
| 1005 | mov FCARG2, RC // Caveat: FCARG2 == BASE | |
| 1006 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) | |
| 1007 | // cTValue * or NULL returned in eax (RC). | |
| 1008 | movzx RA, PC_RA | |
| 1009 | mov BASE, RB // Restore BASE. | |
| 1010 | test RC, RC | |
| 1011 | jnz ->BC_TGETR_Z | |
| 1012 | mov dword [BASE+RA*8+4], LJ_TNIL | |
| 1013 | jmp ->BC_TGETR2_Z | |
| 1014 | | |
| 1015 |//----------------------------------------------------------------------- | |
| 1016 | | |
| 1017 |->vmeta_tsets: | |
| 1018 | mov TMP1, RC // RC = GCstr * | |
| 1019 | mov TMP2, LJ_TSTR | |
| 1020 | lea RCa, TMP1 // Store temp. TValue in TMP1/TMP2. | |
| 1021 | cmp PC_OP, BC_GSET | |
| 1022 | jne >1 | |
| 1023 | lea RA, [DISPATCH+DISPATCH_GL(tmptv)] // Store fn->l.env in g->tmptv. | |
| 1024 | mov [RA], TAB:RB // RB = GCtab * | |
| 1025 | mov dword [RA+4], LJ_TTAB | |
| 1026 | mov RB, RA | |
| 1027 | jmp >2 | |
| 1028 | | |
| 1029 |->vmeta_tsetb: | |
| 1030 | movzx RC, PC_RC | |
| 1031 |.if DUALNUM | |
| 1032 | mov TMP2, LJ_TISNUM | |
| 1033 | mov TMP1, RC | |
| 1034 |.else | |
| 1035 | cvtsi2sd xmm0, RC | |
| 1036 | movsd TMPQ, xmm0 | |
| 1037 |.endif | |
| 1038 | lea RCa, TMPQ // Store temp. TValue in TMPQ. | |
| 1039 | jmp >1 | |
| 1040 | | |
| 1041 |->vmeta_tsetv: | |
| 1042 | movzx RC, PC_RC // Reload TValue *k from RC. | |
| 1043 | lea RC, [BASE+RC*8] | |
| 1044 |1: | |
| 1045 | movzx RB, PC_RB // Reload TValue *t from RB. | |
| 1046 | lea RB, [BASE+RB*8] | |
| 1047 |2: | |
| 1048 |.if X64 | |
| 1049 | mov L:CARG1d, SAVE_L | |
| 1050 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. | |
| 1051 | mov CARG2d, RB | |
| 1052 | mov CARG3, RCa // May be 64 bit ptr to stack. | |
| 1053 | mov L:RB, L:CARG1d | |
| 1054 |.else | |
| 1055 | mov ARG2, RB | |
| 1056 | mov L:RB, SAVE_L | |
| 1057 | mov ARG3, RC | |
| 1058 | mov ARG1, L:RB | |
| 1059 | mov L:RB->base, BASE | |
| 1060 |.endif | |
| 1061 | mov SAVE_PC, PC | |
| 1062 | call extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) | |
| 1063 | // TValue * (finished) or NULL (metamethod) returned in eax (RC). | |
| 1064 | mov BASE, L:RB->base | |
| 1065 | test RC, RC | |
| 1066 | jz >3 | |
| 1067 | // NOBARRIER: lj_meta_tset ensures the table is not black. | |
| 1068 | movzx RA, PC_RA | |
| 1069 |.if X64 | |
| 1070 | mov RBa, [BASE+RA*8] | |
| 1071 | mov [RC], RBa | |
| 1072 |.else | |
| 1073 | mov RB, [BASE+RA*8+4] | |
| 1074 | mov RA, [BASE+RA*8] | |
| 1075 | mov [RC+4], RB | |
| 1076 | mov [RC], RA | |
| 1077 |.endif | |
| 1078 |->cont_nop: // BASE = base, (RC = result) | |
| 1079 | ins_next | |
| 1080 | | |
| 1081 |3: // Call __newindex metamethod. | |
| 1082 | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) | |
| 1083 | mov RA, L:RB->top | |
| 1084 | mov [RA-12], PC // [cont|PC] | |
| 1085 | movzx RC, PC_RA | |
| 1086 | // Copy value to third argument. | |
| 1087 |.if X64 | |
| 1088 | mov RBa, [BASE+RC*8] | |
| 1089 | mov [RA+16], RBa | |
| 1090 |.else | |
| 1091 | mov RB, [BASE+RC*8+4] | |
| 1092 | mov RC, [BASE+RC*8] | |
| 1093 | mov [RA+20], RB | |
| 1094 | mov [RA+16], RC | |
| 1095 |.endif | |
| 1096 | lea PC, [RA+FRAME_CONT] | |
| 1097 | sub PC, BASE | |
| 1098 | mov LFUNC:RB, [RA-8] // Guaranteed to be a function here. | |
| 1099 | mov NARGS:RD, 3+1 // 3 args for func(t, k, v). | |
| 1100 | jmp ->vm_call_dispatch_f | |
| 1101 | | |
| 1102 |->vmeta_tsetr: | |
| 1103 |.if X64WIN | |
| 1104 | mov L:CARG1d, SAVE_L | |
| 1105 | mov CARG3d, RC | |
| 1106 | mov L:CARG1d->base, BASE | |
| 1107 | xchg CARG2d, TAB:RB // Caveat: CARG2d == BASE. | |
| 1108 |.elif X64 | |
| 1109 | mov L:CARG1d, SAVE_L | |
| 1110 | mov CARG2d, TAB:RB | |
| 1111 | mov L:CARG1d->base, BASE | |
| 1112 | mov RB, BASE // Save BASE. | |
| 1113 | mov CARG3d, RC // Caveat: CARG3d == BASE. | |
| 1114 |.else | |
| 1115 | mov L:RA, SAVE_L | |
| 1116 | mov ARG2, TAB:RB | |
| 1117 | mov RB, BASE // Save BASE. | |
| 1118 | mov ARG3, RC | |
| 1119 | mov ARG1, L:RA | |
| 1120 | mov L:RA->base, BASE | |
| 1121 |.endif | |
| 1122 | mov SAVE_PC, PC | |
| 1123 | call extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) | |
| 1124 | // TValue * returned in eax (RC). | |
| 1125 | movzx RA, PC_RA | |
| 1126 | mov BASE, RB // Restore BASE. | |
| 1127 | jmp ->BC_TSETR_Z | |
| 1128 | | |
| 1129 |//-- Comparison metamethods --------------------------------------------- | |
| 1130 | | |
| 1131 |->vmeta_comp: | |
| 1132 |.if X64 | |
| 1133 | mov L:RB, SAVE_L | |
| 1134 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d == BASE. | |
| 1135 |.if X64WIN | |
| 1136 | lea CARG3d, [BASE+RD*8] | |
| 1137 | lea CARG2d, [BASE+RA*8] | |
| 1138 |.else | |
| 1139 | lea CARG2d, [BASE+RA*8] | |
| 1140 | lea CARG3d, [BASE+RD*8] | |
| 1141 |.endif | |
| 1142 | mov CARG1d, L:RB // Caveat: CARG1d/CARG4d == RA. | |
| 1143 | movzx CARG4d, PC_OP | |
| 1144 |.else | |
| 1145 | movzx RB, PC_OP | |
| 1146 | lea RD, [BASE+RD*8] | |
| 1147 | lea RA, [BASE+RA*8] | |
| 1148 | mov ARG4, RB | |
| 1149 | mov L:RB, SAVE_L | |
| 1150 | mov ARG3, RD | |
| 1151 | mov ARG2, RA | |
| 1152 | mov ARG1, L:RB | |
| 1153 | mov L:RB->base, BASE | |
| 1154 |.endif | |
| 1155 | mov SAVE_PC, PC | |
| 1156 | call extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) | |
| 1157 | // 0/1 or TValue * (metamethod) returned in eax (RC). | |
| 1158 |3: | |
| 1159 | mov BASE, L:RB->base | |
| 1160 | cmp RC, 1 | |
| 1161 | ja ->vmeta_binop | |
| 1162 |4: | |
| 1163 | lea PC, [PC+4] | |
| 1164 | jb >6 | |
| 1165 |5: | |
| 1166 | movzx RD, PC_RD | |
| 1167 | branchPC RD | |
| 1168 |6: | |
| 1169 | ins_next | |
| 1170 | | |
| 1171 |->cont_condt: // BASE = base, RC = result | |
| 1172 | add PC, 4 | |
| 1173 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is true. | |
| 1174 | jb <5 | |
| 1175 | jmp <6 | |
| 1176 | | |
| 1177 |->cont_condf: // BASE = base, RC = result | |
| 1178 | cmp dword [RC+4], LJ_TISTRUECOND // Branch if result is false. | |
| 1179 | jmp <4 | |
| 1180 | | |
| 1181 |->vmeta_equal: | |
| 1182 | sub PC, 4 | |
| 1183 |.if X64WIN | |
| 1184 | mov CARG3d, RD | |
| 1185 | mov CARG4d, RB | |
| 1186 | mov L:RB, SAVE_L | |
| 1187 | mov L:RB->base, BASE // Caveat: CARG2d == BASE. | |
| 1188 | mov CARG2d, RA | |
| 1189 | mov CARG1d, L:RB // Caveat: CARG1d == RA. | |
| 1190 |.elif X64 | |
| 1191 | mov CARG2d, RA | |
| 1192 | mov CARG4d, RB // Caveat: CARG4d == RA. | |
| 1193 | mov L:RB, SAVE_L | |
| 1194 | mov L:RB->base, BASE // Caveat: CARG3d == BASE. | |
| 1195 | mov CARG3d, RD | |
| 1196 | mov CARG1d, L:RB | |
| 1197 |.else | |
| 1198 | mov ARG4, RB | |
| 1199 | mov L:RB, SAVE_L | |
| 1200 | mov ARG3, RD | |
| 1201 | mov ARG2, RA | |
| 1202 | mov ARG1, L:RB | |
| 1203 | mov L:RB->base, BASE | |
| 1204 |.endif | |
| 1205 | mov SAVE_PC, PC | |
| 1206 | call extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) | |
| 1207 | // 0/1 or TValue * (metamethod) returned in eax (RC). | |
| 1208 | jmp <3 | |
| 1209 | | |
| 1210 |->vmeta_equal_cd: | |
| 1211 |.if FFI | |
| 1212 | sub PC, 4 | |
| 1213 | mov L:RB, SAVE_L | |
| 1214 | mov L:RB->base, BASE | |
| 1215 | mov FCARG1, L:RB | |
| 1216 | mov FCARG2, dword [PC-4] | |
| 1217 | mov SAVE_PC, PC | |
| 1218 | call extern lj_meta_equal_cd@8 // (lua_State *L, BCIns ins) | |
| 1219 | // 0/1 or TValue * (metamethod) returned in eax (RC). | |
| 1220 | jmp <3 | |
| 1221 |.endif | |
| 1222 | | |
| 1223 |->vmeta_istype: | |
| 1224 |.if X64 | |
| 1225 | mov L:RB, SAVE_L | |
| 1226 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. | |
| 1227 | mov CARG2d, RA | |
| 1228 | movzx CARG3d, PC_RD | |
| 1229 | mov L:CARG1d, L:RB | |
| 1230 |.else | |
| 1231 | movzx RD, PC_RD | |
| 1232 | mov ARG2, RA | |
| 1233 | mov L:RB, SAVE_L | |
| 1234 | mov ARG3, RD | |
| 1235 | mov ARG1, L:RB | |
| 1236 | mov L:RB->base, BASE | |
| 1237 |.endif | |
| 1238 | mov SAVE_PC, PC | |
| 1239 | call extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) | |
| 1240 | mov BASE, L:RB->base | |
| 1241 | jmp <6 | |
| 1242 | | |
| 1243 |//-- Arithmetic metamethods --------------------------------------------- | |
| 1244 | | |
| 1245 |->vmeta_arith_vno: | |
| 1246 |.if DUALNUM | |
| 1247 | movzx RB, PC_RB | |
| 1248 |.endif | |
| 1249 |->vmeta_arith_vn: | |
| 1250 | lea RC, [KBASE+RC*8] | |
| 1251 | jmp >1 | |
| 1252 | | |
| 1253 |->vmeta_arith_nvo: | |
| 1254 |.if DUALNUM | |
| 1255 | movzx RC, PC_RC | |
| 1256 |.endif | |
| 1257 |->vmeta_arith_nv: | |
| 1258 | lea RC, [KBASE+RC*8] | |
| 1259 | lea RB, [BASE+RB*8] | |
| 1260 | xchg RB, RC | |
| 1261 | jmp >2 | |
| 1262 | | |
| 1263 |->vmeta_unm: | |
| 1264 | lea RC, [BASE+RD*8] | |
| 1265 | mov RB, RC | |
| 1266 | jmp >2 | |
| 1267 | | |
| 1268 |->vmeta_arith_vvo: | |
| 1269 |.if DUALNUM | |
| 1270 | movzx RB, PC_RB | |
| 1271 |.endif | |
| 1272 |->vmeta_arith_vv: | |
| 1273 | lea RC, [BASE+RC*8] | |
| 1274 |1: | |
| 1275 | lea RB, [BASE+RB*8] | |
| 1276 |2: | |
| 1277 | lea RA, [BASE+RA*8] | |
| 1278 |.if X64WIN | |
| 1279 | mov CARG3d, RB | |
| 1280 | mov CARG4d, RC | |
| 1281 | movzx RC, PC_OP | |
| 1282 | mov ARG5d, RC | |
| 1283 | mov L:RB, SAVE_L | |
| 1284 | mov L:RB->base, BASE // Caveat: CARG2d == BASE. | |
| 1285 | mov CARG2d, RA | |
| 1286 | mov CARG1d, L:RB // Caveat: CARG1d == RA. | |
| 1287 |.elif X64 | |
| 1288 | movzx CARG5d, PC_OP | |
| 1289 | mov CARG2d, RA | |
| 1290 | mov CARG4d, RC // Caveat: CARG4d == RA. | |
| 1291 | mov L:CARG1d, SAVE_L | |
| 1292 | mov L:CARG1d->base, BASE // Caveat: CARG3d == BASE. | |
| 1293 | mov CARG3d, RB | |
| 1294 | mov L:RB, L:CARG1d | |
| 1295 |.else | |
| 1296 | mov ARG3, RB | |
| 1297 | mov L:RB, SAVE_L | |
| 1298 | mov ARG4, RC | |
| 1299 | movzx RC, PC_OP | |
| 1300 | mov ARG2, RA | |
| 1301 | mov ARG5, RC | |
| 1302 | mov ARG1, L:RB | |
| 1303 | mov L:RB->base, BASE | |
| 1304 |.endif | |
| 1305 | mov SAVE_PC, PC | |
| 1306 | call extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) | |
| 1307 | // NULL (finished) or TValue * (metamethod) returned in eax (RC). | |
| 1308 | mov BASE, L:RB->base | |
| 1309 | test RC, RC | |
| 1310 | jz ->cont_nop | |
| 1311 | | |
| 1312 | // Call metamethod for binary op. | |
| 1313 |->vmeta_binop: | |
| 1314 | // BASE = base, RC = new base, stack = cont/func/o1/o2 | |
| 1315 | mov RA, RC | |
| 1316 | sub RC, BASE | |
| 1317 | mov [RA-12], PC // [cont|PC] | |
| 1318 | lea PC, [RC+FRAME_CONT] | |
| 1319 | mov NARGS:RD, 2+1 // 2 args for func(o1, o2). | |
| 1320 | jmp ->vm_call_dispatch | |
| 1321 | | |
| 1322 |->vmeta_len: | |
| 1323 | mov L:RB, SAVE_L | |
| 1324 | mov L:RB->base, BASE | |
| 1325 | lea FCARG2, [BASE+RD*8] // Caveat: FCARG2 == BASE | |
| 1326 | mov L:FCARG1, L:RB | |
| 1327 | mov SAVE_PC, PC | |
| 1328 | call extern lj_meta_len@8 // (lua_State *L, TValue *o) | |
| 1329 | // NULL (retry) or TValue * (metamethod) returned in eax (RC). | |
| 1330 | mov BASE, L:RB->base | |
| 1331 #if LJ_52 | |
| 1332 | test RC, RC | |
| 1333 | jne ->vmeta_binop // Binop call for compatibility. | |
| 1334 | movzx RD, PC_RD | |
| 1335 | mov TAB:FCARG1, [BASE+RD*8] | |
| 1336 | jmp ->BC_LEN_Z | |
| 1337 #else | |
| 1338 | jmp ->vmeta_binop // Binop call for compatibility. | |
| 1339 #endif | |
| 1340 | | |
| 1341 |//-- Call metamethod ---------------------------------------------------- | |
| 1342 | | |
| 1343 |->vmeta_call_ra: | |
| 1344 | lea RA, [BASE+RA*8+8] | |
| 1345 |->vmeta_call: // Resolve and call __call metamethod. | |
| 1346 | // BASE = old base, RA = new base, RC = nargs+1, PC = return | |
| 1347 | mov TMP2, RA // Save RA, RC for us. | |
| 1348 | mov TMP1, NARGS:RD | |
| 1349 | sub RA, 8 | |
| 1350 |.if X64 | |
| 1351 | mov L:RB, SAVE_L | |
| 1352 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. | |
| 1353 | mov CARG2d, RA | |
| 1354 | lea CARG3d, [RA+NARGS:RD*8] | |
| 1355 | mov CARG1d, L:RB // Caveat: CARG1d may be RA. | |
| 1356 |.else | |
| 1357 | lea RC, [RA+NARGS:RD*8] | |
| 1358 | mov L:RB, SAVE_L | |
| 1359 | mov ARG2, RA | |
| 1360 | mov ARG3, RC | |
| 1361 | mov ARG1, L:RB | |
| 1362 | mov L:RB->base, BASE // This is the callers base! | |
| 1363 |.endif | |
| 1364 | mov SAVE_PC, PC | |
| 1365 | call extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) | |
| 1366 | mov BASE, L:RB->base | |
| 1367 | mov RA, TMP2 | |
| 1368 | mov NARGS:RD, TMP1 | |
| 1369 | mov LFUNC:RB, [RA-8] | |
| 1370 | add NARGS:RD, 1 | |
| 1371 | // This is fragile. L->base must not move, KBASE must always be defined. | |
| 1372 |.if x64 | |
| 1373 | cmp KBASEa, rdx // Continue with CALLT if flag set. | |
| 1374 |.else | |
| 1375 | cmp KBASE, BASE // Continue with CALLT if flag set. | |
| 1376 |.endif | |
| 1377 | je ->BC_CALLT_Z | |
| 1378 | mov BASE, RA | |
| 1379 | ins_call // Otherwise call resolved metamethod. | |
| 1380 | | |
| 1381 |//-- Argument coercion for 'for' statement ------------------------------ | |
| 1382 | | |
| 1383 |->vmeta_for: | |
| 1384 | mov L:RB, SAVE_L | |
| 1385 | mov L:RB->base, BASE | |
| 1386 | mov FCARG2, RA // Caveat: FCARG2 == BASE | |
| 1387 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA | |
| 1388 | mov SAVE_PC, PC | |
| 1389 | call extern lj_meta_for@8 // (lua_State *L, TValue *base) | |
| 1390 | mov BASE, L:RB->base | |
| 1391 | mov RC, [PC-4] | |
| 1392 | movzx RA, RCH | |
| 1393 | movzx OP, RCL | |
| 1394 | shr RC, 16 | |
| 1395 |.if X64 | |
| 1396 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Retry FORI or JFORI. | |
| 1397 |.else | |
| 1398 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Retry FORI or JFORI. | |
| 1399 |.endif | |
| 1400 | | |
| 1401 |//----------------------------------------------------------------------- | |
| 1402 |//-- Fast functions ----------------------------------------------------- | |
| 1403 |//----------------------------------------------------------------------- | |
| 1404 | | |
| 1405 |.macro .ffunc, name | |
| 1406 |->ff_ .. name: | |
| 1407 |.endmacro | |
| 1408 | | |
| 1409 |.macro .ffunc_1, name | |
| 1410 |->ff_ .. name: | |
| 1411 | cmp NARGS:RD, 1+1; jb ->fff_fallback | |
| 1412 |.endmacro | |
| 1413 | | |
| 1414 |.macro .ffunc_2, name | |
| 1415 |->ff_ .. name: | |
| 1416 | cmp NARGS:RD, 2+1; jb ->fff_fallback | |
| 1417 |.endmacro | |
| 1418 | | |
| 1419 |.macro .ffunc_nsse, name, op | |
| 1420 | .ffunc_1 name | |
| 1421 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | |
| 1422 | op xmm0, qword [BASE] | |
| 1423 |.endmacro | |
| 1424 | | |
| 1425 |.macro .ffunc_nsse, name | |
| 1426 | .ffunc_nsse name, movsd | |
| 1427 |.endmacro | |
| 1428 | | |
| 1429 |.macro .ffunc_nnsse, name | |
| 1430 | .ffunc_2 name | |
| 1431 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | |
| 1432 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback | |
| 1433 | movsd xmm0, qword [BASE] | |
| 1434 | movsd xmm1, qword [BASE+8] | |
| 1435 |.endmacro | |
| 1436 | | |
| 1437 |.macro .ffunc_nnr, name | |
| 1438 | .ffunc_2 name | |
| 1439 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | |
| 1440 | cmp dword [BASE+12], LJ_TISNUM; jae ->fff_fallback | |
| 1441 | fld qword [BASE+8] | |
| 1442 | fld qword [BASE] | |
| 1443 |.endmacro | |
| 1444 | | |
| 1445 |// Inlined GC threshold check. Caveat: uses label 1. | |
| 1446 |.macro ffgccheck | |
| 1447 | mov RB, [DISPATCH+DISPATCH_GL(gc.total)] | |
| 1448 | cmp RB, [DISPATCH+DISPATCH_GL(gc.threshold)] | |
| 1449 | jb >1 | |
| 1450 | call ->fff_gcstep | |
| 1451 |1: | |
| 1452 |.endmacro | |
| 1453 | | |
| 1454 |//-- Base library: checks ----------------------------------------------- | |
| 1455 | | |
| 1456 |.ffunc_1 assert | |
| 1457 | mov RB, [BASE+4] | |
| 1458 | cmp RB, LJ_TISTRUECOND; jae ->fff_fallback | |
| 1459 | mov PC, [BASE-4] | |
| 1460 | mov MULTRES, RD | |
| 1461 | mov [BASE-4], RB | |
| 1462 | mov RB, [BASE] | |
| 1463 | mov [BASE-8], RB | |
| 1464 | sub RD, 2 | |
| 1465 | jz >2 | |
| 1466 | mov RA, BASE | |
| 1467 |1: | |
| 1468 | add RA, 8 | |
| 1469 |.if X64 | |
| 1470 | mov RBa, [RA] | |
| 1471 | mov [RA-8], RBa | |
| 1472 |.else | |
| 1473 | mov RB, [RA+4] | |
| 1474 | mov [RA-4], RB | |
| 1475 | mov RB, [RA] | |
| 1476 | mov [RA-8], RB | |
| 1477 |.endif | |
| 1478 | sub RD, 1 | |
| 1479 | jnz <1 | |
| 1480 |2: | |
| 1481 | mov RD, MULTRES | |
| 1482 | jmp ->fff_res_ | |
| 1483 | | |
| 1484 |.ffunc_1 type | |
| 1485 | mov RB, [BASE+4] | |
| 1486 |.if X64 | |
| 1487 | mov RA, RB | |
| 1488 | sar RA, 15 | |
| 1489 | cmp RA, -2 | |
| 1490 | je >3 | |
| 1491 |.endif | |
| 1492 | mov RC, ~LJ_TNUMX | |
| 1493 | not RB | |
| 1494 | cmp RC, RB | |
| 1495 | cmova RC, RB | |
| 1496 |2: | |
| 1497 | mov CFUNC:RB, [BASE-8] | |
| 1498 | mov STR:RC, [CFUNC:RB+RC*8+((char *)(&((GCfuncC *)0)->upvalue))] | |
| 1499 | mov PC, [BASE-4] | |
| 1500 | mov dword [BASE-4], LJ_TSTR | |
| 1501 | mov [BASE-8], STR:RC | |
| 1502 | jmp ->fff_res1 | |
| 1503 |.if X64 | |
| 1504 |3: | |
| 1505 | mov RC, ~LJ_TLIGHTUD | |
| 1506 | jmp <2 | |
| 1507 |.endif | |
| 1508 | | |
| 1509 |//-- Base library: getters and setters --------------------------------- | |
| 1510 | | |
| 1511 |.ffunc_1 getmetatable | |
| 1512 | mov RB, [BASE+4] | |
| 1513 | mov PC, [BASE-4] | |
| 1514 | cmp RB, LJ_TTAB; jne >6 | |
| 1515 |1: // Field metatable must be at same offset for GCtab and GCudata! | |
| 1516 | mov TAB:RB, [BASE] | |
| 1517 | mov TAB:RB, TAB:RB->metatable | |
| 1518 |2: | |
| 1519 | test TAB:RB, TAB:RB | |
| 1520 | mov dword [BASE-4], LJ_TNIL | |
| 1521 | jz ->fff_res1 | |
| 1522 | mov STR:RC, [DISPATCH+DISPATCH_GL(gcroot)+4*(GCROOT_MMNAME+MM_metatable)] | |
| 1523 | mov dword [BASE-4], LJ_TTAB // Store metatable as default result. | |
| 1524 | mov [BASE-8], TAB:RB | |
| 1525 | mov RA, TAB:RB->hmask | |
| 1526 | and RA, STR:RC->sid | |
| 1527 | imul RA, #NODE | |
| 1528 | add NODE:RA, TAB:RB->node | |
| 1529 |3: // Rearranged logic, because we expect _not_ to find the key. | |
| 1530 | cmp dword NODE:RA->key.it, LJ_TSTR | |
| 1531 | jne >4 | |
| 1532 | cmp dword NODE:RA->key.gcr, STR:RC | |
| 1533 | je >5 | |
| 1534 |4: | |
| 1535 | mov NODE:RA, NODE:RA->next | |
| 1536 | test NODE:RA, NODE:RA | |
| 1537 | jnz <3 | |
| 1538 | jmp ->fff_res1 // Not found, keep default result. | |
| 1539 |5: | |
| 1540 | mov RB, [RA+4] | |
| 1541 | cmp RB, LJ_TNIL; je ->fff_res1 // Ditto for nil value. | |
| 1542 | mov RC, [RA] | |
| 1543 | mov [BASE-4], RB // Return value of mt.__metatable. | |
| 1544 | mov [BASE-8], RC | |
| 1545 | jmp ->fff_res1 | |
| 1546 | | |
| 1547 |6: | |
| 1548 | cmp RB, LJ_TUDATA; je <1 | |
| 1549 |.if X64 | |
| 1550 | cmp RB, LJ_TNUMX; ja >8 | |
| 1551 | cmp RB, LJ_TISNUM; jbe >7 | |
| 1552 | mov RB, LJ_TLIGHTUD | |
| 1553 | jmp >8 | |
| 1554 |7: | |
| 1555 |.else | |
| 1556 | cmp RB, LJ_TISNUM; ja >8 | |
| 1557 |.endif | |
| 1558 | mov RB, LJ_TNUMX | |
| 1559 |8: | |
| 1560 | not RB | |
| 1561 | mov TAB:RB, [DISPATCH+RB*4+DISPATCH_GL(gcroot[GCROOT_BASEMT])] | |
| 1562 | jmp <2 | |
| 1563 | | |
| 1564 |.ffunc_2 setmetatable | |
| 1565 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | |
| 1566 | // Fast path: no mt for table yet and not clearing the mt. | |
| 1567 | mov TAB:RB, [BASE] | |
| 1568 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback | |
| 1569 | cmp dword [BASE+12], LJ_TTAB; jne ->fff_fallback | |
| 1570 | mov TAB:RC, [BASE+8] | |
| 1571 | mov TAB:RB->metatable, TAB:RC | |
| 1572 | mov PC, [BASE-4] | |
| 1573 | mov dword [BASE-4], LJ_TTAB // Return original table. | |
| 1574 | mov [BASE-8], TAB:RB | |
| 1575 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | |
| 1576 | jz >1 | |
| 1577 | // Possible write barrier. Table is black, but skip iswhite(mt) check. | |
| 1578 | barrierback TAB:RB, RC | |
| 1579 |1: | |
| 1580 | jmp ->fff_res1 | |
| 1581 | | |
| 1582 |.ffunc_2 rawget | |
| 1583 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | |
| 1584 |.if X64WIN | |
| 1585 | mov RB, BASE // Save BASE. | |
| 1586 | lea CARG3d, [BASE+8] | |
| 1587 | mov CARG2d, [BASE] // Caveat: CARG2d == BASE. | |
| 1588 | mov CARG1d, SAVE_L | |
| 1589 |.elif X64 | |
| 1590 | mov RB, BASE // Save BASE. | |
| 1591 | mov CARG2d, [BASE] | |
| 1592 | lea CARG3d, [BASE+8] // Caveat: CARG3d == BASE. | |
| 1593 | mov CARG1d, SAVE_L | |
| 1594 |.else | |
| 1595 | mov TAB:RD, [BASE] | |
| 1596 | mov L:RB, SAVE_L | |
| 1597 | mov ARG2, TAB:RD | |
| 1598 | mov ARG1, L:RB | |
| 1599 | mov RB, BASE // Save BASE. | |
| 1600 | add BASE, 8 | |
| 1601 | mov ARG3, BASE | |
| 1602 |.endif | |
| 1603 | call extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) | |
| 1604 | // cTValue * returned in eax (RD). | |
| 1605 | mov BASE, RB // Restore BASE. | |
| 1606 | // Copy table slot. | |
| 1607 |.if X64 | |
| 1608 | mov RBa, [RD] | |
| 1609 | mov PC, [BASE-4] | |
| 1610 | mov [BASE-8], RBa | |
| 1611 |.else | |
| 1612 | mov RB, [RD] | |
| 1613 | mov RD, [RD+4] | |
| 1614 | mov PC, [BASE-4] | |
| 1615 | mov [BASE-8], RB | |
| 1616 | mov [BASE-4], RD | |
| 1617 |.endif | |
| 1618 | jmp ->fff_res1 | |
| 1619 | | |
| 1620 |//-- Base library: conversions ------------------------------------------ | |
| 1621 | | |
| 1622 |.ffunc tonumber | |
| 1623 | // Only handles the number case inline (without a base argument). | |
| 1624 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | |
| 1625 | cmp dword [BASE+4], LJ_TISNUM | |
| 1626 |.if DUALNUM | |
| 1627 | jne >1 | |
| 1628 | mov RB, dword [BASE]; jmp ->fff_resi | |
| 1629 |1: | |
| 1630 | ja ->fff_fallback | |
| 1631 |.else | |
| 1632 | jae ->fff_fallback | |
| 1633 |.endif | |
| 1634 | movsd xmm0, qword [BASE]; jmp ->fff_resxmm0 | |
| 1635 | | |
| 1636 |.ffunc_1 tostring | |
| 1637 | // Only handles the string or number case inline. | |
| 1638 | mov PC, [BASE-4] | |
| 1639 | cmp dword [BASE+4], LJ_TSTR; jne >3 | |
| 1640 | // A __tostring method in the string base metatable is ignored. | |
| 1641 | mov STR:RD, [BASE] | |
| 1642 |2: | |
| 1643 | mov dword [BASE-4], LJ_TSTR | |
| 1644 | mov [BASE-8], STR:RD | |
| 1645 | jmp ->fff_res1 | |
| 1646 |3: // Handle numbers inline, unless a number base metatable is present. | |
| 1647 | cmp dword [BASE+4], LJ_TISNUM; ja ->fff_fallback | |
| 1648 | cmp dword [DISPATCH+DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])], 0 | |
| 1649 | jne ->fff_fallback | |
| 1650 | ffgccheck // Caveat: uses label 1. | |
| 1651 | mov L:RB, SAVE_L | |
| 1652 | mov L:RB->base, BASE // Add frame since C call can throw. | |
| 1653 | mov SAVE_PC, PC // Redundant (but a defined value). | |
| 1654 |.if X64 and not X64WIN | |
| 1655 | mov FCARG2, BASE // Otherwise: FCARG2 == BASE | |
| 1656 |.endif | |
| 1657 | mov L:FCARG1, L:RB | |
| 1658 |.if DUALNUM | |
| 1659 | call extern lj_strfmt_number@8 // (lua_State *L, cTValue *o) | |
| 1660 |.else | |
| 1661 | call extern lj_strfmt_num@8 // (lua_State *L, lua_Number *np) | |
| 1662 |.endif | |
| 1663 | // GCstr returned in eax (RD). | |
| 1664 | mov BASE, L:RB->base | |
| 1665 | jmp <2 | |
| 1666 | | |
| 1667 |//-- Base library: iterators ------------------------------------------- | |
| 1668 | | |
| 1669 |.ffunc_1 next | |
| 1670 | je >2 // Missing 2nd arg? | |
| 1671 |1: | |
| 1672 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | |
| 1673 | mov PC, [BASE-4] | |
| 1674 | mov RB, BASE // Save BASE. | |
| 1675 |.if X64WIN | |
| 1676 | mov CARG1d, [BASE] | |
| 1677 | lea CARG3d, [BASE-8] | |
| 1678 | lea CARG2d, [BASE+8] // Caveat: CARG2d == BASE. | |
| 1679 |.elif X64 | |
| 1680 | mov CARG1d, [BASE] | |
| 1681 | lea CARG2d, [BASE+8] | |
| 1682 | lea CARG3d, [BASE-8] // Caveat: CARG3d == BASE. | |
| 1683 |.else | |
| 1684 | mov TAB:RD, [BASE] | |
| 1685 | mov ARG1, TAB:RD | |
| 1686 | add BASE, 8 | |
| 1687 | mov ARG2, BASE | |
| 1688 | sub BASE, 8+8 | |
| 1689 | mov ARG3, BASE | |
| 1690 |.endif | |
| 1691 | call extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) | |
| 1692 | // 1=found, 0=end, -1=error returned in eax (RD). | |
| 1693 | mov BASE, RB // Restore BASE. | |
| 1694 | test RD, RD; jg ->fff_res2 // Found key/value. | |
| 1695 | js ->fff_fallback_2 // Invalid key. | |
| 1696 | // End of traversal: return nil. | |
| 1697 | mov dword [BASE-4], LJ_TNIL | |
| 1698 | jmp ->fff_res1 | |
| 1699 |2: // Set missing 2nd arg to nil. | |
| 1700 | mov dword [BASE+12], LJ_TNIL | |
| 1701 | jmp <1 | |
| 1702 | | |
| 1703 |.ffunc_1 pairs | |
| 1704 | mov TAB:RB, [BASE] | |
| 1705 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | |
| 1706 #if LJ_52 | |
| 1707 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback | |
| 1708 #endif | |
| 1709 | mov CFUNC:RB, [BASE-8] | |
| 1710 | mov CFUNC:RD, CFUNC:RB->upvalue[0] | |
| 1711 | mov PC, [BASE-4] | |
| 1712 | mov dword [BASE-4], LJ_TFUNC | |
| 1713 | mov [BASE-8], CFUNC:RD | |
| 1714 | mov dword [BASE+12], LJ_TNIL | |
| 1715 | mov RD, 1+3 | |
| 1716 | jmp ->fff_res | |
| 1717 | | |
| 1718 |.ffunc_2 ipairs_aux | |
| 1719 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | |
| 1720 | cmp dword [BASE+12], LJ_TISNUM | |
| 1721 |.if DUALNUM | |
| 1722 | jne ->fff_fallback | |
| 1723 |.else | |
| 1724 | jae ->fff_fallback | |
| 1725 |.endif | |
| 1726 | mov PC, [BASE-4] | |
| 1727 |.if DUALNUM | |
| 1728 | mov RD, dword [BASE+8] | |
| 1729 | add RD, 1 | |
| 1730 | mov dword [BASE-4], LJ_TISNUM | |
| 1731 | mov dword [BASE-8], RD | |
| 1732 |.else | |
| 1733 | movsd xmm0, qword [BASE+8] | |
| 1734 | sseconst_1 xmm1, RBa | |
| 1735 | addsd xmm0, xmm1 | |
| 1736 | cvttsd2si RD, xmm0 | |
| 1737 | movsd qword [BASE-8], xmm0 | |
| 1738 |.endif | |
| 1739 | mov TAB:RB, [BASE] | |
| 1740 | cmp RD, TAB:RB->asize; jae >2 // Not in array part? | |
| 1741 | shl RD, 3 | |
| 1742 | add RD, TAB:RB->array | |
| 1743 |1: | |
| 1744 | cmp dword [RD+4], LJ_TNIL; je ->fff_res0 | |
| 1745 | // Copy array slot. | |
| 1746 |.if X64 | |
| 1747 | mov RBa, [RD] | |
| 1748 | mov [BASE], RBa | |
| 1749 |.else | |
| 1750 | mov RB, [RD] | |
| 1751 | mov RD, [RD+4] | |
| 1752 | mov [BASE], RB | |
| 1753 | mov [BASE+4], RD | |
| 1754 |.endif | |
| 1755 |->fff_res2: | |
| 1756 | mov RD, 1+2 | |
| 1757 | jmp ->fff_res | |
| 1758 |2: // Check for empty hash part first. Otherwise call C function. | |
| 1759 | cmp dword TAB:RB->hmask, 0; je ->fff_res0 | |
| 1760 | mov FCARG1, TAB:RB | |
| 1761 | mov RB, BASE // Save BASE. | |
| 1762 | mov FCARG2, RD // Caveat: FCARG2 == BASE | |
| 1763 | call extern lj_tab_getinth@8 // (GCtab *t, int32_t key) | |
| 1764 | // cTValue * or NULL returned in eax (RD). | |
| 1765 | mov BASE, RB | |
| 1766 | test RD, RD | |
| 1767 | jnz <1 | |
| 1768 |->fff_res0: | |
| 1769 | mov RD, 1+0 | |
| 1770 | jmp ->fff_res | |
| 1771 | | |
| 1772 |.ffunc_1 ipairs | |
| 1773 | mov TAB:RB, [BASE] | |
| 1774 | cmp dword [BASE+4], LJ_TTAB; jne ->fff_fallback | |
| 1775 #if LJ_52 | |
| 1776 | cmp dword TAB:RB->metatable, 0; jne ->fff_fallback | |
| 1777 #endif | |
| 1778 | mov CFUNC:RB, [BASE-8] | |
| 1779 | mov CFUNC:RD, CFUNC:RB->upvalue[0] | |
| 1780 | mov PC, [BASE-4] | |
| 1781 | mov dword [BASE-4], LJ_TFUNC | |
| 1782 | mov [BASE-8], CFUNC:RD | |
| 1783 |.if DUALNUM | |
| 1784 | mov dword [BASE+12], LJ_TISNUM | |
| 1785 | mov dword [BASE+8], 0 | |
| 1786 |.else | |
| 1787 | xorps xmm0, xmm0 | |
| 1788 | movsd qword [BASE+8], xmm0 | |
| 1789 |.endif | |
| 1790 | mov RD, 1+3 | |
| 1791 | jmp ->fff_res | |
| 1792 | | |
| 1793 |//-- Base library: catch errors ---------------------------------------- | |
| 1794 | | |
| 1795 |.ffunc_1 pcall | |
| 1796 | lea RA, [BASE+8] | |
| 1797 | sub NARGS:RD, 1 | |
| 1798 | mov PC, 8+FRAME_PCALL | |
| 1799 |1: | |
| 1800 | movzx RB, byte [DISPATCH+DISPATCH_GL(hookmask)] | |
| 1801 | shr RB, HOOK_ACTIVE_SHIFT | |
| 1802 | and RB, 1 | |
| 1803 | add PC, RB // Remember active hook before pcall. | |
| 1804 | jmp ->vm_call_dispatch | |
| 1805 | | |
| 1806 |.ffunc_2 xpcall | |
| 1807 | cmp dword [BASE+12], LJ_TFUNC; jne ->fff_fallback | |
| 1808 | mov RB, [BASE+4] // Swap function and traceback. | |
| 1809 | mov [BASE+12], RB | |
| 1810 | mov dword [BASE+4], LJ_TFUNC | |
| 1811 | mov LFUNC:RB, [BASE] | |
| 1812 | mov PC, [BASE+8] | |
| 1813 | mov [BASE+8], LFUNC:RB | |
| 1814 | mov [BASE], PC | |
| 1815 | lea RA, [BASE+16] | |
| 1816 | sub NARGS:RD, 2 | |
| 1817 | mov PC, 16+FRAME_PCALL | |
| 1818 | jmp <1 | |
| 1819 | | |
| 1820 |//-- Coroutine library -------------------------------------------------- | |
| 1821 | | |
| 1822 |.macro coroutine_resume_wrap, resume | |
| 1823 |.if resume | |
| 1824 |.ffunc_1 coroutine_resume | |
| 1825 | mov L:RB, [BASE] | |
| 1826 |.else | |
| 1827 |.ffunc coroutine_wrap_aux | |
| 1828 | mov CFUNC:RB, [BASE-8] | |
| 1829 | mov L:RB, CFUNC:RB->upvalue[0].gcr | |
| 1830 |.endif | |
| 1831 | mov PC, [BASE-4] | |
| 1832 | mov SAVE_PC, PC | |
| 1833 |.if X64 | |
| 1834 | mov TMP1, L:RB | |
| 1835 |.else | |
| 1836 | mov ARG1, L:RB | |
| 1837 |.endif | |
| 1838 |.if resume | |
| 1839 | cmp dword [BASE+4], LJ_TTHREAD; jne ->fff_fallback | |
| 1840 |.endif | |
| 1841 | cmp aword L:RB->cframe, 0; jne ->fff_fallback | |
| 1842 | cmp byte L:RB->status, LUA_YIELD; ja ->fff_fallback | |
| 1843 | mov RA, L:RB->top | |
| 1844 | je >1 // Status != LUA_YIELD (i.e. 0)? | |
| 1845 | cmp RA, L:RB->base // Check for presence of initial func. | |
| 1846 | je ->fff_fallback | |
| 1847 |1: | |
| 1848 |.if resume | |
| 1849 | lea PC, [RA+NARGS:RD*8-16] // Check stack space (-1-thread). | |
| 1850 |.else | |
| 1851 | lea PC, [RA+NARGS:RD*8-8] // Check stack space (-1). | |
| 1852 |.endif | |
| 1853 | cmp PC, L:RB->maxstack; ja ->fff_fallback | |
| 1854 | mov L:RB->top, PC | |
| 1855 | | |
| 1856 | mov L:RB, SAVE_L | |
| 1857 | mov L:RB->base, BASE | |
| 1858 |.if resume | |
| 1859 | add BASE, 8 // Keep resumed thread in stack for GC. | |
| 1860 |.endif | |
| 1861 | mov L:RB->top, BASE | |
| 1862 |.if resume | |
| 1863 | lea RB, [BASE+NARGS:RD*8-24] // RB = end of source for stack move. | |
| 1864 |.else | |
| 1865 | lea RB, [BASE+NARGS:RD*8-16] // RB = end of source for stack move. | |
| 1866 |.endif | |
| 1867 | sub RBa, PCa // Relative to PC. | |
| 1868 | | |
| 1869 | cmp PC, RA | |
| 1870 | je >3 | |
| 1871 |2: // Move args to coroutine. | |
| 1872 |.if X64 | |
| 1873 | mov RCa, [PC+RB] | |
| 1874 | mov [PC-8], RCa | |
| 1875 |.else | |
| 1876 | mov RC, [PC+RB+4] | |
| 1877 | mov [PC-4], RC | |
| 1878 | mov RC, [PC+RB] | |
| 1879 | mov [PC-8], RC | |
| 1880 |.endif | |
| 1881 | sub PC, 8 | |
| 1882 | cmp PC, RA | |
| 1883 | jne <2 | |
| 1884 |3: | |
| 1885 |.if X64 | |
| 1886 | mov CARG2d, RA | |
| 1887 | mov CARG1d, TMP1 | |
| 1888 |.else | |
| 1889 | mov ARG2, RA | |
| 1890 | xor RA, RA | |
| 1891 | mov ARG4, RA | |
| 1892 | mov ARG3, RA | |
| 1893 |.endif | |
| 1894 | call ->vm_resume // (lua_State *L, TValue *base, 0, 0) | |
| 1895 | | |
| 1896 | mov L:RB, SAVE_L | |
| 1897 |.if X64 | |
| 1898 | mov L:PC, TMP1 | |
| 1899 |.else | |
| 1900 | mov L:PC, ARG1 // The callee doesn't modify SAVE_L. | |
| 1901 |.endif | |
| 1902 | mov BASE, L:RB->base | |
| 1903 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | |
| 1904 | set_vmstate INTERP | |
| 1905 | | |
| 1906 | cmp eax, LUA_YIELD | |
| 1907 | ja >8 | |
| 1908 |4: | |
| 1909 | mov RA, L:PC->base | |
| 1910 | mov KBASE, L:PC->top | |
| 1911 | mov L:PC->top, RA // Clear coroutine stack. | |
| 1912 | mov PC, KBASE | |
| 1913 | sub PC, RA | |
| 1914 | je >6 // No results? | |
| 1915 | lea RD, [BASE+PC] | |
| 1916 | shr PC, 3 | |
| 1917 | cmp RD, L:RB->maxstack | |
| 1918 | ja >9 // Need to grow stack? | |
| 1919 | | |
| 1920 | mov RB, BASE | |
| 1921 | sub RBa, RAa | |
| 1922 |5: // Move results from coroutine. | |
| 1923 |.if X64 | |
| 1924 | mov RDa, [RA] | |
| 1925 | mov [RA+RB], RDa | |
| 1926 |.else | |
| 1927 | mov RD, [RA] | |
| 1928 | mov [RA+RB], RD | |
| 1929 | mov RD, [RA+4] | |
| 1930 | mov [RA+RB+4], RD | |
| 1931 |.endif | |
| 1932 | add RA, 8 | |
| 1933 | cmp RA, KBASE | |
| 1934 | jne <5 | |
| 1935 |6: | |
| 1936 |.if resume | |
| 1937 | lea RD, [PC+2] // nresults+1 = 1 + true + results. | |
| 1938 | mov dword [BASE-4], LJ_TTRUE // Prepend true to results. | |
| 1939 |.else | |
| 1940 | lea RD, [PC+1] // nresults+1 = 1 + results. | |
| 1941 |.endif | |
| 1942 |7: | |
| 1943 | mov PC, SAVE_PC | |
| 1944 | mov MULTRES, RD | |
| 1945 |.if resume | |
| 1946 | mov RAa, -8 | |
| 1947 |.else | |
| 1948 | xor RA, RA | |
| 1949 |.endif | |
| 1950 | test PC, FRAME_TYPE | |
| 1951 | jz ->BC_RET_Z | |
| 1952 | jmp ->vm_return | |
| 1953 | | |
| 1954 |8: // Coroutine returned with error (at co->top-1). | |
| 1955 |.if resume | |
| 1956 | mov dword [BASE-4], LJ_TFALSE // Prepend false to results. | |
| 1957 | mov RA, L:PC->top | |
| 1958 | sub RA, 8 | |
| 1959 | mov L:PC->top, RA // Clear error from coroutine stack. | |
| 1960 | // Copy error message. | |
| 1961 |.if X64 | |
| 1962 | mov RDa, [RA] | |
| 1963 | mov [BASE], RDa | |
| 1964 |.else | |
| 1965 | mov RD, [RA] | |
| 1966 | mov [BASE], RD | |
| 1967 | mov RD, [RA+4] | |
| 1968 | mov [BASE+4], RD | |
| 1969 |.endif | |
| 1970 | mov RD, 1+2 // nresults+1 = 1 + false + error. | |
| 1971 | jmp <7 | |
| 1972 |.else | |
| 1973 | mov FCARG2, L:PC | |
| 1974 | mov FCARG1, L:RB | |
| 1975 | call extern lj_ffh_coroutine_wrap_err@8 // (lua_State *L, lua_State *co) | |
| 1976 | // Error function does not return. | |
| 1977 |.endif | |
| 1978 | | |
| 1979 |9: // Handle stack expansion on return from yield. | |
| 1980 |.if X64 | |
| 1981 | mov L:RA, TMP1 | |
| 1982 |.else | |
| 1983 | mov L:RA, ARG1 // The callee doesn't modify SAVE_L. | |
| 1984 |.endif | |
| 1985 | mov L:RA->top, KBASE // Undo coroutine stack clearing. | |
| 1986 | mov FCARG2, PC | |
| 1987 | mov FCARG1, L:RB | |
| 1988 | call extern lj_state_growstack@8 // (lua_State *L, int n) | |
| 1989 |.if X64 | |
| 1990 | mov L:PC, TMP1 | |
| 1991 |.else | |
| 1992 | mov L:PC, ARG1 | |
| 1993 |.endif | |
| 1994 | mov BASE, L:RB->base | |
| 1995 | jmp <4 // Retry the stack move. | |
| 1996 |.endmacro | |
| 1997 | | |
| 1998 | coroutine_resume_wrap 1 // coroutine.resume | |
| 1999 | coroutine_resume_wrap 0 // coroutine.wrap | |
| 2000 | | |
| 2001 |.ffunc coroutine_yield | |
| 2002 | mov L:RB, SAVE_L | |
| 2003 | test aword L:RB->cframe, CFRAME_RESUME | |
| 2004 | jz ->fff_fallback | |
| 2005 | mov L:RB->base, BASE | |
| 2006 | lea RD, [BASE+NARGS:RD*8-8] | |
| 2007 | mov L:RB->top, RD | |
| 2008 | xor RD, RD | |
| 2009 | mov aword L:RB->cframe, RDa | |
| 2010 | mov al, LUA_YIELD | |
| 2011 | mov byte L:RB->status, al | |
| 2012 | jmp ->vm_leave_unw | |
| 2013 | | |
| 2014 |//-- Math library ------------------------------------------------------- | |
| 2015 | | |
| 2016 |.if not DUALNUM | |
| 2017 |->fff_resi: // Dummy. | |
| 2018 |.endif | |
| 2019 | | |
| 2020 |->fff_resn: | |
| 2021 | mov PC, [BASE-4] | |
| 2022 | fstp qword [BASE-8] | |
| 2023 | jmp ->fff_res1 | |
| 2024 | | |
| 2025 | .ffunc_1 math_abs | |
| 2026 |.if DUALNUM | |
| 2027 | cmp dword [BASE+4], LJ_TISNUM; jne >2 | |
| 2028 | mov RB, dword [BASE] | |
| 2029 | cmp RB, 0; jns ->fff_resi | |
| 2030 | neg RB; js >1 | |
| 2031 |->fff_resbit: | |
| 2032 |->fff_resi: | |
| 2033 | mov PC, [BASE-4] | |
| 2034 | mov dword [BASE-4], LJ_TISNUM | |
| 2035 | mov dword [BASE-8], RB | |
| 2036 | jmp ->fff_res1 | |
| 2037 |1: | |
| 2038 | mov PC, [BASE-4] | |
| 2039 | mov dword [BASE-4], 0x41e00000 // 2^31. | |
| 2040 | mov dword [BASE-8], 0 | |
| 2041 | jmp ->fff_res1 | |
| 2042 |2: | |
| 2043 | ja ->fff_fallback | |
| 2044 |.else | |
| 2045 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | |
| 2046 |.endif | |
| 2047 | movsd xmm0, qword [BASE] | |
| 2048 | sseconst_abs xmm1, RDa | |
| 2049 | andps xmm0, xmm1 | |
| 2050 |->fff_resxmm0: | |
| 2051 | mov PC, [BASE-4] | |
| 2052 | movsd qword [BASE-8], xmm0 | |
| 2053 | // fallthrough | |
| 2054 | | |
| 2055 |->fff_res1: | |
| 2056 | mov RD, 1+1 | |
| 2057 |->fff_res: | |
| 2058 | mov MULTRES, RD | |
| 2059 |->fff_res_: | |
| 2060 | test PC, FRAME_TYPE | |
| 2061 | jnz >7 | |
| 2062 |5: | |
| 2063 | cmp PC_RB, RDL // More results expected? | |
| 2064 | ja >6 | |
| 2065 | // Adjust BASE. KBASE is assumed to be set for the calling frame. | |
| 2066 | movzx RA, PC_RA | |
| 2067 | not RAa // Note: ~RA = -(RA+1) | |
| 2068 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 | |
| 2069 | ins_next | |
| 2070 | | |
| 2071 |6: // Fill up results with nil. | |
| 2072 | mov dword [BASE+RD*8-12], LJ_TNIL | |
| 2073 | add RD, 1 | |
| 2074 | jmp <5 | |
| 2075 | | |
| 2076 |7: // Non-standard return case. | |
| 2077 | mov RAa, -8 // Results start at BASE+RA = BASE-8. | |
| 2078 | jmp ->vm_return | |
| 2079 | | |
| 2080 |.if X64 | |
| 2081 |.define fff_resfp, fff_resxmm0 | |
| 2082 |.else | |
| 2083 |.define fff_resfp, fff_resn | |
| 2084 |.endif | |
| 2085 | | |
| 2086 |.macro math_round, func | |
| 2087 | .ffunc math_ .. func | |
| 2088 |.if DUALNUM | |
| 2089 | cmp dword [BASE+4], LJ_TISNUM; jne >1 | |
| 2090 | mov RB, dword [BASE]; jmp ->fff_resi | |
| 2091 |1: | |
| 2092 | ja ->fff_fallback | |
| 2093 |.else | |
| 2094 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | |
| 2095 |.endif | |
| 2096 | movsd xmm0, qword [BASE] | |
| 2097 | call ->vm_ .. func .. _sse | |
| 2098 |.if DUALNUM | |
| 2099 | cvttsd2si RB, xmm0 | |
| 2100 | cmp RB, 0x80000000 | |
| 2101 | jne ->fff_resi | |
| 2102 | cvtsi2sd xmm1, RB | |
| 2103 | ucomisd xmm0, xmm1 | |
| 2104 | jp ->fff_resxmm0 | |
| 2105 | je ->fff_resi | |
| 2106 |.endif | |
| 2107 | jmp ->fff_resxmm0 | |
| 2108 |.endmacro | |
| 2109 | | |
| 2110 | math_round floor | |
| 2111 | math_round ceil | |
| 2112 | | |
| 2113 |.ffunc_nsse math_sqrt, sqrtsd; jmp ->fff_resxmm0 | |
| 2114 | | |
| 2115 |.ffunc math_log | |
| 2116 | cmp NARGS:RD, 1+1; jne ->fff_fallback // Exactly one argument. | |
| 2117 | cmp dword [BASE+4], LJ_TISNUM; jae ->fff_fallback | |
| 2118 | movsd xmm0, qword [BASE] | |
| 2119 |.if not X64 | |
| 2120 | movsd FPARG1, xmm0 | |
| 2121 |.endif | |
| 2122 | mov RB, BASE | |
| 2123 | call extern log | |
| 2124 | mov BASE, RB | |
| 2125 | jmp ->fff_resfp | |
| 2126 | | |
| 2127 |.macro math_extern, func | |
| 2128 | .ffunc_nsse math_ .. func | |
| 2129 |.if not X64 | |
| 2130 | movsd FPARG1, xmm0 | |
| 2131 |.endif | |
| 2132 | mov RB, BASE | |
| 2133 | call extern func | |
| 2134 | mov BASE, RB | |
| 2135 | jmp ->fff_resfp | |
| 2136 |.endmacro | |
| 2137 | | |
| 2138 |.macro math_extern2, func | |
| 2139 | .ffunc_nnsse math_ .. func | |
| 2140 |.if not X64 | |
| 2141 | movsd FPARG1, xmm0 | |
| 2142 | movsd FPARG3, xmm1 | |
| 2143 |.endif | |
| 2144 | mov RB, BASE | |
| 2145 | call extern func | |
| 2146 | mov BASE, RB | |
| 2147 | jmp ->fff_resfp | |
| 2148 |.endmacro | |
| 2149 | | |
| 2150 | math_extern log10 | |
| 2151 | math_extern exp | |
| 2152 | math_extern sin | |
| 2153 | math_extern cos | |
| 2154 | math_extern tan | |
| 2155 | math_extern asin | |
| 2156 | math_extern acos | |
| 2157 | math_extern atan | |
| 2158 | math_extern sinh | |
| 2159 | math_extern cosh | |
| 2160 | math_extern tanh | |
| 2161 | math_extern2 pow | |
| 2162 | math_extern2 atan2 | |
| 2163 | math_extern2 fmod | |
| 2164 | | |
| 2165 |.ffunc_nnr math_ldexp; fscale; fpop1; jmp ->fff_resn | |
| 2166 | | |
| 2167 |.ffunc_1 math_frexp | |
| 2168 | mov RB, [BASE+4] | |
| 2169 | cmp RB, LJ_TISNUM; jae ->fff_fallback | |
| 2170 | mov PC, [BASE-4] | |
| 2171 | mov RC, [BASE] | |
| 2172 | mov [BASE-4], RB; mov [BASE-8], RC | |
| 2173 | shl RB, 1; cmp RB, 0xffe00000; jae >3 | |
| 2174 | or RC, RB; jz >3 | |
| 2175 | mov RC, 1022 | |
| 2176 | cmp RB, 0x00200000; jb >4 | |
| 2177 |1: | |
| 2178 | shr RB, 21; sub RB, RC // Extract and unbias exponent. | |
| 2179 | cvtsi2sd xmm0, RB | |
| 2180 | mov RB, [BASE-4] | |
| 2181 | and RB, 0x800fffff // Mask off exponent. | |
| 2182 | or RB, 0x3fe00000 // Put mantissa in range [0.5,1) or 0. | |
| 2183 | mov [BASE-4], RB | |
| 2184 |2: | |
| 2185 | movsd qword [BASE], xmm0 | |
| 2186 | mov RD, 1+2 | |
| 2187 | jmp ->fff_res | |
| 2188 |3: // Return +-0, +-Inf, NaN unmodified and an exponent of 0. | |
| 2189 | xorps xmm0, xmm0; jmp <2 | |
| 2190 |4: // Handle denormals by multiplying with 2^54 and adjusting the bias. | |
| 2191 | movsd xmm0, qword [BASE] | |
| 2192 | sseconst_hi xmm1, RBa, 43500000 // 2^54. | |
| 2193 | mulsd xmm0, xmm1 | |
| 2194 | movsd qword [BASE-8], xmm0 | |
| 2195 | mov RB, [BASE-4]; mov RC, 1076; shl RB, 1; jmp <1 | |
| 2196 | | |
| 2197 |.ffunc_nsse math_modf | |
| 2198 | mov RB, [BASE+4] | |
| 2199 | mov PC, [BASE-4] | |
| 2200 | shl RB, 1; cmp RB, 0xffe00000; je >4 // +-Inf? | |
| 2201 | movaps xmm4, xmm0 | |
| 2202 | call ->vm_trunc_sse | |
| 2203 | subsd xmm4, xmm0 | |
| 2204 |1: | |
| 2205 | movsd qword [BASE-8], xmm0 | |
| 2206 | movsd qword [BASE], xmm4 | |
| 2207 | mov RC, [BASE-4]; mov RB, [BASE+4] | |
| 2208 | xor RC, RB; js >3 // Need to adjust sign? | |
| 2209 |2: | |
| 2210 | mov RD, 1+2 | |
| 2211 | jmp ->fff_res | |
| 2212 |3: | |
| 2213 | xor RB, 0x80000000; mov [BASE+4], RB // Flip sign of fraction. | |
| 2214 | jmp <2 | |
| 2215 |4: | |
| 2216 | xorps xmm4, xmm4; jmp <1 // Return +-Inf and +-0. | |
| 2217 | | |
| 2218 |.macro math_minmax, name, cmovop, sseop | |
| 2219 | .ffunc_1 name | |
| 2220 | mov RA, 2 | |
| 2221 | cmp dword [BASE+4], LJ_TISNUM | |
| 2222 |.if DUALNUM | |
| 2223 | jne >4 | |
| 2224 | mov RB, dword [BASE] | |
| 2225 |1: // Handle integers. | |
| 2226 | cmp RA, RD; jae ->fff_resi | |
| 2227 | cmp dword [BASE+RA*8-4], LJ_TISNUM; jne >3 | |
| 2228 | cmp RB, dword [BASE+RA*8-8] | |
| 2229 | cmovop RB, dword [BASE+RA*8-8] | |
| 2230 | add RA, 1 | |
| 2231 | jmp <1 | |
| 2232 |3: | |
| 2233 | ja ->fff_fallback | |
| 2234 | // Convert intermediate result to number and continue below. | |
| 2235 | cvtsi2sd xmm0, RB | |
| 2236 | jmp >6 | |
| 2237 |4: | |
| 2238 | ja ->fff_fallback | |
| 2239 |.else | |
| 2240 | jae ->fff_fallback | |
| 2241 |.endif | |
| 2242 | | |
| 2243 | movsd xmm0, qword [BASE] | |
| 2244 |5: // Handle numbers or integers. | |
| 2245 | cmp RA, RD; jae ->fff_resxmm0 | |
| 2246 | cmp dword [BASE+RA*8-4], LJ_TISNUM | |
| 2247 |.if DUALNUM | |
| 2248 | jb >6 | |
| 2249 | ja ->fff_fallback | |
| 2250 | cvtsi2sd xmm1, dword [BASE+RA*8-8] | |
| 2251 | jmp >7 | |
| 2252 |.else | |
| 2253 | jae ->fff_fallback | |
| 2254 |.endif | |
| 2255 |6: | |
| 2256 | movsd xmm1, qword [BASE+RA*8-8] | |
| 2257 |7: | |
| 2258 | sseop xmm0, xmm1 | |
| 2259 | add RA, 1 | |
| 2260 | jmp <5 | |
| 2261 |.endmacro | |
| 2262 | | |
| 2263 | math_minmax math_min, cmovg, minsd | |
| 2264 | math_minmax math_max, cmovl, maxsd | |
| 2265 | | |
| 2266 |//-- String library ----------------------------------------------------- | |
| 2267 | | |
| 2268 |.ffunc string_byte // Only handle the 1-arg case here. | |
| 2269 | cmp NARGS:RD, 1+1; jne ->fff_fallback | |
| 2270 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | |
| 2271 | mov STR:RB, [BASE] | |
| 2272 | mov PC, [BASE-4] | |
| 2273 | cmp dword STR:RB->len, 1 | |
| 2274 | jb ->fff_res0 // Return no results for empty string. | |
| 2275 | movzx RB, byte STR:RB[1] | |
| 2276 |.if DUALNUM | |
| 2277 | jmp ->fff_resi | |
| 2278 |.else | |
| 2279 | cvtsi2sd xmm0, RB; jmp ->fff_resxmm0 | |
| 2280 |.endif | |
| 2281 | | |
| 2282 |.ffunc string_char // Only handle the 1-arg case here. | |
| 2283 | ffgccheck | |
| 2284 | cmp NARGS:RD, 1+1; jne ->fff_fallback // *Exactly* 1 arg. | |
| 2285 | cmp dword [BASE+4], LJ_TISNUM | |
| 2286 |.if DUALNUM | |
| 2287 | jne ->fff_fallback | |
| 2288 | mov RB, dword [BASE] | |
| 2289 | cmp RB, 255; ja ->fff_fallback | |
| 2290 | mov TMP2, RB | |
| 2291 |.else | |
| 2292 | jae ->fff_fallback | |
| 2293 | cvttsd2si RB, qword [BASE] | |
| 2294 | cmp RB, 255; ja ->fff_fallback | |
| 2295 | mov TMP2, RB | |
| 2296 |.endif | |
| 2297 |.if X64 | |
| 2298 | mov TMP3, 1 | |
| 2299 |.else | |
| 2300 | mov ARG3, 1 | |
| 2301 |.endif | |
| 2302 | lea RDa, TMP2 // Points to stack. Little-endian. | |
| 2303 |->fff_newstr: | |
| 2304 | mov L:RB, SAVE_L | |
| 2305 | mov L:RB->base, BASE | |
| 2306 |.if X64 | |
| 2307 | mov CARG3d, TMP3 // Zero-extended to size_t. | |
| 2308 | mov CARG2, RDa // May be 64 bit ptr to stack. | |
| 2309 | mov CARG1d, L:RB | |
| 2310 |.else | |
| 2311 | mov ARG2, RD | |
| 2312 | mov ARG1, L:RB | |
| 2313 |.endif | |
| 2314 | mov SAVE_PC, PC | |
| 2315 | call extern lj_str_new // (lua_State *L, char *str, size_t l) | |
| 2316 |->fff_resstr: | |
| 2317 | // GCstr * returned in eax (RD). | |
| 2318 | mov BASE, L:RB->base | |
| 2319 | mov PC, [BASE-4] | |
| 2320 | mov dword [BASE-4], LJ_TSTR | |
| 2321 | mov [BASE-8], STR:RD | |
| 2322 | jmp ->fff_res1 | |
| 2323 | | |
| 2324 |.ffunc string_sub | |
| 2325 | ffgccheck | |
| 2326 | mov TMP2, -1 | |
| 2327 | cmp NARGS:RD, 1+2; jb ->fff_fallback | |
| 2328 | jna >1 | |
| 2329 | cmp dword [BASE+20], LJ_TISNUM | |
| 2330 |.if DUALNUM | |
| 2331 | jne ->fff_fallback | |
| 2332 | mov RB, dword [BASE+16] | |
| 2333 | mov TMP2, RB | |
| 2334 |.else | |
| 2335 | jae ->fff_fallback | |
| 2336 | cvttsd2si RB, qword [BASE+16] | |
| 2337 | mov TMP2, RB | |
| 2338 |.endif | |
| 2339 |1: | |
| 2340 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | |
| 2341 | cmp dword [BASE+12], LJ_TISNUM | |
| 2342 |.if DUALNUM | |
| 2343 | jne ->fff_fallback | |
| 2344 |.else | |
| 2345 | jae ->fff_fallback | |
| 2346 |.endif | |
| 2347 | mov STR:RB, [BASE] | |
| 2348 | mov TMP3, STR:RB | |
| 2349 | mov RB, STR:RB->len | |
| 2350 |.if DUALNUM | |
| 2351 | mov RA, dword [BASE+8] | |
| 2352 |.else | |
| 2353 | cvttsd2si RA, qword [BASE+8] | |
| 2354 |.endif | |
| 2355 | mov RC, TMP2 | |
| 2356 | cmp RB, RC // len < end? (unsigned compare) | |
| 2357 | jb >5 | |
| 2358 |2: | |
| 2359 | test RA, RA // start <= 0? | |
| 2360 | jle >7 | |
| 2361 |3: | |
| 2362 | mov STR:RB, TMP3 | |
| 2363 | sub RC, RA // start > end? | |
| 2364 | jl ->fff_emptystr | |
| 2365 | lea RB, [STR:RB+RA+#STR-1] | |
| 2366 | add RC, 1 | |
| 2367 |4: | |
| 2368 |.if X64 | |
| 2369 | mov TMP3, RC | |
| 2370 |.else | |
| 2371 | mov ARG3, RC | |
| 2372 |.endif | |
| 2373 | mov RD, RB | |
| 2374 | jmp ->fff_newstr | |
| 2375 | | |
| 2376 |5: // Negative end or overflow. | |
| 2377 | jl >6 | |
| 2378 | lea RC, [RC+RB+1] // end = end+(len+1) | |
| 2379 | jmp <2 | |
| 2380 |6: // Overflow. | |
| 2381 | mov RC, RB // end = len | |
| 2382 | jmp <2 | |
| 2383 | | |
| 2384 |7: // Negative start or underflow. | |
| 2385 | je >8 | |
| 2386 | add RA, RB // start = start+(len+1) | |
| 2387 | add RA, 1 | |
| 2388 | jg <3 // start > 0? | |
| 2389 |8: // Underflow. | |
| 2390 | mov RA, 1 // start = 1 | |
| 2391 | jmp <3 | |
| 2392 | | |
| 2393 |->fff_emptystr: // Range underflow. | |
| 2394 | xor RC, RC // Zero length. Any ptr in RB is ok. | |
| 2395 | jmp <4 | |
| 2396 | | |
| 2397 |.macro ffstring_op, name | |
| 2398 | .ffunc_1 string_ .. name | |
| 2399 | ffgccheck | |
| 2400 | cmp dword [BASE+4], LJ_TSTR; jne ->fff_fallback | |
| 2401 | mov L:RB, SAVE_L | |
| 2402 | lea SBUF:FCARG1, [DISPATCH+DISPATCH_GL(tmpbuf)] | |
| 2403 | mov L:RB->base, BASE | |
| 2404 | mov STR:FCARG2, [BASE] // Caveat: FCARG2 == BASE | |
| 2405 | mov RCa, SBUF:FCARG1->b | |
| 2406 | mov SBUF:FCARG1->L, L:RB | |
| 2407 | mov SBUF:FCARG1->w, RCa | |
| 2408 | mov SAVE_PC, PC | |
| 2409 | call extern lj_buf_putstr_ .. name .. @8 | |
| 2410 | mov FCARG1, eax | |
| 2411 | call extern lj_buf_tostr@4 | |
| 2412 | jmp ->fff_resstr | |
| 2413 |.endmacro | |
| 2414 | | |
| 2415 |ffstring_op reverse | |
| 2416 |ffstring_op lower | |
| 2417 |ffstring_op upper | |
| 2418 | | |
| 2419 |//-- Bit library -------------------------------------------------------- | |
| 2420 | | |
| 2421 |.macro .ffunc_bit, name, kind, fdef | |
| 2422 | fdef name | |
| 2423 |.if kind == 2 | |
| 2424 | sseconst_tobit xmm1, RBa | |
| 2425 |.endif | |
| 2426 | cmp dword [BASE+4], LJ_TISNUM | |
| 2427 |.if DUALNUM | |
| 2428 | jne >1 | |
| 2429 | mov RB, dword [BASE] | |
| 2430 |.if kind > 0 | |
| 2431 | jmp >2 | |
| 2432 |.else | |
| 2433 | jmp ->fff_resbit | |
| 2434 |.endif | |
| 2435 |1: | |
| 2436 | ja ->fff_fallback | |
| 2437 |.else | |
| 2438 | jae ->fff_fallback | |
| 2439 |.endif | |
| 2440 | movsd xmm0, qword [BASE] | |
| 2441 |.if kind < 2 | |
| 2442 | sseconst_tobit xmm1, RBa | |
| 2443 |.endif | |
| 2444 | addsd xmm0, xmm1 | |
| 2445 | movd RB, xmm0 | |
| 2446 |2: | |
| 2447 |.endmacro | |
| 2448 | | |
| 2449 |.macro .ffunc_bit, name, kind | |
| 2450 | .ffunc_bit name, kind, .ffunc_1 | |
| 2451 |.endmacro | |
| 2452 | | |
| 2453 |.ffunc_bit bit_tobit, 0 | |
| 2454 | jmp ->fff_resbit | |
| 2455 | | |
| 2456 |.macro .ffunc_bit_op, name, ins | |
| 2457 | .ffunc_bit name, 2 | |
| 2458 | mov TMP2, NARGS:RD // Save for fallback. | |
| 2459 | lea RD, [BASE+NARGS:RD*8-16] | |
| 2460 |1: | |
| 2461 | cmp RD, BASE | |
| 2462 | jbe ->fff_resbit | |
| 2463 | cmp dword [RD+4], LJ_TISNUM | |
| 2464 |.if DUALNUM | |
| 2465 | jne >2 | |
| 2466 | ins RB, dword [RD] | |
| 2467 | sub RD, 8 | |
| 2468 | jmp <1 | |
| 2469 |2: | |
| 2470 | ja ->fff_fallback_bit_op | |
| 2471 |.else | |
| 2472 | jae ->fff_fallback_bit_op | |
| 2473 |.endif | |
| 2474 | movsd xmm0, qword [RD] | |
| 2475 | addsd xmm0, xmm1 | |
| 2476 | movd RA, xmm0 | |
| 2477 | ins RB, RA | |
| 2478 | sub RD, 8 | |
| 2479 | jmp <1 | |
| 2480 |.endmacro | |
| 2481 | | |
| 2482 |.ffunc_bit_op bit_band, and | |
| 2483 |.ffunc_bit_op bit_bor, or | |
| 2484 |.ffunc_bit_op bit_bxor, xor | |
| 2485 | | |
| 2486 |.ffunc_bit bit_bswap, 1 | |
| 2487 | bswap RB | |
| 2488 | jmp ->fff_resbit | |
| 2489 | | |
| 2490 |.ffunc_bit bit_bnot, 1 | |
| 2491 | not RB | |
| 2492 |.if DUALNUM | |
| 2493 | jmp ->fff_resbit | |
| 2494 |.else | |
| 2495 |->fff_resbit: | |
| 2496 | cvtsi2sd xmm0, RB | |
| 2497 | jmp ->fff_resxmm0 | |
| 2498 |.endif | |
| 2499 | | |
| 2500 |->fff_fallback_bit_op: | |
| 2501 | mov NARGS:RD, TMP2 // Restore for fallback | |
| 2502 | jmp ->fff_fallback | |
| 2503 | | |
| 2504 |.macro .ffunc_bit_sh, name, ins | |
| 2505 |.if DUALNUM | |
| 2506 | .ffunc_bit name, 1, .ffunc_2 | |
| 2507 | // Note: no inline conversion from number for 2nd argument! | |
| 2508 | cmp dword [BASE+12], LJ_TISNUM; jne ->fff_fallback | |
| 2509 | mov RA, dword [BASE+8] | |
| 2510 |.else | |
| 2511 | .ffunc_nnsse name | |
| 2512 | sseconst_tobit xmm2, RBa | |
| 2513 | addsd xmm0, xmm2 | |
| 2514 | addsd xmm1, xmm2 | |
| 2515 | movd RB, xmm0 | |
| 2516 | movd RA, xmm1 | |
| 2517 |.endif | |
| 2518 | ins RB, cl // Assumes RA is ecx. | |
| 2519 | jmp ->fff_resbit | |
| 2520 |.endmacro | |
| 2521 | | |
| 2522 |.ffunc_bit_sh bit_lshift, shl | |
| 2523 |.ffunc_bit_sh bit_rshift, shr | |
| 2524 |.ffunc_bit_sh bit_arshift, sar | |
| 2525 |.ffunc_bit_sh bit_rol, rol | |
| 2526 |.ffunc_bit_sh bit_ror, ror | |
| 2527 | | |
| 2528 |//----------------------------------------------------------------------- | |
| 2529 | | |
| 2530 |->fff_fallback_2: | |
| 2531 | mov NARGS:RD, 1+2 // Other args are ignored, anyway. | |
| 2532 | jmp ->fff_fallback | |
| 2533 |->fff_fallback_1: | |
| 2534 | mov NARGS:RD, 1+1 // Other args are ignored, anyway. | |
| 2535 |->fff_fallback: // Call fast function fallback handler. | |
| 2536 | // BASE = new base, RD = nargs+1 | |
| 2537 | mov L:RB, SAVE_L | |
| 2538 | mov PC, [BASE-4] // Fallback may overwrite PC. | |
| 2539 | mov SAVE_PC, PC // Redundant (but a defined value). | |
| 2540 | mov L:RB->base, BASE | |
| 2541 | lea RD, [BASE+NARGS:RD*8-8] | |
| 2542 | lea RA, [RD+8*LUA_MINSTACK] // Ensure enough space for handler. | |
| 2543 | mov L:RB->top, RD | |
| 2544 | mov CFUNC:RD, [BASE-8] | |
| 2545 | cmp RA, L:RB->maxstack | |
| 2546 | ja >5 // Need to grow stack. | |
| 2547 |.if X64 | |
| 2548 | mov CARG1d, L:RB | |
| 2549 |.else | |
| 2550 | mov ARG1, L:RB | |
| 2551 |.endif | |
| 2552 | call aword CFUNC:RD->f // (lua_State *L) | |
| 2553 | mov BASE, L:RB->base | |
| 2554 | // Either throws an error, or recovers and returns -1, 0 or nresults+1. | |
| 2555 | test RD, RD; jg ->fff_res // Returned nresults+1? | |
| 2556 |1: | |
| 2557 | mov RA, L:RB->top | |
| 2558 | sub RA, BASE | |
| 2559 | shr RA, 3 | |
| 2560 | test RD, RD | |
| 2561 | lea NARGS:RD, [RA+1] | |
| 2562 | mov LFUNC:RB, [BASE-8] | |
| 2563 | jne ->vm_call_tail // Returned -1? | |
| 2564 | ins_callt // Returned 0: retry fast path. | |
| 2565 | | |
| 2566 |// Reconstruct previous base for vmeta_call during tailcall. | |
| 2567 |->vm_call_tail: | |
| 2568 | mov RA, BASE | |
| 2569 | test PC, FRAME_TYPE | |
| 2570 | jnz >3 | |
| 2571 | movzx RB, PC_RA | |
| 2572 | not RBa // Note: ~RB = -(RB+1) | |
| 2573 | lea BASE, [BASE+RB*8] // base = base - (RB+1)*8 | |
| 2574 | jmp ->vm_call_dispatch // Resolve again for tailcall. | |
| 2575 |3: | |
| 2576 | mov RB, PC | |
| 2577 | and RB, -8 | |
| 2578 | sub BASE, RB | |
| 2579 | jmp ->vm_call_dispatch // Resolve again for tailcall. | |
| 2580 | | |
| 2581 |5: // Grow stack for fallback handler. | |
| 2582 | mov FCARG2, LUA_MINSTACK | |
| 2583 | mov FCARG1, L:RB | |
| 2584 | call extern lj_state_growstack@8 // (lua_State *L, int n) | |
| 2585 | mov BASE, L:RB->base | |
| 2586 | xor RD, RD // Simulate a return 0. | |
| 2587 | jmp <1 // Dumb retry (goes through ff first). | |
| 2588 | | |
| 2589 |->fff_gcstep: // Call GC step function. | |
| 2590 | // BASE = new base, RD = nargs+1 | |
| 2591 | pop RBa // Must keep stack at same level. | |
| 2592 | mov TMPa, RBa // Save return address | |
| 2593 | mov L:RB, SAVE_L | |
| 2594 | mov SAVE_PC, PC // Redundant (but a defined value). | |
| 2595 | mov L:RB->base, BASE | |
| 2596 | lea RD, [BASE+NARGS:RD*8-8] | |
| 2597 | mov FCARG1, L:RB | |
| 2598 | mov L:RB->top, RD | |
| 2599 | call extern lj_gc_step@4 // (lua_State *L) | |
| 2600 | mov BASE, L:RB->base | |
| 2601 | mov RD, L:RB->top | |
| 2602 | sub RD, BASE | |
| 2603 | shr RD, 3 | |
| 2604 | add NARGS:RD, 1 | |
| 2605 | mov RBa, TMPa | |
| 2606 | push RBa // Restore return address. | |
| 2607 | ret | |
| 2608 | | |
| 2609 |//----------------------------------------------------------------------- | |
| 2610 |//-- Special dispatch targets ------------------------------------------- | |
| 2611 |//----------------------------------------------------------------------- | |
| 2612 | | |
| 2613 |->vm_record: // Dispatch target for recording phase. | |
| 2614 |.if JIT | |
| 2615 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] | |
| 2616 | test RDL, HOOK_VMEVENT // No recording while in vmevent. | |
| 2617 | jnz >5 | |
| 2618 | // Decrement the hookcount for consistency, but always do the call. | |
| 2619 | test RDL, HOOK_ACTIVE | |
| 2620 | jnz >1 | |
| 2621 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT | |
| 2622 | jz >1 | |
| 2623 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | |
| 2624 | jmp >1 | |
| 2625 |.endif | |
| 2626 | | |
| 2627 |->vm_rethook: // Dispatch target for return hooks. | |
| 2628 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] | |
| 2629 | test RDL, HOOK_ACTIVE // Hook already active? | |
| 2630 | jnz >5 | |
| 2631 | jmp >1 | |
| 2632 | | |
| 2633 |->vm_inshook: // Dispatch target for instr/line hooks. | |
| 2634 | movzx RD, byte [DISPATCH+DISPATCH_GL(hookmask)] | |
| 2635 | test RDL, HOOK_ACTIVE // Hook already active? | |
| 2636 | jnz >5 | |
| 2637 | | |
| 2638 | test RDL, LUA_MASKLINE|LUA_MASKCOUNT | |
| 2639 | jz >5 | |
| 2640 | dec dword [DISPATCH+DISPATCH_GL(hookcount)] | |
| 2641 | jz >1 | |
| 2642 | test RDL, LUA_MASKLINE | |
| 2643 | jz >5 | |
| 2644 |1: | |
| 2645 | mov L:RB, SAVE_L | |
| 2646 | mov L:RB->base, BASE | |
| 2647 | mov FCARG2, PC // Caveat: FCARG2 == BASE | |
| 2648 | mov FCARG1, L:RB | |
| 2649 | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. | |
| 2650 | call extern lj_dispatch_ins@8 // (lua_State *L, const BCIns *pc) | |
| 2651 |3: | |
| 2652 | mov BASE, L:RB->base | |
| 2653 |4: | |
| 2654 | movzx RA, PC_RA | |
| 2655 |5: | |
| 2656 | movzx OP, PC_OP | |
| 2657 | movzx RD, PC_RD | |
| 2658 |.if X64 | |
| 2659 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] // Re-dispatch to static ins. | |
| 2660 |.else | |
| 2661 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] // Re-dispatch to static ins. | |
| 2662 |.endif | |
| 2663 | | |
| 2664 |->cont_hook: // Continue from hook yield. | |
| 2665 | add PC, 4 | |
| 2666 | mov RA, [RB-24] | |
| 2667 | mov MULTRES, RA // Restore MULTRES for *M ins. | |
| 2668 | jmp <4 | |
| 2669 | | |
| 2670 |->vm_hotloop: // Hot loop counter underflow. | |
| 2671 |.if JIT | |
| 2672 | mov LFUNC:RB, [BASE-8] // Same as curr_topL(L). | |
| 2673 | mov RB, LFUNC:RB->pc | |
| 2674 | movzx RD, byte [RB+PC2PROTO(framesize)] | |
| 2675 | lea RD, [BASE+RD*8] | |
| 2676 | mov L:RB, SAVE_L | |
| 2677 | mov L:RB->base, BASE | |
| 2678 | mov L:RB->top, RD | |
| 2679 | mov FCARG2, PC | |
| 2680 | lea FCARG1, [DISPATCH+GG_DISP2J] | |
| 2681 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa | |
| 2682 | mov SAVE_PC, PC | |
| 2683 | call extern lj_trace_hot@8 // (jit_State *J, const BCIns *pc) | |
| 2684 | jmp <3 | |
| 2685 |.endif | |
| 2686 | | |
| 2687 |->vm_callhook: // Dispatch target for call hooks. | |
| 2688 | mov SAVE_PC, PC | |
| 2689 |.if JIT | |
| 2690 | jmp >1 | |
| 2691 |.endif | |
| 2692 | | |
| 2693 |->vm_hotcall: // Hot call counter underflow. | |
| 2694 |.if JIT | |
| 2695 | mov SAVE_PC, PC | |
| 2696 | or PC, 1 // Marker for hot call. | |
| 2697 |1: | |
| 2698 |.endif | |
| 2699 | lea RD, [BASE+NARGS:RD*8-8] | |
| 2700 | mov L:RB, SAVE_L | |
| 2701 | mov L:RB->base, BASE | |
| 2702 | mov L:RB->top, RD | |
| 2703 | mov FCARG2, PC | |
| 2704 | mov FCARG1, L:RB | |
| 2705 | call extern lj_dispatch_call@8 // (lua_State *L, const BCIns *pc) | |
| 2706 | // ASMFunction returned in eax/rax (RDa). | |
| 2707 | mov SAVE_PC, 0 // Invalidate for subsequent line hook. | |
| 2708 |.if JIT | |
| 2709 | and PC, -2 | |
| 2710 |.endif | |
| 2711 | mov BASE, L:RB->base | |
| 2712 | mov RAa, RDa | |
| 2713 | mov RD, L:RB->top | |
| 2714 | sub RD, BASE | |
| 2715 | mov RBa, RAa | |
| 2716 | movzx RA, PC_RA | |
| 2717 | shr RD, 3 | |
| 2718 | add NARGS:RD, 1 | |
| 2719 | jmp RBa | |
| 2720 | | |
| 2721 |->cont_stitch: // Trace stitching. | |
| 2722 |.if JIT | |
| 2723 | // BASE = base, RC = result, RB = mbase | |
| 2724 | mov TRACE:RA, [RB-24] // Save previous trace. | |
| 2725 | mov TMP1, TRACE:RA | |
| 2726 | mov TMP3, DISPATCH // Need one more register. | |
| 2727 | mov DISPATCH, MULTRES | |
| 2728 | movzx RA, PC_RA | |
| 2729 | lea RA, [BASE+RA*8] // Call base. | |
| 2730 | sub DISPATCH, 1 | |
| 2731 | jz >2 | |
| 2732 |1: // Move results down. | |
| 2733 |.if X64 | |
| 2734 | mov RBa, [RC] | |
| 2735 | mov [RA], RBa | |
| 2736 |.else | |
| 2737 | mov RB, [RC] | |
| 2738 | mov [RA], RB | |
| 2739 | mov RB, [RC+4] | |
| 2740 | mov [RA+4], RB | |
| 2741 |.endif | |
| 2742 | add RC, 8 | |
| 2743 | add RA, 8 | |
| 2744 | sub DISPATCH, 1 | |
| 2745 | jnz <1 | |
| 2746 |2: | |
| 2747 | movzx RC, PC_RA | |
| 2748 | movzx RB, PC_RB | |
| 2749 | add RC, RB | |
| 2750 | lea RC, [BASE+RC*8-8] | |
| 2751 |3: | |
| 2752 | cmp RC, RA | |
| 2753 | ja >9 // More results wanted? | |
| 2754 | | |
| 2755 | mov DISPATCH, TMP3 | |
| 2756 | mov TRACE:RD, TMP1 // Get previous trace. | |
| 2757 | movzx RB, word TRACE:RD->traceno | |
| 2758 | movzx RD, word TRACE:RD->link | |
| 2759 | cmp RD, RB | |
| 2760 | je ->cont_nop // Blacklisted. | |
| 2761 | test RD, RD | |
| 2762 | jne =>BC_JLOOP // Jump to stitched trace. | |
| 2763 | | |
| 2764 | // Stitch a new trace to the previous trace. | |
| 2765 | mov [DISPATCH+DISPATCH_J(exitno)], RB | |
| 2766 | mov L:RB, SAVE_L | |
| 2767 | mov L:RB->base, BASE | |
| 2768 | mov FCARG2, PC | |
| 2769 | lea FCARG1, [DISPATCH+GG_DISP2J] | |
| 2770 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa | |
| 2771 | call extern lj_dispatch_stitch@8 // (jit_State *J, const BCIns *pc) | |
| 2772 | mov BASE, L:RB->base | |
| 2773 | jmp ->cont_nop | |
| 2774 | | |
| 2775 |9: // Fill up results with nil. | |
| 2776 | mov dword [RA+4], LJ_TNIL | |
| 2777 | add RA, 8 | |
| 2778 | jmp <3 | |
| 2779 |.endif | |
| 2780 | | |
| 2781 |->vm_profhook: // Dispatch target for profiler hook. | |
| 2782 #if LJ_HASPROFILE | |
| 2783 | mov L:RB, SAVE_L | |
| 2784 | mov L:RB->base, BASE | |
| 2785 | mov FCARG2, PC // Caveat: FCARG2 == BASE | |
| 2786 | mov FCARG1, L:RB | |
| 2787 | call extern lj_dispatch_profile@8 // (lua_State *L, const BCIns *pc) | |
| 2788 | mov BASE, L:RB->base | |
| 2789 | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. | |
| 2790 | sub PC, 4 | |
| 2791 | jmp ->cont_nop | |
| 2792 #endif | |
| 2793 | | |
| 2794 |//----------------------------------------------------------------------- | |
| 2795 |//-- Trace exit handler ------------------------------------------------- | |
| 2796 |//----------------------------------------------------------------------- | |
| 2797 | | |
| 2798 |// Called from an exit stub with the exit number on the stack. | |
| 2799 |// The 16 bit exit number is stored with two (sign-extended) push imm8. | |
| 2800 |->vm_exit_handler: | |
| 2801 |.if JIT | |
| 2802 |.if X64 | |
| 2803 | push r13; push r12 | |
| 2804 | push r11; push r10; push r9; push r8 | |
| 2805 | push rdi; push rsi; push rbp; lea rbp, [rsp+88]; push rbp | |
| 2806 | push rbx; push rdx; push rcx; push rax | |
| 2807 | movzx RC, byte [rbp-8] // Reconstruct exit number. | |
| 2808 | mov RCH, byte [rbp-16] | |
| 2809 | mov [rbp-8], r15; mov [rbp-16], r14 | |
| 2810 |.else | |
| 2811 | push ebp; lea ebp, [esp+12]; push ebp | |
| 2812 | push ebx; push edx; push ecx; push eax | |
| 2813 | movzx RC, byte [ebp-4] // Reconstruct exit number. | |
| 2814 | mov RCH, byte [ebp-8] | |
| 2815 | mov [ebp-4], edi; mov [ebp-8], esi | |
| 2816 |.endif | |
| 2817 | // Caveat: DISPATCH is ebx. | |
| 2818 | mov DISPATCH, [ebp] | |
| 2819 | mov RA, [DISPATCH+DISPATCH_GL(vmstate)] // Get trace number. | |
| 2820 | set_vmstate EXIT | |
| 2821 | mov [DISPATCH+DISPATCH_J(exitno)], RC | |
| 2822 | mov [DISPATCH+DISPATCH_J(parent)], RA | |
| 2823 |.if X64 | |
| 2824 |.if X64WIN | |
| 2825 | sub rsp, 16*8+4*8 // Room for SSE regs + save area. | |
| 2826 |.else | |
| 2827 | sub rsp, 16*8 // Room for SSE regs. | |
| 2828 |.endif | |
| 2829 | add rbp, -128 | |
| 2830 | movsd qword [rbp-8], xmm15; movsd qword [rbp-16], xmm14 | |
| 2831 | movsd qword [rbp-24], xmm13; movsd qword [rbp-32], xmm12 | |
| 2832 | movsd qword [rbp-40], xmm11; movsd qword [rbp-48], xmm10 | |
| 2833 | movsd qword [rbp-56], xmm9; movsd qword [rbp-64], xmm8 | |
| 2834 | movsd qword [rbp-72], xmm7; movsd qword [rbp-80], xmm6 | |
| 2835 | movsd qword [rbp-88], xmm5; movsd qword [rbp-96], xmm4 | |
| 2836 | movsd qword [rbp-104], xmm3; movsd qword [rbp-112], xmm2 | |
| 2837 | movsd qword [rbp-120], xmm1; movsd qword [rbp-128], xmm0 | |
| 2838 |.else | |
| 2839 | sub esp, 8*8+16 // Room for SSE regs + args. | |
| 2840 | movsd qword [ebp-40], xmm7; movsd qword [ebp-48], xmm6 | |
| 2841 | movsd qword [ebp-56], xmm5; movsd qword [ebp-64], xmm4 | |
| 2842 | movsd qword [ebp-72], xmm3; movsd qword [ebp-80], xmm2 | |
| 2843 | movsd qword [ebp-88], xmm1; movsd qword [ebp-96], xmm0 | |
| 2844 |.endif | |
| 2845 | // Caveat: RB is ebp. | |
| 2846 | mov L:RB, [DISPATCH+DISPATCH_GL(cur_L)] | |
| 2847 | mov BASE, [DISPATCH+DISPATCH_GL(jit_base)] | |
| 2848 | mov aword [DISPATCH+DISPATCH_J(L)], L:RBa | |
| 2849 | mov L:RB->base, BASE | |
| 2850 |.if X64WIN | |
| 2851 | lea CARG2, [rsp+4*8] | |
| 2852 |.elif X64 | |
| 2853 | mov CARG2, rsp | |
| 2854 |.else | |
| 2855 | lea FCARG2, [esp+16] | |
| 2856 |.endif | |
| 2857 | lea FCARG1, [DISPATCH+GG_DISP2J] | |
| 2858 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | |
| 2859 | call extern lj_trace_exit@8 // (jit_State *J, ExitState *ex) | |
| 2860 | // MULTRES or negated error code returned in eax (RD). | |
| 2861 | mov RAa, L:RB->cframe | |
| 2862 | and RAa, CFRAME_RAWMASK | |
| 2863 |.if X64WIN | |
| 2864 | // Reposition stack later. | |
| 2865 |.elif X64 | |
| 2866 | mov rsp, RAa // Reposition stack to C frame. | |
| 2867 |.else | |
| 2868 | mov esp, RAa // Reposition stack to C frame. | |
| 2869 |.endif | |
| 2870 | mov [RAa+CFRAME_OFS_L], L:RB // Set SAVE_L (on-trace resume/yield). | |
| 2871 | mov BASE, L:RB->base | |
| 2872 | mov PC, [RAa+CFRAME_OFS_PC] // Get SAVE_PC. | |
| 2873 |.if X64 | |
| 2874 | jmp >1 | |
| 2875 |.endif | |
| 2876 |.endif | |
| 2877 |->vm_exit_interp: | |
| 2878 | // RD = MULTRES or negated error code, BASE, PC and DISPATCH set. | |
| 2879 |.if JIT | |
| 2880 |.if X64 | |
| 2881 | // Restore additional callee-save registers only used in compiled code. | |
| 2882 |.if X64WIN | |
| 2883 | lea RAa, [rsp+9*16+4*8] | |
| 2884 |1: | |
| 2885 | movdqa xmm15, [RAa-9*16] | |
| 2886 | movdqa xmm14, [RAa-8*16] | |
| 2887 | movdqa xmm13, [RAa-7*16] | |
| 2888 | movdqa xmm12, [RAa-6*16] | |
| 2889 | movdqa xmm11, [RAa-5*16] | |
| 2890 | movdqa xmm10, [RAa-4*16] | |
| 2891 | movdqa xmm9, [RAa-3*16] | |
| 2892 | movdqa xmm8, [RAa-2*16] | |
| 2893 | movdqa xmm7, [RAa-1*16] | |
| 2894 | mov rsp, RAa // Reposition stack to C frame. | |
| 2895 | movdqa xmm6, [RAa] | |
| 2896 | mov r15, CSAVE_3 | |
| 2897 | mov r14, CSAVE_4 | |
| 2898 |.else | |
| 2899 | add rsp, 16 // Reposition stack to C frame. | |
| 2900 |1: | |
| 2901 |.endif | |
| 2902 | mov r13, TMPa | |
| 2903 | mov r12, TMPQ | |
| 2904 |.endif | |
| 2905 | cmp RD, -LUA_ERRERR; jae >9 // Check for error from exit. | |
| 2906 | mov L:RB, SAVE_L | |
| 2907 | mov MULTRES, RD | |
| 2908 | mov LFUNC:KBASE, [BASE-8] | |
| 2909 | mov KBASE, LFUNC:KBASE->pc | |
| 2910 | mov KBASE, [KBASE+PC2PROTO(k)] | |
| 2911 | mov L:RB->base, BASE | |
| 2912 | mov dword [DISPATCH+DISPATCH_GL(jit_base)], 0 | |
| 2913 | set_vmstate INTERP | |
| 2914 | // Modified copy of ins_next which handles function header dispatch, too. | |
| 2915 | mov RC, [PC] | |
| 2916 | movzx RA, RCH | |
| 2917 | movzx OP, RCL | |
| 2918 | add PC, 4 | |
| 2919 | shr RC, 16 | |
| 2920 | cmp MULTRES, -17 // Static dispatch? | |
| 2921 | je >5 | |
| 2922 | cmp OP, BC_FUNCF // Function header? | |
| 2923 | jb >3 | |
| 2924 | cmp OP, BC_FUNCC+2 // Fast function? | |
| 2925 | jae >4 | |
| 2926 |2: | |
| 2927 | mov RC, MULTRES // RC/RD holds nres+1. | |
| 2928 |3: | |
| 2929 |.if X64 | |
| 2930 | jmp aword [DISPATCH+OP*8] | |
| 2931 |.else | |
| 2932 | jmp aword [DISPATCH+OP*4] | |
| 2933 |.endif | |
| 2934 | | |
| 2935 |4: // Check frame below fast function. | |
| 2936 | mov RC, [BASE-4] | |
| 2937 | test RC, FRAME_TYPE | |
| 2938 | jnz <2 // Trace stitching continuation? | |
| 2939 | // Otherwise set KBASE for Lua function below fast function. | |
| 2940 | movzx RC, byte [RC-3] | |
| 2941 | not RCa | |
| 2942 | mov LFUNC:KBASE, [BASE+RC*8-8] | |
| 2943 | mov KBASE, LFUNC:KBASE->pc | |
| 2944 | mov KBASE, [KBASE+PC2PROTO(k)] | |
| 2945 | jmp <2 | |
| 2946 | | |
| 2947 |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. | |
| 2948 | mov RA, [DISPATCH+DISPATCH_J(trace)] | |
| 2949 | mov TRACE:RA, [RA+RD*4] | |
| 2950 | mov RC, TRACE:RA->startins | |
| 2951 | movzx RA, RCH | |
| 2952 | movzx OP, RCL | |
| 2953 | shr RC, 16 | |
| 2954 |.if X64 | |
| 2955 | jmp aword [DISPATCH+OP*8+GG_DISP2STATIC] | |
| 2956 |.else | |
| 2957 | jmp aword [DISPATCH+OP*4+GG_DISP2STATIC] | |
| 2958 |.endif | |
| 2959 | | |
| 2960 |9: // Rethrow error from the right C frame. | |
| 2961 | mov FCARG2, RD | |
| 2962 | mov FCARG1, L:RB | |
| 2963 | neg FCARG2 | |
| 2964 | call extern lj_err_trace@8 // (lua_State *L, int errcode) | |
| 2965 |.endif | |
| 2966 | | |
| 2967 |//----------------------------------------------------------------------- | |
| 2968 |//-- Math helper functions ---------------------------------------------- | |
| 2969 |//----------------------------------------------------------------------- | |
| 2970 | | |
| 2971 |// FP value rounding. Called by math.floor/math.ceil fast functions | |
| 2972 |// and from JIT code. arg/ret is xmm0. xmm0-xmm3 and RD (eax) modified. | |
| 2973 |.macro vm_round, name, mode, cond | |
| 2974 |->name: | |
| 2975 |.if not X64 and cond | |
| 2976 | movsd xmm0, qword [esp+4] | |
| 2977 | call ->name .. _sse | |
| 2978 | movsd qword [esp+4], xmm0 // Overwrite callee-owned arg. | |
| 2979 | fld qword [esp+4] | |
| 2980 | ret | |
| 2981 |.endif | |
| 2982 | | |
| 2983 |->name .. _sse: | |
| 2984 | sseconst_abs xmm2, RDa | |
| 2985 | sseconst_2p52 xmm3, RDa | |
| 2986 | movaps xmm1, xmm0 | |
| 2987 | andpd xmm1, xmm2 // |x| | |
| 2988 | ucomisd xmm3, xmm1 // No truncation if 2^52 <= |x|. | |
| 2989 | jbe >1 | |
| 2990 | andnpd xmm2, xmm0 // Isolate sign bit. | |
| 2991 |.if mode == 2 // trunc(x)? | |
| 2992 | movaps xmm0, xmm1 | |
| 2993 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 | |
| 2994 | subsd xmm1, xmm3 | |
| 2995 | sseconst_1 xmm3, RDa | |
| 2996 | cmpsd xmm0, xmm1, 1 // |x| < result? | |
| 2997 | andpd xmm0, xmm3 | |
| 2998 | subsd xmm1, xmm0 // If yes, subtract -1. | |
| 2999 | orpd xmm1, xmm2 // Merge sign bit back in. | |
| 3000 |.else | |
| 3001 | addsd xmm1, xmm3 // (|x| + 2^52) - 2^52 | |
| 3002 | subsd xmm1, xmm3 | |
| 3003 | orpd xmm1, xmm2 // Merge sign bit back in. | |
| 3004 | sseconst_1 xmm3, RDa | |
| 3005 | .if mode == 1 // ceil(x)? | |
| 3006 | cmpsd xmm0, xmm1, 6 // x > result? | |
| 3007 | andpd xmm0, xmm3 | |
| 3008 | addsd xmm1, xmm0 // If yes, add 1. | |
| 3009 | orpd xmm1, xmm2 // Merge sign bit back in (again). | |
| 3010 | .else // floor(x)? | |
| 3011 | cmpsd xmm0, xmm1, 1 // x < result? | |
| 3012 | andpd xmm0, xmm3 | |
| 3013 | subsd xmm1, xmm0 // If yes, subtract 1. | |
| 3014 | .endif | |
| 3015 |.endif | |
| 3016 | movaps xmm0, xmm1 | |
| 3017 |1: | |
| 3018 | ret | |
| 3019 |.endmacro | |
| 3020 | | |
| 3021 | vm_round vm_floor, 0, 1 | |
| 3022 | vm_round vm_ceil, 1, JIT | |
| 3023 | vm_round vm_trunc, 2, JIT | |
| 3024 | | |
| 3025 |// FP modulo x%y. Called by BC_MOD* and vm_arith. | |
| 3026 |->vm_mod: | |
| 3027 |// Args in xmm0/xmm1, return value in xmm0. | |
| 3028 |// Caveat: xmm0-xmm5 and RC (eax) modified! | |
| 3029 | movaps xmm5, xmm0 | |
| 3030 | divsd xmm0, xmm1 | |
| 3031 | sseconst_abs xmm2, RDa | |
| 3032 | sseconst_2p52 xmm3, RDa | |
| 3033 | movaps xmm4, xmm0 | |
| 3034 | andpd xmm4, xmm2 // |x/y| | |
| 3035 | ucomisd xmm3, xmm4 // No truncation if 2^52 <= |x/y|. | |
| 3036 | jbe >1 | |
| 3037 | andnpd xmm2, xmm0 // Isolate sign bit. | |
| 3038 | addsd xmm4, xmm3 // (|x/y| + 2^52) - 2^52 | |
| 3039 | subsd xmm4, xmm3 | |
| 3040 | orpd xmm4, xmm2 // Merge sign bit back in. | |
| 3041 | sseconst_1 xmm2, RDa | |
| 3042 | cmpsd xmm0, xmm4, 1 // x/y < result? | |
| 3043 | andpd xmm0, xmm2 | |
| 3044 | subsd xmm4, xmm0 // If yes, subtract 1.0. | |
| 3045 | movaps xmm0, xmm5 | |
| 3046 | mulsd xmm1, xmm4 | |
| 3047 | subsd xmm0, xmm1 | |
| 3048 | ret | |
| 3049 |1: | |
| 3050 | mulsd xmm1, xmm0 | |
| 3051 | movaps xmm0, xmm5 | |
| 3052 | subsd xmm0, xmm1 | |
| 3053 | ret | |
| 3054 | | |
| 3055 |//----------------------------------------------------------------------- | |
| 3056 |//-- Miscellaneous functions -------------------------------------------- | |
| 3057 |//----------------------------------------------------------------------- | |
| 3058 | | |
| 3059 |// int lj_vm_cpuid(uint32_t f, uint32_t res[4]) | |
| 3060 |->vm_cpuid: | |
| 3061 |.if X64 | |
| 3062 | mov eax, CARG1d | |
| 3063 | .if X64WIN; push rsi; mov rsi, CARG2; .endif | |
| 3064 | push rbx | |
| 3065 | xor ecx, ecx | |
| 3066 | cpuid | |
| 3067 | mov [rsi], eax | |
| 3068 | mov [rsi+4], ebx | |
| 3069 | mov [rsi+8], ecx | |
| 3070 | mov [rsi+12], edx | |
| 3071 | pop rbx | |
| 3072 | .if X64WIN; pop rsi; .endif | |
| 3073 | ret | |
| 3074 |.else | |
| 3075 | pushfd | |
| 3076 | pop edx | |
| 3077 | mov ecx, edx | |
| 3078 | xor edx, 0x00200000 // Toggle ID bit in flags. | |
| 3079 | push edx | |
| 3080 | popfd | |
| 3081 | pushfd | |
| 3082 | pop edx | |
| 3083 | xor eax, eax // Zero means no features supported. | |
| 3084 | cmp ecx, edx | |
| 3085 | jz >1 // No ID toggle means no CPUID support. | |
| 3086 | mov eax, [esp+4] // Argument 1 is function number. | |
| 3087 | push edi | |
| 3088 | push ebx | |
| 3089 | xor ecx, ecx | |
| 3090 | cpuid | |
| 3091 | mov edi, [esp+16] // Argument 2 is result area. | |
| 3092 | mov [edi], eax | |
| 3093 | mov [edi+4], ebx | |
| 3094 | mov [edi+8], ecx | |
| 3095 | mov [edi+12], edx | |
| 3096 | pop ebx | |
| 3097 | pop edi | |
| 3098 |1: | |
| 3099 | ret | |
| 3100 |.endif | |
| 3101 | | |
| 3102 |.define NEXT_TAB, TAB:FCARG1 | |
| 3103 |.define NEXT_IDX, FCARG2 | |
| 3104 |.define NEXT_PTR, RCa | |
| 3105 |.define NEXT_PTRd, RC | |
| 3106 |.macro NEXT_RES_IDXL, op2; lea edx, [NEXT_IDX+op2]; .endmacro | |
| 3107 |.if X64 | |
| 3108 |.define NEXT_TMP, CARG3d | |
| 3109 |.define NEXT_TMPq, CARG3 | |
| 3110 |.define NEXT_ASIZE, CARG4d | |
| 3111 |.macro NEXT_ENTER; .endmacro | |
| 3112 |.macro NEXT_LEAVE; ret; .endmacro | |
| 3113 |.if X64WIN | |
| 3114 |.define NEXT_RES_PTR, [rsp+aword*5] | |
| 3115 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro | |
| 3116 |.else | |
| 3117 |.define NEXT_RES_PTR, [rsp+aword*1] | |
| 3118 |.macro NEXT_RES_IDX, op2; lea edx, [NEXT_IDX+op2]; .endmacro | |
| 3119 |.endif | |
| 3120 |.else | |
| 3121 |.define NEXT_ASIZE, esi | |
| 3122 |.define NEXT_TMP, edi | |
| 3123 |.macro NEXT_ENTER; push esi; push edi; .endmacro | |
| 3124 |.macro NEXT_LEAVE; pop edi; pop esi; ret; .endmacro | |
| 3125 |.define NEXT_RES_PTR, [esp+dword*3] | |
| 3126 |.macro NEXT_RES_IDX, op2; add NEXT_IDX, op2; .endmacro | |
| 3127 |.endif | |
| 3128 | | |
| 3129 |// TValue *lj_vm_next(GCtab *t, uint32_t idx) | |
| 3130 |// Next idx returned in edx. | |
| 3131 |->vm_next: | |
| 3132 |.if JIT | |
| 3133 | NEXT_ENTER | |
| 3134 | mov NEXT_ASIZE, NEXT_TAB->asize | |
| 3135 |1: // Traverse array part. | |
| 3136 | cmp NEXT_IDX, NEXT_ASIZE; jae >5 | |
| 3137 | mov NEXT_TMP, NEXT_TAB->array | |
| 3138 | cmp dword [NEXT_TMP+NEXT_IDX*8+4], LJ_TNIL; je >2 | |
| 3139 | lea NEXT_PTR, NEXT_RES_PTR | |
| 3140 |.if X64 | |
| 3141 | mov NEXT_TMPq, qword [NEXT_TMP+NEXT_IDX*8] | |
| 3142 | mov qword [NEXT_PTR], NEXT_TMPq | |
| 3143 |.else | |
| 3144 | mov NEXT_ASIZE, dword [NEXT_TMP+NEXT_IDX*8+4] | |
| 3145 | mov NEXT_TMP, dword [NEXT_TMP+NEXT_IDX*8] | |
| 3146 | mov dword [NEXT_PTR+4], NEXT_ASIZE | |
| 3147 | mov dword [NEXT_PTR], NEXT_TMP | |
| 3148 |.endif | |
| 3149 |.if DUALNUM | |
| 3150 | mov dword [NEXT_PTR+dword*3], LJ_TISNUM | |
| 3151 | mov dword [NEXT_PTR+dword*2], NEXT_IDX | |
| 3152 |.else | |
| 3153 | cvtsi2sd xmm0, NEXT_IDX | |
| 3154 | movsd qword [NEXT_PTR+dword*2], xmm0 | |
| 3155 |.endif | |
| 3156 | NEXT_RES_IDX 1 | |
| 3157 | NEXT_LEAVE | |
| 3158 |2: // Skip holes in array part. | |
| 3159 | add NEXT_IDX, 1 | |
| 3160 | jmp <1 | |
| 3161 | | |
| 3162 |5: // Traverse hash part. | |
| 3163 | sub NEXT_IDX, NEXT_ASIZE | |
| 3164 |6: | |
| 3165 | cmp NEXT_IDX, NEXT_TAB->hmask; ja >9 | |
| 3166 | imul NEXT_PTRd, NEXT_IDX, #NODE | |
| 3167 | add NODE:NEXT_PTRd, dword NEXT_TAB->node | |
| 3168 | cmp dword NODE:NEXT_PTR->val.it, LJ_TNIL; je >7 | |
| 3169 | NEXT_RES_IDXL NEXT_ASIZE+1 | |
| 3170 | NEXT_LEAVE | |
| 3171 |7: // Skip holes in hash part. | |
| 3172 | add NEXT_IDX, 1 | |
| 3173 | jmp <6 | |
| 3174 | | |
| 3175 |9: // End of iteration. Set the key to nil (not the value). | |
| 3176 | NEXT_RES_IDX NEXT_ASIZE | |
| 3177 | lea NEXT_PTR, NEXT_RES_PTR | |
| 3178 | mov dword [NEXT_PTR+dword*3], LJ_TNIL | |
| 3179 | NEXT_LEAVE | |
| 3180 |.endif | |
| 3181 | | |
| 3182 |//----------------------------------------------------------------------- | |
| 3183 |//-- Assertions --------------------------------------------------------- | |
| 3184 |//----------------------------------------------------------------------- | |
| 3185 | | |
| 3186 |->assert_bad_for_arg_type: | |
| 3187 #ifdef LUA_USE_ASSERT | |
| 3188 | int3 | |
| 3189 #endif | |
| 3190 | int3 | |
| 3191 | | |
| 3192 |//----------------------------------------------------------------------- | |
| 3193 |//-- FFI helper functions ----------------------------------------------- | |
| 3194 |//----------------------------------------------------------------------- | |
| 3195 | | |
| 3196 |// Handler for callback functions. Callback slot number in ah/al. | |
| 3197 |->vm_ffi_callback: | |
| 3198 |.if FFI | |
| 3199 |.type CTSTATE, CTState, PC | |
| 3200 |.if not X64 | |
| 3201 | sub esp, 16 // Leave room for SAVE_ERRF etc. | |
| 3202 |.endif | |
| 3203 | saveregs_ // ebp/rbp already saved. ebp now holds global_State *. | |
| 3204 | lea DISPATCH, [ebp+GG_G2DISP] | |
| 3205 | mov CTSTATE, GL:ebp->ctype_state | |
| 3206 | movzx eax, ax | |
| 3207 | mov CTSTATE->cb.slot, eax | |
| 3208 |.if X64 | |
| 3209 | mov CTSTATE->cb.gpr[0], CARG1 | |
| 3210 | mov CTSTATE->cb.gpr[1], CARG2 | |
| 3211 | mov CTSTATE->cb.gpr[2], CARG3 | |
| 3212 | mov CTSTATE->cb.gpr[3], CARG4 | |
| 3213 | movsd qword CTSTATE->cb.fpr[0], xmm0 | |
| 3214 | movsd qword CTSTATE->cb.fpr[1], xmm1 | |
| 3215 | movsd qword CTSTATE->cb.fpr[2], xmm2 | |
| 3216 | movsd qword CTSTATE->cb.fpr[3], xmm3 | |
| 3217 |.if X64WIN | |
| 3218 | lea rax, [rsp+CFRAME_SIZE+4*8] | |
| 3219 |.else | |
| 3220 | lea rax, [rsp+CFRAME_SIZE] | |
| 3221 | mov CTSTATE->cb.gpr[4], CARG5 | |
| 3222 | mov CTSTATE->cb.gpr[5], CARG6 | |
| 3223 | movsd qword CTSTATE->cb.fpr[4], xmm4 | |
| 3224 | movsd qword CTSTATE->cb.fpr[5], xmm5 | |
| 3225 | movsd qword CTSTATE->cb.fpr[6], xmm6 | |
| 3226 | movsd qword CTSTATE->cb.fpr[7], xmm7 | |
| 3227 |.endif | |
| 3228 | mov CTSTATE->cb.stack, rax | |
| 3229 | mov CARG2, rsp | |
| 3230 |.else | |
| 3231 | lea eax, [esp+CFRAME_SIZE+16] | |
| 3232 | mov CTSTATE->cb.gpr[0], FCARG1 | |
| 3233 | mov CTSTATE->cb.gpr[1], FCARG2 | |
| 3234 | mov CTSTATE->cb.stack, eax | |
| 3235 | mov FCARG1, [esp+CFRAME_SIZE+12] // Move around misplaced retaddr/ebp. | |
| 3236 | mov FCARG2, [esp+CFRAME_SIZE+8] | |
| 3237 | mov SAVE_RET, FCARG1 | |
| 3238 | mov SAVE_R4, FCARG2 | |
| 3239 | mov FCARG2, esp | |
| 3240 |.endif | |
| 3241 | mov SAVE_PC, CTSTATE // Any value outside of bytecode is ok. | |
| 3242 | mov FCARG1, CTSTATE | |
| 3243 | call extern lj_ccallback_enter@8 // (CTState *cts, void *cf) | |
| 3244 | // lua_State * returned in eax (RD). | |
| 3245 | set_vmstate INTERP | |
| 3246 | mov BASE, L:RD->base | |
| 3247 | mov RD, L:RD->top | |
| 3248 | sub RD, BASE | |
| 3249 | mov LFUNC:RB, [BASE-8] | |
| 3250 | shr RD, 3 | |
| 3251 | add RD, 1 | |
| 3252 | ins_callt | |
| 3253 |.endif | |
| 3254 | | |
| 3255 |->cont_ffi_callback: // Return from FFI callback. | |
| 3256 |.if FFI | |
| 3257 | mov L:RA, SAVE_L | |
| 3258 | mov CTSTATE, [DISPATCH+DISPATCH_GL(ctype_state)] | |
| 3259 | mov aword CTSTATE->L, L:RAa | |
| 3260 | mov L:RA->base, BASE | |
| 3261 | mov L:RA->top, RB | |
| 3262 | mov FCARG1, CTSTATE | |
| 3263 | mov FCARG2, RC | |
| 3264 | call extern lj_ccallback_leave@8 // (CTState *cts, TValue *o) | |
| 3265 |.if X64 | |
| 3266 | mov rax, CTSTATE->cb.gpr[0] | |
| 3267 | movsd xmm0, qword CTSTATE->cb.fpr[0] | |
| 3268 | jmp ->vm_leave_unw | |
| 3269 |.else | |
| 3270 | mov L:RB, SAVE_L | |
| 3271 | mov eax, CTSTATE->cb.gpr[0] | |
| 3272 | mov edx, CTSTATE->cb.gpr[1] | |
| 3273 | cmp dword CTSTATE->cb.gpr[2], 1 | |
| 3274 | jb >7 | |
| 3275 | je >6 | |
| 3276 | fld qword CTSTATE->cb.fpr[0].d | |
| 3277 | jmp >7 | |
| 3278 |6: | |
| 3279 | fld dword CTSTATE->cb.fpr[0].f | |
| 3280 |7: | |
| 3281 | mov ecx, L:RB->top | |
| 3282 | movzx ecx, word [ecx+6] // Get stack adjustment and copy up. | |
| 3283 | mov SAVE_L, ecx // Must be one slot above SAVE_RET | |
| 3284 | restoreregs | |
| 3285 | pop ecx // Move return addr from SAVE_RET. | |
| 3286 | add esp, [esp] // Adjust stack. | |
| 3287 | add esp, 16 | |
| 3288 | push ecx | |
| 3289 | ret | |
| 3290 |.endif | |
| 3291 |.endif | |
| 3292 | | |
| 3293 |->vm_ffi_call@4: // Call C function via FFI. | |
| 3294 | // Caveat: needs special frame unwinding, see below. | |
| 3295 |.if FFI | |
| 3296 |.if X64 | |
| 3297 | .type CCSTATE, CCallState, rbx | |
| 3298 | push rbp; mov rbp, rsp; push rbx; mov CCSTATE, CARG1 | |
| 3299 |.else | |
| 3300 | .type CCSTATE, CCallState, ebx | |
| 3301 | push ebp; mov ebp, esp; push ebx; mov CCSTATE, FCARG1 | |
| 3302 |.endif | |
| 3303 | | |
| 3304 | // Readjust stack. | |
| 3305 |.if X64 | |
| 3306 | mov eax, CCSTATE->spadj | |
| 3307 | sub rsp, rax | |
| 3308 |.else | |
| 3309 | sub esp, CCSTATE->spadj | |
| 3310 |.if WIN | |
| 3311 | mov CCSTATE->spadj, esp | |
| 3312 |.endif | |
| 3313 |.endif | |
| 3314 | | |
| 3315 | // Copy stack slots. | |
| 3316 | movzx ecx, byte CCSTATE->nsp | |
| 3317 | sub ecx, 1 | |
| 3318 | js >2 | |
| 3319 |1: | |
| 3320 |.if X64 | |
| 3321 | mov rax, [CCSTATE+rcx*8+offsetof(CCallState, stack)] | |
| 3322 | mov [rsp+rcx*8+CCALL_SPS_EXTRA*8], rax | |
| 3323 |.else | |
| 3324 | mov eax, [CCSTATE+ecx*4+offsetof(CCallState, stack)] | |
| 3325 | mov [esp+ecx*4], eax | |
| 3326 |.endif | |
| 3327 | sub ecx, 1 | |
| 3328 | jns <1 | |
| 3329 |2: | |
| 3330 | | |
| 3331 |.if X64 | |
| 3332 | movzx eax, byte CCSTATE->nfpr | |
| 3333 | mov CARG1, CCSTATE->gpr[0] | |
| 3334 | mov CARG2, CCSTATE->gpr[1] | |
| 3335 | mov CARG3, CCSTATE->gpr[2] | |
| 3336 | mov CARG4, CCSTATE->gpr[3] | |
| 3337 |.if not X64WIN | |
| 3338 | mov CARG5, CCSTATE->gpr[4] | |
| 3339 | mov CARG6, CCSTATE->gpr[5] | |
| 3340 |.endif | |
| 3341 | test eax, eax; jz >5 | |
| 3342 | movaps xmm0, CCSTATE->fpr[0] | |
| 3343 | movaps xmm1, CCSTATE->fpr[1] | |
| 3344 | movaps xmm2, CCSTATE->fpr[2] | |
| 3345 | movaps xmm3, CCSTATE->fpr[3] | |
| 3346 |.if not X64WIN | |
| 3347 | cmp eax, 4; jbe >5 | |
| 3348 | movaps xmm4, CCSTATE->fpr[4] | |
| 3349 | movaps xmm5, CCSTATE->fpr[5] | |
| 3350 | movaps xmm6, CCSTATE->fpr[6] | |
| 3351 | movaps xmm7, CCSTATE->fpr[7] | |
| 3352 |.endif | |
| 3353 |5: | |
| 3354 |.else | |
| 3355 | mov FCARG1, CCSTATE->gpr[0] | |
| 3356 | mov FCARG2, CCSTATE->gpr[1] | |
| 3357 |.endif | |
| 3358 | | |
| 3359 | call aword CCSTATE->func | |
| 3360 | | |
| 3361 |.if X64 | |
| 3362 | mov CCSTATE->gpr[0], rax | |
| 3363 | movaps CCSTATE->fpr[0], xmm0 | |
| 3364 |.if not X64WIN | |
| 3365 | mov CCSTATE->gpr[1], rdx | |
| 3366 | movaps CCSTATE->fpr[1], xmm1 | |
| 3367 |.endif | |
| 3368 |.else | |
| 3369 | mov CCSTATE->gpr[0], eax | |
| 3370 | mov CCSTATE->gpr[1], edx | |
| 3371 | cmp byte CCSTATE->resx87, 1 | |
| 3372 | jb >7 | |
| 3373 | je >6 | |
| 3374 | fstp qword CCSTATE->fpr[0].d[0] | |
| 3375 | jmp >7 | |
| 3376 |6: | |
| 3377 | fstp dword CCSTATE->fpr[0].f[0] | |
| 3378 |7: | |
| 3379 |.if WIN | |
| 3380 | sub CCSTATE->spadj, esp | |
| 3381 |.endif | |
| 3382 |.endif | |
| 3383 | | |
| 3384 |.if X64 | |
| 3385 | mov rbx, [rbp-8]; leave; ret | |
| 3386 |.else | |
| 3387 | mov ebx, [ebp-4]; leave; ret | |
| 3388 |.endif | |
| 3389 |.endif | |
| 3390 |// Note: vm_ffi_call must be the last function in this object file! | |
| 3391 | | |
| 3392 |//----------------------------------------------------------------------- | |
| 3393 } | |
| 3394 | |
| 3395 /* Generate the code for a single instruction. */ | |
| 3396 static void build_ins(BuildCtx *ctx, BCOp op, int defop) | |
| 3397 { | |
| 3398 int vk = 0; | |
| 3399 |// Note: aligning all instructions does not pay off. | |
| 3400 |=>defop: | |
| 3401 | |
| 3402 switch (op) { | |
| 3403 | |
| 3404 /* -- Comparison ops ---------------------------------------------------- */ | |
| 3405 | |
| 3406 /* Remember: all ops branch for a true comparison, fall through otherwise. */ | |
| 3407 | |
| 3408 |.macro jmp_comp, lt, ge, le, gt, target | |
| 3409 ||switch (op) { | |
| 3410 ||case BC_ISLT: | |
| 3411 | lt target | |
| 3412 ||break; | |
| 3413 ||case BC_ISGE: | |
| 3414 | ge target | |
| 3415 ||break; | |
| 3416 ||case BC_ISLE: | |
| 3417 | le target | |
| 3418 ||break; | |
| 3419 ||case BC_ISGT: | |
| 3420 | gt target | |
| 3421 ||break; | |
| 3422 ||default: break; /* Shut up GCC. */ | |
| 3423 ||} | |
| 3424 |.endmacro | |
| 3425 | |
| 3426 case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: | |
| 3427 | // RA = src1, RD = src2, JMP with RD = target | |
| 3428 | ins_AD | |
| 3429 |.if DUALNUM | |
| 3430 | checkint RA, >7 | |
| 3431 | checkint RD, >8 | |
| 3432 | mov RB, dword [BASE+RA*8] | |
| 3433 | add PC, 4 | |
| 3434 | cmp RB, dword [BASE+RD*8] | |
| 3435 | jmp_comp jge, jl, jg, jle, >9 | |
| 3436 |6: | |
| 3437 | movzx RD, PC_RD | |
| 3438 | branchPC RD | |
| 3439 |9: | |
| 3440 | ins_next | |
| 3441 | | |
| 3442 |7: // RA is not an integer. | |
| 3443 | ja ->vmeta_comp | |
| 3444 | // RA is a number. | |
| 3445 | cmp dword [BASE+RD*8+4], LJ_TISNUM; jb >1; jne ->vmeta_comp | |
| 3446 | // RA is a number, RD is an integer. | |
| 3447 | cvtsi2sd xmm0, dword [BASE+RD*8] | |
| 3448 | jmp >2 | |
| 3449 | | |
| 3450 |8: // RA is an integer, RD is not an integer. | |
| 3451 | ja ->vmeta_comp | |
| 3452 | // RA is an integer, RD is a number. | |
| 3453 | cvtsi2sd xmm1, dword [BASE+RA*8] | |
| 3454 | movsd xmm0, qword [BASE+RD*8] | |
| 3455 | add PC, 4 | |
| 3456 | ucomisd xmm0, xmm1 | |
| 3457 | jmp_comp jbe, ja, jb, jae, <9 | |
| 3458 | jmp <6 | |
| 3459 |.else | |
| 3460 | checknum RA, ->vmeta_comp | |
| 3461 | checknum RD, ->vmeta_comp | |
| 3462 |.endif | |
| 3463 |1: | |
| 3464 | movsd xmm0, qword [BASE+RD*8] | |
| 3465 |2: | |
| 3466 | add PC, 4 | |
| 3467 | ucomisd xmm0, qword [BASE+RA*8] | |
| 3468 |3: | |
| 3469 | // Unordered: all of ZF CF PF set, ordered: PF clear. | |
| 3470 | // To preserve NaN semantics GE/GT branch on unordered, but LT/LE don't. | |
| 3471 |.if DUALNUM | |
| 3472 | jmp_comp jbe, ja, jb, jae, <9 | |
| 3473 | jmp <6 | |
| 3474 |.else | |
| 3475 | jmp_comp jbe, ja, jb, jae, >1 | |
| 3476 | movzx RD, PC_RD | |
| 3477 | branchPC RD | |
| 3478 |1: | |
| 3479 | ins_next | |
| 3480 |.endif | |
| 3481 break; | |
| 3482 | |
| 3483 case BC_ISEQV: case BC_ISNEV: | |
| 3484 vk = op == BC_ISEQV; | |
| 3485 | ins_AD // RA = src1, RD = src2, JMP with RD = target | |
| 3486 | mov RB, [BASE+RD*8+4] | |
| 3487 | add PC, 4 | |
| 3488 |.if DUALNUM | |
| 3489 | cmp RB, LJ_TISNUM; jne >7 | |
| 3490 | checkint RA, >8 | |
| 3491 | mov RB, dword [BASE+RD*8] | |
| 3492 | cmp RB, dword [BASE+RA*8] | |
| 3493 if (vk) { | |
| 3494 | jne >9 | |
| 3495 } else { | |
| 3496 | je >9 | |
| 3497 } | |
| 3498 | movzx RD, PC_RD | |
| 3499 | branchPC RD | |
| 3500 |9: | |
| 3501 | ins_next | |
| 3502 | | |
| 3503 |7: // RD is not an integer. | |
| 3504 | ja >5 | |
| 3505 | // RD is a number. | |
| 3506 | cmp dword [BASE+RA*8+4], LJ_TISNUM; jb >1; jne >5 | |
| 3507 | // RD is a number, RA is an integer. | |
| 3508 | cvtsi2sd xmm0, dword [BASE+RA*8] | |
| 3509 | jmp >2 | |
| 3510 | | |
| 3511 |8: // RD is an integer, RA is not an integer. | |
| 3512 | ja >5 | |
| 3513 | // RD is an integer, RA is a number. | |
| 3514 | cvtsi2sd xmm0, dword [BASE+RD*8] | |
| 3515 | ucomisd xmm0, qword [BASE+RA*8] | |
| 3516 | jmp >4 | |
| 3517 | | |
| 3518 |.else | |
| 3519 | cmp RB, LJ_TISNUM; jae >5 | |
| 3520 | checknum RA, >5 | |
| 3521 |.endif | |
| 3522 |1: | |
| 3523 | movsd xmm0, qword [BASE+RA*8] | |
| 3524 |2: | |
| 3525 | ucomisd xmm0, qword [BASE+RD*8] | |
| 3526 |4: | |
| 3527 iseqne_fp: | |
| 3528 if (vk) { | |
| 3529 | jp >2 // Unordered means not equal. | |
| 3530 | jne >2 | |
| 3531 } else { | |
| 3532 | jp >2 // Unordered means not equal. | |
| 3533 | je >1 | |
| 3534 } | |
| 3535 iseqne_end: | |
| 3536 if (vk) { | |
| 3537 |1: // EQ: Branch to the target. | |
| 3538 | movzx RD, PC_RD | |
| 3539 | branchPC RD | |
| 3540 |2: // NE: Fallthrough to next instruction. | |
| 3541 |.if not FFI | |
| 3542 |3: | |
| 3543 |.endif | |
| 3544 } else { | |
| 3545 |.if not FFI | |
| 3546 |3: | |
| 3547 |.endif | |
| 3548 |2: // NE: Branch to the target. | |
| 3549 | movzx RD, PC_RD | |
| 3550 | branchPC RD | |
| 3551 |1: // EQ: Fallthrough to next instruction. | |
| 3552 } | |
| 3553 if (LJ_DUALNUM && (op == BC_ISEQV || op == BC_ISNEV || | |
| 3554 op == BC_ISEQN || op == BC_ISNEN)) { | |
| 3555 | jmp <9 | |
| 3556 } else { | |
| 3557 | ins_next | |
| 3558 } | |
| 3559 | | |
| 3560 if (op == BC_ISEQV || op == BC_ISNEV) { | |
| 3561 |5: // Either or both types are not numbers. | |
| 3562 |.if FFI | |
| 3563 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd | |
| 3564 | checktp RA, LJ_TCDATA; je ->vmeta_equal_cd | |
| 3565 |.endif | |
| 3566 | checktp RA, RB // Compare types. | |
| 3567 | jne <2 // Not the same type? | |
| 3568 | cmp RB, LJ_TISPRI | |
| 3569 | jae <1 // Same type and primitive type? | |
| 3570 | | |
| 3571 | // Same types and not a primitive type. Compare GCobj or pvalue. | |
| 3572 | mov RA, [BASE+RA*8] | |
| 3573 | mov RD, [BASE+RD*8] | |
| 3574 | cmp RA, RD | |
| 3575 | je <1 // Same GCobjs or pvalues? | |
| 3576 | cmp RB, LJ_TISTABUD | |
| 3577 | ja <2 // Different objects and not table/ud? | |
| 3578 |.if X64 | |
| 3579 | cmp RB, LJ_TUDATA // And not 64 bit lightuserdata. | |
| 3580 | jb <2 | |
| 3581 |.endif | |
| 3582 | | |
| 3583 | // Different tables or userdatas. Need to check __eq metamethod. | |
| 3584 | // Field metatable must be at same offset for GCtab and GCudata! | |
| 3585 | mov TAB:RB, TAB:RA->metatable | |
| 3586 | test TAB:RB, TAB:RB | |
| 3587 | jz <2 // No metatable? | |
| 3588 | test byte TAB:RB->nomm, 1<<MM_eq | |
| 3589 | jnz <2 // Or 'no __eq' flag set? | |
| 3590 if (vk) { | |
| 3591 | xor RB, RB // ne = 0 | |
| 3592 } else { | |
| 3593 | mov RB, 1 // ne = 1 | |
| 3594 } | |
| 3595 | jmp ->vmeta_equal // Handle __eq metamethod. | |
| 3596 } else { | |
| 3597 |.if FFI | |
| 3598 |3: | |
| 3599 | cmp RB, LJ_TCDATA | |
| 3600 if (LJ_DUALNUM && vk) { | |
| 3601 | jne <9 | |
| 3602 } else { | |
| 3603 | jne <2 | |
| 3604 } | |
| 3605 | jmp ->vmeta_equal_cd | |
| 3606 |.endif | |
| 3607 } | |
| 3608 break; | |
| 3609 case BC_ISEQS: case BC_ISNES: | |
| 3610 vk = op == BC_ISEQS; | |
| 3611 | ins_AND // RA = src, RD = str const, JMP with RD = target | |
| 3612 | mov RB, [BASE+RA*8+4] | |
| 3613 | add PC, 4 | |
| 3614 | cmp RB, LJ_TSTR; jne >3 | |
| 3615 | mov RA, [BASE+RA*8] | |
| 3616 | cmp RA, [KBASE+RD*4] | |
| 3617 iseqne_test: | |
| 3618 if (vk) { | |
| 3619 | jne >2 | |
| 3620 } else { | |
| 3621 | je >1 | |
| 3622 } | |
| 3623 goto iseqne_end; | |
| 3624 case BC_ISEQN: case BC_ISNEN: | |
| 3625 vk = op == BC_ISEQN; | |
| 3626 | ins_AD // RA = src, RD = num const, JMP with RD = target | |
| 3627 | mov RB, [BASE+RA*8+4] | |
| 3628 | add PC, 4 | |
| 3629 |.if DUALNUM | |
| 3630 | cmp RB, LJ_TISNUM; jne >7 | |
| 3631 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jne >8 | |
| 3632 | mov RB, dword [KBASE+RD*8] | |
| 3633 | cmp RB, dword [BASE+RA*8] | |
| 3634 if (vk) { | |
| 3635 | jne >9 | |
| 3636 } else { | |
| 3637 | je >9 | |
| 3638 } | |
| 3639 | movzx RD, PC_RD | |
| 3640 | branchPC RD | |
| 3641 |9: | |
| 3642 | ins_next | |
| 3643 | | |
| 3644 |7: // RA is not an integer. | |
| 3645 | ja >3 | |
| 3646 | // RA is a number. | |
| 3647 | cmp dword [KBASE+RD*8+4], LJ_TISNUM; jb >1 | |
| 3648 | // RA is a number, RD is an integer. | |
| 3649 | cvtsi2sd xmm0, dword [KBASE+RD*8] | |
| 3650 | jmp >2 | |
| 3651 | | |
| 3652 |8: // RA is an integer, RD is a number. | |
| 3653 | cvtsi2sd xmm0, dword [BASE+RA*8] | |
| 3654 | ucomisd xmm0, qword [KBASE+RD*8] | |
| 3655 | jmp >4 | |
| 3656 |.else | |
| 3657 | cmp RB, LJ_TISNUM; jae >3 | |
| 3658 |.endif | |
| 3659 |1: | |
| 3660 | movsd xmm0, qword [KBASE+RD*8] | |
| 3661 |2: | |
| 3662 | ucomisd xmm0, qword [BASE+RA*8] | |
| 3663 |4: | |
| 3664 goto iseqne_fp; | |
| 3665 case BC_ISEQP: case BC_ISNEP: | |
| 3666 vk = op == BC_ISEQP; | |
| 3667 | ins_AND // RA = src, RD = primitive type (~), JMP with RD = target | |
| 3668 | mov RB, [BASE+RA*8+4] | |
| 3669 | add PC, 4 | |
| 3670 | cmp RB, RD | |
| 3671 if (!LJ_HASFFI) goto iseqne_test; | |
| 3672 if (vk) { | |
| 3673 | jne >3 | |
| 3674 | movzx RD, PC_RD | |
| 3675 | branchPC RD | |
| 3676 |2: | |
| 3677 | ins_next | |
| 3678 |3: | |
| 3679 | cmp RB, LJ_TCDATA; jne <2 | |
| 3680 | jmp ->vmeta_equal_cd | |
| 3681 } else { | |
| 3682 | je >2 | |
| 3683 | cmp RB, LJ_TCDATA; je ->vmeta_equal_cd | |
| 3684 | movzx RD, PC_RD | |
| 3685 | branchPC RD | |
| 3686 |2: | |
| 3687 | ins_next | |
| 3688 } | |
| 3689 break; | |
| 3690 | |
| 3691 /* -- Unary test and copy ops ------------------------------------------- */ | |
| 3692 | |
| 3693 case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: | |
| 3694 | ins_AD // RA = dst or unused, RD = src, JMP with RD = target | |
| 3695 | mov RB, [BASE+RD*8+4] | |
| 3696 | add PC, 4 | |
| 3697 | cmp RB, LJ_TISTRUECOND | |
| 3698 if (op == BC_IST || op == BC_ISTC) { | |
| 3699 | jae >1 | |
| 3700 } else { | |
| 3701 | jb >1 | |
| 3702 } | |
| 3703 if (op == BC_ISTC || op == BC_ISFC) { | |
| 3704 | mov [BASE+RA*8+4], RB | |
| 3705 | mov RB, [BASE+RD*8] | |
| 3706 | mov [BASE+RA*8], RB | |
| 3707 } | |
| 3708 | movzx RD, PC_RD | |
| 3709 | branchPC RD | |
| 3710 |1: // Fallthrough to the next instruction. | |
| 3711 | ins_next | |
| 3712 break; | |
| 3713 | |
| 3714 case BC_ISTYPE: | |
| 3715 | ins_AD // RA = src, RD = -type | |
| 3716 | add RD, [BASE+RA*8+4] | |
| 3717 | jne ->vmeta_istype | |
| 3718 | ins_next | |
| 3719 break; | |
| 3720 case BC_ISNUM: | |
| 3721 | ins_AD // RA = src, RD = -(TISNUM-1) | |
| 3722 | checknum RA, ->vmeta_istype | |
| 3723 | ins_next | |
| 3724 break; | |
| 3725 | |
| 3726 /* -- Unary ops --------------------------------------------------------- */ | |
| 3727 | |
| 3728 case BC_MOV: | |
| 3729 | ins_AD // RA = dst, RD = src | |
| 3730 |.if X64 | |
| 3731 | mov RBa, [BASE+RD*8] | |
| 3732 | mov [BASE+RA*8], RBa | |
| 3733 |.else | |
| 3734 | mov RB, [BASE+RD*8+4] | |
| 3735 | mov RD, [BASE+RD*8] | |
| 3736 | mov [BASE+RA*8+4], RB | |
| 3737 | mov [BASE+RA*8], RD | |
| 3738 |.endif | |
| 3739 | ins_next_ | |
| 3740 break; | |
| 3741 case BC_NOT: | |
| 3742 | ins_AD // RA = dst, RD = src | |
| 3743 | xor RB, RB | |
| 3744 | checktp RD, LJ_TISTRUECOND | |
| 3745 | adc RB, LJ_TTRUE | |
| 3746 | mov [BASE+RA*8+4], RB | |
| 3747 | ins_next | |
| 3748 break; | |
| 3749 case BC_UNM: | |
| 3750 | ins_AD // RA = dst, RD = src | |
| 3751 |.if DUALNUM | |
| 3752 | checkint RD, >5 | |
| 3753 | mov RB, [BASE+RD*8] | |
| 3754 | neg RB | |
| 3755 | jo >4 | |
| 3756 | mov dword [BASE+RA*8+4], LJ_TISNUM | |
| 3757 | mov dword [BASE+RA*8], RB | |
| 3758 |9: | |
| 3759 | ins_next | |
| 3760 |4: | |
| 3761 | mov dword [BASE+RA*8+4], 0x41e00000 // 2^31. | |
| 3762 | mov dword [BASE+RA*8], 0 | |
| 3763 | jmp <9 | |
| 3764 |5: | |
| 3765 | ja ->vmeta_unm | |
| 3766 |.else | |
| 3767 | checknum RD, ->vmeta_unm | |
| 3768 |.endif | |
| 3769 | movsd xmm0, qword [BASE+RD*8] | |
| 3770 | sseconst_sign xmm1, RDa | |
| 3771 | xorps xmm0, xmm1 | |
| 3772 | movsd qword [BASE+RA*8], xmm0 | |
| 3773 |.if DUALNUM | |
| 3774 | jmp <9 | |
| 3775 |.else | |
| 3776 | ins_next | |
| 3777 |.endif | |
| 3778 break; | |
| 3779 case BC_LEN: | |
| 3780 | ins_AD // RA = dst, RD = src | |
| 3781 | checkstr RD, >2 | |
| 3782 | mov STR:RD, [BASE+RD*8] | |
| 3783 |.if DUALNUM | |
| 3784 | mov RD, dword STR:RD->len | |
| 3785 |1: | |
| 3786 | mov dword [BASE+RA*8+4], LJ_TISNUM | |
| 3787 | mov dword [BASE+RA*8], RD | |
| 3788 |.else | |
| 3789 | xorps xmm0, xmm0 | |
| 3790 | cvtsi2sd xmm0, dword STR:RD->len | |
| 3791 |1: | |
| 3792 | movsd qword [BASE+RA*8], xmm0 | |
| 3793 |.endif | |
| 3794 | ins_next | |
| 3795 |2: | |
| 3796 | checktab RD, ->vmeta_len | |
| 3797 | mov TAB:FCARG1, [BASE+RD*8] | |
| 3798 #if LJ_52 | |
| 3799 | mov TAB:RB, TAB:FCARG1->metatable | |
| 3800 | cmp TAB:RB, 0 | |
| 3801 | jnz >9 | |
| 3802 |3: | |
| 3803 #endif | |
| 3804 |->BC_LEN_Z: | |
| 3805 | mov RB, BASE // Save BASE. | |
| 3806 | call extern lj_tab_len@4 // (GCtab *t) | |
| 3807 | // Length of table returned in eax (RD). | |
| 3808 |.if DUALNUM | |
| 3809 | // Nothing to do. | |
| 3810 |.else | |
| 3811 | cvtsi2sd xmm0, RD | |
| 3812 |.endif | |
| 3813 | mov BASE, RB // Restore BASE. | |
| 3814 | movzx RA, PC_RA | |
| 3815 | jmp <1 | |
| 3816 #if LJ_52 | |
| 3817 |9: // Check for __len. | |
| 3818 | test byte TAB:RB->nomm, 1<<MM_len | |
| 3819 | jnz <3 | |
| 3820 | jmp ->vmeta_len // 'no __len' flag NOT set: check. | |
| 3821 #endif | |
| 3822 break; | |
| 3823 | |
| 3824 /* -- Binary ops -------------------------------------------------------- */ | |
| 3825 | |
| 3826 |.macro ins_arithpre, sseins, ssereg | |
| 3827 | ins_ABC | |
| 3828 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | |
| 3829 ||switch (vk) { | |
| 3830 ||case 0: | |
| 3831 | checknum RB, ->vmeta_arith_vn | |
| 3832 | .if DUALNUM | |
| 3833 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_vn | |
| 3834 | .endif | |
| 3835 | movsd xmm0, qword [BASE+RB*8] | |
| 3836 | sseins ssereg, qword [KBASE+RC*8] | |
| 3837 || break; | |
| 3838 ||case 1: | |
| 3839 | checknum RB, ->vmeta_arith_nv | |
| 3840 | .if DUALNUM | |
| 3841 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jae ->vmeta_arith_nv | |
| 3842 | .endif | |
| 3843 | movsd xmm0, qword [KBASE+RC*8] | |
| 3844 | sseins ssereg, qword [BASE+RB*8] | |
| 3845 || break; | |
| 3846 ||default: | |
| 3847 | checknum RB, ->vmeta_arith_vv | |
| 3848 | checknum RC, ->vmeta_arith_vv | |
| 3849 | movsd xmm0, qword [BASE+RB*8] | |
| 3850 | sseins ssereg, qword [BASE+RC*8] | |
| 3851 || break; | |
| 3852 ||} | |
| 3853 |.endmacro | |
| 3854 | | |
| 3855 |.macro ins_arithdn, intins | |
| 3856 | ins_ABC | |
| 3857 ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); | |
| 3858 ||switch (vk) { | |
| 3859 ||case 0: | |
| 3860 | checkint RB, ->vmeta_arith_vn | |
| 3861 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_vn | |
| 3862 | mov RB, [BASE+RB*8] | |
| 3863 | intins RB, [KBASE+RC*8]; jo ->vmeta_arith_vno | |
| 3864 || break; | |
| 3865 ||case 1: | |
| 3866 | checkint RB, ->vmeta_arith_nv | |
| 3867 | cmp dword [KBASE+RC*8+4], LJ_TISNUM; jne ->vmeta_arith_nv | |
| 3868 | mov RC, [KBASE+RC*8] | |
| 3869 | intins RC, [BASE+RB*8]; jo ->vmeta_arith_nvo | |
| 3870 || break; | |
| 3871 ||default: | |
| 3872 | checkint RB, ->vmeta_arith_vv | |
| 3873 | checkint RC, ->vmeta_arith_vv | |
| 3874 | mov RB, [BASE+RB*8] | |
| 3875 | intins RB, [BASE+RC*8]; jo ->vmeta_arith_vvo | |
| 3876 || break; | |
| 3877 ||} | |
| 3878 | mov dword [BASE+RA*8+4], LJ_TISNUM | |
| 3879 ||if (vk == 1) { | |
| 3880 | mov dword [BASE+RA*8], RC | |
| 3881 ||} else { | |
| 3882 | mov dword [BASE+RA*8], RB | |
| 3883 ||} | |
| 3884 | ins_next | |
| 3885 |.endmacro | |
| 3886 | | |
| 3887 |.macro ins_arithpost | |
| 3888 | movsd qword [BASE+RA*8], xmm0 | |
| 3889 |.endmacro | |
| 3890 | | |
| 3891 |.macro ins_arith, sseins | |
| 3892 | ins_arithpre sseins, xmm0 | |
| 3893 | ins_arithpost | |
| 3894 | ins_next | |
| 3895 |.endmacro | |
| 3896 | | |
| 3897 |.macro ins_arith, intins, sseins | |
| 3898 |.if DUALNUM | |
| 3899 | ins_arithdn intins | |
| 3900 |.else | |
| 3901 | ins_arith, sseins | |
| 3902 |.endif | |
| 3903 |.endmacro | |
| 3904 | |
| 3905 | // RA = dst, RB = src1 or num const, RC = src2 or num const | |
| 3906 case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: | |
| 3907 | ins_arith add, addsd | |
| 3908 break; | |
| 3909 case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: | |
| 3910 | ins_arith sub, subsd | |
| 3911 break; | |
| 3912 case BC_MULVN: case BC_MULNV: case BC_MULVV: | |
| 3913 | ins_arith imul, mulsd | |
| 3914 break; | |
| 3915 case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: | |
| 3916 | ins_arith divsd | |
| 3917 break; | |
| 3918 case BC_MODVN: | |
| 3919 | ins_arithpre movsd, xmm1 | |
| 3920 |->BC_MODVN_Z: | |
| 3921 | call ->vm_mod | |
| 3922 | ins_arithpost | |
| 3923 | ins_next | |
| 3924 break; | |
| 3925 case BC_MODNV: case BC_MODVV: | |
| 3926 | ins_arithpre movsd, xmm1 | |
| 3927 | jmp ->BC_MODVN_Z // Avoid 3 copies. It's slow anyway. | |
| 3928 break; | |
| 3929 case BC_POW: | |
| 3930 | ins_arithpre movsd, xmm1 | |
| 3931 | mov RB, BASE | |
| 3932 |.if not X64 | |
| 3933 | movsd FPARG1, xmm0 | |
| 3934 | movsd FPARG3, xmm1 | |
| 3935 |.endif | |
| 3936 | call extern pow | |
| 3937 | movzx RA, PC_RA | |
| 3938 | mov BASE, RB | |
| 3939 |.if X64 | |
| 3940 | ins_arithpost | |
| 3941 |.else | |
| 3942 | fstp qword [BASE+RA*8] | |
| 3943 |.endif | |
| 3944 | ins_next | |
| 3945 break; | |
| 3946 | |
| 3947 case BC_CAT: | |
| 3948 | ins_ABC // RA = dst, RB = src_start, RC = src_end | |
| 3949 |.if X64 | |
| 3950 | mov L:CARG1d, SAVE_L | |
| 3951 | mov L:CARG1d->base, BASE | |
| 3952 | lea CARG2d, [BASE+RC*8] | |
| 3953 | mov CARG3d, RC | |
| 3954 | sub CARG3d, RB | |
| 3955 |->BC_CAT_Z: | |
| 3956 | mov L:RB, L:CARG1d | |
| 3957 |.else | |
| 3958 | lea RA, [BASE+RC*8] | |
| 3959 | sub RC, RB | |
| 3960 | mov ARG2, RA | |
| 3961 | mov ARG3, RC | |
| 3962 |->BC_CAT_Z: | |
| 3963 | mov L:RB, SAVE_L | |
| 3964 | mov ARG1, L:RB | |
| 3965 | mov L:RB->base, BASE | |
| 3966 |.endif | |
| 3967 | mov SAVE_PC, PC | |
| 3968 | call extern lj_meta_cat // (lua_State *L, TValue *top, int left) | |
| 3969 | // NULL (finished) or TValue * (metamethod) returned in eax (RC). | |
| 3970 | mov BASE, L:RB->base | |
| 3971 | test RC, RC | |
| 3972 | jnz ->vmeta_binop | |
| 3973 | movzx RB, PC_RB // Copy result to Stk[RA] from Stk[RB]. | |
| 3974 | movzx RA, PC_RA | |
| 3975 |.if X64 | |
| 3976 | mov RCa, [BASE+RB*8] | |
| 3977 | mov [BASE+RA*8], RCa | |
| 3978 |.else | |
| 3979 | mov RC, [BASE+RB*8+4] | |
| 3980 | mov RB, [BASE+RB*8] | |
| 3981 | mov [BASE+RA*8+4], RC | |
| 3982 | mov [BASE+RA*8], RB | |
| 3983 |.endif | |
| 3984 | ins_next | |
| 3985 break; | |
| 3986 | |
| 3987 /* -- Constant ops ------------------------------------------------------ */ | |
| 3988 | |
| 3989 case BC_KSTR: | |
| 3990 | ins_AND // RA = dst, RD = str const (~) | |
| 3991 | mov RD, [KBASE+RD*4] | |
| 3992 | mov dword [BASE+RA*8+4], LJ_TSTR | |
| 3993 | mov [BASE+RA*8], RD | |
| 3994 | ins_next | |
| 3995 break; | |
| 3996 case BC_KCDATA: | |
| 3997 |.if FFI | |
| 3998 | ins_AND // RA = dst, RD = cdata const (~) | |
| 3999 | mov RD, [KBASE+RD*4] | |
| 4000 | mov dword [BASE+RA*8+4], LJ_TCDATA | |
| 4001 | mov [BASE+RA*8], RD | |
| 4002 | ins_next | |
| 4003 |.endif | |
| 4004 break; | |
| 4005 case BC_KSHORT: | |
| 4006 | ins_AD // RA = dst, RD = signed int16 literal | |
| 4007 |.if DUALNUM | |
| 4008 | movsx RD, RDW | |
| 4009 | mov dword [BASE+RA*8+4], LJ_TISNUM | |
| 4010 | mov dword [BASE+RA*8], RD | |
| 4011 |.else | |
| 4012 | movsx RD, RDW // Sign-extend literal. | |
| 4013 | cvtsi2sd xmm0, RD | |
| 4014 | movsd qword [BASE+RA*8], xmm0 | |
| 4015 |.endif | |
| 4016 | ins_next | |
| 4017 break; | |
| 4018 case BC_KNUM: | |
| 4019 | ins_AD // RA = dst, RD = num const | |
| 4020 | movsd xmm0, qword [KBASE+RD*8] | |
| 4021 | movsd qword [BASE+RA*8], xmm0 | |
| 4022 | ins_next | |
| 4023 break; | |
| 4024 case BC_KPRI: | |
| 4025 | ins_AND // RA = dst, RD = primitive type (~) | |
| 4026 | mov [BASE+RA*8+4], RD | |
| 4027 | ins_next | |
| 4028 break; | |
| 4029 case BC_KNIL: | |
| 4030 | ins_AD // RA = dst_start, RD = dst_end | |
| 4031 | lea RA, [BASE+RA*8+12] | |
| 4032 | lea RD, [BASE+RD*8+4] | |
| 4033 | mov RB, LJ_TNIL | |
| 4034 | mov [RA-8], RB // Sets minimum 2 slots. | |
| 4035 |1: | |
| 4036 | mov [RA], RB | |
| 4037 | add RA, 8 | |
| 4038 | cmp RA, RD | |
| 4039 | jbe <1 | |
| 4040 | ins_next | |
| 4041 break; | |
| 4042 | |
| 4043 /* -- Upvalue and function ops ------------------------------------------ */ | |
| 4044 | |
| 4045 case BC_UGET: | |
| 4046 | ins_AD // RA = dst, RD = upvalue # | |
| 4047 | mov LFUNC:RB, [BASE-8] | |
| 4048 | mov UPVAL:RB, [LFUNC:RB+RD*4+offsetof(GCfuncL, uvptr)] | |
| 4049 | mov RB, UPVAL:RB->v | |
| 4050 |.if X64 | |
| 4051 | mov RDa, [RB] | |
| 4052 | mov [BASE+RA*8], RDa | |
| 4053 |.else | |
| 4054 | mov RD, [RB+4] | |
| 4055 | mov RB, [RB] | |
| 4056 | mov [BASE+RA*8+4], RD | |
| 4057 | mov [BASE+RA*8], RB | |
| 4058 |.endif | |
| 4059 | ins_next | |
| 4060 break; | |
| 4061 case BC_USETV: | |
| 4062 #define TV2MARKOFS \ | |
| 4063 ((int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)) | |
| 4064 | ins_AD // RA = upvalue #, RD = src | |
| 4065 | mov LFUNC:RB, [BASE-8] | |
| 4066 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | |
| 4067 | cmp byte UPVAL:RB->closed, 0 | |
| 4068 | mov RB, UPVAL:RB->v | |
| 4069 | mov RA, [BASE+RD*8] | |
| 4070 | mov RD, [BASE+RD*8+4] | |
| 4071 | mov [RB], RA | |
| 4072 | mov [RB+4], RD | |
| 4073 | jz >1 | |
| 4074 | // Check barrier for closed upvalue. | |
| 4075 | test byte [RB+TV2MARKOFS], LJ_GC_BLACK // isblack(uv) | |
| 4076 | jnz >2 | |
| 4077 |1: | |
| 4078 | ins_next | |
| 4079 | | |
| 4080 |2: // Upvalue is black. Check if new value is collectable and white. | |
| 4081 | sub RD, LJ_TISGCV | |
| 4082 | cmp RD, LJ_TNUMX - LJ_TISGCV // tvisgcv(v) | |
| 4083 | jbe <1 | |
| 4084 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(v) | |
| 4085 | jz <1 | |
| 4086 | // Crossed a write barrier. Move the barrier forward. | |
| 4087 |.if X64 and not X64WIN | |
| 4088 | mov FCARG2, RB | |
| 4089 | mov RB, BASE // Save BASE. | |
| 4090 |.else | |
| 4091 | xchg FCARG2, RB // Save BASE (FCARG2 == BASE). | |
| 4092 |.endif | |
| 4093 | lea GL:FCARG1, [DISPATCH+GG_DISP2G] | |
| 4094 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) | |
| 4095 | mov BASE, RB // Restore BASE. | |
| 4096 | jmp <1 | |
| 4097 break; | |
| 4098 #undef TV2MARKOFS | |
| 4099 case BC_USETS: | |
| 4100 | ins_AND // RA = upvalue #, RD = str const (~) | |
| 4101 | mov LFUNC:RB, [BASE-8] | |
| 4102 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | |
| 4103 | mov GCOBJ:RA, [KBASE+RD*4] | |
| 4104 | mov RD, UPVAL:RB->v | |
| 4105 | mov [RD], GCOBJ:RA | |
| 4106 | mov dword [RD+4], LJ_TSTR | |
| 4107 | test byte UPVAL:RB->marked, LJ_GC_BLACK // isblack(uv) | |
| 4108 | jnz >2 | |
| 4109 |1: | |
| 4110 | ins_next | |
| 4111 | | |
| 4112 |2: // Check if string is white and ensure upvalue is closed. | |
| 4113 | test byte GCOBJ:RA->gch.marked, LJ_GC_WHITES // iswhite(str) | |
| 4114 | jz <1 | |
| 4115 | cmp byte UPVAL:RB->closed, 0 | |
| 4116 | jz <1 | |
| 4117 | // Crossed a write barrier. Move the barrier forward. | |
| 4118 | mov RB, BASE // Save BASE (FCARG2 == BASE). | |
| 4119 | mov FCARG2, RD | |
| 4120 | lea GL:FCARG1, [DISPATCH+GG_DISP2G] | |
| 4121 | call extern lj_gc_barrieruv@8 // (global_State *g, TValue *tv) | |
| 4122 | mov BASE, RB // Restore BASE. | |
| 4123 | jmp <1 | |
| 4124 break; | |
| 4125 case BC_USETN: | |
| 4126 | ins_AD // RA = upvalue #, RD = num const | |
| 4127 | mov LFUNC:RB, [BASE-8] | |
| 4128 | movsd xmm0, qword [KBASE+RD*8] | |
| 4129 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | |
| 4130 | mov RA, UPVAL:RB->v | |
| 4131 | movsd qword [RA], xmm0 | |
| 4132 | ins_next | |
| 4133 break; | |
| 4134 case BC_USETP: | |
| 4135 | ins_AND // RA = upvalue #, RD = primitive type (~) | |
| 4136 | mov LFUNC:RB, [BASE-8] | |
| 4137 | mov UPVAL:RB, [LFUNC:RB+RA*4+offsetof(GCfuncL, uvptr)] | |
| 4138 | mov RA, UPVAL:RB->v | |
| 4139 | mov [RA+4], RD | |
| 4140 | ins_next | |
| 4141 break; | |
| 4142 case BC_UCLO: | |
| 4143 | ins_AD // RA = level, RD = target | |
| 4144 | branchPC RD // Do this first to free RD. | |
| 4145 | mov L:RB, SAVE_L | |
| 4146 | cmp dword L:RB->openupval, 0 | |
| 4147 | je >1 | |
| 4148 | mov L:RB->base, BASE | |
| 4149 | lea FCARG2, [BASE+RA*8] // Caveat: FCARG2 == BASE | |
| 4150 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA | |
| 4151 | call extern lj_func_closeuv@8 // (lua_State *L, TValue *level) | |
| 4152 | mov BASE, L:RB->base | |
| 4153 |1: | |
| 4154 | ins_next | |
| 4155 break; | |
| 4156 | |
| 4157 case BC_FNEW: | |
| 4158 | ins_AND // RA = dst, RD = proto const (~) (holding function prototype) | |
| 4159 |.if X64 | |
| 4160 | mov L:RB, SAVE_L | |
| 4161 | mov L:RB->base, BASE // Caveat: CARG2d/CARG3d may be BASE. | |
| 4162 | mov CARG3d, [BASE-8] | |
| 4163 | mov CARG2d, [KBASE+RD*4] // Fetch GCproto *. | |
| 4164 | mov CARG1d, L:RB | |
| 4165 |.else | |
| 4166 | mov LFUNC:RA, [BASE-8] | |
| 4167 | mov PROTO:RD, [KBASE+RD*4] // Fetch GCproto *. | |
| 4168 | mov L:RB, SAVE_L | |
| 4169 | mov ARG3, LFUNC:RA | |
| 4170 | mov ARG2, PROTO:RD | |
| 4171 | mov ARG1, L:RB | |
| 4172 | mov L:RB->base, BASE | |
| 4173 |.endif | |
| 4174 | mov SAVE_PC, PC | |
| 4175 | // (lua_State *L, GCproto *pt, GCfuncL *parent) | |
| 4176 | call extern lj_func_newL_gc | |
| 4177 | // GCfuncL * returned in eax (RC). | |
| 4178 | mov BASE, L:RB->base | |
| 4179 | movzx RA, PC_RA | |
| 4180 | mov [BASE+RA*8], LFUNC:RC | |
| 4181 | mov dword [BASE+RA*8+4], LJ_TFUNC | |
| 4182 | ins_next | |
| 4183 break; | |
| 4184 | |
| 4185 /* -- Table ops --------------------------------------------------------- */ | |
| 4186 | |
| 4187 case BC_TNEW: | |
| 4188 | ins_AD // RA = dst, RD = hbits|asize | |
| 4189 | mov L:RB, SAVE_L | |
| 4190 | mov L:RB->base, BASE | |
| 4191 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] | |
| 4192 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] | |
| 4193 | mov SAVE_PC, PC | |
| 4194 | jae >5 | |
| 4195 |1: | |
| 4196 |.if X64 | |
| 4197 | mov CARG3d, RD | |
| 4198 | and RD, 0x7ff | |
| 4199 | shr CARG3d, 11 | |
| 4200 |.else | |
| 4201 | mov RA, RD | |
| 4202 | and RD, 0x7ff | |
| 4203 | shr RA, 11 | |
| 4204 | mov ARG3, RA | |
| 4205 |.endif | |
| 4206 | cmp RD, 0x7ff | |
| 4207 | je >3 | |
| 4208 |2: | |
| 4209 |.if X64 | |
| 4210 | mov L:CARG1d, L:RB | |
| 4211 | mov CARG2d, RD | |
| 4212 |.else | |
| 4213 | mov ARG1, L:RB | |
| 4214 | mov ARG2, RD | |
| 4215 |.endif | |
| 4216 | call extern lj_tab_new // (lua_State *L, int32_t asize, uint32_t hbits) | |
| 4217 | // Table * returned in eax (RC). | |
| 4218 | mov BASE, L:RB->base | |
| 4219 | movzx RA, PC_RA | |
| 4220 | mov [BASE+RA*8], TAB:RC | |
| 4221 | mov dword [BASE+RA*8+4], LJ_TTAB | |
| 4222 | ins_next | |
| 4223 |3: // Turn 0x7ff into 0x801. | |
| 4224 | mov RD, 0x801 | |
| 4225 | jmp <2 | |
| 4226 |5: | |
| 4227 | mov L:FCARG1, L:RB | |
| 4228 | call extern lj_gc_step_fixtop@4 // (lua_State *L) | |
| 4229 | movzx RD, PC_RD | |
| 4230 | jmp <1 | |
| 4231 break; | |
| 4232 case BC_TDUP: | |
| 4233 | ins_AND // RA = dst, RD = table const (~) (holding template table) | |
| 4234 | mov L:RB, SAVE_L | |
| 4235 | mov RA, [DISPATCH+DISPATCH_GL(gc.total)] | |
| 4236 | mov SAVE_PC, PC | |
| 4237 | cmp RA, [DISPATCH+DISPATCH_GL(gc.threshold)] | |
| 4238 | mov L:RB->base, BASE | |
| 4239 | jae >3 | |
| 4240 |2: | |
| 4241 | mov TAB:FCARG2, [KBASE+RD*4] // Caveat: FCARG2 == BASE | |
| 4242 | mov L:FCARG1, L:RB // Caveat: FCARG1 == RA | |
| 4243 | call extern lj_tab_dup@8 // (lua_State *L, Table *kt) | |
| 4244 | // Table * returned in eax (RC). | |
| 4245 | mov BASE, L:RB->base | |
| 4246 | movzx RA, PC_RA | |
| 4247 | mov [BASE+RA*8], TAB:RC | |
| 4248 | mov dword [BASE+RA*8+4], LJ_TTAB | |
| 4249 | ins_next | |
| 4250 |3: | |
| 4251 | mov L:FCARG1, L:RB | |
| 4252 | call extern lj_gc_step_fixtop@4 // (lua_State *L) | |
| 4253 | movzx RD, PC_RD // Need to reload RD. | |
| 4254 | not RDa | |
| 4255 | jmp <2 | |
| 4256 break; | |
| 4257 | |
| 4258 case BC_GGET: | |
| 4259 | ins_AND // RA = dst, RD = str const (~) | |
| 4260 | mov LFUNC:RB, [BASE-8] | |
| 4261 | mov TAB:RB, LFUNC:RB->env | |
| 4262 | mov STR:RC, [KBASE+RD*4] | |
| 4263 | jmp ->BC_TGETS_Z | |
| 4264 break; | |
| 4265 case BC_GSET: | |
| 4266 | ins_AND // RA = src, RD = str const (~) | |
| 4267 | mov LFUNC:RB, [BASE-8] | |
| 4268 | mov TAB:RB, LFUNC:RB->env | |
| 4269 | mov STR:RC, [KBASE+RD*4] | |
| 4270 | jmp ->BC_TSETS_Z | |
| 4271 break; | |
| 4272 | |
| 4273 case BC_TGETV: | |
| 4274 | ins_ABC // RA = dst, RB = table, RC = key | |
| 4275 | checktab RB, ->vmeta_tgetv | |
| 4276 | mov TAB:RB, [BASE+RB*8] | |
| 4277 | | |
| 4278 | // Integer key? | |
| 4279 |.if DUALNUM | |
| 4280 | checkint RC, >5 | |
| 4281 | mov RC, dword [BASE+RC*8] | |
| 4282 |.else | |
| 4283 | // Convert number to int and back and compare. | |
| 4284 | checknum RC, >5 | |
| 4285 | movsd xmm0, qword [BASE+RC*8] | |
| 4286 | cvttsd2si RC, xmm0 | |
| 4287 | cvtsi2sd xmm1, RC | |
| 4288 | ucomisd xmm0, xmm1 | |
| 4289 | jne ->vmeta_tgetv // Generic numeric key? Use fallback. | |
| 4290 |.endif | |
| 4291 | cmp RC, TAB:RB->asize // Takes care of unordered, too. | |
| 4292 | jae ->vmeta_tgetv // Not in array part? Use fallback. | |
| 4293 | shl RC, 3 | |
| 4294 | add RC, TAB:RB->array | |
| 4295 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. | |
| 4296 | je >2 | |
| 4297 | // Get array slot. | |
| 4298 |.if X64 | |
| 4299 | mov RBa, [RC] | |
| 4300 | mov [BASE+RA*8], RBa | |
| 4301 |.else | |
| 4302 | mov RB, [RC] | |
| 4303 | mov RC, [RC+4] | |
| 4304 | mov [BASE+RA*8], RB | |
| 4305 | mov [BASE+RA*8+4], RC | |
| 4306 |.endif | |
| 4307 |1: | |
| 4308 | ins_next | |
| 4309 | | |
| 4310 |2: // Check for __index if table value is nil. | |
| 4311 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. | |
| 4312 | jz >3 | |
| 4313 | mov TAB:RA, TAB:RB->metatable | |
| 4314 | test byte TAB:RA->nomm, 1<<MM_index | |
| 4315 | jz ->vmeta_tgetv // 'no __index' flag NOT set: check. | |
| 4316 | movzx RA, PC_RA // Restore RA. | |
| 4317 |3: | |
| 4318 | mov dword [BASE+RA*8+4], LJ_TNIL | |
| 4319 | jmp <1 | |
| 4320 | | |
| 4321 |5: // String key? | |
| 4322 | checkstr RC, ->vmeta_tgetv | |
| 4323 | mov STR:RC, [BASE+RC*8] | |
| 4324 | jmp ->BC_TGETS_Z | |
| 4325 break; | |
| 4326 case BC_TGETS: | |
| 4327 | ins_ABC // RA = dst, RB = table, RC = str const (~) | |
| 4328 | not RCa | |
| 4329 | mov STR:RC, [KBASE+RC*4] | |
| 4330 | checktab RB, ->vmeta_tgets | |
| 4331 | mov TAB:RB, [BASE+RB*8] | |
| 4332 |->BC_TGETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | |
| 4333 | mov RA, TAB:RB->hmask | |
| 4334 | and RA, STR:RC->sid | |
| 4335 | imul RA, #NODE | |
| 4336 | add NODE:RA, TAB:RB->node | |
| 4337 |1: | |
| 4338 | cmp dword NODE:RA->key.it, LJ_TSTR | |
| 4339 | jne >4 | |
| 4340 | cmp dword NODE:RA->key.gcr, STR:RC | |
| 4341 | jne >4 | |
| 4342 | // Ok, key found. Assumes: offsetof(Node, val) == 0 | |
| 4343 | cmp dword [RA+4], LJ_TNIL // Avoid overwriting RB in fastpath. | |
| 4344 | je >5 // Key found, but nil value? | |
| 4345 | movzx RC, PC_RA | |
| 4346 | // Get node value. | |
| 4347 |.if X64 | |
| 4348 | mov RBa, [RA] | |
| 4349 | mov [BASE+RC*8], RBa | |
| 4350 |.else | |
| 4351 | mov RB, [RA] | |
| 4352 | mov RA, [RA+4] | |
| 4353 | mov [BASE+RC*8], RB | |
| 4354 | mov [BASE+RC*8+4], RA | |
| 4355 |.endif | |
| 4356 |2: | |
| 4357 | ins_next | |
| 4358 | | |
| 4359 |3: | |
| 4360 | movzx RC, PC_RA | |
| 4361 | mov dword [BASE+RC*8+4], LJ_TNIL | |
| 4362 | jmp <2 | |
| 4363 | | |
| 4364 |4: // Follow hash chain. | |
| 4365 | mov NODE:RA, NODE:RA->next | |
| 4366 | test NODE:RA, NODE:RA | |
| 4367 | jnz <1 | |
| 4368 | // End of hash chain: key not found, nil result. | |
| 4369 | | |
| 4370 |5: // Check for __index if table value is nil. | |
| 4371 | mov TAB:RA, TAB:RB->metatable | |
| 4372 | test TAB:RA, TAB:RA | |
| 4373 | jz <3 // No metatable: done. | |
| 4374 | test byte TAB:RA->nomm, 1<<MM_index | |
| 4375 | jnz <3 // 'no __index' flag set: done. | |
| 4376 | jmp ->vmeta_tgets // Caveat: preserve STR:RC. | |
| 4377 break; | |
| 4378 case BC_TGETB: | |
| 4379 | ins_ABC // RA = dst, RB = table, RC = byte literal | |
| 4380 | checktab RB, ->vmeta_tgetb | |
| 4381 | mov TAB:RB, [BASE+RB*8] | |
| 4382 | cmp RC, TAB:RB->asize | |
| 4383 | jae ->vmeta_tgetb | |
| 4384 | shl RC, 3 | |
| 4385 | add RC, TAB:RB->array | |
| 4386 | cmp dword [RC+4], LJ_TNIL // Avoid overwriting RB in fastpath. | |
| 4387 | je >2 | |
| 4388 | // Get array slot. | |
| 4389 |.if X64 | |
| 4390 | mov RBa, [RC] | |
| 4391 | mov [BASE+RA*8], RBa | |
| 4392 |.else | |
| 4393 | mov RB, [RC] | |
| 4394 | mov RC, [RC+4] | |
| 4395 | mov [BASE+RA*8], RB | |
| 4396 | mov [BASE+RA*8+4], RC | |
| 4397 |.endif | |
| 4398 |1: | |
| 4399 | ins_next | |
| 4400 | | |
| 4401 |2: // Check for __index if table value is nil. | |
| 4402 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. | |
| 4403 | jz >3 | |
| 4404 | mov TAB:RA, TAB:RB->metatable | |
| 4405 | test byte TAB:RA->nomm, 1<<MM_index | |
| 4406 | jz ->vmeta_tgetb // 'no __index' flag NOT set: check. | |
| 4407 | movzx RA, PC_RA // Restore RA. | |
| 4408 |3: | |
| 4409 | mov dword [BASE+RA*8+4], LJ_TNIL | |
| 4410 | jmp <1 | |
| 4411 break; | |
| 4412 case BC_TGETR: | |
| 4413 | ins_ABC // RA = dst, RB = table, RC = key | |
| 4414 | mov TAB:RB, [BASE+RB*8] | |
| 4415 |.if DUALNUM | |
| 4416 | mov RC, dword [BASE+RC*8] | |
| 4417 |.else | |
| 4418 | cvttsd2si RC, qword [BASE+RC*8] | |
| 4419 |.endif | |
| 4420 | cmp RC, TAB:RB->asize | |
| 4421 | jae ->vmeta_tgetr // Not in array part? Use fallback. | |
| 4422 | shl RC, 3 | |
| 4423 | add RC, TAB:RB->array | |
| 4424 | // Get array slot. | |
| 4425 |->BC_TGETR_Z: | |
| 4426 |.if X64 | |
| 4427 | mov RBa, [RC] | |
| 4428 | mov [BASE+RA*8], RBa | |
| 4429 |.else | |
| 4430 | mov RB, [RC] | |
| 4431 | mov RC, [RC+4] | |
| 4432 | mov [BASE+RA*8], RB | |
| 4433 | mov [BASE+RA*8+4], RC | |
| 4434 |.endif | |
| 4435 |->BC_TGETR2_Z: | |
| 4436 | ins_next | |
| 4437 break; | |
| 4438 | |
| 4439 case BC_TSETV: | |
| 4440 | ins_ABC // RA = src, RB = table, RC = key | |
| 4441 | checktab RB, ->vmeta_tsetv | |
| 4442 | mov TAB:RB, [BASE+RB*8] | |
| 4443 | | |
| 4444 | // Integer key? | |
| 4445 |.if DUALNUM | |
| 4446 | checkint RC, >5 | |
| 4447 | mov RC, dword [BASE+RC*8] | |
| 4448 |.else | |
| 4449 | // Convert number to int and back and compare. | |
| 4450 | checknum RC, >5 | |
| 4451 | movsd xmm0, qword [BASE+RC*8] | |
| 4452 | cvttsd2si RC, xmm0 | |
| 4453 | cvtsi2sd xmm1, RC | |
| 4454 | ucomisd xmm0, xmm1 | |
| 4455 | jne ->vmeta_tsetv // Generic numeric key? Use fallback. | |
| 4456 |.endif | |
| 4457 | cmp RC, TAB:RB->asize // Takes care of unordered, too. | |
| 4458 | jae ->vmeta_tsetv | |
| 4459 | shl RC, 3 | |
| 4460 | add RC, TAB:RB->array | |
| 4461 | cmp dword [RC+4], LJ_TNIL | |
| 4462 | je >3 // Previous value is nil? | |
| 4463 |1: | |
| 4464 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | |
| 4465 | jnz >7 | |
| 4466 |2: // Set array slot. | |
| 4467 |.if X64 | |
| 4468 | mov RBa, [BASE+RA*8] | |
| 4469 | mov [RC], RBa | |
| 4470 |.else | |
| 4471 | mov RB, [BASE+RA*8+4] | |
| 4472 | mov RA, [BASE+RA*8] | |
| 4473 | mov [RC+4], RB | |
| 4474 | mov [RC], RA | |
| 4475 |.endif | |
| 4476 | ins_next | |
| 4477 | | |
| 4478 |3: // Check for __newindex if previous value is nil. | |
| 4479 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. | |
| 4480 | jz <1 | |
| 4481 | mov TAB:RA, TAB:RB->metatable | |
| 4482 | test byte TAB:RA->nomm, 1<<MM_newindex | |
| 4483 | jz ->vmeta_tsetv // 'no __newindex' flag NOT set: check. | |
| 4484 | movzx RA, PC_RA // Restore RA. | |
| 4485 | jmp <1 | |
| 4486 | | |
| 4487 |5: // String key? | |
| 4488 | checkstr RC, ->vmeta_tsetv | |
| 4489 | mov STR:RC, [BASE+RC*8] | |
| 4490 | jmp ->BC_TSETS_Z | |
| 4491 | | |
| 4492 |7: // Possible table write barrier for the value. Skip valiswhite check. | |
| 4493 | barrierback TAB:RB, RA | |
| 4494 | movzx RA, PC_RA // Restore RA. | |
| 4495 | jmp <2 | |
| 4496 break; | |
| 4497 case BC_TSETS: | |
| 4498 | ins_ABC // RA = src, RB = table, RC = str const (~) | |
| 4499 | not RCa | |
| 4500 | mov STR:RC, [KBASE+RC*4] | |
| 4501 | checktab RB, ->vmeta_tsets | |
| 4502 | mov TAB:RB, [BASE+RB*8] | |
| 4503 |->BC_TSETS_Z: // RB = GCtab *, RC = GCstr *, refetches PC_RA. | |
| 4504 | mov RA, TAB:RB->hmask | |
| 4505 | and RA, STR:RC->sid | |
| 4506 | imul RA, #NODE | |
| 4507 | mov byte TAB:RB->nomm, 0 // Clear metamethod cache. | |
| 4508 | add NODE:RA, TAB:RB->node | |
| 4509 |1: | |
| 4510 | cmp dword NODE:RA->key.it, LJ_TSTR | |
| 4511 | jne >5 | |
| 4512 | cmp dword NODE:RA->key.gcr, STR:RC | |
| 4513 | jne >5 | |
| 4514 | // Ok, key found. Assumes: offsetof(Node, val) == 0 | |
| 4515 | cmp dword [RA+4], LJ_TNIL | |
| 4516 | je >4 // Previous value is nil? | |
| 4517 |2: | |
| 4518 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | |
| 4519 | jnz >7 | |
| 4520 |3: // Set node value. | |
| 4521 | movzx RC, PC_RA | |
| 4522 |.if X64 | |
| 4523 | mov RBa, [BASE+RC*8] | |
| 4524 | mov [RA], RBa | |
| 4525 |.else | |
| 4526 | mov RB, [BASE+RC*8+4] | |
| 4527 | mov RC, [BASE+RC*8] | |
| 4528 | mov [RA+4], RB | |
| 4529 | mov [RA], RC | |
| 4530 |.endif | |
| 4531 | ins_next | |
| 4532 | | |
| 4533 |4: // Check for __newindex if previous value is nil. | |
| 4534 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. | |
| 4535 | jz <2 | |
| 4536 | mov TMP1, RA // Save RA. | |
| 4537 | mov TAB:RA, TAB:RB->metatable | |
| 4538 | test byte TAB:RA->nomm, 1<<MM_newindex | |
| 4539 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. | |
| 4540 | mov RA, TMP1 // Restore RA. | |
| 4541 | jmp <2 | |
| 4542 | | |
| 4543 |5: // Follow hash chain. | |
| 4544 | mov NODE:RA, NODE:RA->next | |
| 4545 | test NODE:RA, NODE:RA | |
| 4546 | jnz <1 | |
| 4547 | // End of hash chain: key not found, add a new one. | |
| 4548 | | |
| 4549 | // But check for __newindex first. | |
| 4550 | mov TAB:RA, TAB:RB->metatable | |
| 4551 | test TAB:RA, TAB:RA | |
| 4552 | jz >6 // No metatable: continue. | |
| 4553 | test byte TAB:RA->nomm, 1<<MM_newindex | |
| 4554 | jz ->vmeta_tsets // 'no __newindex' flag NOT set: check. | |
| 4555 |6: | |
| 4556 | mov TMP1, STR:RC | |
| 4557 | mov TMP2, LJ_TSTR | |
| 4558 | mov TMP3, TAB:RB // Save TAB:RB for us. | |
| 4559 |.if X64 | |
| 4560 | mov L:CARG1d, SAVE_L | |
| 4561 | mov L:CARG1d->base, BASE | |
| 4562 | lea CARG3, TMP1 | |
| 4563 | mov CARG2d, TAB:RB | |
| 4564 | mov L:RB, L:CARG1d | |
| 4565 |.else | |
| 4566 | lea RC, TMP1 // Store temp. TValue in TMP1/TMP2. | |
| 4567 | mov ARG2, TAB:RB | |
| 4568 | mov L:RB, SAVE_L | |
| 4569 | mov ARG3, RC | |
| 4570 | mov ARG1, L:RB | |
| 4571 | mov L:RB->base, BASE | |
| 4572 |.endif | |
| 4573 | mov SAVE_PC, PC | |
| 4574 | call extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k) | |
| 4575 | // Handles write barrier for the new key. TValue * returned in eax (RC). | |
| 4576 | mov BASE, L:RB->base | |
| 4577 | mov TAB:RB, TMP3 // Need TAB:RB for barrier. | |
| 4578 | mov RA, eax | |
| 4579 | jmp <2 // Must check write barrier for value. | |
| 4580 | | |
| 4581 |7: // Possible table write barrier for the value. Skip valiswhite check. | |
| 4582 | barrierback TAB:RB, RC // Destroys STR:RC. | |
| 4583 | jmp <3 | |
| 4584 break; | |
| 4585 case BC_TSETB: | |
| 4586 | ins_ABC // RA = src, RB = table, RC = byte literal | |
| 4587 | checktab RB, ->vmeta_tsetb | |
| 4588 | mov TAB:RB, [BASE+RB*8] | |
| 4589 | cmp RC, TAB:RB->asize | |
| 4590 | jae ->vmeta_tsetb | |
| 4591 | shl RC, 3 | |
| 4592 | add RC, TAB:RB->array | |
| 4593 | cmp dword [RC+4], LJ_TNIL | |
| 4594 | je >3 // Previous value is nil? | |
| 4595 |1: | |
| 4596 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | |
| 4597 | jnz >7 | |
| 4598 |2: // Set array slot. | |
| 4599 |.if X64 | |
| 4600 | mov RAa, [BASE+RA*8] | |
| 4601 | mov [RC], RAa | |
| 4602 |.else | |
| 4603 | mov RB, [BASE+RA*8+4] | |
| 4604 | mov RA, [BASE+RA*8] | |
| 4605 | mov [RC+4], RB | |
| 4606 | mov [RC], RA | |
| 4607 |.endif | |
| 4608 | ins_next | |
| 4609 | | |
| 4610 |3: // Check for __newindex if previous value is nil. | |
| 4611 | cmp dword TAB:RB->metatable, 0 // Shouldn't overwrite RA for fastpath. | |
| 4612 | jz <1 | |
| 4613 | mov TAB:RA, TAB:RB->metatable | |
| 4614 | test byte TAB:RA->nomm, 1<<MM_newindex | |
| 4615 | jz ->vmeta_tsetb // 'no __newindex' flag NOT set: check. | |
| 4616 | movzx RA, PC_RA // Restore RA. | |
| 4617 | jmp <1 | |
| 4618 | | |
| 4619 |7: // Possible table write barrier for the value. Skip valiswhite check. | |
| 4620 | barrierback TAB:RB, RA | |
| 4621 | movzx RA, PC_RA // Restore RA. | |
| 4622 | jmp <2 | |
| 4623 break; | |
| 4624 case BC_TSETR: | |
| 4625 | ins_ABC // RA = src, RB = table, RC = key | |
| 4626 | mov TAB:RB, [BASE+RB*8] | |
| 4627 |.if DUALNUM | |
| 4628 | mov RC, dword [BASE+RC*8] | |
| 4629 |.else | |
| 4630 | cvttsd2si RC, qword [BASE+RC*8] | |
| 4631 |.endif | |
| 4632 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | |
| 4633 | jnz >7 | |
| 4634 |2: | |
| 4635 | cmp RC, TAB:RB->asize | |
| 4636 | jae ->vmeta_tsetr | |
| 4637 | shl RC, 3 | |
| 4638 | add RC, TAB:RB->array | |
| 4639 | // Set array slot. | |
| 4640 |->BC_TSETR_Z: | |
| 4641 |.if X64 | |
| 4642 | mov RBa, [BASE+RA*8] | |
| 4643 | mov [RC], RBa | |
| 4644 |.else | |
| 4645 | mov RB, [BASE+RA*8+4] | |
| 4646 | mov RA, [BASE+RA*8] | |
| 4647 | mov [RC+4], RB | |
| 4648 | mov [RC], RA | |
| 4649 |.endif | |
| 4650 | ins_next | |
| 4651 | | |
| 4652 |7: // Possible table write barrier for the value. Skip valiswhite check. | |
| 4653 | barrierback TAB:RB, RA | |
| 4654 | movzx RA, PC_RA // Restore RA. | |
| 4655 | jmp <2 | |
| 4656 break; | |
| 4657 | |
| 4658 case BC_TSETM: | |
| 4659 | ins_AD // RA = base (table at base-1), RD = num const (start index) | |
| 4660 | mov TMP1, KBASE // Need one more free register. | |
| 4661 | mov KBASE, dword [KBASE+RD*8] // Integer constant is in lo-word. | |
| 4662 |1: | |
| 4663 | lea RA, [BASE+RA*8] | |
| 4664 | mov TAB:RB, [RA-8] // Guaranteed to be a table. | |
| 4665 | test byte TAB:RB->marked, LJ_GC_BLACK // isblack(table) | |
| 4666 | jnz >7 | |
| 4667 |2: | |
| 4668 | mov RD, MULTRES | |
| 4669 | sub RD, 1 | |
| 4670 | jz >4 // Nothing to copy? | |
| 4671 | add RD, KBASE // Compute needed size. | |
| 4672 | cmp RD, TAB:RB->asize | |
| 4673 | ja >5 // Doesn't fit into array part? | |
| 4674 | sub RD, KBASE | |
| 4675 | shl KBASE, 3 | |
| 4676 | add KBASE, TAB:RB->array | |
| 4677 |3: // Copy result slots to table. | |
| 4678 |.if X64 | |
| 4679 | mov RBa, [RA] | |
| 4680 | add RA, 8 | |
| 4681 | mov [KBASE], RBa | |
| 4682 |.else | |
| 4683 | mov RB, [RA] | |
| 4684 | mov [KBASE], RB | |
| 4685 | mov RB, [RA+4] | |
| 4686 | add RA, 8 | |
| 4687 | mov [KBASE+4], RB | |
| 4688 |.endif | |
| 4689 | add KBASE, 8 | |
| 4690 | sub RD, 1 | |
| 4691 | jnz <3 | |
| 4692 |4: | |
| 4693 | mov KBASE, TMP1 | |
| 4694 | ins_next | |
| 4695 | | |
| 4696 |5: // Need to resize array part. | |
| 4697 |.if X64 | |
| 4698 | mov L:CARG1d, SAVE_L | |
| 4699 | mov L:CARG1d->base, BASE // Caveat: CARG2d/CARG3d may be BASE. | |
| 4700 | mov CARG2d, TAB:RB | |
| 4701 | mov CARG3d, RD | |
| 4702 | mov L:RB, L:CARG1d | |
| 4703 |.else | |
| 4704 | mov ARG2, TAB:RB | |
| 4705 | mov L:RB, SAVE_L | |
| 4706 | mov L:RB->base, BASE | |
| 4707 | mov ARG3, RD | |
| 4708 | mov ARG1, L:RB | |
| 4709 |.endif | |
| 4710 | mov SAVE_PC, PC | |
| 4711 | call extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) | |
| 4712 | mov BASE, L:RB->base | |
| 4713 | movzx RA, PC_RA // Restore RA. | |
| 4714 | jmp <1 // Retry. | |
| 4715 | | |
| 4716 |7: // Possible table write barrier for any value. Skip valiswhite check. | |
| 4717 | barrierback TAB:RB, RD | |
| 4718 | jmp <2 | |
| 4719 break; | |
| 4720 | |
| 4721 /* -- Calls and vararg handling ----------------------------------------- */ | |
| 4722 | |
| 4723 case BC_CALL: case BC_CALLM: | |
| 4724 | ins_A_C // RA = base, (RB = nresults+1,) RC = nargs+1 | extra_nargs | |
| 4725 if (op == BC_CALLM) { | |
| 4726 | add NARGS:RD, MULTRES | |
| 4727 } | |
| 4728 | cmp dword [BASE+RA*8+4], LJ_TFUNC | |
| 4729 | mov LFUNC:RB, [BASE+RA*8] | |
| 4730 | jne ->vmeta_call_ra | |
| 4731 | lea BASE, [BASE+RA*8+8] | |
| 4732 | ins_call | |
| 4733 break; | |
| 4734 | |
| 4735 case BC_CALLMT: | |
| 4736 | ins_AD // RA = base, RD = extra_nargs | |
| 4737 | add NARGS:RD, MULTRES | |
| 4738 | // Fall through. Assumes BC_CALLT follows and ins_AD is a no-op. | |
| 4739 break; | |
| 4740 case BC_CALLT: | |
| 4741 | ins_AD // RA = base, RD = nargs+1 | |
| 4742 | lea RA, [BASE+RA*8+8] | |
| 4743 | mov KBASE, BASE // Use KBASE for move + vmeta_call hint. | |
| 4744 | mov LFUNC:RB, [RA-8] | |
| 4745 | cmp dword [RA-4], LJ_TFUNC | |
| 4746 | jne ->vmeta_call | |
| 4747 |->BC_CALLT_Z: | |
| 4748 | mov PC, [BASE-4] | |
| 4749 | test PC, FRAME_TYPE | |
| 4750 | jnz >7 | |
| 4751 |1: | |
| 4752 | mov [BASE-8], LFUNC:RB // Copy function down, reloaded below. | |
| 4753 | mov MULTRES, NARGS:RD | |
| 4754 | sub NARGS:RD, 1 | |
| 4755 | jz >3 | |
| 4756 |2: // Move args down. | |
| 4757 |.if X64 | |
| 4758 | mov RBa, [RA] | |
| 4759 | add RA, 8 | |
| 4760 | mov [KBASE], RBa | |
| 4761 |.else | |
| 4762 | mov RB, [RA] | |
| 4763 | mov [KBASE], RB | |
| 4764 | mov RB, [RA+4] | |
| 4765 | add RA, 8 | |
| 4766 | mov [KBASE+4], RB | |
| 4767 |.endif | |
| 4768 | add KBASE, 8 | |
| 4769 | sub NARGS:RD, 1 | |
| 4770 | jnz <2 | |
| 4771 | | |
| 4772 | mov LFUNC:RB, [BASE-8] | |
| 4773 |3: | |
| 4774 | mov NARGS:RD, MULTRES | |
| 4775 | cmp byte LFUNC:RB->ffid, 1 // (> FF_C) Calling a fast function? | |
| 4776 | ja >5 | |
| 4777 |4: | |
| 4778 | ins_callt | |
| 4779 | | |
| 4780 |5: // Tailcall to a fast function. | |
| 4781 | test PC, FRAME_TYPE // Lua frame below? | |
| 4782 | jnz <4 | |
| 4783 | movzx RA, PC_RA | |
| 4784 | not RAa | |
| 4785 | mov LFUNC:KBASE, [BASE+RA*8-8] // Need to prepare KBASE. | |
| 4786 | mov KBASE, LFUNC:KBASE->pc | |
| 4787 | mov KBASE, [KBASE+PC2PROTO(k)] | |
| 4788 | jmp <4 | |
| 4789 | | |
| 4790 |7: // Tailcall from a vararg function. | |
| 4791 | sub PC, FRAME_VARG | |
| 4792 | test PC, FRAME_TYPEP | |
| 4793 | jnz >8 // Vararg frame below? | |
| 4794 | sub BASE, PC // Need to relocate BASE/KBASE down. | |
| 4795 | mov KBASE, BASE | |
| 4796 | mov PC, [BASE-4] | |
| 4797 | jmp <1 | |
| 4798 |8: | |
| 4799 | add PC, FRAME_VARG | |
| 4800 | jmp <1 | |
| 4801 break; | |
| 4802 | |
| 4803 case BC_ITERC: | |
| 4804 | ins_A // RA = base, (RB = nresults+1,) RC = nargs+1 (2+1) | |
| 4805 | lea RA, [BASE+RA*8+8] // fb = base+1 | |
| 4806 |.if X64 | |
| 4807 | mov RBa, [RA-24] // Copy state. fb[0] = fb[-3]. | |
| 4808 | mov RCa, [RA-16] // Copy control var. fb[1] = fb[-2]. | |
| 4809 | mov [RA], RBa | |
| 4810 | mov [RA+8], RCa | |
| 4811 |.else | |
| 4812 | mov RB, [RA-24] // Copy state. fb[0] = fb[-3]. | |
| 4813 | mov RC, [RA-20] | |
| 4814 | mov [RA], RB | |
| 4815 | mov [RA+4], RC | |
| 4816 | mov RB, [RA-16] // Copy control var. fb[1] = fb[-2]. | |
| 4817 | mov RC, [RA-12] | |
| 4818 | mov [RA+8], RB | |
| 4819 | mov [RA+12], RC | |
| 4820 |.endif | |
| 4821 | mov LFUNC:RB, [RA-32] // Copy callable. fb[-1] = fb[-4] | |
| 4822 | mov RC, [RA-28] | |
| 4823 | mov [RA-8], LFUNC:RB | |
| 4824 | mov [RA-4], RC | |
| 4825 | cmp RC, LJ_TFUNC // Handle like a regular 2-arg call. | |
| 4826 | mov NARGS:RD, 2+1 | |
| 4827 | jne ->vmeta_call | |
| 4828 | mov BASE, RA | |
| 4829 | ins_call | |
| 4830 break; | |
| 4831 | |
| 4832 case BC_ITERN: | |
| 4833 |.if JIT | |
| 4834 | hotloop RB | |
| 4835 |.endif | |
| 4836 |->vm_IITERN: | |
| 4837 | ins_A // RA = base, (RB = nresults+1, RC = nargs+1 (2+1)) | |
| 4838 | mov TMP1, KBASE // Need two more free registers. | |
| 4839 | mov TMP2, DISPATCH | |
| 4840 | mov TAB:RB, [BASE+RA*8-16] | |
| 4841 | mov RC, [BASE+RA*8-8] // Get index from control var. | |
| 4842 | mov DISPATCH, TAB:RB->asize | |
| 4843 | add PC, 4 | |
| 4844 | mov KBASE, TAB:RB->array | |
| 4845 |1: // Traverse array part. | |
| 4846 | cmp RC, DISPATCH; jae >5 // Index points after array part? | |
| 4847 | cmp dword [KBASE+RC*8+4], LJ_TNIL; je >4 | |
| 4848 |.if DUALNUM | |
| 4849 | mov dword [BASE+RA*8+4], LJ_TISNUM | |
| 4850 | mov dword [BASE+RA*8], RC | |
| 4851 |.else | |
| 4852 | cvtsi2sd xmm0, RC | |
| 4853 |.endif | |
| 4854 | // Copy array slot to returned value. | |
| 4855 |.if X64 | |
| 4856 | mov RBa, [KBASE+RC*8] | |
| 4857 | mov [BASE+RA*8+8], RBa | |
| 4858 |.else | |
| 4859 | mov RB, [KBASE+RC*8+4] | |
| 4860 | mov [BASE+RA*8+12], RB | |
| 4861 | mov RB, [KBASE+RC*8] | |
| 4862 | mov [BASE+RA*8+8], RB | |
| 4863 |.endif | |
| 4864 | add RC, 1 | |
| 4865 | // Return array index as a numeric key. | |
| 4866 |.if DUALNUM | |
| 4867 | // See above. | |
| 4868 |.else | |
| 4869 | movsd qword [BASE+RA*8], xmm0 | |
| 4870 |.endif | |
| 4871 | mov [BASE+RA*8-8], RC // Update control var. | |
| 4872 |2: | |
| 4873 | movzx RD, PC_RD // Get target from ITERL. | |
| 4874 | branchPC RD | |
| 4875 |3: | |
| 4876 | mov DISPATCH, TMP2 | |
| 4877 | mov KBASE, TMP1 | |
| 4878 | ins_next | |
| 4879 | | |
| 4880 |4: // Skip holes in array part. | |
| 4881 | add RC, 1 | |
| 4882 | jmp <1 | |
| 4883 | | |
| 4884 |5: // Traverse hash part. | |
| 4885 | sub RC, DISPATCH | |
| 4886 |6: | |
| 4887 | cmp RC, TAB:RB->hmask; ja <3 // End of iteration? Branch to ITERL+1. | |
| 4888 | imul KBASE, RC, #NODE | |
| 4889 | add NODE:KBASE, TAB:RB->node | |
| 4890 | cmp dword NODE:KBASE->val.it, LJ_TNIL; je >7 | |
| 4891 | lea DISPATCH, [RC+DISPATCH+1] | |
| 4892 | // Copy key and value from hash slot. | |
| 4893 |.if X64 | |
| 4894 | mov RBa, NODE:KBASE->key | |
| 4895 | mov RCa, NODE:KBASE->val | |
| 4896 | mov [BASE+RA*8], RBa | |
| 4897 | mov [BASE+RA*8+8], RCa | |
| 4898 |.else | |
| 4899 | mov RB, NODE:KBASE->key.gcr | |
| 4900 | mov RC, NODE:KBASE->key.it | |
| 4901 | mov [BASE+RA*8], RB | |
| 4902 | mov [BASE+RA*8+4], RC | |
| 4903 | mov RB, NODE:KBASE->val.gcr | |
| 4904 | mov RC, NODE:KBASE->val.it | |
| 4905 | mov [BASE+RA*8+8], RB | |
| 4906 | mov [BASE+RA*8+12], RC | |
| 4907 |.endif | |
| 4908 | mov [BASE+RA*8-8], DISPATCH | |
| 4909 | jmp <2 | |
| 4910 | | |
| 4911 |7: // Skip holes in hash part. | |
| 4912 | add RC, 1 | |
| 4913 | jmp <6 | |
| 4914 break; | |
| 4915 | |
| 4916 case BC_ISNEXT: | |
| 4917 | ins_AD // RA = base, RD = target (points to ITERN) | |
| 4918 | cmp dword [BASE+RA*8-20], LJ_TFUNC; jne >5 | |
| 4919 | mov CFUNC:RB, [BASE+RA*8-24] | |
| 4920 | cmp dword [BASE+RA*8-12], LJ_TTAB; jne >5 | |
| 4921 | cmp dword [BASE+RA*8-4], LJ_TNIL; jne >5 | |
| 4922 | cmp byte CFUNC:RB->ffid, FF_next_N; jne >5 | |
| 4923 | branchPC RD | |
| 4924 | mov dword [BASE+RA*8-8], 0 // Initialize control var. | |
| 4925 | mov dword [BASE+RA*8-4], LJ_KEYINDEX | |
| 4926 |1: | |
| 4927 | ins_next | |
| 4928 |5: // Despecialize bytecode if any of the checks fail. | |
| 4929 | mov PC_OP, BC_JMP | |
| 4930 | branchPC RD | |
| 4931 |.if JIT | |
| 4932 | cmp byte [PC], BC_ITERN | |
| 4933 | jne >6 | |
| 4934 |.endif | |
| 4935 | mov byte [PC], BC_ITERC | |
| 4936 | jmp <1 | |
| 4937 |.if JIT | |
| 4938 |6: // Unpatch JLOOP. | |
| 4939 | mov RA, [DISPATCH+DISPATCH_J(trace)] | |
| 4940 | movzx RC, word [PC+2] | |
| 4941 | mov TRACE:RA, [RA+RC*4] | |
| 4942 | mov eax, TRACE:RA->startins | |
| 4943 | mov al, BC_ITERC | |
| 4944 | mov dword [PC], eax | |
| 4945 | jmp <1 | |
| 4946 |.endif | |
| 4947 break; | |
| 4948 | |
| 4949 case BC_VARG: | |
| 4950 | ins_ABC // RA = base, RB = nresults+1, RC = numparams | |
| 4951 | mov TMP1, KBASE // Need one more free register. | |
| 4952 | lea KBASE, [BASE+RC*8+(8+FRAME_VARG)] | |
| 4953 | lea RA, [BASE+RA*8] | |
| 4954 | sub KBASE, [BASE-4] | |
| 4955 | // Note: KBASE may now be even _above_ BASE if nargs was < numparams. | |
| 4956 | test RB, RB | |
| 4957 | jz >5 // Copy all varargs? | |
| 4958 | lea RB, [RA+RB*8-8] | |
| 4959 | cmp KBASE, BASE // No vararg slots? | |
| 4960 | jnb >2 | |
| 4961 |1: // Copy vararg slots to destination slots. | |
| 4962 |.if X64 | |
| 4963 | mov RCa, [KBASE-8] | |
| 4964 | add KBASE, 8 | |
| 4965 | mov [RA], RCa | |
| 4966 |.else | |
| 4967 | mov RC, [KBASE-8] | |
| 4968 | mov [RA], RC | |
| 4969 | mov RC, [KBASE-4] | |
| 4970 | add KBASE, 8 | |
| 4971 | mov [RA+4], RC | |
| 4972 |.endif | |
| 4973 | add RA, 8 | |
| 4974 | cmp RA, RB // All destination slots filled? | |
| 4975 | jnb >3 | |
| 4976 | cmp KBASE, BASE // No more vararg slots? | |
| 4977 | jb <1 | |
| 4978 |2: // Fill up remainder with nil. | |
| 4979 | mov dword [RA+4], LJ_TNIL | |
| 4980 | add RA, 8 | |
| 4981 | cmp RA, RB | |
| 4982 | jb <2 | |
| 4983 |3: | |
| 4984 | mov KBASE, TMP1 | |
| 4985 | ins_next | |
| 4986 | | |
| 4987 |5: // Copy all varargs. | |
| 4988 | mov MULTRES, 1 // MULTRES = 0+1 | |
| 4989 | mov RC, BASE | |
| 4990 | sub RC, KBASE | |
| 4991 | jbe <3 // No vararg slots? | |
| 4992 | mov RB, RC | |
| 4993 | shr RB, 3 | |
| 4994 | add RB, 1 | |
| 4995 | mov MULTRES, RB // MULTRES = #varargs+1 | |
| 4996 | mov L:RB, SAVE_L | |
| 4997 | add RC, RA | |
| 4998 | cmp RC, L:RB->maxstack | |
| 4999 | ja >7 // Need to grow stack? | |
| 5000 |6: // Copy all vararg slots. | |
| 5001 |.if X64 | |
| 5002 | mov RCa, [KBASE-8] | |
| 5003 | add KBASE, 8 | |
| 5004 | mov [RA], RCa | |
| 5005 |.else | |
| 5006 | mov RC, [KBASE-8] | |
| 5007 | mov [RA], RC | |
| 5008 | mov RC, [KBASE-4] | |
| 5009 | add KBASE, 8 | |
| 5010 | mov [RA+4], RC | |
| 5011 |.endif | |
| 5012 | add RA, 8 | |
| 5013 | cmp KBASE, BASE // No more vararg slots? | |
| 5014 | jb <6 | |
| 5015 | jmp <3 | |
| 5016 | | |
| 5017 |7: // Grow stack for varargs. | |
| 5018 | mov L:RB->base, BASE | |
| 5019 | mov L:RB->top, RA | |
| 5020 | mov SAVE_PC, PC | |
| 5021 | sub KBASE, BASE // Need delta, because BASE may change. | |
| 5022 | mov FCARG2, MULTRES | |
| 5023 | sub FCARG2, 1 | |
| 5024 | mov FCARG1, L:RB | |
| 5025 | call extern lj_state_growstack@8 // (lua_State *L, int n) | |
| 5026 | mov BASE, L:RB->base | |
| 5027 | mov RA, L:RB->top | |
| 5028 | add KBASE, BASE | |
| 5029 | jmp <6 | |
| 5030 break; | |
| 5031 | |
| 5032 /* -- Returns ----------------------------------------------------------- */ | |
| 5033 | |
| 5034 case BC_RETM: | |
| 5035 | ins_AD // RA = results, RD = extra_nresults | |
| 5036 | add RD, MULTRES // MULTRES >=1, so RD >=1. | |
| 5037 | // Fall through. Assumes BC_RET follows and ins_AD is a no-op. | |
| 5038 break; | |
| 5039 | |
| 5040 case BC_RET: case BC_RET0: case BC_RET1: | |
| 5041 | ins_AD // RA = results, RD = nresults+1 | |
| 5042 if (op != BC_RET0) { | |
| 5043 | shl RA, 3 | |
| 5044 } | |
| 5045 |1: | |
| 5046 | mov PC, [BASE-4] | |
| 5047 | mov MULTRES, RD // Save nresults+1. | |
| 5048 | test PC, FRAME_TYPE // Check frame type marker. | |
| 5049 | jnz >7 // Not returning to a fixarg Lua func? | |
| 5050 switch (op) { | |
| 5051 case BC_RET: | |
| 5052 |->BC_RET_Z: | |
| 5053 | mov KBASE, BASE // Use KBASE for result move. | |
| 5054 | sub RD, 1 | |
| 5055 | jz >3 | |
| 5056 |2: // Move results down. | |
| 5057 |.if X64 | |
| 5058 | mov RBa, [KBASE+RA] | |
| 5059 | mov [KBASE-8], RBa | |
| 5060 |.else | |
| 5061 | mov RB, [KBASE+RA] | |
| 5062 | mov [KBASE-8], RB | |
| 5063 | mov RB, [KBASE+RA+4] | |
| 5064 | mov [KBASE-4], RB | |
| 5065 |.endif | |
| 5066 | add KBASE, 8 | |
| 5067 | sub RD, 1 | |
| 5068 | jnz <2 | |
| 5069 |3: | |
| 5070 | mov RD, MULTRES // Note: MULTRES may be >255. | |
| 5071 | movzx RB, PC_RB // So cannot compare with RDL! | |
| 5072 |5: | |
| 5073 | cmp RB, RD // More results expected? | |
| 5074 | ja >6 | |
| 5075 break; | |
| 5076 case BC_RET1: | |
| 5077 |.if X64 | |
| 5078 | mov RBa, [BASE+RA] | |
| 5079 | mov [BASE-8], RBa | |
| 5080 |.else | |
| 5081 | mov RB, [BASE+RA+4] | |
| 5082 | mov [BASE-4], RB | |
| 5083 | mov RB, [BASE+RA] | |
| 5084 | mov [BASE-8], RB | |
| 5085 |.endif | |
| 5086 /* fallthrough */ | |
| 5087 case BC_RET0: | |
| 5088 |5: | |
| 5089 | cmp PC_RB, RDL // More results expected? | |
| 5090 | ja >6 | |
| 5091 default: | |
| 5092 break; | |
| 5093 } | |
| 5094 | movzx RA, PC_RA | |
| 5095 | not RAa // Note: ~RA = -(RA+1) | |
| 5096 | lea BASE, [BASE+RA*8] // base = base - (RA+1)*8 | |
| 5097 | mov LFUNC:KBASE, [BASE-8] | |
| 5098 | mov KBASE, LFUNC:KBASE->pc | |
| 5099 | mov KBASE, [KBASE+PC2PROTO(k)] | |
| 5100 | ins_next | |
| 5101 | | |
| 5102 |6: // Fill up results with nil. | |
| 5103 if (op == BC_RET) { | |
| 5104 | mov dword [KBASE-4], LJ_TNIL // Note: relies on shifted base. | |
| 5105 | add KBASE, 8 | |
| 5106 } else { | |
| 5107 | mov dword [BASE+RD*8-12], LJ_TNIL | |
| 5108 } | |
| 5109 | add RD, 1 | |
| 5110 | jmp <5 | |
| 5111 | | |
| 5112 |7: // Non-standard return case. | |
| 5113 | lea RB, [PC-FRAME_VARG] | |
| 5114 | test RB, FRAME_TYPEP | |
| 5115 | jnz ->vm_return | |
| 5116 | // Return from vararg function: relocate BASE down and RA up. | |
| 5117 | sub BASE, RB | |
| 5118 if (op != BC_RET0) { | |
| 5119 | add RA, RB | |
| 5120 } | |
| 5121 | jmp <1 | |
| 5122 break; | |
| 5123 | |
| 5124 /* -- Loops and branches ------------------------------------------------ */ | |
| 5125 | |
| 5126 |.define FOR_IDX, [RA]; .define FOR_TIDX, dword [RA+4] | |
| 5127 |.define FOR_STOP, [RA+8]; .define FOR_TSTOP, dword [RA+12] | |
| 5128 |.define FOR_STEP, [RA+16]; .define FOR_TSTEP, dword [RA+20] | |
| 5129 |.define FOR_EXT, [RA+24]; .define FOR_TEXT, dword [RA+28] | |
| 5130 | |
| 5131 case BC_FORL: | |
| 5132 |.if JIT | |
| 5133 | hotloop RB | |
| 5134 |.endif | |
| 5135 | // Fall through. Assumes BC_IFORL follows and ins_AJ is a no-op. | |
| 5136 break; | |
| 5137 | |
| 5138 case BC_JFORI: | |
| 5139 case BC_JFORL: | |
| 5140 #if !LJ_HASJIT | |
| 5141 break; | |
| 5142 #endif | |
| 5143 case BC_FORI: | |
| 5144 case BC_IFORL: | |
| 5145 vk = (op == BC_IFORL || op == BC_JFORL); | |
| 5146 | ins_AJ // RA = base, RD = target (after end of loop or start of loop) | |
| 5147 | lea RA, [BASE+RA*8] | |
| 5148 if (LJ_DUALNUM) { | |
| 5149 | cmp FOR_TIDX, LJ_TISNUM; jne >9 | |
| 5150 if (!vk) { | |
| 5151 | cmp FOR_TSTOP, LJ_TISNUM; jne ->vmeta_for | |
| 5152 | cmp FOR_TSTEP, LJ_TISNUM; jne ->vmeta_for | |
| 5153 | mov RB, dword FOR_IDX | |
| 5154 | cmp dword FOR_STEP, 0; jl >5 | |
| 5155 } else { | |
| 5156 #ifdef LUA_USE_ASSERT | |
| 5157 | cmp FOR_TSTOP, LJ_TISNUM; jne ->assert_bad_for_arg_type | |
| 5158 | cmp FOR_TSTEP, LJ_TISNUM; jne ->assert_bad_for_arg_type | |
| 5159 #endif | |
| 5160 | mov RB, dword FOR_STEP | |
| 5161 | test RB, RB; js >5 | |
| 5162 | add RB, dword FOR_IDX; jo >1 | |
| 5163 | mov dword FOR_IDX, RB | |
| 5164 } | |
| 5165 | cmp RB, dword FOR_STOP | |
| 5166 | mov FOR_TEXT, LJ_TISNUM | |
| 5167 | mov dword FOR_EXT, RB | |
| 5168 if (op == BC_FORI) { | |
| 5169 | jle >7 | |
| 5170 |1: | |
| 5171 |6: | |
| 5172 | branchPC RD | |
| 5173 } else if (op == BC_JFORI) { | |
| 5174 | branchPC RD | |
| 5175 | movzx RD, PC_RD | |
| 5176 | jle =>BC_JLOOP | |
| 5177 |1: | |
| 5178 |6: | |
| 5179 } else if (op == BC_IFORL) { | |
| 5180 | jg >7 | |
| 5181 |6: | |
| 5182 | branchPC RD | |
| 5183 |1: | |
| 5184 } else { | |
| 5185 | jle =>BC_JLOOP | |
| 5186 |1: | |
| 5187 |6: | |
| 5188 } | |
| 5189 |7: | |
| 5190 | ins_next | |
| 5191 | | |
| 5192 |5: // Invert check for negative step. | |
| 5193 if (vk) { | |
| 5194 | add RB, dword FOR_IDX; jo <1 | |
| 5195 | mov dword FOR_IDX, RB | |
| 5196 } | |
| 5197 | cmp RB, dword FOR_STOP | |
| 5198 | mov FOR_TEXT, LJ_TISNUM | |
| 5199 | mov dword FOR_EXT, RB | |
| 5200 if (op == BC_FORI) { | |
| 5201 | jge <7 | |
| 5202 } else if (op == BC_JFORI) { | |
| 5203 | branchPC RD | |
| 5204 | movzx RD, PC_RD | |
| 5205 | jge =>BC_JLOOP | |
| 5206 } else if (op == BC_IFORL) { | |
| 5207 | jl <7 | |
| 5208 } else { | |
| 5209 | jge =>BC_JLOOP | |
| 5210 } | |
| 5211 | jmp <6 | |
| 5212 |9: // Fallback to FP variant. | |
| 5213 } else if (!vk) { | |
| 5214 | cmp FOR_TIDX, LJ_TISNUM | |
| 5215 } | |
| 5216 if (!vk) { | |
| 5217 | jae ->vmeta_for | |
| 5218 | cmp FOR_TSTOP, LJ_TISNUM; jae ->vmeta_for | |
| 5219 } else { | |
| 5220 #ifdef LUA_USE_ASSERT | |
| 5221 | cmp FOR_TSTOP, LJ_TISNUM; jae ->assert_bad_for_arg_type | |
| 5222 | cmp FOR_TSTEP, LJ_TISNUM; jae ->assert_bad_for_arg_type | |
| 5223 #endif | |
| 5224 } | |
| 5225 | mov RB, FOR_TSTEP // Load type/hiword of for step. | |
| 5226 if (!vk) { | |
| 5227 | cmp RB, LJ_TISNUM; jae ->vmeta_for | |
| 5228 } | |
| 5229 | movsd xmm0, qword FOR_IDX | |
| 5230 | movsd xmm1, qword FOR_STOP | |
| 5231 if (vk) { | |
| 5232 | addsd xmm0, qword FOR_STEP | |
| 5233 | movsd qword FOR_IDX, xmm0 | |
| 5234 | test RB, RB; js >3 | |
| 5235 } else { | |
| 5236 | jl >3 | |
| 5237 } | |
| 5238 | ucomisd xmm1, xmm0 | |
| 5239 |1: | |
| 5240 | movsd qword FOR_EXT, xmm0 | |
| 5241 if (op == BC_FORI) { | |
| 5242 |.if DUALNUM | |
| 5243 | jnb <7 | |
| 5244 |.else | |
| 5245 | jnb >2 | |
| 5246 | branchPC RD | |
| 5247 |.endif | |
| 5248 } else if (op == BC_JFORI) { | |
| 5249 | branchPC RD | |
| 5250 | movzx RD, PC_RD | |
| 5251 | jnb =>BC_JLOOP | |
| 5252 } else if (op == BC_IFORL) { | |
| 5253 |.if DUALNUM | |
| 5254 | jb <7 | |
| 5255 |.else | |
| 5256 | jb >2 | |
| 5257 | branchPC RD | |
| 5258 |.endif | |
| 5259 } else { | |
| 5260 | jnb =>BC_JLOOP | |
| 5261 } | |
| 5262 |.if DUALNUM | |
| 5263 | jmp <6 | |
| 5264 |.else | |
| 5265 |2: | |
| 5266 | ins_next | |
| 5267 |.endif | |
| 5268 | | |
| 5269 |3: // Invert comparison if step is negative. | |
| 5270 | ucomisd xmm0, xmm1 | |
| 5271 | jmp <1 | |
| 5272 break; | |
| 5273 | |
| 5274 case BC_ITERL: | |
| 5275 |.if JIT | |
| 5276 | hotloop RB | |
| 5277 |.endif | |
| 5278 | // Fall through. Assumes BC_IITERL follows and ins_AJ is a no-op. | |
| 5279 break; | |
| 5280 | |
| 5281 case BC_JITERL: | |
| 5282 #if !LJ_HASJIT | |
| 5283 break; | |
| 5284 #endif | |
| 5285 case BC_IITERL: | |
| 5286 | ins_AJ // RA = base, RD = target | |
| 5287 | lea RA, [BASE+RA*8] | |
| 5288 | mov RB, [RA+4] | |
| 5289 | cmp RB, LJ_TNIL; je >1 // Stop if iterator returned nil. | |
| 5290 if (op == BC_JITERL) { | |
| 5291 | mov [RA-4], RB | |
| 5292 | mov RB, [RA] | |
| 5293 | mov [RA-8], RB | |
| 5294 | jmp =>BC_JLOOP | |
| 5295 } else { | |
| 5296 | branchPC RD // Otherwise save control var + branch. | |
| 5297 | mov RD, [RA] | |
| 5298 | mov [RA-4], RB | |
| 5299 | mov [RA-8], RD | |
| 5300 } | |
| 5301 |1: | |
| 5302 | ins_next | |
| 5303 break; | |
| 5304 | |
| 5305 case BC_LOOP: | |
| 5306 | ins_A // RA = base, RD = target (loop extent) | |
| 5307 | // Note: RA/RD is only used by trace recorder to determine scope/extent | |
| 5308 | // This opcode does NOT jump, it's only purpose is to detect a hot loop. | |
| 5309 |.if JIT | |
| 5310 | hotloop RB | |
| 5311 |.endif | |
| 5312 | // Fall through. Assumes BC_ILOOP follows and ins_A is a no-op. | |
| 5313 break; | |
| 5314 | |
| 5315 case BC_ILOOP: | |
| 5316 | ins_A // RA = base, RD = target (loop extent) | |
| 5317 | ins_next | |
| 5318 break; | |
| 5319 | |
| 5320 case BC_JLOOP: | |
| 5321 |.if JIT | |
| 5322 | ins_AD // RA = base (ignored), RD = traceno | |
| 5323 | mov RA, [DISPATCH+DISPATCH_J(trace)] | |
| 5324 | mov TRACE:RD, [RA+RD*4] | |
| 5325 | mov RDa, TRACE:RD->mcode | |
| 5326 | mov L:RB, SAVE_L | |
| 5327 | mov [DISPATCH+DISPATCH_GL(jit_base)], BASE | |
| 5328 | mov [DISPATCH+DISPATCH_GL(tmpbuf.L)], L:RB | |
| 5329 | // Save additional callee-save registers only used in compiled code. | |
| 5330 |.if X64WIN | |
| 5331 | mov TMPQ, r12 | |
| 5332 | mov TMPa, r13 | |
| 5333 | mov CSAVE_4, r14 | |
| 5334 | mov CSAVE_3, r15 | |
| 5335 | mov RAa, rsp | |
| 5336 | sub rsp, 9*16+4*8 | |
| 5337 | movdqa [RAa], xmm6 | |
| 5338 | movdqa [RAa-1*16], xmm7 | |
| 5339 | movdqa [RAa-2*16], xmm8 | |
| 5340 | movdqa [RAa-3*16], xmm9 | |
| 5341 | movdqa [RAa-4*16], xmm10 | |
| 5342 | movdqa [RAa-5*16], xmm11 | |
| 5343 | movdqa [RAa-6*16], xmm12 | |
| 5344 | movdqa [RAa-7*16], xmm13 | |
| 5345 | movdqa [RAa-8*16], xmm14 | |
| 5346 | movdqa [RAa-9*16], xmm15 | |
| 5347 |.elif X64 | |
| 5348 | mov TMPQ, r12 | |
| 5349 | mov TMPa, r13 | |
| 5350 | sub rsp, 16 | |
| 5351 |.endif | |
| 5352 | jmp RDa | |
| 5353 |.endif | |
| 5354 break; | |
| 5355 | |
| 5356 case BC_JMP: | |
| 5357 | ins_AJ // RA = unused, RD = target | |
| 5358 | branchPC RD | |
| 5359 | ins_next | |
| 5360 break; | |
| 5361 | |
| 5362 /* -- Function headers -------------------------------------------------- */ | |
| 5363 | |
| 5364 /* | |
| 5365 ** Reminder: A function may be called with func/args above L->maxstack, | |
| 5366 ** i.e. occupying EXTRA_STACK slots. And vmeta_call may add one extra slot, | |
| 5367 ** too. This means all FUNC* ops (including fast functions) must check | |
| 5368 ** for stack overflow _before_ adding more slots! | |
| 5369 */ | |
| 5370 | |
| 5371 case BC_FUNCF: | |
| 5372 |.if JIT | |
| 5373 | hotcall RB | |
| 5374 |.endif | |
| 5375 case BC_FUNCV: /* NYI: compiled vararg functions. */ | |
| 5376 | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow and ins_AD is a no-op. | |
| 5377 break; | |
| 5378 | |
| 5379 case BC_JFUNCF: | |
| 5380 #if !LJ_HASJIT | |
| 5381 break; | |
| 5382 #endif | |
| 5383 case BC_IFUNCF: | |
| 5384 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 | |
| 5385 | mov KBASE, [PC-4+PC2PROTO(k)] | |
| 5386 | mov L:RB, SAVE_L | |
| 5387 | lea RA, [BASE+RA*8] // Top of frame. | |
| 5388 | cmp RA, L:RB->maxstack | |
| 5389 | ja ->vm_growstack_f | |
| 5390 | movzx RA, byte [PC-4+PC2PROTO(numparams)] | |
| 5391 | cmp NARGS:RD, RA // Check for missing parameters. | |
| 5392 | jbe >3 | |
| 5393 |2: | |
| 5394 if (op == BC_JFUNCF) { | |
| 5395 | movzx RD, PC_RD | |
| 5396 | jmp =>BC_JLOOP | |
| 5397 } else { | |
| 5398 | ins_next | |
| 5399 } | |
| 5400 | | |
| 5401 |3: // Clear missing parameters. | |
| 5402 | mov dword [BASE+NARGS:RD*8-4], LJ_TNIL | |
| 5403 | add NARGS:RD, 1 | |
| 5404 | cmp NARGS:RD, RA | |
| 5405 | jbe <3 | |
| 5406 | jmp <2 | |
| 5407 break; | |
| 5408 | |
| 5409 case BC_JFUNCV: | |
| 5410 #if !LJ_HASJIT | |
| 5411 break; | |
| 5412 #endif | |
| 5413 | int3 // NYI: compiled vararg functions | |
| 5414 break; /* NYI: compiled vararg functions. */ | |
| 5415 | |
| 5416 case BC_IFUNCV: | |
| 5417 | ins_AD // BASE = new base, RA = framesize, RD = nargs+1 | |
| 5418 | lea RB, [NARGS:RD*8+FRAME_VARG] | |
| 5419 | lea RD, [BASE+NARGS:RD*8] | |
| 5420 | mov LFUNC:KBASE, [BASE-8] | |
| 5421 | mov [RD-4], RB // Store delta + FRAME_VARG. | |
| 5422 | mov [RD-8], LFUNC:KBASE // Store copy of LFUNC. | |
| 5423 | mov L:RB, SAVE_L | |
| 5424 | lea RA, [RD+RA*8] | |
| 5425 | cmp RA, L:RB->maxstack | |
| 5426 | ja ->vm_growstack_v // Need to grow stack. | |
| 5427 | mov RA, BASE | |
| 5428 | mov BASE, RD | |
| 5429 | movzx RB, byte [PC-4+PC2PROTO(numparams)] | |
| 5430 | test RB, RB | |
| 5431 | jz >2 | |
| 5432 |1: // Copy fixarg slots up to new frame. | |
| 5433 | add RA, 8 | |
| 5434 | cmp RA, BASE | |
| 5435 | jnb >3 // Less args than parameters? | |
| 5436 | mov KBASE, [RA-8] | |
| 5437 | mov [RD], KBASE | |
| 5438 | mov KBASE, [RA-4] | |
| 5439 | mov [RD+4], KBASE | |
| 5440 | add RD, 8 | |
| 5441 | mov dword [RA-4], LJ_TNIL // Clear old fixarg slot (help the GC). | |
| 5442 | sub RB, 1 | |
| 5443 | jnz <1 | |
| 5444 |2: | |
| 5445 if (op == BC_JFUNCV) { | |
| 5446 | movzx RD, PC_RD | |
| 5447 | jmp =>BC_JLOOP | |
| 5448 } else { | |
| 5449 | mov KBASE, [PC-4+PC2PROTO(k)] | |
| 5450 | ins_next | |
| 5451 } | |
| 5452 | | |
| 5453 |3: // Clear missing parameters. | |
| 5454 | mov dword [RD+4], LJ_TNIL | |
| 5455 | add RD, 8 | |
| 5456 | sub RB, 1 | |
| 5457 | jnz <3 | |
| 5458 | jmp <2 | |
| 5459 break; | |
| 5460 | |
| 5461 case BC_FUNCC: | |
| 5462 case BC_FUNCCW: | |
| 5463 | ins_AD // BASE = new base, RA = ins RA|RD (unused), RD = nargs+1 | |
| 5464 | mov CFUNC:RB, [BASE-8] | |
| 5465 | mov KBASEa, CFUNC:RB->f | |
| 5466 | mov L:RB, SAVE_L | |
| 5467 | lea RD, [BASE+NARGS:RD*8-8] | |
| 5468 | mov L:RB->base, BASE | |
| 5469 | lea RA, [RD+8*LUA_MINSTACK] | |
| 5470 | cmp RA, L:RB->maxstack | |
| 5471 | mov L:RB->top, RD | |
| 5472 if (op == BC_FUNCC) { | |
| 5473 |.if X64 | |
| 5474 | mov CARG1d, L:RB // Caveat: CARG1d may be RA. | |
| 5475 |.else | |
| 5476 | mov ARG1, L:RB | |
| 5477 |.endif | |
| 5478 } else { | |
| 5479 |.if X64 | |
| 5480 | mov CARG2, KBASEa | |
| 5481 | mov CARG1d, L:RB // Caveat: CARG1d may be RA. | |
| 5482 |.else | |
| 5483 | mov ARG2, KBASEa | |
| 5484 | mov ARG1, L:RB | |
| 5485 |.endif | |
| 5486 } | |
| 5487 | ja ->vm_growstack_c // Need to grow stack. | |
| 5488 | set_vmstate C | |
| 5489 if (op == BC_FUNCC) { | |
| 5490 | call KBASEa // (lua_State *L) | |
| 5491 } else { | |
| 5492 | // (lua_State *L, lua_CFunction f) | |
| 5493 | call aword [DISPATCH+DISPATCH_GL(wrapf)] | |
| 5494 } | |
| 5495 | // nresults returned in eax (RD). | |
| 5496 | mov BASE, L:RB->base | |
| 5497 | mov [DISPATCH+DISPATCH_GL(cur_L)], L:RB | |
| 5498 | set_vmstate INTERP | |
| 5499 | lea RA, [BASE+RD*8] | |
| 5500 | neg RA | |
| 5501 | add RA, L:RB->top // RA = (L->top-(L->base+nresults))*8 | |
| 5502 | mov PC, [BASE-4] // Fetch PC of caller. | |
| 5503 | jmp ->vm_returnc | |
| 5504 break; | |
| 5505 | |
| 5506 /* ---------------------------------------------------------------------- */ | |
| 5507 | |
| 5508 default: | |
| 5509 fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); | |
| 5510 exit(2); | |
| 5511 break; | |
| 5512 } | |
| 5513 } | |
| 5514 | |
| 5515 static int build_backend(BuildCtx *ctx) | |
| 5516 { | |
| 5517 int op; | |
| 5518 dasm_growpc(Dst, BC__MAX); | |
| 5519 build_subroutines(ctx); | |
| 5520 |.code_op | |
| 5521 for (op = 0; op < BC__MAX; op++) | |
| 5522 build_ins(ctx, (BCOp)op, op); | |
| 5523 return BC__MAX; | |
| 5524 } | |
| 5525 | |
| 5526 /* Emit pseudo frame-info for all assembler functions. */ | |
| 5527 static void emit_asm_debug(BuildCtx *ctx) | |
| 5528 { | |
| 5529 int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); | |
| 5530 #if LJ_64 | |
| 5531 #define SZPTR "8" | |
| 5532 #define BSZPTR "3" | |
| 5533 #define REG_SP "0x7" | |
| 5534 #define REG_RA "0x10" | |
| 5535 #else | |
| 5536 #define SZPTR "4" | |
| 5537 #define BSZPTR "2" | |
| 5538 #define REG_SP "0x4" | |
| 5539 #define REG_RA "0x8" | |
| 5540 #endif | |
| 5541 switch (ctx->mode) { | |
| 5542 case BUILD_elfasm: | |
| 5543 fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); | |
| 5544 fprintf(ctx->fp, | |
| 5545 ".Lframe0:\n" | |
| 5546 "\t.long .LECIE0-.LSCIE0\n" | |
| 5547 ".LSCIE0:\n" | |
| 5548 "\t.long 0xffffffff\n" | |
| 5549 "\t.byte 0x1\n" | |
| 5550 "\t.string \"\"\n" | |
| 5551 "\t.uleb128 0x1\n" | |
| 5552 "\t.sleb128 -" SZPTR "\n" | |
| 5553 "\t.byte " REG_RA "\n" | |
| 5554 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" | |
| 5555 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" | |
| 5556 "\t.align " SZPTR "\n" | |
| 5557 ".LECIE0:\n\n"); | |
| 5558 fprintf(ctx->fp, | |
| 5559 ".LSFDE0:\n" | |
| 5560 "\t.long .LEFDE0-.LASFDE0\n" | |
| 5561 ".LASFDE0:\n" | |
| 5562 "\t.long .Lframe0\n" | |
| 5563 #if LJ_64 | |
| 5564 "\t.quad .Lbegin\n" | |
| 5565 "\t.quad %d\n" | |
| 5566 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | |
| 5567 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | |
| 5568 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | |
| 5569 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ | |
| 5570 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ | |
| 5571 #if LJ_NO_UNWIND | |
| 5572 "\t.byte 0x8d\n\t.uleb128 0x6\n" /* offset r13 */ | |
| 5573 "\t.byte 0x8c\n\t.uleb128 0x7\n" /* offset r12 */ | |
| 5574 #endif | |
| 5575 #else | |
| 5576 "\t.long .Lbegin\n" | |
| 5577 "\t.long %d\n" | |
| 5578 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | |
| 5579 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ | |
| 5580 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ | |
| 5581 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ | |
| 5582 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ | |
| 5583 #endif | |
| 5584 "\t.align " SZPTR "\n" | |
| 5585 ".LEFDE0:\n\n", fcofs, CFRAME_SIZE); | |
| 5586 #if LJ_HASFFI | |
| 5587 fprintf(ctx->fp, | |
| 5588 ".LSFDE1:\n" | |
| 5589 "\t.long .LEFDE1-.LASFDE1\n" | |
| 5590 ".LASFDE1:\n" | |
| 5591 "\t.long .Lframe0\n" | |
| 5592 #if LJ_64 | |
| 5593 "\t.quad lj_vm_ffi_call\n" | |
| 5594 "\t.quad %d\n" | |
| 5595 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ | |
| 5596 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | |
| 5597 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ | |
| 5598 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | |
| 5599 #else | |
| 5600 "\t.long lj_vm_ffi_call\n" | |
| 5601 "\t.long %d\n" | |
| 5602 "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ | |
| 5603 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ | |
| 5604 "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ | |
| 5605 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ | |
| 5606 #endif | |
| 5607 "\t.align " SZPTR "\n" | |
| 5608 ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); | |
| 5609 #endif | |
| 5610 #if !LJ_NO_UNWIND | |
| 5611 #if LJ_TARGET_SOLARIS | |
| 5612 #if LJ_64 | |
| 5613 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@unwind\n"); | |
| 5614 #else | |
| 5615 fprintf(ctx->fp, "\t.section .eh_frame,\"aw\",@progbits\n"); | |
| 5616 #endif | |
| 5617 #else | |
| 5618 fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); | |
| 5619 #endif | |
| 5620 fprintf(ctx->fp, | |
| 5621 ".Lframe1:\n" | |
| 5622 "\t.long .LECIE1-.LSCIE1\n" | |
| 5623 ".LSCIE1:\n" | |
| 5624 "\t.long 0\n" | |
| 5625 "\t.byte 0x1\n" | |
| 5626 "\t.string \"zPR\"\n" | |
| 5627 "\t.uleb128 0x1\n" | |
| 5628 "\t.sleb128 -" SZPTR "\n" | |
| 5629 "\t.byte " REG_RA "\n" | |
| 5630 "\t.uleb128 6\n" /* augmentation length */ | |
| 5631 "\t.byte 0x1b\n" /* pcrel|sdata4 */ | |
| 5632 "\t.long lj_err_unwind_dwarf-.\n" | |
| 5633 "\t.byte 0x1b\n" /* pcrel|sdata4 */ | |
| 5634 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" | |
| 5635 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" | |
| 5636 "\t.align " SZPTR "\n" | |
| 5637 ".LECIE1:\n\n"); | |
| 5638 fprintf(ctx->fp, | |
| 5639 ".LSFDE2:\n" | |
| 5640 "\t.long .LEFDE2-.LASFDE2\n" | |
| 5641 ".LASFDE2:\n" | |
| 5642 "\t.long .LASFDE2-.Lframe1\n" | |
| 5643 "\t.long .Lbegin-.\n" | |
| 5644 "\t.long %d\n" | |
| 5645 "\t.uleb128 0\n" /* augmentation length */ | |
| 5646 "\t.byte 0xe\n\t.uleb128 %d\n" /* def_cfa_offset */ | |
| 5647 #if LJ_64 | |
| 5648 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | |
| 5649 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | |
| 5650 "\t.byte 0x8f\n\t.uleb128 0x4\n" /* offset r15 */ | |
| 5651 "\t.byte 0x8e\n\t.uleb128 0x5\n" /* offset r14 */ | |
| 5652 #else | |
| 5653 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ | |
| 5654 "\t.byte 0x87\n\t.uleb128 0x3\n" /* offset edi */ | |
| 5655 "\t.byte 0x86\n\t.uleb128 0x4\n" /* offset esi */ | |
| 5656 "\t.byte 0x83\n\t.uleb128 0x5\n" /* offset ebx */ | |
| 5657 #endif | |
| 5658 "\t.align " SZPTR "\n" | |
| 5659 ".LEFDE2:\n\n", fcofs, CFRAME_SIZE); | |
| 5660 #if LJ_HASFFI | |
| 5661 fprintf(ctx->fp, | |
| 5662 ".Lframe2:\n" | |
| 5663 "\t.long .LECIE2-.LSCIE2\n" | |
| 5664 ".LSCIE2:\n" | |
| 5665 "\t.long 0\n" | |
| 5666 "\t.byte 0x1\n" | |
| 5667 "\t.string \"zR\"\n" | |
| 5668 "\t.uleb128 0x1\n" | |
| 5669 "\t.sleb128 -" SZPTR "\n" | |
| 5670 "\t.byte " REG_RA "\n" | |
| 5671 "\t.uleb128 1\n" /* augmentation length */ | |
| 5672 "\t.byte 0x1b\n" /* pcrel|sdata4 */ | |
| 5673 "\t.byte 0xc\n\t.uleb128 " REG_SP "\n\t.uleb128 " SZPTR "\n" | |
| 5674 "\t.byte 0x80+" REG_RA "\n\t.uleb128 0x1\n" | |
| 5675 "\t.align " SZPTR "\n" | |
| 5676 ".LECIE2:\n\n"); | |
| 5677 fprintf(ctx->fp, | |
| 5678 ".LSFDE3:\n" | |
| 5679 "\t.long .LEFDE3-.LASFDE3\n" | |
| 5680 ".LASFDE3:\n" | |
| 5681 "\t.long .LASFDE3-.Lframe2\n" | |
| 5682 "\t.long lj_vm_ffi_call-.\n" | |
| 5683 "\t.long %d\n" | |
| 5684 "\t.uleb128 0\n" /* augmentation length */ | |
| 5685 #if LJ_64 | |
| 5686 "\t.byte 0xe\n\t.uleb128 16\n" /* def_cfa_offset */ | |
| 5687 "\t.byte 0x86\n\t.uleb128 0x2\n" /* offset rbp */ | |
| 5688 "\t.byte 0xd\n\t.uleb128 0x6\n" /* def_cfa_register rbp */ | |
| 5689 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset rbx */ | |
| 5690 #else | |
| 5691 "\t.byte 0xe\n\t.uleb128 8\n" /* def_cfa_offset */ | |
| 5692 "\t.byte 0x85\n\t.uleb128 0x2\n" /* offset ebp */ | |
| 5693 "\t.byte 0xd\n\t.uleb128 0x5\n" /* def_cfa_register ebp */ | |
| 5694 "\t.byte 0x83\n\t.uleb128 0x3\n" /* offset ebx */ | |
| 5695 #endif | |
| 5696 "\t.align " SZPTR "\n" | |
| 5697 ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); | |
| 5698 #endif | |
| 5699 #endif | |
| 5700 break; | |
| 5701 #if !LJ_NO_UNWIND | |
| 5702 /* Mental note: never let Apple design an assembler. | |
| 5703 ** Or a linker. Or a plastic case. But I digress. | |
| 5704 */ | |
| 5705 case BUILD_machasm: { | |
| 5706 #if LJ_HASFFI | |
| 5707 int fcsize = 0; | |
| 5708 #endif | |
| 5709 int i; | |
| 5710 fprintf(ctx->fp, "\t.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support\n"); | |
| 5711 fprintf(ctx->fp, | |
| 5712 "EH_frame1:\n" | |
| 5713 "\t.set L$set$x,LECIEX-LSCIEX\n" | |
| 5714 "\t.long L$set$x\n" | |
| 5715 "LSCIEX:\n" | |
| 5716 "\t.long 0\n" | |
| 5717 "\t.byte 0x1\n" | |
| 5718 "\t.ascii \"zPR\\0\"\n" | |
| 5719 "\t.byte 0x1\n" | |
| 5720 "\t.byte 128-" SZPTR "\n" | |
| 5721 "\t.byte " REG_RA "\n" | |
| 5722 "\t.byte 6\n" /* augmentation length */ | |
| 5723 "\t.byte 0x9b\n" /* indirect|pcrel|sdata4 */ | |
| 5724 #if LJ_64 | |
| 5725 "\t.long _lj_err_unwind_dwarf+4@GOTPCREL\n" | |
| 5726 "\t.byte 0x1b\n" /* pcrel|sdata4 */ | |
| 5727 "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" | |
| 5728 #else | |
| 5729 "\t.long L_lj_err_unwind_dwarf$non_lazy_ptr-.\n" | |
| 5730 "\t.byte 0x1b\n" /* pcrel|sdata4 */ | |
| 5731 "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH-O. */ | |
| 5732 #endif | |
| 5733 "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" | |
| 5734 "\t.align " BSZPTR "\n" | |
| 5735 "LECIEX:\n\n"); | |
| 5736 for (i = 0; i < ctx->nsym; i++) { | |
| 5737 const char *name = ctx->sym[i].name; | |
| 5738 int32_t size = ctx->sym[i+1].ofs - ctx->sym[i].ofs; | |
| 5739 if (size == 0) continue; | |
| 5740 #if LJ_HASFFI | |
| 5741 if (!strcmp(name, "_lj_vm_ffi_call")) { fcsize = size; continue; } | |
| 5742 #endif | |
| 5743 fprintf(ctx->fp, | |
| 5744 "%s.eh:\n" | |
| 5745 "LSFDE%d:\n" | |
| 5746 "\t.set L$set$%d,LEFDE%d-LASFDE%d\n" | |
| 5747 "\t.long L$set$%d\n" | |
| 5748 "LASFDE%d:\n" | |
| 5749 "\t.long LASFDE%d-EH_frame1\n" | |
| 5750 "\t.long %s-.\n" | |
| 5751 "\t.long %d\n" | |
| 5752 "\t.byte 0\n" /* augmentation length */ | |
| 5753 "\t.byte 0xe\n\t.byte %d\n" /* def_cfa_offset */ | |
| 5754 #if LJ_64 | |
| 5755 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ | |
| 5756 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ | |
| 5757 "\t.byte 0x8f\n\t.byte 0x4\n" /* offset r15 */ | |
| 5758 "\t.byte 0x8e\n\t.byte 0x5\n" /* offset r14 */ | |
| 5759 #else | |
| 5760 "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ | |
| 5761 "\t.byte 0x87\n\t.byte 0x3\n" /* offset edi */ | |
| 5762 "\t.byte 0x86\n\t.byte 0x4\n" /* offset esi */ | |
| 5763 "\t.byte 0x83\n\t.byte 0x5\n" /* offset ebx */ | |
| 5764 #endif | |
| 5765 "\t.align " BSZPTR "\n" | |
| 5766 "LEFDE%d:\n\n", | |
| 5767 name, i, i, i, i, i, i, i, name, size, CFRAME_SIZE, i); | |
| 5768 } | |
| 5769 #if LJ_HASFFI | |
| 5770 if (fcsize) { | |
| 5771 fprintf(ctx->fp, | |
| 5772 "EH_frame2:\n" | |
| 5773 "\t.set L$set$y,LECIEY-LSCIEY\n" | |
| 5774 "\t.long L$set$y\n" | |
| 5775 "LSCIEY:\n" | |
| 5776 "\t.long 0\n" | |
| 5777 "\t.byte 0x1\n" | |
| 5778 "\t.ascii \"zR\\0\"\n" | |
| 5779 "\t.byte 0x1\n" | |
| 5780 "\t.byte 128-" SZPTR "\n" | |
| 5781 "\t.byte " REG_RA "\n" | |
| 5782 "\t.byte 1\n" /* augmentation length */ | |
| 5783 #if LJ_64 | |
| 5784 "\t.byte 0x1b\n" /* pcrel|sdata4 */ | |
| 5785 "\t.byte 0xc\n\t.byte " REG_SP "\n\t.byte " SZPTR "\n" | |
| 5786 #else | |
| 5787 "\t.byte 0x1b\n" /* pcrel|sdata4 */ | |
| 5788 "\t.byte 0xc\n\t.byte 0x5\n\t.byte 0x4\n" /* esp=5 on 32 bit MACH. */ | |
| 5789 #endif | |
| 5790 "\t.byte 0x80+" REG_RA "\n\t.byte 0x1\n" | |
| 5791 "\t.align " BSZPTR "\n" | |
| 5792 "LECIEY:\n\n"); | |
| 5793 fprintf(ctx->fp, | |
| 5794 "_lj_vm_ffi_call.eh:\n" | |
| 5795 "LSFDEY:\n" | |
| 5796 "\t.set L$set$yy,LEFDEY-LASFDEY\n" | |
| 5797 "\t.long L$set$yy\n" | |
| 5798 "LASFDEY:\n" | |
| 5799 "\t.long LASFDEY-EH_frame2\n" | |
| 5800 "\t.long _lj_vm_ffi_call-.\n" | |
| 5801 "\t.long %d\n" | |
| 5802 "\t.byte 0\n" /* augmentation length */ | |
| 5803 #if LJ_64 | |
| 5804 "\t.byte 0xe\n\t.byte 16\n" /* def_cfa_offset */ | |
| 5805 "\t.byte 0x86\n\t.byte 0x2\n" /* offset rbp */ | |
| 5806 "\t.byte 0xd\n\t.byte 0x6\n" /* def_cfa_register rbp */ | |
| 5807 "\t.byte 0x83\n\t.byte 0x3\n" /* offset rbx */ | |
| 5808 #else | |
| 5809 "\t.byte 0xe\n\t.byte 8\n" /* def_cfa_offset */ | |
| 5810 "\t.byte 0x84\n\t.byte 0x2\n" /* offset ebp (4 for MACH-O)*/ | |
| 5811 "\t.byte 0xd\n\t.byte 0x4\n" /* def_cfa_register ebp */ | |
| 5812 "\t.byte 0x83\n\t.byte 0x3\n" /* offset ebx */ | |
| 5813 #endif | |
| 5814 "\t.align " BSZPTR "\n" | |
| 5815 "LEFDEY:\n\n", fcsize); | |
| 5816 } | |
| 5817 #endif | |
| 5818 #if !LJ_64 | |
| 5819 fprintf(ctx->fp, | |
| 5820 "\t.non_lazy_symbol_pointer\n" | |
| 5821 "L_lj_err_unwind_dwarf$non_lazy_ptr:\n" | |
| 5822 ".indirect_symbol _lj_err_unwind_dwarf\n" | |
| 5823 ".long 0\n\n"); | |
| 5824 fprintf(ctx->fp, "\t.section __IMPORT,__jump_table,symbol_stubs,pure_instructions+self_modifying_code,5\n"); | |
| 5825 { | |
| 5826 const char *const *xn; | |
| 5827 for (xn = ctx->extnames; *xn; xn++) | |
| 5828 if (strncmp(*xn, LABEL_PREFIX, sizeof(LABEL_PREFIX)-1)) | |
| 5829 fprintf(ctx->fp, "L_%s$stub:\n\t.indirect_symbol _%s\n\t.ascii \"\\364\\364\\364\\364\\364\"\n", *xn, *xn); | |
| 5830 } | |
| 5831 #endif | |
| 5832 fprintf(ctx->fp, ".subsections_via_symbols\n"); | |
| 5833 } | |
| 5834 break; | |
| 5835 #endif | |
| 5836 default: /* Difficult for other modes. */ | |
| 5837 break; | |
| 5838 } | |
| 5839 } | |
| 5840 |