Mercurial
comparison third_party/luajit/src/lj_target_x86.h @ 186:8cf4ec5e2191 hg-web
Fixed merge conflict.
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Fri, 23 Jan 2026 22:38:59 -0800 |
| parents | 94705b5986b3 |
| children |
comparison
equal
deleted
inserted
replaced
| 176:fed99fc04e12 | 186:8cf4ec5e2191 |
|---|---|
| 1 /* | |
| 2 ** Definitions for x86 and x64 CPUs. | |
| 3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h | |
| 4 */ | |
| 5 | |
| 6 #ifndef _LJ_TARGET_X86_H | |
| 7 #define _LJ_TARGET_X86_H | |
| 8 | |
| 9 /* -- Registers IDs ------------------------------------------------------- */ | |
| 10 | |
| 11 #if LJ_64 | |
| 12 #define GPRDEF(_) \ | |
| 13 _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) \ | |
| 14 _(R8D) _(R9D) _(R10D) _(R11D) _(R12D) _(R13D) _(R14D) _(R15D) | |
| 15 #define FPRDEF(_) \ | |
| 16 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) \ | |
| 17 _(XMM8) _(XMM9) _(XMM10) _(XMM11) _(XMM12) _(XMM13) _(XMM14) _(XMM15) | |
| 18 #else | |
| 19 #define GPRDEF(_) \ | |
| 20 _(EAX) _(ECX) _(EDX) _(EBX) _(ESP) _(EBP) _(ESI) _(EDI) | |
| 21 #define FPRDEF(_) \ | |
| 22 _(XMM0) _(XMM1) _(XMM2) _(XMM3) _(XMM4) _(XMM5) _(XMM6) _(XMM7) | |
| 23 #endif | |
| 24 #define VRIDDEF(_) \ | |
| 25 _(MRM) _(RIP) | |
| 26 | |
| 27 #define RIDENUM(name) RID_##name, | |
| 28 | |
| 29 enum { | |
| 30 GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ | |
| 31 FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ | |
| 32 RID_MAX, | |
| 33 RID_MRM = RID_MAX, /* Pseudo-id for ModRM operand. */ | |
| 34 RID_RIP = RID_MAX+5, /* Pseudo-id for RIP (x64 only), rm bits = 5. */ | |
| 35 | |
| 36 /* Calling conventions. */ | |
| 37 RID_SP = RID_ESP, | |
| 38 RID_RET = RID_EAX, | |
| 39 #if LJ_64 | |
| 40 RID_FPRET = RID_XMM0, | |
| 41 #endif | |
| 42 RID_RETLO = RID_EAX, | |
| 43 RID_RETHI = RID_EDX, | |
| 44 | |
| 45 /* These definitions must match with the *.dasc file(s): */ | |
| 46 RID_BASE = RID_EDX, /* Interpreter BASE. */ | |
| 47 #if LJ_64 && !LJ_ABI_WIN | |
| 48 RID_LPC = RID_EBX, /* Interpreter PC. */ | |
| 49 RID_DISPATCH = RID_R14D, /* Interpreter DISPATCH table. */ | |
| 50 #else | |
| 51 RID_LPC = RID_ESI, /* Interpreter PC. */ | |
| 52 RID_DISPATCH = RID_EBX, /* Interpreter DISPATCH table. */ | |
| 53 #endif | |
| 54 | |
| 55 /* Register ranges [min, max) and number of registers. */ | |
| 56 RID_MIN_GPR = RID_EAX, | |
| 57 RID_MIN_FPR = RID_XMM0, | |
| 58 RID_MAX_GPR = RID_MIN_FPR, | |
| 59 RID_MAX_FPR = RID_MAX, | |
| 60 RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, | |
| 61 RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR, | |
| 62 }; | |
| 63 | |
| 64 /* -- Register sets ------------------------------------------------------- */ | |
| 65 | |
| 66 /* Make use of all registers, except the stack pointer (and maybe DISPATCH). */ | |
| 67 #define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) \ | |
| 68 - RID2RSET(RID_ESP) \ | |
| 69 - LJ_GC64*RID2RSET(RID_DISPATCH)) | |
| 70 #define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR)) | |
| 71 #define RSET_ALL (RSET_GPR|RSET_FPR) | |
| 72 #define RSET_INIT RSET_ALL | |
| 73 | |
| 74 #if LJ_64 | |
| 75 /* Note: this requires the use of FORCE_REX! */ | |
| 76 #define RSET_GPR8 RSET_GPR | |
| 77 #else | |
| 78 #define RSET_GPR8 (RSET_RANGE(RID_EAX, RID_EBX+1)) | |
| 79 #endif | |
| 80 | |
| 81 /* ABI-specific register sets. */ | |
| 82 #define RSET_ACD (RID2RSET(RID_EAX)|RID2RSET(RID_ECX)|RID2RSET(RID_EDX)) | |
| 83 #if LJ_64 | |
| 84 #if LJ_ABI_WIN | |
| 85 /* Windows x64 ABI. */ | |
| 86 #define RSET_SCRATCH \ | |
| 87 (RSET_ACD|RSET_RANGE(RID_R8D, RID_R11D+1)|RSET_RANGE(RID_XMM0, RID_XMM5+1)) | |
| 88 #define REGARG_GPRS \ | |
| 89 (RID_ECX|((RID_EDX|((RID_R8D|(RID_R9D<<5))<<5))<<5)) | |
| 90 #define REGARG_NUMGPR 4 | |
| 91 #define REGARG_NUMFPR 4 | |
| 92 #define REGARG_FIRSTFPR RID_XMM0 | |
| 93 #define REGARG_LASTFPR RID_XMM3 | |
| 94 #define STACKARG_OFS (4*8) | |
| 95 #else | |
| 96 /* The rest of the civilized x64 world has a common ABI. */ | |
| 97 #define RSET_SCRATCH \ | |
| 98 (RSET_ACD|RSET_RANGE(RID_ESI, RID_R11D+1)|RSET_FPR) | |
| 99 #define REGARG_GPRS \ | |
| 100 (RID_EDI|((RID_ESI|((RID_EDX|((RID_ECX|((RID_R8D|(RID_R9D \ | |
| 101 <<5))<<5))<<5))<<5))<<5)) | |
| 102 #define REGARG_NUMGPR 6 | |
| 103 #define REGARG_NUMFPR 8 | |
| 104 #define REGARG_FIRSTFPR RID_XMM0 | |
| 105 #define REGARG_LASTFPR RID_XMM7 | |
| 106 #define STACKARG_OFS 0 | |
| 107 #endif | |
| 108 #else | |
| 109 /* Common x86 ABI. */ | |
| 110 #define RSET_SCRATCH (RSET_ACD|RSET_FPR) | |
| 111 #define REGARG_GPRS (RID_ECX|(RID_EDX<<5)) /* Fastcall only. */ | |
| 112 #define REGARG_NUMGPR 2 /* Fastcall only. */ | |
| 113 #define REGARG_NUMFPR 0 | |
| 114 #define STACKARG_OFS 0 | |
| 115 #endif | |
| 116 | |
| 117 #if LJ_64 | |
| 118 /* Prefer the low 8 regs of each type to reduce REX prefixes. */ | |
| 119 #undef rset_picktop | |
| 120 #define rset_picktop(rs) (lj_fls(lj_bswap(rs)) ^ 0x18) | |
| 121 #endif | |
| 122 | |
| 123 /* -- Spill slots --------------------------------------------------------- */ | |
| 124 | |
| 125 /* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. | |
| 126 ** | |
| 127 ** SPS_FIXED: Available fixed spill slots in interpreter frame. | |
| 128 ** This definition must match with the *.dasc file(s). | |
| 129 ** | |
| 130 ** SPS_FIRST: First spill slot for general use. Reserve min. two 32 bit slots. | |
| 131 */ | |
| 132 #if LJ_64 | |
| 133 #if LJ_ABI_WIN | |
| 134 #define SPS_FIXED (4*2) | |
| 135 #define SPS_FIRST (4*2) /* Don't use callee register save area. */ | |
| 136 #else | |
| 137 #if LJ_GC64 | |
| 138 #define SPS_FIXED 2 | |
| 139 #else | |
| 140 #define SPS_FIXED 4 | |
| 141 #endif | |
| 142 #define SPS_FIRST 2 | |
| 143 #endif | |
| 144 #else | |
| 145 #define SPS_FIXED 6 | |
| 146 #define SPS_FIRST 2 | |
| 147 #endif | |
| 148 | |
| 149 #define SPOFS_TMP 0 | |
| 150 | |
| 151 #define sps_scale(slot) (4 * (int32_t)(slot)) | |
| 152 #define sps_align(slot) (((slot) - SPS_FIXED + 3) & ~3) | |
| 153 | |
| 154 /* -- Exit state ---------------------------------------------------------- */ | |
| 155 | |
| 156 /* This definition must match with the *.dasc file(s). */ | |
| 157 typedef struct { | |
| 158 lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ | |
| 159 intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ | |
| 160 int32_t spill[256]; /* Spill slots. */ | |
| 161 } ExitState; | |
| 162 | |
| 163 /* Limited by the range of a short fwd jump (127): (2+2)*(32-1)-2 = 122. */ | |
| 164 #define EXITSTUB_SPACING (2+2) | |
| 165 #define EXITSTUBS_PER_GROUP 32 | |
| 166 | |
| 167 #define EXITTRACE_VMSTATE 1 /* g->vmstate has traceno on exit. */ | |
| 168 | |
| 169 /* -- x86 ModRM operand encoding ------------------------------------------ */ | |
| 170 | |
| 171 typedef enum { | |
| 172 XM_OFS0 = 0x00, XM_OFS8 = 0x40, XM_OFS32 = 0x80, XM_REG = 0xc0, | |
| 173 XM_SCALE1 = 0x00, XM_SCALE2 = 0x40, XM_SCALE4 = 0x80, XM_SCALE8 = 0xc0, | |
| 174 XM_MASK = 0xc0 | |
| 175 } x86Mode; | |
| 176 | |
| 177 /* Structure to hold variable ModRM operand. */ | |
| 178 typedef struct { | |
| 179 int32_t ofs; /* Offset. */ | |
| 180 uint8_t base; /* Base register or RID_NONE. */ | |
| 181 uint8_t idx; /* Index register or RID_NONE. */ | |
| 182 uint8_t scale; /* Index scale (XM_SCALE1 .. XM_SCALE8). */ | |
| 183 } x86ModRM; | |
| 184 | |
| 185 /* -- Opcodes ------------------------------------------------------------- */ | |
| 186 | |
| 187 /* Macros to construct variable-length x86 opcodes. -(len+1) is in LSB. */ | |
| 188 #define XO_(o) ((uint32_t)(0x0000fe + (0x##o<<24))) | |
| 189 #define XO_FPU(a,b) ((uint32_t)(0x00fd + (0x##a<<16)+(0x##b<<24))) | |
| 190 #define XO_0f(o) ((uint32_t)(0x0f00fd + (0x##o<<24))) | |
| 191 #define XO_66(o) ((uint32_t)(0x6600fd + (0x##o<<24))) | |
| 192 #define XO_660f(o) ((uint32_t)(0x0f66fc + (0x##o<<24))) | |
| 193 #define XO_f20f(o) ((uint32_t)(0x0ff2fc + (0x##o<<24))) | |
| 194 #define XO_f30f(o) ((uint32_t)(0x0ff3fc + (0x##o<<24))) | |
| 195 | |
| 196 #define XV_660f38(o) ((uint32_t)(0x79e2c4 + (0x##o<<24))) | |
| 197 #define XV_f20f38(o) ((uint32_t)(0x7be2c4 + (0x##o<<24))) | |
| 198 #define XV_f20f3a(o) ((uint32_t)(0x7be3c4 + (0x##o<<24))) | |
| 199 #define XV_f30f38(o) ((uint32_t)(0x7ae2c4 + (0x##o<<24))) | |
| 200 | |
| 201 /* This list of x86 opcodes is not intended to be complete. Opcodes are only | |
| 202 ** included when needed. Take a look at DynASM or jit.dis_x86 to see the | |
| 203 ** whole mess. | |
| 204 */ | |
| 205 typedef enum { | |
| 206 /* Fixed length opcodes. XI_* prefix. */ | |
| 207 XI_O16 = 0x66, | |
| 208 XI_NOP = 0x90, | |
| 209 XI_XCHGa = 0x90, | |
| 210 XI_CALL = 0xe8, | |
| 211 XI_JMP = 0xe9, | |
| 212 XI_JMPs = 0xeb, | |
| 213 XI_PUSH = 0x50, /* Really 50+r. */ | |
| 214 XI_JCCs = 0x70, /* Really 7x. */ | |
| 215 XI_JCCn = 0x80, /* Really 0f8x. */ | |
| 216 XI_LEA = 0x8d, | |
| 217 XI_MOVrib = 0xb0, /* Really b0+r. */ | |
| 218 XI_MOVri = 0xb8, /* Really b8+r. */ | |
| 219 XI_ARITHib = 0x80, | |
| 220 XI_ARITHi = 0x81, | |
| 221 XI_ARITHi8 = 0x83, | |
| 222 XI_PUSHi8 = 0x6a, | |
| 223 XI_TESTb = 0x84, | |
| 224 XI_TEST = 0x85, | |
| 225 XI_INT3 = 0xcc, | |
| 226 XI_MOVmi = 0xc7, | |
| 227 XI_GROUP5 = 0xff, | |
| 228 | |
| 229 /* Note: little-endian byte-order! */ | |
| 230 XI_FLDZ = 0xeed9, | |
| 231 XI_FLD1 = 0xe8d9, | |
| 232 XI_FDUP = 0xc0d9, /* Really fld st0. */ | |
| 233 XI_FPOP = 0xd8dd, /* Really fstp st0. */ | |
| 234 XI_FPOP1 = 0xd9dd, /* Really fstp st1. */ | |
| 235 XI_FRNDINT = 0xfcd9, | |
| 236 XI_FSCALE = 0xfdd9, | |
| 237 XI_FYL2X = 0xf1d9, | |
| 238 | |
| 239 /* VEX-encoded instructions. XV_* prefix. */ | |
| 240 XV_RORX = XV_f20f3a(f0), | |
| 241 XV_SARX = XV_f30f38(f7), | |
| 242 XV_SHLX = XV_660f38(f7), | |
| 243 XV_SHRX = XV_f20f38(f7), | |
| 244 | |
| 245 /* Variable-length opcodes. XO_* prefix. */ | |
| 246 XO_OR = XO_(0b), | |
| 247 XO_MOV = XO_(8b), | |
| 248 XO_MOVto = XO_(89), | |
| 249 XO_MOVtow = XO_66(89), | |
| 250 XO_MOVtob = XO_(88), | |
| 251 XO_MOVmi = XO_(c7), | |
| 252 XO_MOVmib = XO_(c6), | |
| 253 XO_LEA = XO_(8d), | |
| 254 XO_ARITHib = XO_(80), | |
| 255 XO_ARITHi = XO_(81), | |
| 256 XO_ARITHi8 = XO_(83), | |
| 257 XO_ARITHiw8 = XO_66(83), | |
| 258 XO_SHIFTi = XO_(c1), | |
| 259 XO_SHIFT1 = XO_(d1), | |
| 260 XO_SHIFTcl = XO_(d3), | |
| 261 XO_IMUL = XO_0f(af), | |
| 262 XO_IMULi = XO_(69), | |
| 263 XO_IMULi8 = XO_(6b), | |
| 264 XO_CMP = XO_(3b), | |
| 265 XO_TESTb = XO_(84), | |
| 266 XO_TEST = XO_(85), | |
| 267 XO_GROUP3b = XO_(f6), | |
| 268 XO_GROUP3 = XO_(f7), | |
| 269 XO_GROUP5b = XO_(fe), | |
| 270 XO_GROUP5 = XO_(ff), | |
| 271 XO_MOVZXb = XO_0f(b6), | |
| 272 XO_MOVZXw = XO_0f(b7), | |
| 273 XO_MOVSXb = XO_0f(be), | |
| 274 XO_MOVSXw = XO_0f(bf), | |
| 275 XO_MOVSXd = XO_(63), | |
| 276 XO_BSWAP = XO_0f(c8), | |
| 277 XO_CMOV = XO_0f(40), | |
| 278 | |
| 279 XO_MOVSD = XO_f20f(10), | |
| 280 XO_MOVSDto = XO_f20f(11), | |
| 281 XO_MOVSS = XO_f30f(10), | |
| 282 XO_MOVSSto = XO_f30f(11), | |
| 283 XO_MOVLPD = XO_660f(12), | |
| 284 XO_MOVAPS = XO_0f(28), | |
| 285 XO_XORPS = XO_0f(57), | |
| 286 XO_ANDPS = XO_0f(54), | |
| 287 XO_ADDSD = XO_f20f(58), | |
| 288 XO_SUBSD = XO_f20f(5c), | |
| 289 XO_MULSD = XO_f20f(59), | |
| 290 XO_DIVSD = XO_f20f(5e), | |
| 291 XO_SQRTSD = XO_f20f(51), | |
| 292 XO_MINSD = XO_f20f(5d), | |
| 293 XO_MAXSD = XO_f20f(5f), | |
| 294 XO_ROUNDSD = 0x0b3a0ffc, /* Really 66 0f 3a 0b. See asm_fpmath. */ | |
| 295 XO_UCOMISD = XO_660f(2e), | |
| 296 XO_CVTSI2SD = XO_f20f(2a), | |
| 297 XO_CVTTSD2SI= XO_f20f(2c), | |
| 298 XO_CVTSI2SS = XO_f30f(2a), | |
| 299 XO_CVTTSS2SI= XO_f30f(2c), | |
| 300 XO_CVTSS2SD = XO_f30f(5a), | |
| 301 XO_CVTSD2SS = XO_f20f(5a), | |
| 302 XO_ADDSS = XO_f30f(58), | |
| 303 XO_MOVD = XO_660f(6e), | |
| 304 XO_MOVDto = XO_660f(7e), | |
| 305 | |
| 306 XO_FLDd = XO_(d9), XOg_FLDd = 0, | |
| 307 XO_FLDq = XO_(dd), XOg_FLDq = 0, | |
| 308 XO_FILDd = XO_(db), XOg_FILDd = 0, | |
| 309 XO_FILDq = XO_(df), XOg_FILDq = 5, | |
| 310 XO_FSTPd = XO_(d9), XOg_FSTPd = 3, | |
| 311 XO_FSTPq = XO_(dd), XOg_FSTPq = 3, | |
| 312 XO_FISTPq = XO_(df), XOg_FISTPq = 7, | |
| 313 XO_FISTTPq = XO_(dd), XOg_FISTTPq = 1, | |
| 314 XO_FADDq = XO_(dc), XOg_FADDq = 0, | |
| 315 XO_FLDCW = XO_(d9), XOg_FLDCW = 5, | |
| 316 XO_FNSTCW = XO_(d9), XOg_FNSTCW = 7 | |
| 317 } x86Op; | |
| 318 | |
| 319 /* x86 opcode groups. */ | |
| 320 typedef uint32_t x86Group; | |
| 321 | |
| 322 #define XG_(i8, i, g) ((x86Group)(((i8) << 16) + ((i) << 8) + (g))) | |
| 323 #define XG_ARITHi(g) XG_(XI_ARITHi8, XI_ARITHi, g) | |
| 324 #define XG_TOXOi(xg) ((x86Op)(0x000000fe + (((xg)<<16) & 0xff000000))) | |
| 325 #define XG_TOXOi8(xg) ((x86Op)(0x000000fe + (((xg)<<8) & 0xff000000))) | |
| 326 | |
| 327 #define XO_ARITH(a) ((x86Op)(0x030000fe + ((a)<<27))) | |
| 328 #define XO_ARITHw(a) ((x86Op)(0x036600fd + ((a)<<27))) | |
| 329 | |
| 330 typedef enum { | |
| 331 XOg_ADD, XOg_OR, XOg_ADC, XOg_SBB, XOg_AND, XOg_SUB, XOg_XOR, XOg_CMP, | |
| 332 XOg_X_IMUL | |
| 333 } x86Arith; | |
| 334 | |
| 335 typedef enum { | |
| 336 XOg_ROL, XOg_ROR, XOg_RCL, XOg_RCR, XOg_SHL, XOg_SHR, XOg_SAL, XOg_SAR | |
| 337 } x86Shift; | |
| 338 | |
| 339 typedef enum { | |
| 340 XOg_TEST, XOg_TEST_, XOg_NOT, XOg_NEG, XOg_MUL, XOg_IMUL, XOg_DIV, XOg_IDIV | |
| 341 } x86Group3; | |
| 342 | |
| 343 typedef enum { | |
| 344 XOg_INC, XOg_DEC, XOg_CALL, XOg_CALLfar, XOg_JMP, XOg_JMPfar, XOg_PUSH | |
| 345 } x86Group5; | |
| 346 | |
| 347 /* x86 condition codes. */ | |
| 348 typedef enum { | |
| 349 CC_O, CC_NO, CC_B, CC_NB, CC_E, CC_NE, CC_BE, CC_NBE, | |
| 350 CC_S, CC_NS, CC_P, CC_NP, CC_L, CC_NL, CC_LE, CC_NLE, | |
| 351 CC_C = CC_B, CC_NAE = CC_C, CC_NC = CC_NB, CC_AE = CC_NB, | |
| 352 CC_Z = CC_E, CC_NZ = CC_NE, CC_NA = CC_BE, CC_A = CC_NBE, | |
| 353 CC_PE = CC_P, CC_PO = CC_NP, CC_NGE = CC_L, CC_GE = CC_NL, | |
| 354 CC_NG = CC_LE, CC_G = CC_NLE | |
| 355 } x86CC; | |
| 356 | |
| 357 #endif |