Mercurial
comparison third_party/luajit/src/lj_target.h @ 178:94705b5986b3
[ThirdParty] Added WRK and luajit for load testing.
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Thu, 22 Jan 2026 20:10:30 -0800 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 177:24fe8ff94056 | 178:94705b5986b3 |
|---|---|
| 1 /* | |
| 2 ** Definitions for target CPU. | |
| 3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h | |
| 4 */ | |
| 5 | |
| 6 #ifndef _LJ_TARGET_H | |
| 7 #define _LJ_TARGET_H | |
| 8 | |
| 9 #include "lj_def.h" | |
| 10 #include "lj_arch.h" | |
| 11 | |
| 12 /* -- Registers and spill slots ------------------------------------------- */ | |
| 13 | |
| 14 /* Register type (uint8_t in ir->r). */ | |
| 15 typedef uint32_t Reg; | |
| 16 | |
| 17 /* The hi-bit is NOT set for an allocated register. This means the value | |
| 18 ** can be directly used without masking. The hi-bit is set for a register | |
| 19 ** allocation hint or for RID_INIT, RID_SINK or RID_SUNK. | |
| 20 */ | |
| 21 #define RID_NONE 0x80 | |
| 22 #define RID_MASK 0x7f | |
| 23 #define RID_INIT (RID_NONE|RID_MASK) | |
| 24 #define RID_SINK (RID_INIT-1) | |
| 25 #define RID_SUNK (RID_INIT-2) | |
| 26 | |
| 27 #define ra_noreg(r) ((r) & RID_NONE) | |
| 28 #define ra_hasreg(r) (!((r) & RID_NONE)) | |
| 29 | |
| 30 /* The ra_hashint() macro assumes a previous test for ra_noreg(). */ | |
| 31 #define ra_hashint(r) ((r) < RID_SUNK) | |
| 32 #define ra_gethint(r) ((Reg)((r) & RID_MASK)) | |
| 33 #define ra_sethint(rr, r) rr = (uint8_t)((r)|RID_NONE) | |
| 34 #define ra_samehint(r1, r2) (ra_gethint((r1)^(r2)) == 0) | |
| 35 | |
| 36 /* Spill slot 0 means no spill slot has been allocated. */ | |
| 37 #define SPS_NONE 0 | |
| 38 | |
| 39 #define ra_hasspill(s) ((s) != SPS_NONE) | |
| 40 | |
| 41 /* Combined register and spill slot (uint16_t in ir->prev). */ | |
| 42 typedef uint32_t RegSP; | |
| 43 | |
| 44 #define REGSP(r, s) ((r) + ((s) << 8)) | |
| 45 #define REGSP_HINT(r) ((r)|RID_NONE) | |
| 46 #define REGSP_INIT REGSP(RID_INIT, 0) | |
| 47 | |
| 48 #define regsp_reg(rs) ((rs) & 255) | |
| 49 #define regsp_spill(rs) ((rs) >> 8) | |
| 50 #define regsp_used(rs) \ | |
| 51 (((rs) & ~REGSP(RID_MASK, 0)) != REGSP(RID_NONE, 0)) | |
| 52 | |
| 53 /* -- Register sets ------------------------------------------------------- */ | |
| 54 | |
| 55 /* Bitset for registers. 32 registers suffice for most architectures. | |
| 56 ** Note that one set holds bits for both GPRs and FPRs. | |
| 57 */ | |
| 58 #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 | |
| 59 typedef uint64_t RegSet; | |
| 60 #else | |
| 61 typedef uint32_t RegSet; | |
| 62 #endif | |
| 63 | |
| 64 #define RID2RSET(r) (((RegSet)1) << (r)) | |
| 65 #define RSET_EMPTY ((RegSet)0) | |
| 66 #define RSET_RANGE(lo, hi) ((RID2RSET((hi)-(lo))-1) << (lo)) | |
| 67 | |
| 68 #define rset_test(rs, r) ((int)((rs) >> (r)) & 1) | |
| 69 #define rset_set(rs, r) (rs |= RID2RSET(r)) | |
| 70 #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) | |
| 71 #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) | |
| 72 #if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 | |
| 73 #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) | |
| 74 #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) | |
| 75 #else | |
| 76 #define rset_picktop(rs) ((Reg)lj_fls(rs)) | |
| 77 #define rset_pickbot(rs) ((Reg)lj_ffs(rs)) | |
| 78 #endif | |
| 79 | |
| 80 /* -- Register allocation cost -------------------------------------------- */ | |
| 81 | |
| 82 /* The register allocation heuristic keeps track of the cost for allocating | |
| 83 ** a specific register: | |
| 84 ** | |
| 85 ** A free register (obviously) has a cost of 0 and a 1-bit in the free mask. | |
| 86 ** | |
| 87 ** An already allocated register has the (non-zero) IR reference in the lowest | |
| 88 ** bits and the result of a blended cost-model in the higher bits. | |
| 89 ** | |
| 90 ** The allocator first checks the free mask for a hit. Otherwise an (unrolled) | |
| 91 ** linear search for the minimum cost is used. The search doesn't need to | |
| 92 ** keep track of the position of the minimum, which makes it very fast. | |
| 93 ** The lowest bits of the minimum cost show the desired IR reference whose | |
| 94 ** register is the one to evict. | |
| 95 ** | |
| 96 ** Without the cost-model this degenerates to the standard heuristics for | |
| 97 ** (reverse) linear-scan register allocation. Since code generation is done | |
| 98 ** in reverse, a live interval extends from the last use to the first def. | |
| 99 ** For an SSA IR the IR reference is the first (and only) def and thus | |
| 100 ** trivially marks the end of the interval. The LSRA heuristics says to pick | |
| 101 ** the register whose live interval has the furthest extent, i.e. the lowest | |
| 102 ** IR reference in our case. | |
| 103 ** | |
| 104 ** A cost-model should take into account other factors, like spill-cost and | |
| 105 ** restore- or rematerialization-cost, which depend on the kind of instruction. | |
| 106 ** E.g. constants have zero spill costs, variant instructions have higher | |
| 107 ** costs than invariants and PHIs should preferably never be spilled. | |
| 108 ** | |
| 109 ** Here's a first cut at simple, but effective blended cost-model for R-LSRA: | |
| 110 ** - Due to careful design of the IR, constants already have lower IR | |
| 111 ** references than invariants and invariants have lower IR references | |
| 112 ** than variants. | |
| 113 ** - The cost in the upper 16 bits is the sum of the IR reference and a | |
| 114 ** weighted score. The score currently only takes into account whether | |
| 115 ** the IRT_ISPHI bit is set in the instruction type. | |
| 116 ** - The PHI weight is the minimum distance (in IR instructions) a PHI | |
| 117 ** reference has to be further apart from a non-PHI reference to be spilled. | |
| 118 ** - It should be a power of two (for speed) and must be between 2 and 32768. | |
| 119 ** Good values for the PHI weight seem to be between 40 and 150. | |
| 120 ** - Further study is required. | |
| 121 */ | |
| 122 #define REGCOST_PHI_WEIGHT 64 | |
| 123 | |
| 124 /* Cost for allocating a specific register. */ | |
| 125 typedef uint32_t RegCost; | |
| 126 | |
| 127 /* Note: assumes 16 bit IRRef1. */ | |
| 128 #define REGCOST(cost, ref) ((RegCost)(ref) + ((RegCost)(cost) << 16)) | |
| 129 #define regcost_ref(rc) ((IRRef1)(rc)) | |
| 130 | |
| 131 #define REGCOST_T(t) \ | |
| 132 ((RegCost)((t)&IRT_ISPHI) * (((RegCost)(REGCOST_PHI_WEIGHT)<<16)/IRT_ISPHI)) | |
| 133 #define REGCOST_REF_T(ref, t) (REGCOST((ref), (ref)) + REGCOST_T((t))) | |
| 134 | |
| 135 /* -- Target-specific definitions ----------------------------------------- */ | |
| 136 | |
| 137 #if LJ_TARGET_X86ORX64 | |
| 138 #include "lj_target_x86.h" | |
| 139 #elif LJ_TARGET_ARM | |
| 140 #include "lj_target_arm.h" | |
| 141 #elif LJ_TARGET_ARM64 | |
| 142 #include "lj_target_arm64.h" | |
| 143 #elif LJ_TARGET_PPC | |
| 144 #include "lj_target_ppc.h" | |
| 145 #elif LJ_TARGET_MIPS | |
| 146 #include "lj_target_mips.h" | |
| 147 #else | |
| 148 #error "Missing include for target CPU" | |
| 149 #endif | |
| 150 | |
| 151 #ifdef EXITSTUBS_PER_GROUP | |
| 152 /* Return the address of an exit stub. */ | |
| 153 static LJ_AINLINE char *exitstub_addr_(char **group, uint32_t exitno) | |
| 154 { | |
| 155 lj_assertX(group[exitno / EXITSTUBS_PER_GROUP] != NULL, | |
| 156 "exit stub group for exit %d uninitialized", exitno); | |
| 157 return (char *)group[exitno / EXITSTUBS_PER_GROUP] + | |
| 158 EXITSTUB_SPACING*(exitno % EXITSTUBS_PER_GROUP); | |
| 159 } | |
| 160 /* Avoid dependence on lj_jit.h if only including lj_target.h. */ | |
| 161 #define exitstub_addr(J, exitno) \ | |
| 162 ((MCode *)exitstub_addr_((char **)((J)->exitstubgroup), (exitno))) | |
| 163 #endif | |
| 164 | |
| 165 #endif |