Mercurial
comparison third_party/luajit/src/lj_str.c @ 178:94705b5986b3
[ThirdParty] Added WRK and luajit for load testing.
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Thu, 22 Jan 2026 20:10:30 -0800 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 177:24fe8ff94056 | 178:94705b5986b3 |
|---|---|
| 1 /* | |
| 2 ** String handling. | |
| 3 ** Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h | |
| 4 */ | |
| 5 | |
| 6 #define lj_str_c | |
| 7 #define LUA_CORE | |
| 8 | |
| 9 #include "lj_obj.h" | |
| 10 #include "lj_gc.h" | |
| 11 #include "lj_err.h" | |
| 12 #include "lj_str.h" | |
| 13 #include "lj_char.h" | |
| 14 #include "lj_prng.h" | |
| 15 | |
| 16 /* -- String helpers ------------------------------------------------------ */ | |
| 17 | |
| 18 /* Ordered compare of strings. Assumes string data is 4-byte aligned. */ | |
| 19 int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b) | |
| 20 { | |
| 21 MSize i, n = a->len > b->len ? b->len : a->len; | |
| 22 for (i = 0; i < n; i += 4) { | |
| 23 /* Note: innocuous access up to end of string + 3. */ | |
| 24 uint32_t va = *(const uint32_t *)(strdata(a)+i); | |
| 25 uint32_t vb = *(const uint32_t *)(strdata(b)+i); | |
| 26 if (va != vb) { | |
| 27 #if LJ_LE | |
| 28 va = lj_bswap(va); vb = lj_bswap(vb); | |
| 29 #endif | |
| 30 i -= n; | |
| 31 if ((int32_t)i >= -3) { | |
| 32 va >>= 32+(i<<3); vb >>= 32+(i<<3); | |
| 33 if (va == vb) break; | |
| 34 } | |
| 35 return va < vb ? -1 : 1; | |
| 36 } | |
| 37 } | |
| 38 return (int32_t)(a->len - b->len); | |
| 39 } | |
| 40 | |
| 41 /* Find fixed string p inside string s. */ | |
| 42 const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen) | |
| 43 { | |
| 44 if (plen <= slen) { | |
| 45 if (plen == 0) { | |
| 46 return s; | |
| 47 } else { | |
| 48 int c = *(const uint8_t *)p++; | |
| 49 plen--; slen -= plen; | |
| 50 while (slen) { | |
| 51 const char *q = (const char *)memchr(s, c, slen); | |
| 52 if (!q) break; | |
| 53 if (memcmp(q+1, p, plen) == 0) return q; | |
| 54 q++; slen -= (MSize)(q-s); s = q; | |
| 55 } | |
| 56 } | |
| 57 } | |
| 58 return NULL; | |
| 59 } | |
| 60 | |
| 61 /* Check whether a string has a pattern matching character. */ | |
| 62 int lj_str_haspattern(GCstr *s) | |
| 63 { | |
| 64 const char *p = strdata(s), *q = p + s->len; | |
| 65 while (p < q) { | |
| 66 int c = *(const uint8_t *)p++; | |
| 67 if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c)) | |
| 68 return 1; /* Found a pattern matching char. */ | |
| 69 } | |
| 70 return 0; /* No pattern matching chars found. */ | |
| 71 } | |
| 72 | |
| 73 /* -- String hashing ------------------------------------------------------ */ | |
| 74 | |
| 75 /* Keyed sparse ARX string hash. Constant time. */ | |
| 76 static StrHash hash_sparse(uint64_t seed, const char *str, MSize len) | |
| 77 { | |
| 78 /* Constants taken from lookup3 hash by Bob Jenkins. */ | |
| 79 StrHash a, b, h = len ^ (StrHash)seed; | |
| 80 if (len >= 4) { /* Caveat: unaligned access! */ | |
| 81 a = lj_getu32(str); | |
| 82 h ^= lj_getu32(str+len-4); | |
| 83 b = lj_getu32(str+(len>>1)-2); | |
| 84 h ^= b; h -= lj_rol(b, 14); | |
| 85 b += lj_getu32(str+(len>>2)-1); | |
| 86 } else { | |
| 87 a = *(const uint8_t *)str; | |
| 88 h ^= *(const uint8_t *)(str+len-1); | |
| 89 b = *(const uint8_t *)(str+(len>>1)); | |
| 90 h ^= b; h -= lj_rol(b, 14); | |
| 91 } | |
| 92 a ^= h; a -= lj_rol(h, 11); | |
| 93 b ^= a; b -= lj_rol(a, 25); | |
| 94 h ^= b; h -= lj_rol(b, 16); | |
| 95 return h; | |
| 96 } | |
| 97 | |
| 98 #if LUAJIT_SECURITY_STRHASH | |
| 99 /* Keyed dense ARX string hash. Linear time. */ | |
| 100 static LJ_NOINLINE StrHash hash_dense(uint64_t seed, StrHash h, | |
| 101 const char *str, MSize len) | |
| 102 { | |
| 103 StrHash b = lj_bswap(lj_rol(h ^ (StrHash)(seed >> 32), 4)); | |
| 104 if (len > 12) { | |
| 105 StrHash a = (StrHash)seed; | |
| 106 const char *pe = str+len-12, *p = pe, *q = str; | |
| 107 do { | |
| 108 a += lj_getu32(p); | |
| 109 b += lj_getu32(p+4); | |
| 110 h += lj_getu32(p+8); | |
| 111 p = q; q += 12; | |
| 112 h ^= b; h -= lj_rol(b, 14); | |
| 113 a ^= h; a -= lj_rol(h, 11); | |
| 114 b ^= a; b -= lj_rol(a, 25); | |
| 115 } while (p < pe); | |
| 116 h ^= b; h -= lj_rol(b, 16); | |
| 117 a ^= h; a -= lj_rol(h, 4); | |
| 118 b ^= a; b -= lj_rol(a, 14); | |
| 119 } | |
| 120 return b; | |
| 121 } | |
| 122 #endif | |
| 123 | |
| 124 /* -- String interning ---------------------------------------------------- */ | |
| 125 | |
| 126 #define LJ_STR_MAXCOLL 32 | |
| 127 | |
| 128 /* Resize the string interning hash table (grow and shrink). */ | |
| 129 void lj_str_resize(lua_State *L, MSize newmask) | |
| 130 { | |
| 131 global_State *g = G(L); | |
| 132 GCRef *newtab, *oldtab = g->str.tab; | |
| 133 MSize i; | |
| 134 | |
| 135 /* No resizing during GC traversal or if already too big. */ | |
| 136 if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1) | |
| 137 return; | |
| 138 | |
| 139 newtab = lj_mem_newvec(L, newmask+1, GCRef); | |
| 140 memset(newtab, 0, (newmask+1)*sizeof(GCRef)); | |
| 141 | |
| 142 #if LUAJIT_SECURITY_STRHASH | |
| 143 /* Check which chains need secondary hashes. */ | |
| 144 if (g->str.second) { | |
| 145 int newsecond = 0; | |
| 146 /* Compute primary chain lengths. */ | |
| 147 for (i = g->str.mask; i != ~(MSize)0; i--) { | |
| 148 GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); | |
| 149 while (o) { | |
| 150 GCstr *s = gco2str(o); | |
| 151 MSize hash = s->hashalg ? hash_sparse(g->str.seed, strdata(s), s->len) : | |
| 152 s->hash; | |
| 153 hash &= newmask; | |
| 154 setgcrefp(newtab[hash], gcrefu(newtab[hash]) + 1); | |
| 155 o = gcnext(o); | |
| 156 } | |
| 157 } | |
| 158 /* Mark secondary chains. */ | |
| 159 for (i = newmask; i != ~(MSize)0; i--) { | |
| 160 int secondary = gcrefu(newtab[i]) > LJ_STR_MAXCOLL; | |
| 161 newsecond |= secondary; | |
| 162 setgcrefp(newtab[i], secondary); | |
| 163 } | |
| 164 g->str.second = newsecond; | |
| 165 } | |
| 166 #endif | |
| 167 | |
| 168 /* Reinsert all strings from the old table into the new table. */ | |
| 169 for (i = g->str.mask; i != ~(MSize)0; i--) { | |
| 170 GCobj *o = (GCobj *)(gcrefu(oldtab[i]) & ~(uintptr_t)1); | |
| 171 while (o) { | |
| 172 GCobj *next = gcnext(o); | |
| 173 GCstr *s = gco2str(o); | |
| 174 MSize hash = s->hash; | |
| 175 #if LUAJIT_SECURITY_STRHASH | |
| 176 uintptr_t u; | |
| 177 if (LJ_LIKELY(!s->hashalg)) { /* String hashed with primary hash. */ | |
| 178 hash &= newmask; | |
| 179 u = gcrefu(newtab[hash]); | |
| 180 if (LJ_UNLIKELY(u & 1)) { /* Switch string to secondary hash. */ | |
| 181 s->hash = hash = hash_dense(g->str.seed, s->hash, strdata(s), s->len); | |
| 182 s->hashalg = 1; | |
| 183 hash &= newmask; | |
| 184 u = gcrefu(newtab[hash]); | |
| 185 } | |
| 186 } else { /* String hashed with secondary hash. */ | |
| 187 MSize shash = hash_sparse(g->str.seed, strdata(s), s->len); | |
| 188 u = gcrefu(newtab[shash & newmask]); | |
| 189 if (u & 1) { | |
| 190 hash &= newmask; | |
| 191 u = gcrefu(newtab[hash]); | |
| 192 } else { /* Revert string back to primary hash. */ | |
| 193 s->hash = shash; | |
| 194 s->hashalg = 0; | |
| 195 hash = (shash & newmask); | |
| 196 } | |
| 197 } | |
| 198 /* NOBARRIER: The string table is a GC root. */ | |
| 199 setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); | |
| 200 setgcrefp(newtab[hash], ((uintptr_t)o | (u & 1))); | |
| 201 #else | |
| 202 hash &= newmask; | |
| 203 /* NOBARRIER: The string table is a GC root. */ | |
| 204 setgcrefr(o->gch.nextgc, newtab[hash]); | |
| 205 setgcref(newtab[hash], o); | |
| 206 #endif | |
| 207 o = next; | |
| 208 } | |
| 209 } | |
| 210 | |
| 211 /* Free old table and replace with new table. */ | |
| 212 lj_str_freetab(g); | |
| 213 g->str.tab = newtab; | |
| 214 g->str.mask = newmask; | |
| 215 } | |
| 216 | |
| 217 #if LUAJIT_SECURITY_STRHASH | |
| 218 /* Rehash and rechain all strings in a chain. */ | |
| 219 static LJ_NOINLINE GCstr *lj_str_rehash_chain(lua_State *L, StrHash hashc, | |
| 220 const char *str, MSize len) | |
| 221 { | |
| 222 global_State *g = G(L); | |
| 223 int ow = g->gc.state == GCSsweepstring ? otherwhite(g) : 0; /* Sweeping? */ | |
| 224 GCRef *strtab = g->str.tab; | |
| 225 MSize strmask = g->str.mask; | |
| 226 GCobj *o = gcref(strtab[hashc & strmask]); | |
| 227 setgcrefp(strtab[hashc & strmask], (void *)((uintptr_t)1)); | |
| 228 g->str.second = 1; | |
| 229 while (o) { | |
| 230 uintptr_t u; | |
| 231 GCobj *next = gcnext(o); | |
| 232 GCstr *s = gco2str(o); | |
| 233 StrHash hash; | |
| 234 if (ow) { /* Must sweep while rechaining. */ | |
| 235 if (((o->gch.marked ^ LJ_GC_WHITES) & ow)) { /* String alive? */ | |
| 236 lj_assertG(!isdead(g, o) || (o->gch.marked & LJ_GC_FIXED), | |
| 237 "sweep of undead string"); | |
| 238 makewhite(g, o); | |
| 239 } else { /* Free dead string. */ | |
| 240 lj_assertG(isdead(g, o) || ow == LJ_GC_SFIXED, | |
| 241 "sweep of unlive string"); | |
| 242 lj_str_free(g, s); | |
| 243 o = next; | |
| 244 continue; | |
| 245 } | |
| 246 } | |
| 247 hash = s->hash; | |
| 248 if (!s->hashalg) { /* Rehash with secondary hash. */ | |
| 249 hash = hash_dense(g->str.seed, hash, strdata(s), s->len); | |
| 250 s->hash = hash; | |
| 251 s->hashalg = 1; | |
| 252 } | |
| 253 /* Rechain. */ | |
| 254 hash &= strmask; | |
| 255 u = gcrefu(strtab[hash]); | |
| 256 setgcrefp(o->gch.nextgc, (u & ~(uintptr_t)1)); | |
| 257 setgcrefp(strtab[hash], ((uintptr_t)o | (u & 1))); | |
| 258 o = next; | |
| 259 } | |
| 260 /* Try to insert the pending string again. */ | |
| 261 return lj_str_new(L, str, len); | |
| 262 } | |
| 263 #endif | |
| 264 | |
| 265 /* Reseed String ID from PRNG after random interval < 2^bits. */ | |
| 266 #if LUAJIT_SECURITY_STRID == 1 | |
| 267 #define STRID_RESEED_INTERVAL 8 | |
| 268 #elif LUAJIT_SECURITY_STRID == 2 | |
| 269 #define STRID_RESEED_INTERVAL 4 | |
| 270 #elif LUAJIT_SECURITY_STRID >= 3 | |
| 271 #define STRID_RESEED_INTERVAL 0 | |
| 272 #endif | |
| 273 | |
| 274 /* Allocate a new string and add to string interning table. */ | |
| 275 static GCstr *lj_str_alloc(lua_State *L, const char *str, MSize len, | |
| 276 StrHash hash, int hashalg) | |
| 277 { | |
| 278 GCstr *s = lj_mem_newt(L, lj_str_size(len), GCstr); | |
| 279 global_State *g = G(L); | |
| 280 uintptr_t u; | |
| 281 newwhite(g, s); | |
| 282 s->gct = ~LJ_TSTR; | |
| 283 s->len = len; | |
| 284 s->hash = hash; | |
| 285 #ifndef STRID_RESEED_INTERVAL | |
| 286 s->sid = g->str.id++; | |
| 287 #elif STRID_RESEED_INTERVAL | |
| 288 if (!g->str.idreseed--) { | |
| 289 uint64_t r = lj_prng_u64(&g->prng); | |
| 290 g->str.id = (StrID)r; | |
| 291 g->str.idreseed = (uint8_t)(r >> (64 - STRID_RESEED_INTERVAL)); | |
| 292 } | |
| 293 s->sid = g->str.id++; | |
| 294 #else | |
| 295 s->sid = (StrID)lj_prng_u64(&g->prng); | |
| 296 #endif | |
| 297 s->reserved = 0; | |
| 298 s->hashalg = (uint8_t)hashalg; | |
| 299 /* Clear last 4 bytes of allocated memory. Implies zero-termination, too. */ | |
| 300 *(uint32_t *)(strdatawr(s)+(len & ~(MSize)3)) = 0; | |
| 301 memcpy(strdatawr(s), str, len); | |
| 302 /* Add to string hash table. */ | |
| 303 hash &= g->str.mask; | |
| 304 u = gcrefu(g->str.tab[hash]); | |
| 305 setgcrefp(s->nextgc, (u & ~(uintptr_t)1)); | |
| 306 /* NOBARRIER: The string table is a GC root. */ | |
| 307 setgcrefp(g->str.tab[hash], ((uintptr_t)s | (u & 1))); | |
| 308 if (g->str.num++ > g->str.mask) /* Allow a 100% load factor. */ | |
| 309 lj_str_resize(L, (g->str.mask<<1)+1); /* Grow string table. */ | |
| 310 return s; /* Return newly interned string. */ | |
| 311 } | |
| 312 | |
| 313 /* Intern a string and return string object. */ | |
| 314 GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx) | |
| 315 { | |
| 316 global_State *g = G(L); | |
| 317 if (lenx-1 < LJ_MAX_STR-1) { | |
| 318 MSize len = (MSize)lenx; | |
| 319 StrHash hash = hash_sparse(g->str.seed, str, len); | |
| 320 MSize coll = 0; | |
| 321 int hashalg = 0; | |
| 322 /* Check if the string has already been interned. */ | |
| 323 GCobj *o = gcref(g->str.tab[hash & g->str.mask]); | |
| 324 #if LUAJIT_SECURITY_STRHASH | |
| 325 if (LJ_UNLIKELY((uintptr_t)o & 1)) { /* Secondary hash for this chain? */ | |
| 326 hashalg = 1; | |
| 327 hash = hash_dense(g->str.seed, hash, str, len); | |
| 328 o = (GCobj *)(gcrefu(g->str.tab[hash & g->str.mask]) & ~(uintptr_t)1); | |
| 329 } | |
| 330 #endif | |
| 331 while (o != NULL) { | |
| 332 GCstr *sx = gco2str(o); | |
| 333 if (sx->hash == hash && sx->len == len) { | |
| 334 if (memcmp(str, strdata(sx), len) == 0) { | |
| 335 if (isdead(g, o)) flipwhite(o); /* Resurrect if dead. */ | |
| 336 return sx; /* Return existing string. */ | |
| 337 } | |
| 338 coll++; | |
| 339 } | |
| 340 coll++; | |
| 341 o = gcnext(o); | |
| 342 } | |
| 343 #if LUAJIT_SECURITY_STRHASH | |
| 344 /* Rehash chain if there are too many collisions. */ | |
| 345 if (LJ_UNLIKELY(coll > LJ_STR_MAXCOLL) && !hashalg) { | |
| 346 return lj_str_rehash_chain(L, hash, str, len); | |
| 347 } | |
| 348 #endif | |
| 349 /* Otherwise allocate a new string. */ | |
| 350 return lj_str_alloc(L, str, len, hash, hashalg); | |
| 351 } else { | |
| 352 if (lenx) | |
| 353 lj_err_msg(L, LJ_ERR_STROV); | |
| 354 return &g->strempty; | |
| 355 } | |
| 356 } | |
| 357 | |
| 358 void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s) | |
| 359 { | |
| 360 g->str.num--; | |
| 361 lj_mem_free(g, s, lj_str_size(s->len)); | |
| 362 } | |
| 363 | |
| 364 void LJ_FASTCALL lj_str_init(lua_State *L) | |
| 365 { | |
| 366 global_State *g = G(L); | |
| 367 g->str.seed = lj_prng_u64(&g->prng); | |
| 368 lj_str_resize(L, LJ_MIN_STRTAB-1); | |
| 369 } | |
| 370 |