Mercurial
comparison third_party/luajit/dynasm/dasm_x86.lua @ 178:94705b5986b3
[ThirdParty] Added WRK and luajit for load testing.
| author | MrJuneJune <me@mrjunejune.com> |
|---|---|
| date | Thu, 22 Jan 2026 20:10:30 -0800 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 177:24fe8ff94056 | 178:94705b5986b3 |
|---|---|
| 1 ------------------------------------------------------------------------------ | |
| 2 -- DynASM x86/x64 module. | |
| 3 -- | |
| 4 -- Copyright (C) 2005-2023 Mike Pall. All rights reserved. | |
| 5 -- See dynasm.lua for full copyright notice. | |
| 6 ------------------------------------------------------------------------------ | |
| 7 | |
| 8 local x64 = x64 | |
| 9 | |
| 10 -- Module information: | |
| 11 local _info = { | |
| 12 arch = x64 and "x64" or "x86", | |
| 13 description = "DynASM x86/x64 module", | |
| 14 version = "1.5.0", | |
| 15 vernum = 10500, | |
| 16 release = "2021-05-02", | |
| 17 author = "Mike Pall", | |
| 18 license = "MIT", | |
| 19 } | |
| 20 | |
| 21 -- Exported glue functions for the arch-specific module. | |
| 22 local _M = { _info = _info } | |
| 23 | |
| 24 -- Cache library functions. | |
| 25 local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs | |
| 26 local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable | |
| 27 local _s = string | |
| 28 local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char | |
| 29 local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub | |
| 30 local concat, sort, remove = table.concat, table.sort, table.remove | |
| 31 local bit = bit or require("bit") | |
| 32 local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift | |
| 33 | |
| 34 -- Inherited tables and callbacks. | |
| 35 local g_opt, g_arch | |
| 36 local wline, werror, wfatal, wwarn | |
| 37 | |
| 38 -- Action name list. | |
| 39 -- CHECK: Keep this in sync with the C code! | |
| 40 local action_names = { | |
| 41 -- int arg, 1 buffer pos: | |
| 42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", | |
| 43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num): | |
| 44 "VREG", "SPACE", | |
| 45 -- ptrdiff_t arg, 1 buffer pos (address): !x64 | |
| 46 "SETLABEL", "REL_A", | |
| 47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): | |
| 48 "REL_LG", "REL_PC", | |
| 49 -- action arg (1 byte) or int arg, 1 buffer pos (link): | |
| 50 "IMM_LG", "IMM_PC", | |
| 51 -- action arg (1 byte) or int arg, 1 buffer pos (offset): | |
| 52 "LABEL_LG", "LABEL_PC", | |
| 53 -- action arg (1 byte), 1 buffer pos (offset): | |
| 54 "ALIGN", | |
| 55 -- action args (2 bytes), no buffer pos. | |
| 56 "EXTERN", | |
| 57 -- action arg (1 byte), no buffer pos. | |
| 58 "ESC", | |
| 59 -- no action arg, no buffer pos. | |
| 60 "MARK", | |
| 61 -- action arg (1 byte), no buffer pos, terminal action: | |
| 62 "SECTION", | |
| 63 -- no args, no buffer pos, terminal action: | |
| 64 "STOP" | |
| 65 } | |
| 66 | |
| 67 -- Maximum number of section buffer positions for dasm_put(). | |
| 68 -- CHECK: Keep this in sync with the C code! | |
| 69 local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. | |
| 70 | |
| 71 -- Action name -> action number (dynamically generated below). | |
| 72 local map_action = {} | |
| 73 -- First action number. Everything below does not need to be escaped. | |
| 74 local actfirst = 256-#action_names | |
| 75 | |
| 76 -- Action list buffer and string (only used to remove dupes). | |
| 77 local actlist = {} | |
| 78 local actstr = "" | |
| 79 | |
| 80 -- Argument list for next dasm_put(). Start with offset 0 into action list. | |
| 81 local actargs = { 0 } | |
| 82 | |
| 83 -- Current number of section buffer positions for dasm_put(). | |
| 84 local secpos = 1 | |
| 85 | |
| 86 -- VREG kind encodings, pre-shifted by 5 bits. | |
| 87 local map_vreg = { | |
| 88 ["modrm.rm.m"] = 0x00, | |
| 89 ["modrm.rm.r"] = 0x20, | |
| 90 ["opcode"] = 0x20, | |
| 91 ["sib.base"] = 0x20, | |
| 92 ["sib.index"] = 0x40, | |
| 93 ["modrm.reg"] = 0x80, | |
| 94 ["vex.v"] = 0xa0, | |
| 95 ["imm.hi"] = 0xc0, | |
| 96 } | |
| 97 | |
| 98 -- Current number of VREG actions contributing to REX/VEX shrinkage. | |
| 99 local vreg_shrink_count = 0 | |
| 100 | |
| 101 ------------------------------------------------------------------------------ | |
| 102 | |
| 103 -- Compute action numbers for action names. | |
| 104 for n,name in ipairs(action_names) do | |
| 105 local num = actfirst + n - 1 | |
| 106 map_action[name] = num | |
| 107 end | |
| 108 | |
| 109 -- Dump action names and numbers. | |
| 110 local function dumpactions(out) | |
| 111 out:write("DynASM encoding engine action codes:\n") | |
| 112 for n,name in ipairs(action_names) do | |
| 113 local num = map_action[name] | |
| 114 out:write(format(" %-10s %02X %d\n", name, num, num)) | |
| 115 end | |
| 116 out:write("\n") | |
| 117 end | |
| 118 | |
| 119 -- Write action list buffer as a huge static C array. | |
| 120 local function writeactions(out, name) | |
| 121 local nn = #actlist | |
| 122 local last = actlist[nn] or 255 | |
| 123 actlist[nn] = nil -- Remove last byte. | |
| 124 if nn == 0 then nn = 1 end | |
| 125 out:write("static const unsigned char ", name, "[", nn, "] = {\n") | |
| 126 local s = " " | |
| 127 for n,b in ipairs(actlist) do | |
| 128 s = s..b.."," | |
| 129 if #s >= 75 then | |
| 130 assert(out:write(s, "\n")) | |
| 131 s = " " | |
| 132 end | |
| 133 end | |
| 134 out:write(s, last, "\n};\n\n") -- Add last byte back. | |
| 135 end | |
| 136 | |
| 137 ------------------------------------------------------------------------------ | |
| 138 | |
| 139 -- Add byte to action list. | |
| 140 local function wputxb(n) | |
| 141 assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range") | |
| 142 actlist[#actlist+1] = n | |
| 143 end | |
| 144 | |
| 145 -- Add action to list with optional arg. Advance buffer pos, too. | |
| 146 local function waction(action, a, num) | |
| 147 wputxb(assert(map_action[action], "bad action name `"..action.."'")) | |
| 148 if a then actargs[#actargs+1] = a end | |
| 149 if a or num then secpos = secpos + (num or 1) end | |
| 150 end | |
| 151 | |
| 152 -- Optionally add a VREG action. | |
| 153 local function wvreg(kind, vreg, psz, sk, defer) | |
| 154 if not vreg then return end | |
| 155 waction("VREG", vreg) | |
| 156 local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") | |
| 157 if b < (sk or 0) then | |
| 158 vreg_shrink_count = vreg_shrink_count + 1 | |
| 159 end | |
| 160 if not defer then | |
| 161 b = b + vreg_shrink_count * 8 | |
| 162 vreg_shrink_count = 0 | |
| 163 end | |
| 164 wputxb(b + (psz or 0)) | |
| 165 end | |
| 166 | |
| 167 -- Add call to embedded DynASM C code. | |
| 168 local function wcall(func, args) | |
| 169 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) | |
| 170 end | |
| 171 | |
| 172 -- Delete duplicate action list chunks. A tad slow, but so what. | |
| 173 local function dedupechunk(offset) | |
| 174 local al, as = actlist, actstr | |
| 175 local chunk = char(unpack(al, offset+1, #al)) | |
| 176 local orig = find(as, chunk, 1, true) | |
| 177 if orig then | |
| 178 actargs[1] = orig-1 -- Replace with original offset. | |
| 179 for i=offset+1,#al do al[i] = nil end -- Kill dupe. | |
| 180 else | |
| 181 actstr = as..chunk | |
| 182 end | |
| 183 end | |
| 184 | |
| 185 -- Flush action list (intervening C code or buffer pos overflow). | |
| 186 local function wflush(term) | |
| 187 local offset = actargs[1] | |
| 188 if #actlist == offset then return end -- Nothing to flush. | |
| 189 if not term then waction("STOP") end -- Terminate action list. | |
| 190 dedupechunk(offset) | |
| 191 wcall("put", actargs) -- Add call to dasm_put(). | |
| 192 actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). | |
| 193 secpos = 1 -- The actionlist offset occupies a buffer position, too. | |
| 194 end | |
| 195 | |
| 196 -- Put escaped byte. | |
| 197 local function wputb(n) | |
| 198 if n >= actfirst then waction("ESC") end -- Need to escape byte. | |
| 199 wputxb(n) | |
| 200 end | |
| 201 | |
| 202 ------------------------------------------------------------------------------ | |
| 203 | |
| 204 -- Global label name -> global label number. With auto assignment on 1st use. | |
| 205 local next_global = 10 | |
| 206 local map_global = setmetatable({}, { __index = function(t, name) | |
| 207 if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end | |
| 208 local n = next_global | |
| 209 if n > 246 then werror("too many global labels") end | |
| 210 next_global = n + 1 | |
| 211 t[name] = n | |
| 212 return n | |
| 213 end}) | |
| 214 | |
| 215 -- Dump global labels. | |
| 216 local function dumpglobals(out, lvl) | |
| 217 local t = {} | |
| 218 for name, n in pairs(map_global) do t[n] = name end | |
| 219 out:write("Global labels:\n") | |
| 220 for i=10,next_global-1 do | |
| 221 out:write(format(" %s\n", t[i])) | |
| 222 end | |
| 223 out:write("\n") | |
| 224 end | |
| 225 | |
| 226 -- Write global label enum. | |
| 227 local function writeglobals(out, prefix) | |
| 228 local t = {} | |
| 229 for name, n in pairs(map_global) do t[n] = name end | |
| 230 out:write("enum {\n") | |
| 231 for i=10,next_global-1 do | |
| 232 out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n") | |
| 233 end | |
| 234 out:write(" ", prefix, "_MAX\n};\n") | |
| 235 end | |
| 236 | |
| 237 -- Write global label names. | |
| 238 local function writeglobalnames(out, name) | |
| 239 local t = {} | |
| 240 for name, n in pairs(map_global) do t[n] = name end | |
| 241 out:write("static const char *const ", name, "[] = {\n") | |
| 242 for i=10,next_global-1 do | |
| 243 out:write(" \"", t[i], "\",\n") | |
| 244 end | |
| 245 out:write(" (const char *)0\n};\n") | |
| 246 end | |
| 247 | |
| 248 ------------------------------------------------------------------------------ | |
| 249 | |
| 250 -- Extern label name -> extern label number. With auto assignment on 1st use. | |
| 251 local next_extern = -1 | |
| 252 local map_extern = setmetatable({}, { __index = function(t, name) | |
| 253 -- No restrictions on the name for now. | |
| 254 local n = next_extern | |
| 255 if n < -256 then werror("too many extern labels") end | |
| 256 next_extern = n - 1 | |
| 257 t[name] = n | |
| 258 return n | |
| 259 end}) | |
| 260 | |
| 261 -- Dump extern labels. | |
| 262 local function dumpexterns(out, lvl) | |
| 263 local t = {} | |
| 264 for name, n in pairs(map_extern) do t[-n] = name end | |
| 265 out:write("Extern labels:\n") | |
| 266 for i=1,-next_extern-1 do | |
| 267 out:write(format(" %s\n", t[i])) | |
| 268 end | |
| 269 out:write("\n") | |
| 270 end | |
| 271 | |
| 272 -- Write extern label names. | |
| 273 local function writeexternnames(out, name) | |
| 274 local t = {} | |
| 275 for name, n in pairs(map_extern) do t[-n] = name end | |
| 276 out:write("static const char *const ", name, "[] = {\n") | |
| 277 for i=1,-next_extern-1 do | |
| 278 out:write(" \"", t[i], "\",\n") | |
| 279 end | |
| 280 out:write(" (const char *)0\n};\n") | |
| 281 end | |
| 282 | |
| 283 ------------------------------------------------------------------------------ | |
| 284 | |
| 285 -- Arch-specific maps. | |
| 286 local map_archdef = {} -- Ext. register name -> int. name. | |
| 287 local map_reg_rev = {} -- Int. register name -> ext. name. | |
| 288 local map_reg_num = {} -- Int. register name -> register number. | |
| 289 local map_reg_opsize = {} -- Int. register name -> operand size. | |
| 290 local map_reg_valid_base = {} -- Int. register name -> valid base register? | |
| 291 local map_reg_valid_index = {} -- Int. register name -> valid index register? | |
| 292 local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex. | |
| 293 local reg_list = {} -- Canonical list of int. register names. | |
| 294 | |
| 295 local map_type = {} -- Type name -> { ctype, reg } | |
| 296 local ctypenum = 0 -- Type number (for _PTx macros). | |
| 297 | |
| 298 local addrsize = x64 and "q" or "d" -- Size for address operands. | |
| 299 | |
| 300 -- Helper functions to fill register maps. | |
| 301 local function mkrmap(sz, cl, names) | |
| 302 local cname = format("@%s", sz) | |
| 303 reg_list[#reg_list+1] = cname | |
| 304 map_archdef[cl] = cname | |
| 305 map_reg_rev[cname] = cl | |
| 306 map_reg_num[cname] = -1 | |
| 307 map_reg_opsize[cname] = sz | |
| 308 if sz == addrsize or sz == "d" then | |
| 309 map_reg_valid_base[cname] = true | |
| 310 map_reg_valid_index[cname] = true | |
| 311 end | |
| 312 if names then | |
| 313 for n,name in ipairs(names) do | |
| 314 local iname = format("@%s%x", sz, n-1) | |
| 315 reg_list[#reg_list+1] = iname | |
| 316 map_archdef[name] = iname | |
| 317 map_reg_rev[iname] = name | |
| 318 map_reg_num[iname] = n-1 | |
| 319 map_reg_opsize[iname] = sz | |
| 320 if sz == "b" and n > 4 then map_reg_needrex[iname] = false end | |
| 321 if sz == addrsize or sz == "d" then | |
| 322 map_reg_valid_base[iname] = true | |
| 323 map_reg_valid_index[iname] = true | |
| 324 end | |
| 325 end | |
| 326 end | |
| 327 for i=0,(x64 and sz ~= "f") and 15 or 7 do | |
| 328 local needrex = sz == "b" and i > 3 | |
| 329 local iname = format("@%s%x%s", sz, i, needrex and "R" or "") | |
| 330 if needrex then map_reg_needrex[iname] = true end | |
| 331 local name | |
| 332 if sz == "o" or sz == "y" then name = format("%s%d", cl, i) | |
| 333 elseif sz == "f" then name = format("st%d", i) | |
| 334 else name = format("r%d%s", i, sz == addrsize and "" or sz) end | |
| 335 map_archdef[name] = iname | |
| 336 if not map_reg_rev[iname] then | |
| 337 reg_list[#reg_list+1] = iname | |
| 338 map_reg_rev[iname] = name | |
| 339 map_reg_num[iname] = i | |
| 340 map_reg_opsize[iname] = sz | |
| 341 if sz == addrsize or sz == "d" then | |
| 342 map_reg_valid_base[iname] = true | |
| 343 map_reg_valid_index[iname] = true | |
| 344 end | |
| 345 end | |
| 346 end | |
| 347 reg_list[#reg_list+1] = "" | |
| 348 end | |
| 349 | |
| 350 -- Integer registers (qword, dword, word and byte sized). | |
| 351 if x64 then | |
| 352 mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"}) | |
| 353 end | |
| 354 mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}) | |
| 355 mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) | |
| 356 mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) | |
| 357 map_reg_valid_index[map_archdef.esp] = false | |
| 358 if x64 then map_reg_valid_index[map_archdef.rsp] = false end | |
| 359 if x64 then map_reg_needrex[map_archdef.Rb] = true end | |
| 360 map_archdef["Ra"] = "@"..addrsize | |
| 361 | |
| 362 -- FP registers (internally tword sized, but use "f" as operand size). | |
| 363 mkrmap("f", "Rf") | |
| 364 | |
| 365 -- SSE registers (oword sized, but qword and dword accessible). | |
| 366 mkrmap("o", "xmm") | |
| 367 | |
| 368 -- AVX registers (yword sized, but oword, qword and dword accessible). | |
| 369 mkrmap("y", "ymm") | |
| 370 | |
| 371 -- Operand size prefixes to codes. | |
| 372 local map_opsize = { | |
| 373 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y", | |
| 374 tword = "t", aword = addrsize, | |
| 375 } | |
| 376 | |
| 377 -- Operand size code to number. | |
| 378 local map_opsizenum = { | |
| 379 b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10, | |
| 380 } | |
| 381 | |
| 382 -- Operand size code to name. | |
| 383 local map_opsizename = { | |
| 384 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword", | |
| 385 t = "tword", f = "fpword", | |
| 386 } | |
| 387 | |
| 388 -- Valid index register scale factors. | |
| 389 local map_xsc = { | |
| 390 ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3, | |
| 391 } | |
| 392 | |
| 393 -- Condition codes. | |
| 394 local map_cc = { | |
| 395 o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7, | |
| 396 s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15, | |
| 397 c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7, | |
| 398 pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15, | |
| 399 } | |
| 400 | |
| 401 | |
| 402 -- Reverse defines for registers. | |
| 403 function _M.revdef(s) | |
| 404 return gsub(s, "@%w+", map_reg_rev) | |
| 405 end | |
| 406 | |
| 407 -- Dump register names and numbers | |
| 408 local function dumpregs(out) | |
| 409 out:write("Register names, sizes and internal numbers:\n") | |
| 410 for _,reg in ipairs(reg_list) do | |
| 411 if reg == "" then | |
| 412 out:write("\n") | |
| 413 else | |
| 414 local name = map_reg_rev[reg] | |
| 415 local num = map_reg_num[reg] | |
| 416 local opsize = map_opsizename[map_reg_opsize[reg]] | |
| 417 out:write(format(" %-5s %-8s %s\n", name, opsize, | |
| 418 num < 0 and "(variable)" or num)) | |
| 419 end | |
| 420 end | |
| 421 end | |
| 422 | |
| 423 ------------------------------------------------------------------------------ | |
| 424 | |
| 425 -- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC). | |
| 426 local function wputlabel(aprefix, imm, num) | |
| 427 if type(imm) == "number" then | |
| 428 if imm < 0 then | |
| 429 waction("EXTERN") | |
| 430 wputxb(aprefix == "IMM_" and 0 or 1) | |
| 431 imm = -imm-1 | |
| 432 else | |
| 433 waction(aprefix.."LG", nil, num); | |
| 434 end | |
| 435 wputxb(imm) | |
| 436 else | |
| 437 waction(aprefix.."PC", imm, num) | |
| 438 end | |
| 439 end | |
| 440 | |
| 441 -- Put signed byte or arg. | |
| 442 local function wputsbarg(n) | |
| 443 if type(n) == "number" then | |
| 444 if n < -128 or n > 127 then | |
| 445 werror("signed immediate byte out of range") | |
| 446 end | |
| 447 if n < 0 then n = n + 256 end | |
| 448 wputb(n) | |
| 449 else waction("IMM_S", n) end | |
| 450 end | |
| 451 | |
| 452 -- Put unsigned byte or arg. | |
| 453 local function wputbarg(n) | |
| 454 if type(n) == "number" then | |
| 455 if n < 0 or n > 255 then | |
| 456 werror("unsigned immediate byte out of range") | |
| 457 end | |
| 458 wputb(n) | |
| 459 else waction("IMM_B", n) end | |
| 460 end | |
| 461 | |
| 462 -- Put unsigned word or arg. | |
| 463 local function wputwarg(n) | |
| 464 if type(n) == "number" then | |
| 465 if shr(n, 16) ~= 0 then | |
| 466 werror("unsigned immediate word out of range") | |
| 467 end | |
| 468 wputb(band(n, 255)); wputb(shr(n, 8)); | |
| 469 else waction("IMM_W", n) end | |
| 470 end | |
| 471 | |
| 472 -- Put signed or unsigned dword or arg. | |
| 473 local function wputdarg(n) | |
| 474 local tn = type(n) | |
| 475 if tn == "number" then | |
| 476 wputb(band(n, 255)) | |
| 477 wputb(band(shr(n, 8), 255)) | |
| 478 wputb(band(shr(n, 16), 255)) | |
| 479 wputb(shr(n, 24)) | |
| 480 elseif tn == "table" then | |
| 481 wputlabel("IMM_", n[1], 1) | |
| 482 else | |
| 483 waction("IMM_D", n) | |
| 484 end | |
| 485 end | |
| 486 | |
| 487 -- Put signed or unsigned qword or arg. | |
| 488 local function wputqarg(n) | |
| 489 local tn = type(n) | |
| 490 if tn == "number" then -- This is only used for numbers from -2^31..2^32-1. | |
| 491 wputb(band(n, 255)) | |
| 492 wputb(band(shr(n, 8), 255)) | |
| 493 wputb(band(shr(n, 16), 255)) | |
| 494 wputb(shr(n, 24)) | |
| 495 local sign = n < 0 and 255 or 0 | |
| 496 wputb(sign); wputb(sign); wputb(sign); wputb(sign) | |
| 497 else | |
| 498 waction("IMM_D", format("(unsigned int)(%s)", n)) | |
| 499 waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n)) | |
| 500 end | |
| 501 end | |
| 502 | |
| 503 -- Put operand-size dependent number or arg (defaults to dword). | |
| 504 local function wputszarg(sz, n) | |
| 505 if not sz or sz == "d" or sz == "q" then wputdarg(n) | |
| 506 elseif sz == "w" then wputwarg(n) | |
| 507 elseif sz == "b" then wputbarg(n) | |
| 508 elseif sz == "s" then wputsbarg(n) | |
| 509 else werror("bad operand size") end | |
| 510 end | |
| 511 | |
| 512 -- Put multi-byte opcode with operand-size dependent modifications. | |
| 513 local function wputop(sz, op, rex, vex, vregr, vregxb) | |
| 514 local psz, sk = 0, nil | |
| 515 if vex then | |
| 516 local tail | |
| 517 if vex.m == 1 and band(rex, 11) == 0 then | |
| 518 if x64 and vregxb then | |
| 519 sk = map_vreg["modrm.reg"] | |
| 520 else | |
| 521 wputb(0xc5) | |
| 522 tail = shl(bxor(band(rex, 4), 4), 5) | |
| 523 psz = 3 | |
| 524 end | |
| 525 end | |
| 526 if not tail then | |
| 527 wputb(0xc4) | |
| 528 wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) | |
| 529 tail = shl(band(rex, 8), 4) | |
| 530 psz = 4 | |
| 531 end | |
| 532 local reg, vreg = 0, nil | |
| 533 if vex.v then | |
| 534 reg = vex.v.reg | |
| 535 if not reg then werror("bad vex operand") end | |
| 536 if reg < 0 then reg = 0; vreg = vex.v.vreg end | |
| 537 end | |
| 538 if sz == "y" or vex.l then tail = tail + 4 end | |
| 539 wputb(tail + shl(bxor(reg, 15), 3) + vex.p) | |
| 540 wvreg("vex.v", vreg) | |
| 541 rex = 0 | |
| 542 if op >= 256 then werror("bad vex opcode") end | |
| 543 else | |
| 544 if rex ~= 0 then | |
| 545 if not x64 then werror("bad operand size") end | |
| 546 elseif (vregr or vregxb) and x64 then | |
| 547 rex = 0x10 | |
| 548 sk = map_vreg["vex.v"] | |
| 549 end | |
| 550 end | |
| 551 local r | |
| 552 if sz == "w" then wputb(102) end | |
| 553 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] | |
| 554 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end | |
| 555 if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end | |
| 556 if op >= 65536 then | |
| 557 if rex ~= 0 then | |
| 558 local opc3 = band(op, 0xffff00) | |
| 559 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then | |
| 560 wputb(64 + band(rex, 15)); rex = 0; psz = 2 | |
| 561 end | |
| 562 end | |
| 563 wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 | |
| 564 end | |
| 565 if op >= 256 then | |
| 566 local b = shr(op, 8) | |
| 567 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end | |
| 568 wputb(b); op = band(op, 255); psz = psz + 1 | |
| 569 end | |
| 570 if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end | |
| 571 if sz == "b" then op = op - 1 end | |
| 572 wputb(op) | |
| 573 return psz, sk | |
| 574 end | |
| 575 | |
| 576 -- Put ModRM or SIB formatted byte. | |
| 577 local function wputmodrm(m, s, rm, vs, vrm) | |
| 578 assert(m < 4 and s < 16 and rm < 16, "bad modrm operands") | |
| 579 wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7)) | |
| 580 end | |
| 581 | |
| 582 -- Put ModRM/SIB plus optional displacement. | |
| 583 local function wputmrmsib(t, imark, s, vsreg, psz, sk) | |
| 584 local vreg, vxreg | |
| 585 local reg, xreg = t.reg, t.xreg | |
| 586 if reg and reg < 0 then reg = 0; vreg = t.vreg end | |
| 587 if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end | |
| 588 if s < 0 then s = 0 end | |
| 589 | |
| 590 -- Register mode. | |
| 591 if sub(t.mode, 1, 1) == "r" then | |
| 592 wputmodrm(3, s, reg) | |
| 593 wvreg("modrm.reg", vsreg, psz+1, sk, vreg) | |
| 594 wvreg("modrm.rm.r", vreg, psz+1, sk) | |
| 595 return | |
| 596 end | |
| 597 | |
| 598 local disp = t.disp | |
| 599 local tdisp = type(disp) | |
| 600 -- No base register? | |
| 601 if not reg then | |
| 602 local riprel = false | |
| 603 if xreg then | |
| 604 -- Indexed mode with index register only. | |
| 605 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) | |
| 606 wputmodrm(0, s, 4) | |
| 607 if imark == "I" then waction("MARK") end | |
| 608 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) | |
| 609 wputmodrm(t.xsc, xreg, 5) | |
| 610 wvreg("sib.index", vxreg, psz+2, sk) | |
| 611 else | |
| 612 -- Pure 32 bit displacement. | |
| 613 if x64 and tdisp ~= "table" then | |
| 614 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) | |
| 615 wvreg("modrm.reg", vsreg, psz+1, sk) | |
| 616 if imark == "I" then waction("MARK") end | |
| 617 wputmodrm(0, 4, 5) | |
| 618 else | |
| 619 riprel = x64 | |
| 620 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) | |
| 621 wvreg("modrm.reg", vsreg, psz+1, sk) | |
| 622 if imark == "I" then waction("MARK") end | |
| 623 end | |
| 624 end | |
| 625 if riprel then -- Emit rip-relative displacement. | |
| 626 if match("UWSiI", imark) then | |
| 627 werror("NYI: rip-relative displacement followed by immediate") | |
| 628 end | |
| 629 -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. | |
| 630 wputlabel("REL_", disp[1], 2) | |
| 631 else | |
| 632 wputdarg(disp) | |
| 633 end | |
| 634 return | |
| 635 end | |
| 636 | |
| 637 local m | |
| 638 if tdisp == "number" then -- Check displacement size at assembly time. | |
| 639 if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) | |
| 640 if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] | |
| 641 elseif disp >= -128 and disp <= 127 then m = 1 | |
| 642 else m = 2 end | |
| 643 elseif tdisp == "table" then | |
| 644 m = 2 | |
| 645 end | |
| 646 | |
| 647 -- Index register present or esp as base register: need SIB encoding. | |
| 648 if xreg or band(reg, 7) == 4 then | |
| 649 wputmodrm(m or 2, s, 4) -- ModRM. | |
| 650 if m == nil or imark == "I" then waction("MARK") end | |
| 651 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) | |
| 652 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. | |
| 653 wvreg("sib.index", vxreg, psz+2, sk, vreg) | |
| 654 wvreg("sib.base", vreg, psz+2, sk) | |
| 655 else | |
| 656 wputmodrm(m or 2, s, reg) -- ModRM. | |
| 657 if (imark == "I" and (m == 1 or m == 2)) or | |
| 658 (m == nil and (vsreg or vreg)) then waction("MARK") end | |
| 659 wvreg("modrm.reg", vsreg, psz+1, sk, vreg) | |
| 660 wvreg("modrm.rm.m", vreg, psz+1, sk) | |
| 661 end | |
| 662 | |
| 663 -- Put displacement. | |
| 664 if m == 1 then wputsbarg(disp) | |
| 665 elseif m == 2 then wputdarg(disp) | |
| 666 elseif m == nil then waction("DISP", disp) end | |
| 667 end | |
| 668 | |
| 669 ------------------------------------------------------------------------------ | |
| 670 | |
| 671 -- Return human-readable operand mode string. | |
| 672 local function opmodestr(op, args) | |
| 673 local m = {} | |
| 674 for i=1,#args do | |
| 675 local a = args[i] | |
| 676 m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?") | |
| 677 end | |
| 678 return op.." "..concat(m, ",") | |
| 679 end | |
| 680 | |
| 681 -- Convert number to valid integer or nil. | |
| 682 local function toint(expr, isqword) | |
| 683 local n = tonumber(expr) | |
| 684 if n then | |
| 685 if n % 1 ~= 0 then | |
| 686 werror("not an integer number `"..expr.."'") | |
| 687 elseif isqword then | |
| 688 if n < -2147483648 or n > 2147483647 then | |
| 689 n = nil -- Handle it as an expression to avoid precision loss. | |
| 690 end | |
| 691 elseif n < -2147483648 or n > 4294967295 then | |
| 692 werror("bad integer number `"..expr.."'") | |
| 693 end | |
| 694 return n | |
| 695 end | |
| 696 end | |
| 697 | |
| 698 -- Parse immediate expression. | |
| 699 local function immexpr(expr) | |
| 700 -- &expr (pointer) | |
| 701 if sub(expr, 1, 1) == "&" then | |
| 702 return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2)) | |
| 703 end | |
| 704 | |
| 705 local prefix = sub(expr, 1, 2) | |
| 706 -- =>expr (pc label reference) | |
| 707 if prefix == "=>" then | |
| 708 return "iJ", sub(expr, 3) | |
| 709 end | |
| 710 -- ->name (global label reference) | |
| 711 if prefix == "->" then | |
| 712 return "iJ", map_global[sub(expr, 3)] | |
| 713 end | |
| 714 | |
| 715 -- [<>][1-9] (local label reference) | |
| 716 local dir, lnum = match(expr, "^([<>])([1-9])$") | |
| 717 if dir then -- Fwd: 247-255, Bkwd: 1-9. | |
| 718 return "iJ", lnum + (dir == ">" and 246 or 0) | |
| 719 end | |
| 720 | |
| 721 local extname = match(expr, "^extern%s+(%S+)$") | |
| 722 if extname then | |
| 723 return "iJ", map_extern[extname] | |
| 724 end | |
| 725 | |
| 726 -- expr (interpreted as immediate) | |
| 727 return "iI", expr | |
| 728 end | |
| 729 | |
| 730 -- Parse displacement expression: +-num, +-expr, +-opsize*num | |
| 731 local function dispexpr(expr) | |
| 732 local disp = expr == "" and 0 or toint(expr) | |
| 733 if disp then return disp end | |
| 734 local c, dispt = match(expr, "^([+-])%s*(.+)$") | |
| 735 if c == "+" then | |
| 736 expr = dispt | |
| 737 elseif not c then | |
| 738 werror("bad displacement expression `"..expr.."'") | |
| 739 end | |
| 740 local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$") | |
| 741 local ops, imm = map_opsize[opsize], toint(tailops) | |
| 742 if ops and imm then | |
| 743 if c == "-" then imm = -imm end | |
| 744 return imm*map_opsizenum[ops] | |
| 745 end | |
| 746 local mode, iexpr = immexpr(dispt) | |
| 747 if mode == "iJ" then | |
| 748 if c == "-" then werror("cannot invert label reference") end | |
| 749 return { iexpr } | |
| 750 end | |
| 751 return expr -- Need to return original signed expression. | |
| 752 end | |
| 753 | |
| 754 -- Parse register or type expression. | |
| 755 local function rtexpr(expr) | |
| 756 if not expr then return end | |
| 757 local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$") | |
| 758 local tp = map_type[tname or expr] | |
| 759 if tp then | |
| 760 local reg = ovreg or tp.reg | |
| 761 local rnum = map_reg_num[reg] | |
| 762 if not rnum then | |
| 763 werror("type `"..(tname or expr).."' needs a register override") | |
| 764 end | |
| 765 if not map_reg_valid_base[reg] then | |
| 766 werror("bad base register override `"..(map_reg_rev[reg] or reg).."'") | |
| 767 end | |
| 768 return reg, rnum, tp | |
| 769 end | |
| 770 return expr, map_reg_num[expr] | |
| 771 end | |
| 772 | |
| 773 -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. | |
| 774 local function parseoperand(param, isqword) | |
| 775 local t = {} | |
| 776 | |
| 777 local expr = param | |
| 778 local opsize, tailops = match(param, "^(%w+)%s*(.+)$") | |
| 779 if opsize then | |
| 780 t.opsize = map_opsize[opsize] | |
| 781 if t.opsize then expr = tailops end | |
| 782 end | |
| 783 | |
| 784 local br = match(expr, "^%[%s*(.-)%s*%]$") | |
| 785 repeat | |
| 786 if br then | |
| 787 t.mode = "xm" | |
| 788 | |
| 789 -- [disp] | |
| 790 t.disp = toint(br) | |
| 791 if t.disp then | |
| 792 t.mode = x64 and "xm" or "xmO" | |
| 793 break | |
| 794 end | |
| 795 | |
| 796 -- [reg...] | |
| 797 local tp | |
| 798 local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$") | |
| 799 reg, t.reg, tp = rtexpr(reg) | |
| 800 if not t.reg then | |
| 801 -- [expr] | |
| 802 t.mode = x64 and "xm" or "xmO" | |
| 803 t.disp = dispexpr("+"..br) | |
| 804 break | |
| 805 end | |
| 806 | |
| 807 if t.reg == -1 then | |
| 808 t.vreg, tailr = match(tailr, "^(%b())(.*)$") | |
| 809 if not t.vreg then werror("bad variable register expression") end | |
| 810 end | |
| 811 | |
| 812 -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr] | |
| 813 local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$") | |
| 814 if xsc then | |
| 815 if not map_reg_valid_index[reg] then | |
| 816 werror("bad index register `"..map_reg_rev[reg].."'") | |
| 817 end | |
| 818 t.xsc = map_xsc[xsc] | |
| 819 t.xreg = t.reg | |
| 820 t.vxreg = t.vreg | |
| 821 t.reg = nil | |
| 822 t.vreg = nil | |
| 823 t.disp = dispexpr(tailsc) | |
| 824 break | |
| 825 end | |
| 826 if not map_reg_valid_base[reg] then | |
| 827 werror("bad base register `"..map_reg_rev[reg].."'") | |
| 828 end | |
| 829 | |
| 830 -- [reg] or [reg+-disp] | |
| 831 t.disp = toint(tailr) or (tailr == "" and 0) | |
| 832 if t.disp then break end | |
| 833 | |
| 834 -- [reg+xreg...] | |
| 835 local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$") | |
| 836 xreg, t.xreg, tp = rtexpr(xreg) | |
| 837 if not t.xreg then | |
| 838 -- [reg+-expr] | |
| 839 t.disp = dispexpr(tailr) | |
| 840 break | |
| 841 end | |
| 842 if not map_reg_valid_index[xreg] then | |
| 843 werror("bad index register `"..map_reg_rev[xreg].."'") | |
| 844 end | |
| 845 | |
| 846 if t.xreg == -1 then | |
| 847 t.vxreg, tailx = match(tailx, "^(%b())(.*)$") | |
| 848 if not t.vxreg then werror("bad variable register expression") end | |
| 849 end | |
| 850 | |
| 851 -- [reg+xreg*xsc...] | |
| 852 local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$") | |
| 853 if xsc then | |
| 854 t.xsc = map_xsc[xsc] | |
| 855 tailx = tailsc | |
| 856 end | |
| 857 | |
| 858 -- [...] or [...+-disp] or [...+-expr] | |
| 859 t.disp = dispexpr(tailx) | |
| 860 else | |
| 861 -- imm or opsize*imm | |
| 862 local imm = toint(expr, isqword) | |
| 863 if not imm and sub(expr, 1, 1) == "*" and t.opsize then | |
| 864 imm = toint(sub(expr, 2)) | |
| 865 if imm then | |
| 866 imm = imm * map_opsizenum[t.opsize] | |
| 867 t.opsize = nil | |
| 868 end | |
| 869 end | |
| 870 if imm then | |
| 871 if t.opsize then werror("bad operand size override") end | |
| 872 local m = "i" | |
| 873 if imm == 1 then m = m.."1" end | |
| 874 if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end | |
| 875 if imm >= -128 and imm <= 127 then m = m.."S" end | |
| 876 t.imm = imm | |
| 877 t.mode = m | |
| 878 break | |
| 879 end | |
| 880 | |
| 881 local tp | |
| 882 local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$") | |
| 883 reg, t.reg, tp = rtexpr(reg) | |
| 884 if t.reg then | |
| 885 if t.reg == -1 then | |
| 886 t.vreg, tailr = match(tailr, "^(%b())(.*)$") | |
| 887 if not t.vreg then werror("bad variable register expression") end | |
| 888 end | |
| 889 -- reg | |
| 890 if tailr == "" then | |
| 891 if t.opsize then werror("bad operand size override") end | |
| 892 t.opsize = map_reg_opsize[reg] | |
| 893 if t.opsize == "f" then | |
| 894 t.mode = t.reg == 0 and "fF" or "f" | |
| 895 else | |
| 896 if reg == "@w4" or (x64 and reg == "@d4") then | |
| 897 wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'")) | |
| 898 end | |
| 899 t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm") | |
| 900 end | |
| 901 t.needrex = map_reg_needrex[reg] | |
| 902 break | |
| 903 end | |
| 904 | |
| 905 -- type[idx], type[idx].field, type->field -> [reg+offset_expr] | |
| 906 if not tp then werror("bad operand `"..param.."'") end | |
| 907 t.mode = "xm" | |
| 908 t.disp = format(tp.ctypefmt, tailr) | |
| 909 else | |
| 910 t.mode, t.imm = immexpr(expr) | |
| 911 if sub(t.mode, -1) == "J" then | |
| 912 if t.opsize and t.opsize ~= addrsize then | |
| 913 werror("bad operand size override") | |
| 914 end | |
| 915 t.opsize = addrsize | |
| 916 end | |
| 917 end | |
| 918 end | |
| 919 until true | |
| 920 return t | |
| 921 end | |
| 922 | |
| 923 ------------------------------------------------------------------------------ | |
| 924 -- x86 Template String Description | |
| 925 -- =============================== | |
| 926 -- | |
| 927 -- Each template string is a list of [match:]pattern pairs, | |
| 928 -- separated by "|". The first match wins. No match means a | |
| 929 -- bad or unsupported combination of operand modes or sizes. | |
| 930 -- | |
| 931 -- The match part and the ":" is omitted if the operation has | |
| 932 -- no operands. Otherwise the first N characters are matched | |
| 933 -- against the mode strings of each of the N operands. | |
| 934 -- | |
| 935 -- The mode string for each operand type is (see parseoperand()): | |
| 936 -- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl | |
| 937 -- FP register: "f", +"F" for st0 | |
| 938 -- Index operand: "xm", +"O" for [disp] (pure offset) | |
| 939 -- Immediate: "i", +"S" for signed 8 bit, +"1" for 1, | |
| 940 -- +"I" for arg, +"P" for pointer | |
| 941 -- Any: +"J" for valid jump targets | |
| 942 -- | |
| 943 -- So a match character "m" (mixed) matches both an integer register | |
| 944 -- and an index operand (to be encoded with the ModRM/SIB scheme). | |
| 945 -- But "r" matches only a register and "x" only an index operand | |
| 946 -- (e.g. for FP memory access operations). | |
| 947 -- | |
| 948 -- The operand size match string starts right after the mode match | |
| 949 -- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty. | |
| 950 -- The effective data size of the operation is matched against this list. | |
| 951 -- | |
| 952 -- If only the regular "b", "w", "d", "q", "t" operand sizes are | |
| 953 -- present, then all operands must be the same size. Unspecified sizes | |
| 954 -- are ignored, but at least one operand must have a size or the pattern | |
| 955 -- won't match (use the "byte", "word", "dword", "qword", "tword" | |
| 956 -- operand size overrides. E.g.: mov dword [eax], 1). | |
| 957 -- | |
| 958 -- If the list has a "1" or "2" prefix, the operand size is taken | |
| 959 -- from the respective operand and any other operand sizes are ignored. | |
| 960 -- If the list contains only ".", all operand sizes are ignored. | |
| 961 -- If the list has a "/" prefix, the concatenated (mixed) operand sizes | |
| 962 -- are compared to the match. | |
| 963 -- | |
| 964 -- E.g. "rrdw" matches for either two dword registers or two word | |
| 965 -- registers. "Fx2dq" matches an st0 operand plus an index operand | |
| 966 -- pointing to a dword (float) or qword (double). | |
| 967 -- | |
| 968 -- Every character after the ":" is part of the pattern string: | |
| 969 -- Hex chars are accumulated to form the opcode (left to right). | |
| 970 -- "n" disables the standard opcode mods | |
| 971 -- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") | |
| 972 -- "X" Force REX.W. | |
| 973 -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. | |
| 974 -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. | |
| 975 -- The spare 3 bits are either filled with the last hex digit or | |
| 976 -- the result from a previous "r"/"R". The opcode is restored. | |
| 977 -- "u" Use VEX encoding, vvvv unused. | |
| 978 -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is | |
| 979 -- removed from the list used by future characters). | |
| 980 -- "w" Use VEX encoding, vvvv from 3rd operand. | |
| 981 -- "L" Force VEX.L | |
| 982 -- | |
| 983 -- All of the following characters force a flush of the opcode: | |
| 984 -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. | |
| 985 -- "s" stores a 4 bit immediate from the last register operand, | |
| 986 -- followed by 4 zero bits. | |
| 987 -- "S" stores a signed 8 bit immediate from the last operand. | |
| 988 -- "U" stores an unsigned 8 bit immediate from the last operand. | |
| 989 -- "W" stores an unsigned 16 bit immediate from the last operand. | |
| 990 -- "i" stores an operand sized immediate from the last operand. | |
| 991 -- "I" dito, but generates an action code to optionally modify | |
| 992 -- the opcode (+2) for a signed 8 bit immediate. | |
| 993 -- "J" generates one of the REL action codes from the last operand. | |
| 994 -- | |
| 995 ------------------------------------------------------------------------------ | |
| 996 | |
| 997 -- Template strings for x86 instructions. Ordered by first opcode byte. | |
| 998 -- Unimplemented opcodes (deliberate omissions) are marked with *. | |
| 999 local map_op = { | |
| 1000 -- 00-05: add... | |
| 1001 -- 06: *push es | |
| 1002 -- 07: *pop es | |
| 1003 -- 08-0D: or... | |
| 1004 -- 0E: *push cs | |
| 1005 -- 0F: two byte opcode prefix | |
| 1006 -- 10-15: adc... | |
| 1007 -- 16: *push ss | |
| 1008 -- 17: *pop ss | |
| 1009 -- 18-1D: sbb... | |
| 1010 -- 1E: *push ds | |
| 1011 -- 1F: *pop ds | |
| 1012 -- 20-25: and... | |
| 1013 es_0 = "26", | |
| 1014 -- 27: *daa | |
| 1015 -- 28-2D: sub... | |
| 1016 cs_0 = "2E", | |
| 1017 -- 2F: *das | |
| 1018 -- 30-35: xor... | |
| 1019 ss_0 = "36", | |
| 1020 -- 37: *aaa | |
| 1021 -- 38-3D: cmp... | |
| 1022 ds_0 = "3E", | |
| 1023 -- 3F: *aas | |
| 1024 inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m", | |
| 1025 dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m", | |
| 1026 push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or | |
| 1027 "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i", | |
| 1028 pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m", | |
| 1029 -- 60: *pusha, *pushad, *pushaw | |
| 1030 -- 61: *popa, *popad, *popaw | |
| 1031 -- 62: *bound rdw,x | |
| 1032 -- 63: x86: *arpl mw,rw | |
| 1033 movsxd_2 = x64 and "rm/qd:63rM", | |
| 1034 fs_0 = "64", | |
| 1035 gs_0 = "65", | |
| 1036 o16_0 = "66", | |
| 1037 a16_0 = not x64 and "67" or nil, | |
| 1038 a32_0 = x64 and "67", | |
| 1039 -- 68: push idw | |
| 1040 -- 69: imul rdw,mdw,idw | |
| 1041 -- 6A: push ib | |
| 1042 -- 6B: imul rdw,mdw,S | |
| 1043 -- 6C: *insb | |
| 1044 -- 6D: *insd, *insw | |
| 1045 -- 6E: *outsb | |
| 1046 -- 6F: *outsd, *outsw | |
| 1047 -- 70-7F: jcc lb | |
| 1048 -- 80: add... mb,i | |
| 1049 -- 81: add... mdw,i | |
| 1050 -- 82: *undefined | |
| 1051 -- 83: add... mdw,S | |
| 1052 test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi", | |
| 1053 -- 86: xchg rb,mb | |
| 1054 -- 87: xchg rdw,mdw | |
| 1055 -- 88: mov mb,r | |
| 1056 -- 89: mov mdw,r | |
| 1057 -- 8A: mov r,mb | |
| 1058 -- 8B: mov r,mdw | |
| 1059 -- 8C: *mov mdw,seg | |
| 1060 lea_2 = "rx1dq:8DrM", | |
| 1061 -- 8E: *mov seg,mdw | |
| 1062 -- 8F: pop mdw | |
| 1063 nop_0 = "90", | |
| 1064 xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm", | |
| 1065 cbw_0 = "6698", | |
| 1066 cwde_0 = "98", | |
| 1067 cdqe_0 = "4898", | |
| 1068 cwd_0 = "6699", | |
| 1069 cdq_0 = "99", | |
| 1070 cqo_0 = "4899", | |
| 1071 -- 9A: *call iw:idw | |
| 1072 wait_0 = "9B", | |
| 1073 fwait_0 = "9B", | |
| 1074 pushf_0 = "9C", | |
| 1075 pushfd_0 = not x64 and "9C", | |
| 1076 pushfq_0 = x64 and "9C", | |
| 1077 popf_0 = "9D", | |
| 1078 popfd_0 = not x64 and "9D", | |
| 1079 popfq_0 = x64 and "9D", | |
| 1080 sahf_0 = "9E", | |
| 1081 lahf_0 = "9F", | |
| 1082 mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", | |
| 1083 movsb_0 = "A4", | |
| 1084 movsw_0 = "66A5", | |
| 1085 movsd_0 = "A5", | |
| 1086 cmpsb_0 = "A6", | |
| 1087 cmpsw_0 = "66A7", | |
| 1088 cmpsd_0 = "A7", | |
| 1089 -- A8: test Rb,i | |
| 1090 -- A9: test Rdw,i | |
| 1091 stosb_0 = "AA", | |
| 1092 stosw_0 = "66AB", | |
| 1093 stosd_0 = "AB", | |
| 1094 lodsb_0 = "AC", | |
| 1095 lodsw_0 = "66AD", | |
| 1096 lodsd_0 = "AD", | |
| 1097 scasb_0 = "AE", | |
| 1098 scasw_0 = "66AF", | |
| 1099 scasd_0 = "AF", | |
| 1100 -- B0-B7: mov rb,i | |
| 1101 -- B8-BF: mov rdw,i | |
| 1102 -- C0: rol... mb,i | |
| 1103 -- C1: rol... mdw,i | |
| 1104 ret_1 = "i.:nC2W", | |
| 1105 ret_0 = "C3", | |
| 1106 -- C4: *les rdw,mq | |
| 1107 -- C5: *lds rdw,mq | |
| 1108 -- C6: mov mb,i | |
| 1109 -- C7: mov mdw,i | |
| 1110 -- C8: *enter iw,ib | |
| 1111 leave_0 = "C9", | |
| 1112 -- CA: *retf iw | |
| 1113 -- CB: *retf | |
| 1114 int3_0 = "CC", | |
| 1115 int_1 = "i.:nCDU", | |
| 1116 into_0 = "CE", | |
| 1117 -- CF: *iret | |
| 1118 -- D0: rol... mb,1 | |
| 1119 -- D1: rol... mdw,1 | |
| 1120 -- D2: rol... mb,cl | |
| 1121 -- D3: rol... mb,cl | |
| 1122 -- D4: *aam ib | |
| 1123 -- D5: *aad ib | |
| 1124 -- D6: *salc | |
| 1125 -- D7: *xlat | |
| 1126 -- D8-DF: floating point ops | |
| 1127 -- E0: *loopne | |
| 1128 -- E1: *loope | |
| 1129 -- E2: *loop | |
| 1130 -- E3: *jcxz, *jecxz | |
| 1131 -- E4: *in Rb,ib | |
| 1132 -- E5: *in Rdw,ib | |
| 1133 -- E6: *out ib,Rb | |
| 1134 -- E7: *out ib,Rdw | |
| 1135 call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J", | |
| 1136 jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB | |
| 1137 -- EA: *jmp iw:idw | |
| 1138 -- EB: jmp ib | |
| 1139 -- EC: *in Rb,dx | |
| 1140 -- ED: *in Rdw,dx | |
| 1141 -- EE: *out dx,Rb | |
| 1142 -- EF: *out dx,Rdw | |
| 1143 lock_0 = "F0", | |
| 1144 int1_0 = "F1", | |
| 1145 repne_0 = "F2", | |
| 1146 repnz_0 = "F2", | |
| 1147 rep_0 = "F3", | |
| 1148 repe_0 = "F3", | |
| 1149 repz_0 = "F3", | |
| 1150 -- F4: *hlt | |
| 1151 cmc_0 = "F5", | |
| 1152 -- F6: test... mb,i; div... mb | |
| 1153 -- F7: test... mdw,i; div... mdw | |
| 1154 clc_0 = "F8", | |
| 1155 stc_0 = "F9", | |
| 1156 -- FA: *cli | |
| 1157 cld_0 = "FC", | |
| 1158 std_0 = "FD", | |
| 1159 -- FE: inc... mb | |
| 1160 -- FF: inc... mdw | |
| 1161 | |
| 1162 -- misc ops | |
| 1163 not_1 = "m:F72m", | |
| 1164 neg_1 = "m:F73m", | |
| 1165 mul_1 = "m:F74m", | |
| 1166 imul_1 = "m:F75m", | |
| 1167 div_1 = "m:F76m", | |
| 1168 idiv_1 = "m:F77m", | |
| 1169 | |
| 1170 imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi", | |
| 1171 imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi", | |
| 1172 | |
| 1173 movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:", | |
| 1174 movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:", | |
| 1175 | |
| 1176 bswap_1 = "rqd:0FC8r", | |
| 1177 bsf_2 = "rmqdw:0FBCrM", | |
| 1178 bsr_2 = "rmqdw:0FBDrM", | |
| 1179 bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU", | |
| 1180 btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU", | |
| 1181 btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU", | |
| 1182 bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU", | |
| 1183 | |
| 1184 shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:", | |
| 1185 shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:", | |
| 1186 | |
| 1187 rdtsc_0 = "0F31", -- P1+ | |
| 1188 rdpmc_0 = "0F33", -- P6+ | |
| 1189 cpuid_0 = "0FA2", -- P1+ | |
| 1190 | |
| 1191 -- floating point ops | |
| 1192 fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m", | |
| 1193 fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m", | |
| 1194 fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m", | |
| 1195 | |
| 1196 fpop_0 = "DDD8", -- Alias for fstp st0. | |
| 1197 | |
| 1198 fist_1 = "xw:nDF2m|xd:DB2m", | |
| 1199 fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m", | |
| 1200 fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m", | |
| 1201 | |
| 1202 fxch_0 = "D9C9", | |
| 1203 fxch_1 = "ff:D9C8r", | |
| 1204 fxch_2 = "fFf:D9C8r|Fff:D9C8R", | |
| 1205 | |
| 1206 fucom_1 = "ff:DDE0r", | |
| 1207 fucom_2 = "Fff:DDE0R", | |
| 1208 fucomp_1 = "ff:DDE8r", | |
| 1209 fucomp_2 = "Fff:DDE8R", | |
| 1210 fucomi_1 = "ff:DBE8r", -- P6+ | |
| 1211 fucomi_2 = "Fff:DBE8R", -- P6+ | |
| 1212 fucomip_1 = "ff:DFE8r", -- P6+ | |
| 1213 fucomip_2 = "Fff:DFE8R", -- P6+ | |
| 1214 fcomi_1 = "ff:DBF0r", -- P6+ | |
| 1215 fcomi_2 = "Fff:DBF0R", -- P6+ | |
| 1216 fcomip_1 = "ff:DFF0r", -- P6+ | |
| 1217 fcomip_2 = "Fff:DFF0R", -- P6+ | |
| 1218 fucompp_0 = "DAE9", | |
| 1219 fcompp_0 = "DED9", | |
| 1220 | |
| 1221 fldenv_1 = "x.:D94m", | |
| 1222 fnstenv_1 = "x.:D96m", | |
| 1223 fstenv_1 = "x.:9BD96m", | |
| 1224 fldcw_1 = "xw:nD95m", | |
| 1225 fstcw_1 = "xw:n9BD97m", | |
| 1226 fnstcw_1 = "xw:nD97m", | |
| 1227 fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m", | |
| 1228 fnstsw_1 = "Rw:nDFE0|xw:nDD7m", | |
| 1229 fclex_0 = "9BDBE2", | |
| 1230 fnclex_0 = "DBE2", | |
| 1231 | |
| 1232 fnop_0 = "D9D0", | |
| 1233 -- D9D1-D9DF: unassigned | |
| 1234 | |
| 1235 fchs_0 = "D9E0", | |
| 1236 fabs_0 = "D9E1", | |
| 1237 -- D9E2: unassigned | |
| 1238 -- D9E3: unassigned | |
| 1239 ftst_0 = "D9E4", | |
| 1240 fxam_0 = "D9E5", | |
| 1241 -- D9E6: unassigned | |
| 1242 -- D9E7: unassigned | |
| 1243 fld1_0 = "D9E8", | |
| 1244 fldl2t_0 = "D9E9", | |
| 1245 fldl2e_0 = "D9EA", | |
| 1246 fldpi_0 = "D9EB", | |
| 1247 fldlg2_0 = "D9EC", | |
| 1248 fldln2_0 = "D9ED", | |
| 1249 fldz_0 = "D9EE", | |
| 1250 -- D9EF: unassigned | |
| 1251 | |
| 1252 f2xm1_0 = "D9F0", | |
| 1253 fyl2x_0 = "D9F1", | |
| 1254 fptan_0 = "D9F2", | |
| 1255 fpatan_0 = "D9F3", | |
| 1256 fxtract_0 = "D9F4", | |
| 1257 fprem1_0 = "D9F5", | |
| 1258 fdecstp_0 = "D9F6", | |
| 1259 fincstp_0 = "D9F7", | |
| 1260 fprem_0 = "D9F8", | |
| 1261 fyl2xp1_0 = "D9F9", | |
| 1262 fsqrt_0 = "D9FA", | |
| 1263 fsincos_0 = "D9FB", | |
| 1264 frndint_0 = "D9FC", | |
| 1265 fscale_0 = "D9FD", | |
| 1266 fsin_0 = "D9FE", | |
| 1267 fcos_0 = "D9FF", | |
| 1268 | |
| 1269 -- SSE, SSE2 | |
| 1270 andnpd_2 = "rmo:660F55rM", | |
| 1271 andnps_2 = "rmo:0F55rM", | |
| 1272 andpd_2 = "rmo:660F54rM", | |
| 1273 andps_2 = "rmo:0F54rM", | |
| 1274 clflush_1 = "x.:0FAE7m", | |
| 1275 cmppd_3 = "rmio:660FC2rMU", | |
| 1276 cmpps_3 = "rmio:0FC2rMU", | |
| 1277 cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:", | |
| 1278 cmpss_3 = "rrio:F30FC2rMU|rxi/od:", | |
| 1279 comisd_2 = "rro:660F2FrM|rx/oq:", | |
| 1280 comiss_2 = "rro:0F2FrM|rx/od:", | |
| 1281 cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:", | |
| 1282 cvtdq2ps_2 = "rmo:0F5BrM", | |
| 1283 cvtpd2dq_2 = "rmo:F20FE6rM", | |
| 1284 cvtpd2ps_2 = "rmo:660F5ArM", | |
| 1285 cvtpi2pd_2 = "rx/oq:660F2ArM", | |
| 1286 cvtpi2ps_2 = "rx/oq:0F2ArM", | |
| 1287 cvtps2dq_2 = "rmo:660F5BrM", | |
| 1288 cvtps2pd_2 = "rro:0F5ArM|rx/oq:", | |
| 1289 cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:", | |
| 1290 cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", | |
| 1291 cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM", | |
| 1292 cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM", | |
| 1293 cvtss2sd_2 = "rro:F30F5ArM|rx/od:", | |
| 1294 cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:", | |
| 1295 cvttpd2dq_2 = "rmo:660FE6rM", | |
| 1296 cvttps2dq_2 = "rmo:F30F5BrM", | |
| 1297 cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:", | |
| 1298 cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:", | |
| 1299 fxsave_1 = "x.:0FAE0m", | |
| 1300 fxrstor_1 = "x.:0FAE1m", | |
| 1301 ldmxcsr_1 = "xd:0FAE2m", | |
| 1302 lfence_0 = "0FAEE8", | |
| 1303 maskmovdqu_2 = "rro:660FF7rM", | |
| 1304 mfence_0 = "0FAEF0", | |
| 1305 movapd_2 = "rmo:660F28rM|mro:660F29Rm", | |
| 1306 movaps_2 = "rmo:0F28rM|mro:0F29Rm", | |
| 1307 movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:", | |
| 1308 movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", | |
| 1309 movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", | |
| 1310 movhlps_2 = "rro:0F12rM", | |
| 1311 movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm", | |
| 1312 movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm", | |
| 1313 movlhps_2 = "rro:0F16rM", | |
| 1314 movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm", | |
| 1315 movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm", | |
| 1316 movmskpd_2 = "rr/do:660F50rM", | |
| 1317 movmskps_2 = "rr/do:0F50rM", | |
| 1318 movntdq_2 = "xro:660FE7Rm", | |
| 1319 movnti_2 = "xrqd:0FC3Rm", | |
| 1320 movntpd_2 = "xro:660F2BRm", | |
| 1321 movntps_2 = "xro:0F2BRm", | |
| 1322 movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", | |
| 1323 movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm", | |
| 1324 movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", | |
| 1325 movupd_2 = "rmo:660F10rM|mro:660F11Rm", | |
| 1326 movups_2 = "rmo:0F10rM|mro:0F11Rm", | |
| 1327 orpd_2 = "rmo:660F56rM", | |
| 1328 orps_2 = "rmo:0F56rM", | |
| 1329 pause_0 = "F390", | |
| 1330 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. | |
| 1331 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", | |
| 1332 pmovmskb_2 = "rr/do:660FD7rM", | |
| 1333 prefetchnta_1 = "xb:n0F180m", | |
| 1334 prefetcht0_1 = "xb:n0F181m", | |
| 1335 prefetcht1_1 = "xb:n0F182m", | |
| 1336 prefetcht2_1 = "xb:n0F183m", | |
| 1337 pshufd_3 = "rmio:660F70rMU", | |
| 1338 pshufhw_3 = "rmio:F30F70rMU", | |
| 1339 pshuflw_3 = "rmio:F20F70rMU", | |
| 1340 pslld_2 = "rmo:660FF2rM|rio:660F726mU", | |
| 1341 pslldq_2 = "rio:660F737mU", | |
| 1342 psllq_2 = "rmo:660FF3rM|rio:660F736mU", | |
| 1343 psllw_2 = "rmo:660FF1rM|rio:660F716mU", | |
| 1344 psrad_2 = "rmo:660FE2rM|rio:660F724mU", | |
| 1345 psraw_2 = "rmo:660FE1rM|rio:660F714mU", | |
| 1346 psrld_2 = "rmo:660FD2rM|rio:660F722mU", | |
| 1347 psrldq_2 = "rio:660F733mU", | |
| 1348 psrlq_2 = "rmo:660FD3rM|rio:660F732mU", | |
| 1349 psrlw_2 = "rmo:660FD1rM|rio:660F712mU", | |
| 1350 rcpps_2 = "rmo:0F53rM", | |
| 1351 rcpss_2 = "rro:F30F53rM|rx/od:", | |
| 1352 rsqrtps_2 = "rmo:0F52rM", | |
| 1353 rsqrtss_2 = "rmo:F30F52rM", | |
| 1354 sfence_0 = "0FAEF8", | |
| 1355 shufpd_3 = "rmio:660FC6rMU", | |
| 1356 shufps_3 = "rmio:0FC6rMU", | |
| 1357 stmxcsr_1 = "xd:0FAE3m", | |
| 1358 ucomisd_2 = "rro:660F2ErM|rx/oq:", | |
| 1359 ucomiss_2 = "rro:0F2ErM|rx/od:", | |
| 1360 unpckhpd_2 = "rmo:660F15rM", | |
| 1361 unpckhps_2 = "rmo:0F15rM", | |
| 1362 unpcklpd_2 = "rmo:660F14rM", | |
| 1363 unpcklps_2 = "rmo:0F14rM", | |
| 1364 xorpd_2 = "rmo:660F57rM", | |
| 1365 xorps_2 = "rmo:0F57rM", | |
| 1366 | |
| 1367 -- SSE3 ops | |
| 1368 fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m", | |
| 1369 addsubpd_2 = "rmo:660FD0rM", | |
| 1370 addsubps_2 = "rmo:F20FD0rM", | |
| 1371 haddpd_2 = "rmo:660F7CrM", | |
| 1372 haddps_2 = "rmo:F20F7CrM", | |
| 1373 hsubpd_2 = "rmo:660F7DrM", | |
| 1374 hsubps_2 = "rmo:F20F7DrM", | |
| 1375 lddqu_2 = "rxo:F20FF0rM", | |
| 1376 movddup_2 = "rmo:F20F12rM", | |
| 1377 movshdup_2 = "rmo:F30F16rM", | |
| 1378 movsldup_2 = "rmo:F30F12rM", | |
| 1379 | |
| 1380 -- SSSE3 ops | |
| 1381 pabsb_2 = "rmo:660F381CrM", | |
| 1382 pabsd_2 = "rmo:660F381ErM", | |
| 1383 pabsw_2 = "rmo:660F381DrM", | |
| 1384 palignr_3 = "rmio:660F3A0FrMU", | |
| 1385 phaddd_2 = "rmo:660F3802rM", | |
| 1386 phaddsw_2 = "rmo:660F3803rM", | |
| 1387 phaddw_2 = "rmo:660F3801rM", | |
| 1388 phsubd_2 = "rmo:660F3806rM", | |
| 1389 phsubsw_2 = "rmo:660F3807rM", | |
| 1390 phsubw_2 = "rmo:660F3805rM", | |
| 1391 pmaddubsw_2 = "rmo:660F3804rM", | |
| 1392 pmulhrsw_2 = "rmo:660F380BrM", | |
| 1393 pshufb_2 = "rmo:660F3800rM", | |
| 1394 psignb_2 = "rmo:660F3808rM", | |
| 1395 psignd_2 = "rmo:660F380ArM", | |
| 1396 psignw_2 = "rmo:660F3809rM", | |
| 1397 | |
| 1398 -- SSE4.1 ops | |
| 1399 blendpd_3 = "rmio:660F3A0DrMU", | |
| 1400 blendps_3 = "rmio:660F3A0CrMU", | |
| 1401 blendvpd_3 = "rmRo:660F3815rM", | |
| 1402 blendvps_3 = "rmRo:660F3814rM", | |
| 1403 dppd_3 = "rmio:660F3A41rMU", | |
| 1404 dpps_3 = "rmio:660F3A40rMU", | |
| 1405 extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU", | |
| 1406 insertps_3 = "rrio:660F3A41rMU|rxi/od:", | |
| 1407 movntdqa_2 = "rxo:660F382ArM", | |
| 1408 mpsadbw_3 = "rmio:660F3A42rMU", | |
| 1409 packusdw_2 = "rmo:660F382BrM", | |
| 1410 pblendvb_3 = "rmRo:660F3810rM", | |
| 1411 pblendw_3 = "rmio:660F3A0ErMU", | |
| 1412 pcmpeqq_2 = "rmo:660F3829rM", | |
| 1413 pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:", | |
| 1414 pextrd_3 = "mri/do:660F3A16RmU", | |
| 1415 pextrq_3 = "mri/qo:660F3A16RmU", | |
| 1416 -- pextrw is SSE2, mem operand is SSE4.1 only | |
| 1417 phminposuw_2 = "rmo:660F3841rM", | |
| 1418 pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", | |
| 1419 pinsrd_3 = "rmi/od:660F3A22rMU", | |
| 1420 pinsrq_3 = "rmi/oq:660F3A22rXMU", | |
| 1421 pmaxsb_2 = "rmo:660F383CrM", | |
| 1422 pmaxsd_2 = "rmo:660F383DrM", | |
| 1423 pmaxud_2 = "rmo:660F383FrM", | |
| 1424 pmaxuw_2 = "rmo:660F383ErM", | |
| 1425 pminsb_2 = "rmo:660F3838rM", | |
| 1426 pminsd_2 = "rmo:660F3839rM", | |
| 1427 pminud_2 = "rmo:660F383BrM", | |
| 1428 pminuw_2 = "rmo:660F383ArM", | |
| 1429 pmovsxbd_2 = "rro:660F3821rM|rx/od:", | |
| 1430 pmovsxbq_2 = "rro:660F3822rM|rx/ow:", | |
| 1431 pmovsxbw_2 = "rro:660F3820rM|rx/oq:", | |
| 1432 pmovsxdq_2 = "rro:660F3825rM|rx/oq:", | |
| 1433 pmovsxwd_2 = "rro:660F3823rM|rx/oq:", | |
| 1434 pmovsxwq_2 = "rro:660F3824rM|rx/od:", | |
| 1435 pmovzxbd_2 = "rro:660F3831rM|rx/od:", | |
| 1436 pmovzxbq_2 = "rro:660F3832rM|rx/ow:", | |
| 1437 pmovzxbw_2 = "rro:660F3830rM|rx/oq:", | |
| 1438 pmovzxdq_2 = "rro:660F3835rM|rx/oq:", | |
| 1439 pmovzxwd_2 = "rro:660F3833rM|rx/oq:", | |
| 1440 pmovzxwq_2 = "rro:660F3834rM|rx/od:", | |
| 1441 pmuldq_2 = "rmo:660F3828rM", | |
| 1442 pmulld_2 = "rmo:660F3840rM", | |
| 1443 ptest_2 = "rmo:660F3817rM", | |
| 1444 roundpd_3 = "rmio:660F3A09rMU", | |
| 1445 roundps_3 = "rmio:660F3A08rMU", | |
| 1446 roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:", | |
| 1447 roundss_3 = "rrio:660F3A0ArMU|rxi/od:", | |
| 1448 | |
| 1449 -- SSE4.2 ops | |
| 1450 crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:", | |
| 1451 pcmpestri_3 = "rmio:660F3A61rMU", | |
| 1452 pcmpestrm_3 = "rmio:660F3A60rMU", | |
| 1453 pcmpgtq_2 = "rmo:660F3837rM", | |
| 1454 pcmpistri_3 = "rmio:660F3A63rMU", | |
| 1455 pcmpistrm_3 = "rmio:660F3A62rMU", | |
| 1456 popcnt_2 = "rmqdw:F30FB8rM", | |
| 1457 | |
| 1458 -- SSE4a | |
| 1459 extrq_2 = "rro:660F79rM", | |
| 1460 extrq_3 = "riio:660F780mUU", | |
| 1461 insertq_2 = "rro:F20F79rM", | |
| 1462 insertq_4 = "rriio:F20F78rMUU", | |
| 1463 lzcnt_2 = "rmqdw:F30FBDrM", | |
| 1464 movntsd_2 = "xr/qo:nF20F2BRm", | |
| 1465 movntss_2 = "xr/do:F30F2BRm", | |
| 1466 -- popcnt is also in SSE4.2 | |
| 1467 | |
| 1468 -- AES-NI | |
| 1469 aesdec_2 = "rmo:660F38DErM", | |
| 1470 aesdeclast_2 = "rmo:660F38DFrM", | |
| 1471 aesenc_2 = "rmo:660F38DCrM", | |
| 1472 aesenclast_2 = "rmo:660F38DDrM", | |
| 1473 aesimc_2 = "rmo:660F38DBrM", | |
| 1474 aeskeygenassist_3 = "rmio:660F3ADFrMU", | |
| 1475 pclmulqdq_3 = "rmio:660F3A44rMU", | |
| 1476 | |
| 1477 -- AVX FP ops | |
| 1478 vaddsubpd_3 = "rrmoy:660FVD0rM", | |
| 1479 vaddsubps_3 = "rrmoy:F20FVD0rM", | |
| 1480 vandpd_3 = "rrmoy:660FV54rM", | |
| 1481 vandps_3 = "rrmoy:0FV54rM", | |
| 1482 vandnpd_3 = "rrmoy:660FV55rM", | |
| 1483 vandnps_3 = "rrmoy:0FV55rM", | |
| 1484 vblendpd_4 = "rrmioy:660F3AV0DrMU", | |
| 1485 vblendps_4 = "rrmioy:660F3AV0CrMU", | |
| 1486 vblendvpd_4 = "rrmroy:660F3AV4BrMs", | |
| 1487 vblendvps_4 = "rrmroy:660F3AV4ArMs", | |
| 1488 vbroadcastf128_2 = "rx/yo:660F38u1ArM", | |
| 1489 vcmppd_4 = "rrmioy:660FVC2rMU", | |
| 1490 vcmpps_4 = "rrmioy:0FVC2rMU", | |
| 1491 vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:", | |
| 1492 vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:", | |
| 1493 vcomisd_2 = "rro:660Fu2FrM|rx/oq:", | |
| 1494 vcomiss_2 = "rro:0Fu2FrM|rx/od:", | |
| 1495 vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:", | |
| 1496 vcvtdq2ps_2 = "rmoy:0Fu5BrM", | |
| 1497 vcvtpd2dq_2 = "rmoy:F20FuE6rM", | |
| 1498 vcvtpd2ps_2 = "rmoy:660Fu5ArM", | |
| 1499 vcvtps2dq_2 = "rmoy:660Fu5BrM", | |
| 1500 vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:", | |
| 1501 vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:", | |
| 1502 vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:", | |
| 1503 vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM", | |
| 1504 vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM", | |
| 1505 vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:", | |
| 1506 vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:", | |
| 1507 vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM", | |
| 1508 vcvttps2dq_2 = "rmoy:F30Fu5BrM", | |
| 1509 vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:", | |
| 1510 vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:", | |
| 1511 vdppd_4 = "rrmio:660F3AV41rMU", | |
| 1512 vdpps_4 = "rrmioy:660F3AV40rMU", | |
| 1513 vextractf128_3 = "mri/oy:660F3AuL19RmU", | |
| 1514 vextractps_3 = "mri/do:660F3Au17RmU", | |
| 1515 vhaddpd_3 = "rrmoy:660FV7CrM", | |
| 1516 vhaddps_3 = "rrmoy:F20FV7CrM", | |
| 1517 vhsubpd_3 = "rrmoy:660FV7DrM", | |
| 1518 vhsubps_3 = "rrmoy:F20FV7DrM", | |
| 1519 vinsertf128_4 = "rrmi/yyo:660F3AV18rMU", | |
| 1520 vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:", | |
| 1521 vldmxcsr_1 = "xd:0FuAE2m", | |
| 1522 vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm", | |
| 1523 vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm", | |
| 1524 vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm", | |
| 1525 vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm", | |
| 1526 vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:", | |
| 1527 vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm", | |
| 1528 vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:", | |
| 1529 vmovhlps_3 = "rrro:0FV12rM", | |
| 1530 vmovhpd_2 = "xr/qo:660Fu17Rm", | |
| 1531 vmovhpd_3 = "rrx/ooq:660FV16rM", | |
| 1532 vmovhps_2 = "xr/qo:0Fu17Rm", | |
| 1533 vmovhps_3 = "rrx/ooq:0FV16rM", | |
| 1534 vmovlhps_3 = "rrro:0FV16rM", | |
| 1535 vmovlpd_2 = "xr/qo:660Fu13Rm", | |
| 1536 vmovlpd_3 = "rrx/ooq:660FV12rM", | |
| 1537 vmovlps_2 = "xr/qo:0Fu13Rm", | |
| 1538 vmovlps_3 = "rrx/ooq:0FV12rM", | |
| 1539 vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM", | |
| 1540 vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM", | |
| 1541 vmovntpd_2 = "xroy:660Fu2BRm", | |
| 1542 vmovntps_2 = "xroy:0Fu2BRm", | |
| 1543 vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm", | |
| 1544 vmovsd_3 = "rrro:F20FV10rM", | |
| 1545 vmovshdup_2 = "rmoy:F30Fu16rM", | |
| 1546 vmovsldup_2 = "rmoy:F30Fu12rM", | |
| 1547 vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm", | |
| 1548 vmovss_3 = "rrro:F30FV10rM", | |
| 1549 vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm", | |
| 1550 vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm", | |
| 1551 vorpd_3 = "rrmoy:660FV56rM", | |
| 1552 vorps_3 = "rrmoy:0FV56rM", | |
| 1553 vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU", | |
| 1554 vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU", | |
| 1555 vperm2f128_4 = "rrmiy:660F3AV06rMU", | |
| 1556 vptestpd_2 = "rmoy:660F38u0FrM", | |
| 1557 vptestps_2 = "rmoy:660F38u0ErM", | |
| 1558 vrcpps_2 = "rmoy:0Fu53rM", | |
| 1559 vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", | |
| 1560 vrsqrtps_2 = "rmoy:0Fu52rM", | |
| 1561 vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", | |
| 1562 vroundpd_3 = "rmioy:660F3Au09rMU", | |
| 1563 vroundps_3 = "rmioy:660F3Au08rMU", | |
| 1564 vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", | |
| 1565 vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", | |
| 1566 vshufpd_4 = "rrmioy:660FVC6rMU", | |
| 1567 vshufps_4 = "rrmioy:0FVC6rMU", | |
| 1568 vsqrtps_2 = "rmoy:0Fu51rM", | |
| 1569 vsqrtss_2 = "rro:F30Fu51rM|rx/od:", | |
| 1570 vsqrtpd_2 = "rmoy:660Fu51rM", | |
| 1571 vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:", | |
| 1572 vstmxcsr_1 = "xd:0FuAE3m", | |
| 1573 vucomisd_2 = "rro:660Fu2ErM|rx/oq:", | |
| 1574 vucomiss_2 = "rro:0Fu2ErM|rx/od:", | |
| 1575 vunpckhpd_3 = "rrmoy:660FV15rM", | |
| 1576 vunpckhps_3 = "rrmoy:0FV15rM", | |
| 1577 vunpcklpd_3 = "rrmoy:660FV14rM", | |
| 1578 vunpcklps_3 = "rrmoy:0FV14rM", | |
| 1579 vxorpd_3 = "rrmoy:660FV57rM", | |
| 1580 vxorps_3 = "rrmoy:0FV57rM", | |
| 1581 vzeroall_0 = "0FuL77", | |
| 1582 vzeroupper_0 = "0Fu77", | |
| 1583 | |
| 1584 -- AVX2 FP ops | |
| 1585 vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:", | |
| 1586 vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:", | |
| 1587 -- *vgather* (!vsib) | |
| 1588 vpermpd_3 = "rmiy:660F3AuX01rMU", | |
| 1589 vpermps_3 = "rrmy:660F38V16rM", | |
| 1590 | |
| 1591 -- AVX, AVX2 integer ops | |
| 1592 -- In general, xmm requires AVX, ymm requires AVX2. | |
| 1593 vaesdec_3 = "rrmo:660F38VDErM", | |
| 1594 vaesdeclast_3 = "rrmo:660F38VDFrM", | |
| 1595 vaesenc_3 = "rrmo:660F38VDCrM", | |
| 1596 vaesenclast_3 = "rrmo:660F38VDDrM", | |
| 1597 vaesimc_2 = "rmo:660F38uDBrM", | |
| 1598 vaeskeygenassist_3 = "rmio:660F3AuDFrMU", | |
| 1599 vlddqu_2 = "rxoy:F20FuF0rM", | |
| 1600 vmaskmovdqu_2 = "rro:660FuF7rM", | |
| 1601 vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", | |
| 1602 vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm", | |
| 1603 vmovntdq_2 = "xroy:660FuE7Rm", | |
| 1604 vmovntdqa_2 = "rxoy:660F38u2ArM", | |
| 1605 vmpsadbw_4 = "rrmioy:660F3AV42rMU", | |
| 1606 vpabsb_2 = "rmoy:660F38u1CrM", | |
| 1607 vpabsd_2 = "rmoy:660F38u1ErM", | |
| 1608 vpabsw_2 = "rmoy:660F38u1DrM", | |
| 1609 vpackusdw_3 = "rrmoy:660F38V2BrM", | |
| 1610 vpalignr_4 = "rrmioy:660F3AV0FrMU", | |
| 1611 vpblendvb_4 = "rrmroy:660F3AV4CrMs", | |
| 1612 vpblendw_4 = "rrmioy:660F3AV0ErMU", | |
| 1613 vpclmulqdq_4 = "rrmio:660F3AV44rMU", | |
| 1614 vpcmpeqq_3 = "rrmoy:660F38V29rM", | |
| 1615 vpcmpestri_3 = "rmio:660F3Au61rMU", | |
| 1616 vpcmpestrm_3 = "rmio:660F3Au60rMU", | |
| 1617 vpcmpgtq_3 = "rrmoy:660F38V37rM", | |
| 1618 vpcmpistri_3 = "rmio:660F3Au63rMU", | |
| 1619 vpcmpistrm_3 = "rmio:660F3Au62rMU", | |
| 1620 vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:", | |
| 1621 vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU", | |
| 1622 vpextrd_3 = "mri/do:660F3Au16RmU", | |
| 1623 vpextrq_3 = "mri/qo:660F3Au16RmU", | |
| 1624 vphaddw_3 = "rrmoy:660F38V01rM", | |
| 1625 vphaddd_3 = "rrmoy:660F38V02rM", | |
| 1626 vphaddsw_3 = "rrmoy:660F38V03rM", | |
| 1627 vphminposuw_2 = "rmo:660F38u41rM", | |
| 1628 vphsubw_3 = "rrmoy:660F38V05rM", | |
| 1629 vphsubd_3 = "rrmoy:660F38V06rM", | |
| 1630 vphsubsw_3 = "rrmoy:660F38V07rM", | |
| 1631 vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:", | |
| 1632 vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:", | |
| 1633 vpinsrd_4 = "rrmi/ood:660F3AV22rMU", | |
| 1634 vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU", | |
| 1635 vpmaddubsw_3 = "rrmoy:660F38V04rM", | |
| 1636 vpmaxsb_3 = "rrmoy:660F38V3CrM", | |
| 1637 vpmaxsd_3 = "rrmoy:660F38V3DrM", | |
| 1638 vpmaxuw_3 = "rrmoy:660F38V3ErM", | |
| 1639 vpmaxud_3 = "rrmoy:660F38V3FrM", | |
| 1640 vpminsb_3 = "rrmoy:660F38V38rM", | |
| 1641 vpminsd_3 = "rrmoy:660F38V39rM", | |
| 1642 vpminuw_3 = "rrmoy:660F38V3ArM", | |
| 1643 vpminud_3 = "rrmoy:660F38V3BrM", | |
| 1644 vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM", | |
| 1645 vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:", | |
| 1646 vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:", | |
| 1647 vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:", | |
| 1648 vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:", | |
| 1649 vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:", | |
| 1650 vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:", | |
| 1651 vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:", | |
| 1652 vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:", | |
| 1653 vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:", | |
| 1654 vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:", | |
| 1655 vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:", | |
| 1656 vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:", | |
| 1657 vpmuldq_3 = "rrmoy:660F38V28rM", | |
| 1658 vpmulhrsw_3 = "rrmoy:660F38V0BrM", | |
| 1659 vpmulld_3 = "rrmoy:660F38V40rM", | |
| 1660 vpshufb_3 = "rrmoy:660F38V00rM", | |
| 1661 vpshufd_3 = "rmioy:660Fu70rMU", | |
| 1662 vpshufhw_3 = "rmioy:F30Fu70rMU", | |
| 1663 vpshuflw_3 = "rmioy:F20Fu70rMU", | |
| 1664 vpsignb_3 = "rrmoy:660F38V08rM", | |
| 1665 vpsignw_3 = "rrmoy:660F38V09rM", | |
| 1666 vpsignd_3 = "rrmoy:660F38V0ArM", | |
| 1667 vpslldq_3 = "rrioy:660Fv737mU", | |
| 1668 vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU", | |
| 1669 vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU", | |
| 1670 vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU", | |
| 1671 vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU", | |
| 1672 vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU", | |
| 1673 vpsrldq_3 = "rrioy:660Fv733mU", | |
| 1674 vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU", | |
| 1675 vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU", | |
| 1676 vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU", | |
| 1677 vptest_2 = "rmoy:660F38u17rM", | |
| 1678 | |
| 1679 -- AVX2 integer ops | |
| 1680 vbroadcasti128_2 = "rx/yo:660F38u5ArM", | |
| 1681 vinserti128_4 = "rrmi/yyo:660F3AV38rMU", | |
| 1682 vextracti128_3 = "mri/oy:660F3AuL39RmU", | |
| 1683 vpblendd_4 = "rrmioy:660F3AV02rMU", | |
| 1684 vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:", | |
| 1685 vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:", | |
| 1686 vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:", | |
| 1687 vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:", | |
| 1688 vpermd_3 = "rrmy:660F38V36rM", | |
| 1689 vpermq_3 = "rmiy:660F3AuX00rMU", | |
| 1690 -- *vpgather* (!vsib) | |
| 1691 vperm2i128_4 = "rrmiy:660F3AV46rMU", | |
| 1692 vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm", | |
| 1693 vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm", | |
| 1694 vpsllvd_3 = "rrmoy:660F38V47rM", | |
| 1695 vpsllvq_3 = "rrmoy:660F38VX47rM", | |
| 1696 vpsravd_3 = "rrmoy:660F38V46rM", | |
| 1697 vpsrlvd_3 = "rrmoy:660F38V45rM", | |
| 1698 vpsrlvq_3 = "rrmoy:660F38VX45rM", | |
| 1699 | |
| 1700 -- Intel ADX | |
| 1701 adcx_2 = "rmqd:660F38F6rM", | |
| 1702 adox_2 = "rmqd:F30F38F6rM", | |
| 1703 | |
| 1704 -- BMI1 | |
| 1705 andn_3 = "rrmqd:0F38VF2rM", | |
| 1706 bextr_3 = "rmrqd:0F38wF7rM", | |
| 1707 blsi_2 = "rmqd:0F38vF33m", | |
| 1708 blsmsk_2 = "rmqd:0F38vF32m", | |
| 1709 blsr_2 = "rmqd:0F38vF31m", | |
| 1710 tzcnt_2 = "rmqdw:F30FBCrM", | |
| 1711 | |
| 1712 -- BMI2 | |
| 1713 bzhi_3 = "rmrqd:0F38wF5rM", | |
| 1714 mulx_3 = "rrmqd:F20F38VF6rM", | |
| 1715 pdep_3 = "rrmqd:F20F38VF5rM", | |
| 1716 pext_3 = "rrmqd:F30F38VF5rM", | |
| 1717 rorx_3 = "rmSqd:F20F3AuF0rMS", | |
| 1718 sarx_3 = "rmrqd:F30F38wF7rM", | |
| 1719 shrx_3 = "rmrqd:F20F38wF7rM", | |
| 1720 shlx_3 = "rmrqd:660F38wF7rM", | |
| 1721 | |
| 1722 -- FMA3 | |
| 1723 vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", | |
| 1724 vfmaddsub132ps_3 = "rrmoy:660F38V96rM", | |
| 1725 vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", | |
| 1726 vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", | |
| 1727 vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", | |
| 1728 vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", | |
| 1729 | |
| 1730 vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", | |
| 1731 vfmsubadd132ps_3 = "rrmoy:660F38V97rM", | |
| 1732 vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", | |
| 1733 vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", | |
| 1734 vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", | |
| 1735 vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", | |
| 1736 | |
| 1737 vfmadd132pd_3 = "rrmoy:660F38VX98rM", | |
| 1738 vfmadd132ps_3 = "rrmoy:660F38V98rM", | |
| 1739 vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", | |
| 1740 vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", | |
| 1741 vfmadd213pd_3 = "rrmoy:660F38VXA8rM", | |
| 1742 vfmadd213ps_3 = "rrmoy:660F38VA8rM", | |
| 1743 vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", | |
| 1744 vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", | |
| 1745 vfmadd231pd_3 = "rrmoy:660F38VXB8rM", | |
| 1746 vfmadd231ps_3 = "rrmoy:660F38VB8rM", | |
| 1747 vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", | |
| 1748 vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", | |
| 1749 | |
| 1750 vfmsub132pd_3 = "rrmoy:660F38VX9ArM", | |
| 1751 vfmsub132ps_3 = "rrmoy:660F38V9ArM", | |
| 1752 vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", | |
| 1753 vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", | |
| 1754 vfmsub213pd_3 = "rrmoy:660F38VXAArM", | |
| 1755 vfmsub213ps_3 = "rrmoy:660F38VAArM", | |
| 1756 vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", | |
| 1757 vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", | |
| 1758 vfmsub231pd_3 = "rrmoy:660F38VXBArM", | |
| 1759 vfmsub231ps_3 = "rrmoy:660F38VBArM", | |
| 1760 vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", | |
| 1761 vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", | |
| 1762 | |
| 1763 vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", | |
| 1764 vfnmadd132ps_3 = "rrmoy:660F38V9CrM", | |
| 1765 vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", | |
| 1766 vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", | |
| 1767 vfnmadd213pd_3 = "rrmoy:660F38VXACrM", | |
| 1768 vfnmadd213ps_3 = "rrmoy:660F38VACrM", | |
| 1769 vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", | |
| 1770 vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", | |
| 1771 vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", | |
| 1772 vfnmadd231ps_3 = "rrmoy:660F38VBCrM", | |
| 1773 vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", | |
| 1774 vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", | |
| 1775 | |
| 1776 vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", | |
| 1777 vfnmsub132ps_3 = "rrmoy:660F38V9ErM", | |
| 1778 vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", | |
| 1779 vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", | |
| 1780 vfnmsub213pd_3 = "rrmoy:660F38VXAErM", | |
| 1781 vfnmsub213ps_3 = "rrmoy:660F38VAErM", | |
| 1782 vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", | |
| 1783 vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", | |
| 1784 vfnmsub231pd_3 = "rrmoy:660F38VXBErM", | |
| 1785 vfnmsub231ps_3 = "rrmoy:660F38VBErM", | |
| 1786 vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", | |
| 1787 vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", | |
| 1788 } | |
| 1789 | |
| 1790 ------------------------------------------------------------------------------ | |
| 1791 | |
| 1792 -- Arithmetic ops. | |
| 1793 for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3, | |
| 1794 ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do | |
| 1795 local n8 = shl(n, 3) | |
| 1796 map_op[name.."_2"] = format( | |
| 1797 "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi", | |
| 1798 1+n8, 3+n8, n, n, 5+n8, n) | |
| 1799 end | |
| 1800 | |
| 1801 -- Shift ops. | |
| 1802 for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, | |
| 1803 shl = 4, shr = 5, sar = 7, sal = 4 } do | |
| 1804 map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n) | |
| 1805 end | |
| 1806 | |
| 1807 -- Conditional ops. | |
| 1808 for cc,n in pairs(map_cc) do | |
| 1809 map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X | |
| 1810 map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) | |
| 1811 map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+ | |
| 1812 end | |
| 1813 | |
| 1814 -- FP arithmetic ops. | |
| 1815 for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, | |
| 1816 sub = 4, subr = 5, div = 6, divr = 7 } do | |
| 1817 local nc = 0xc0 + shl(n, 3) | |
| 1818 local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) | |
| 1819 local fn = "f"..name | |
| 1820 map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) | |
| 1821 if n == 2 or n == 3 then | |
| 1822 map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n) | |
| 1823 else | |
| 1824 map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n) | |
| 1825 map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) | |
| 1826 map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) | |
| 1827 end | |
| 1828 map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n) | |
| 1829 end | |
| 1830 | |
| 1831 -- FP conditional moves. | |
| 1832 for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do | |
| 1833 local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6) | |
| 1834 map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+ | |
| 1835 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ | |
| 1836 end | |
| 1837 | |
| 1838 -- SSE / AVX FP arithmetic ops. | |
| 1839 for name,n in pairs{ sqrt = 1, add = 8, mul = 9, | |
| 1840 sub = 12, min = 13, div = 14, max = 15 } do | |
| 1841 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) | |
| 1842 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) | |
| 1843 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) | |
| 1844 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) | |
| 1845 if n ~= 1 then | |
| 1846 map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n) | |
| 1847 map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n) | |
| 1848 map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n) | |
| 1849 map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n) | |
| 1850 end | |
| 1851 end | |
| 1852 | |
| 1853 -- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf). | |
| 1854 for name,n in pairs{ | |
| 1855 paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4, | |
| 1856 paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B, | |
| 1857 packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC, | |
| 1858 paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0, | |
| 1859 pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76, | |
| 1860 pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66, | |
| 1861 pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE, | |
| 1862 pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA, | |
| 1863 pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5, | |
| 1864 pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8, | |
| 1865 psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8, | |
| 1866 psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9, | |
| 1867 punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A, | |
| 1868 punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61, | |
| 1869 punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF | |
| 1870 } do | |
| 1871 map_op[name.."_2"] = format("rmo:660F%02XrM", n) | |
| 1872 map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n) | |
| 1873 end | |
| 1874 | |
| 1875 ------------------------------------------------------------------------------ | |
| 1876 | |
| 1877 local map_vexarg = { u = false, v = 1, V = 2, w = 3 } | |
| 1878 | |
| 1879 -- Process pattern string. | |
| 1880 local function dopattern(pat, args, sz, op, needrex) | |
| 1881 local digit, addin, vex | |
| 1882 local opcode = 0 | |
| 1883 local szov = sz | |
| 1884 local narg = 1 | |
| 1885 local rex = 0 | |
| 1886 | |
| 1887 -- Limit number of section buffer positions used by a single dasm_put(). | |
| 1888 -- A single opcode needs a maximum of 6 positions. | |
| 1889 if secpos+6 > maxsecpos then wflush() end | |
| 1890 | |
| 1891 -- Process each character. | |
| 1892 for c in gmatch(pat.."|", ".") do | |
| 1893 if match(c, "%x") then -- Hex digit. | |
| 1894 digit = byte(c) - 48 | |
| 1895 if digit > 48 then digit = digit - 39 | |
| 1896 elseif digit > 16 then digit = digit - 7 end | |
| 1897 opcode = opcode*16 + digit | |
| 1898 addin = nil | |
| 1899 elseif c == "n" then -- Disable operand size mods for opcode. | |
| 1900 szov = nil | |
| 1901 elseif c == "X" then -- Force REX.W. | |
| 1902 rex = 8 | |
| 1903 elseif c == "L" then -- Force VEX.L. | |
| 1904 vex.l = true | |
| 1905 elseif c == "r" then -- Merge 1st operand regno. into opcode. | |
| 1906 addin = args[1]; opcode = opcode + (addin.reg % 8) | |
| 1907 if narg < 2 then narg = 2 end | |
| 1908 elseif c == "R" then -- Merge 2nd operand regno. into opcode. | |
| 1909 addin = args[2]; opcode = opcode + (addin.reg % 8) | |
| 1910 narg = 3 | |
| 1911 elseif c == "m" or c == "M" then -- Encode ModRM/SIB. | |
| 1912 local s | |
| 1913 if addin then | |
| 1914 s = addin.reg | |
| 1915 opcode = opcode - band(s, 7) -- Undo regno opcode merge. | |
| 1916 else | |
| 1917 s = band(opcode, 15) -- Undo last digit. | |
| 1918 opcode = shr(opcode, 4) | |
| 1919 end | |
| 1920 local nn = c == "m" and 1 or 2 | |
| 1921 local t = args[nn] | |
| 1922 if narg <= nn then narg = nn + 1 end | |
| 1923 if szov == "q" and rex == 0 then rex = rex + 8 end | |
| 1924 if t.reg and t.reg > 7 then rex = rex + 1 end | |
| 1925 if t.xreg and t.xreg > 7 then rex = rex + 2 end | |
| 1926 if s > 7 then rex = rex + 4 end | |
| 1927 if needrex then rex = rex + 16 end | |
| 1928 local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) | |
| 1929 opcode = nil | |
| 1930 local imark = sub(pat, -1) -- Force a mark (ugly). | |
| 1931 -- Put ModRM/SIB with regno/last digit as spare. | |
| 1932 wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) | |
| 1933 addin = nil | |
| 1934 elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix | |
| 1935 local b = band(opcode, 255); opcode = shr(opcode, 8) | |
| 1936 local m = 1 | |
| 1937 if b == 0x38 then m = 2 | |
| 1938 elseif b == 0x3a then m = 3 end | |
| 1939 if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end | |
| 1940 if b ~= 0x0f then | |
| 1941 werror("expected `0F', `0F38', or `0F3A' to precede `"..c.. | |
| 1942 "' in pattern `"..pat.."' for `"..op.."'") | |
| 1943 end | |
| 1944 local v = map_vexarg[c] | |
| 1945 if v then v = remove(args, v) end | |
| 1946 b = band(opcode, 255) | |
| 1947 local p = 0 | |
| 1948 if b == 0x66 then p = 1 | |
| 1949 elseif b == 0xf3 then p = 2 | |
| 1950 elseif b == 0xf2 then p = 3 end | |
| 1951 if p ~= 0 then opcode = shr(opcode, 8) end | |
| 1952 if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end | |
| 1953 vex = { m = m, p = p, v = v } | |
| 1954 else | |
| 1955 if opcode then -- Flush opcode. | |
| 1956 if szov == "q" and rex == 0 then rex = rex + 8 end | |
| 1957 if needrex then rex = rex + 16 end | |
| 1958 if addin and addin.reg == -1 then | |
| 1959 local psz, sk = wputop(szov, opcode - 7, rex, vex, true) | |
| 1960 wvreg("opcode", addin.vreg, psz, sk) | |
| 1961 else | |
| 1962 if addin and addin.reg > 7 then rex = rex + 1 end | |
| 1963 wputop(szov, opcode, rex, vex) | |
| 1964 end | |
| 1965 opcode = nil | |
| 1966 end | |
| 1967 if c == "|" then break end | |
| 1968 if c == "o" then -- Offset (pure 32 bit displacement). | |
| 1969 wputdarg(args[1].disp); if narg < 2 then narg = 2 end | |
| 1970 elseif c == "O" then | |
| 1971 wputdarg(args[2].disp); narg = 3 | |
| 1972 else | |
| 1973 -- Anything else is an immediate operand. | |
| 1974 local a = args[narg] | |
| 1975 narg = narg + 1 | |
| 1976 local mode, imm = a.mode, a.imm | |
| 1977 if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then | |
| 1978 werror("bad operand size for label") | |
| 1979 end | |
| 1980 if c == "S" then | |
| 1981 wputsbarg(imm) | |
| 1982 elseif c == "U" then | |
| 1983 wputbarg(imm) | |
| 1984 elseif c == "W" then | |
| 1985 wputwarg(imm) | |
| 1986 elseif c == "i" or c == "I" then | |
| 1987 if mode == "iJ" then | |
| 1988 wputlabel("IMM_", imm, 1) | |
| 1989 elseif mode == "iI" and c == "I" then | |
| 1990 waction(sz == "w" and "IMM_WB" or "IMM_DB", imm) | |
| 1991 else | |
| 1992 wputszarg(sz, imm) | |
| 1993 end | |
| 1994 elseif c == "J" then | |
| 1995 if mode == "iPJ" then | |
| 1996 waction("REL_A", imm) -- !x64 (secpos) | |
| 1997 else | |
| 1998 wputlabel("REL_", imm, 2) | |
| 1999 end | |
| 2000 elseif c == "s" then | |
| 2001 local reg = a.reg | |
| 2002 if reg < 0 then | |
| 2003 wputb(0) | |
| 2004 wvreg("imm.hi", a.vreg) | |
| 2005 else | |
| 2006 wputb(shl(reg, 4)) | |
| 2007 end | |
| 2008 else | |
| 2009 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") | |
| 2010 end | |
| 2011 end | |
| 2012 end | |
| 2013 end | |
| 2014 end | |
| 2015 | |
| 2016 ------------------------------------------------------------------------------ | |
| 2017 | |
| 2018 -- Mapping of operand modes to short names. Suppress output with '#'. | |
| 2019 local map_modename = { | |
| 2020 r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm", | |
| 2021 f = "stx", F = "st0", J = "lbl", ["1"] = "1", | |
| 2022 I = "#", S = "#", O = "#", | |
| 2023 } | |
| 2024 | |
| 2025 -- Return a table/string showing all possible operand modes. | |
| 2026 local function templatehelp(template, nparams) | |
| 2027 if nparams == 0 then return "" end | |
| 2028 local t = {} | |
| 2029 for tm in gmatch(template, "[^%|]+") do | |
| 2030 local s = map_modename[sub(tm, 1, 1)] | |
| 2031 s = s..gsub(sub(tm, 2, nparams), ".", function(c) | |
| 2032 return ", "..map_modename[c] | |
| 2033 end) | |
| 2034 if not match(s, "#") then t[#t+1] = s end | |
| 2035 end | |
| 2036 return t | |
| 2037 end | |
| 2038 | |
| 2039 -- Match operand modes against mode match part of template. | |
| 2040 local function matchtm(tm, args) | |
| 2041 for i=1,#args do | |
| 2042 if not match(args[i].mode, sub(tm, i, i)) then return end | |
| 2043 end | |
| 2044 return true | |
| 2045 end | |
| 2046 | |
| 2047 -- Handle opcodes defined with template strings. | |
| 2048 map_op[".template__"] = function(params, template, nparams) | |
| 2049 if not params then return templatehelp(template, nparams) end | |
| 2050 local args = {} | |
| 2051 | |
| 2052 -- Zero-operand opcodes have no match part. | |
| 2053 if #params == 0 then | |
| 2054 dopattern(template, args, "d", params.op, nil) | |
| 2055 return | |
| 2056 end | |
| 2057 | |
| 2058 -- Determine common operand size (coerce undefined size) or flag as mixed. | |
| 2059 local sz, szmix, needrex | |
| 2060 for i,p in ipairs(params) do | |
| 2061 args[i] = parseoperand(p) | |
| 2062 local nsz = args[i].opsize | |
| 2063 if nsz then | |
| 2064 if sz and sz ~= nsz then szmix = true else sz = nsz end | |
| 2065 end | |
| 2066 local nrex = args[i].needrex | |
| 2067 if nrex ~= nil then | |
| 2068 if needrex == nil then | |
| 2069 needrex = nrex | |
| 2070 elseif needrex ~= nrex then | |
| 2071 werror("bad mix of byte-addressable registers") | |
| 2072 end | |
| 2073 end | |
| 2074 end | |
| 2075 | |
| 2076 -- Try all match:pattern pairs (separated by '|'). | |
| 2077 local gotmatch, lastpat | |
| 2078 for tm in gmatch(template, "[^%|]+") do | |
| 2079 -- Split off size match (starts after mode match) and pattern string. | |
| 2080 local szm, pat = match(tm, "^(.-):(.*)$", #args+1) | |
| 2081 if pat == "" then pat = lastpat else lastpat = pat end | |
| 2082 if matchtm(tm, args) then | |
| 2083 local prefix = sub(szm, 1, 1) | |
| 2084 if prefix == "/" then -- Exactly match leading operand sizes. | |
| 2085 for i = #szm,1,-1 do | |
| 2086 if i == 1 then | |
| 2087 dopattern(pat, args, sz, params.op, needrex) -- Process pattern. | |
| 2088 return | |
| 2089 elseif args[i-1].opsize ~= sub(szm, i, i) then | |
| 2090 break | |
| 2091 end | |
| 2092 end | |
| 2093 else -- Match common operand size. | |
| 2094 local szp = sz | |
| 2095 if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes. | |
| 2096 if prefix == "1" then szp = args[1].opsize; szmix = nil | |
| 2097 elseif prefix == "2" then szp = args[2].opsize; szmix = nil end | |
| 2098 if not szmix and (prefix == "." or match(szm, szp or "#")) then | |
| 2099 dopattern(pat, args, szp, params.op, needrex) -- Process pattern. | |
| 2100 return | |
| 2101 end | |
| 2102 end | |
| 2103 gotmatch = true | |
| 2104 end | |
| 2105 end | |
| 2106 | |
| 2107 local msg = "bad operand mode" | |
| 2108 if gotmatch then | |
| 2109 if szmix then | |
| 2110 msg = "mixed operand size" | |
| 2111 else | |
| 2112 msg = sz and "bad operand size" or "missing operand size" | |
| 2113 end | |
| 2114 end | |
| 2115 | |
| 2116 werror(msg.." in `"..opmodestr(params.op, args).."'") | |
| 2117 end | |
| 2118 | |
| 2119 ------------------------------------------------------------------------------ | |
| 2120 | |
| 2121 -- x64-specific opcode for 64 bit immediates and displacements. | |
| 2122 if x64 then | |
| 2123 function map_op.mov64_2(params) | |
| 2124 if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end | |
| 2125 if secpos+2 > maxsecpos then wflush() end | |
| 2126 local opcode, op64, sz, rex, vreg | |
| 2127 local op64 = match(params[1], "^%[%s*(.-)%s*%]$") | |
| 2128 if op64 then | |
| 2129 local a = parseoperand(params[2]) | |
| 2130 if a.mode ~= "rmR" then werror("bad operand mode") end | |
| 2131 sz = a.opsize | |
| 2132 rex = sz == "q" and 8 or 0 | |
| 2133 opcode = 0xa3 | |
| 2134 else | |
| 2135 op64 = match(params[2], "^%[%s*(.-)%s*%]$") | |
| 2136 local a = parseoperand(params[1]) | |
| 2137 if op64 then | |
| 2138 if a.mode ~= "rmR" then werror("bad operand mode") end | |
| 2139 sz = a.opsize | |
| 2140 rex = sz == "q" and 8 or 0 | |
| 2141 opcode = 0xa1 | |
| 2142 else | |
| 2143 if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then | |
| 2144 werror("bad operand mode") | |
| 2145 end | |
| 2146 op64 = params[2] | |
| 2147 if a.reg == -1 then | |
| 2148 vreg = a.vreg | |
| 2149 opcode = 0xb8 | |
| 2150 else | |
| 2151 opcode = 0xb8 + band(a.reg, 7) | |
| 2152 end | |
| 2153 rex = a.reg > 7 and 9 or 8 | |
| 2154 end | |
| 2155 end | |
| 2156 local psz, sk = wputop(sz, opcode, rex, nil, vreg) | |
| 2157 wvreg("opcode", vreg, psz, sk) | |
| 2158 waction("IMM_D", format("(unsigned int)(%s)", op64)) | |
| 2159 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) | |
| 2160 end | |
| 2161 end | |
| 2162 | |
| 2163 ------------------------------------------------------------------------------ | |
| 2164 | |
| 2165 -- Pseudo-opcodes for data storage. | |
| 2166 local function op_data(params) | |
| 2167 if not params then return "imm..." end | |
| 2168 local sz = sub(params.op, 2, 2) | |
| 2169 if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end | |
| 2170 for _,p in ipairs(params) do | |
| 2171 local a = parseoperand(p, sz == "q") | |
| 2172 if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then | |
| 2173 werror("bad mode or size in `"..p.."'") | |
| 2174 end | |
| 2175 if a.mode == "iJ" then | |
| 2176 wputlabel("IMM_", a.imm, 1) | |
| 2177 elseif sz == "q" then | |
| 2178 wputqarg(a.imm) | |
| 2179 else | |
| 2180 wputszarg(sz, a.imm) | |
| 2181 end | |
| 2182 if secpos+2 > maxsecpos then wflush() end | |
| 2183 end | |
| 2184 end | |
| 2185 | |
| 2186 map_op[".byte_*"] = op_data | |
| 2187 map_op[".sbyte_*"] = op_data | |
| 2188 map_op[".word_*"] = op_data | |
| 2189 map_op[".dword_*"] = op_data | |
| 2190 map_op[".qword_*"] = op_data | |
| 2191 map_op[".aword_*"] = op_data | |
| 2192 map_op[".long_*"] = op_data | |
| 2193 map_op[".quad_*"] = op_data | |
| 2194 map_op[".addr_*"] = op_data | |
| 2195 | |
| 2196 ------------------------------------------------------------------------------ | |
| 2197 | |
| 2198 -- Pseudo-opcode to mark the position where the action list is to be emitted. | |
| 2199 map_op[".actionlist_1"] = function(params) | |
| 2200 if not params then return "cvar" end | |
| 2201 local name = params[1] -- No syntax check. You get to keep the pieces. | |
| 2202 wline(function(out) writeactions(out, name) end) | |
| 2203 end | |
| 2204 | |
| 2205 -- Pseudo-opcode to mark the position where the global enum is to be emitted. | |
| 2206 map_op[".globals_1"] = function(params) | |
| 2207 if not params then return "prefix" end | |
| 2208 local prefix = params[1] -- No syntax check. You get to keep the pieces. | |
| 2209 wline(function(out) writeglobals(out, prefix) end) | |
| 2210 end | |
| 2211 | |
| 2212 -- Pseudo-opcode to mark the position where the global names are to be emitted. | |
| 2213 map_op[".globalnames_1"] = function(params) | |
| 2214 if not params then return "cvar" end | |
| 2215 local name = params[1] -- No syntax check. You get to keep the pieces. | |
| 2216 wline(function(out) writeglobalnames(out, name) end) | |
| 2217 end | |
| 2218 | |
| 2219 -- Pseudo-opcode to mark the position where the extern names are to be emitted. | |
| 2220 map_op[".externnames_1"] = function(params) | |
| 2221 if not params then return "cvar" end | |
| 2222 local name = params[1] -- No syntax check. You get to keep the pieces. | |
| 2223 wline(function(out) writeexternnames(out, name) end) | |
| 2224 end | |
| 2225 | |
| 2226 ------------------------------------------------------------------------------ | |
| 2227 | |
| 2228 -- Label pseudo-opcode (converted from trailing colon form). | |
| 2229 map_op[".label_2"] = function(params) | |
| 2230 if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end | |
| 2231 if secpos+2 > maxsecpos then wflush() end | |
| 2232 local a = parseoperand(params[1]) | |
| 2233 local mode, imm = a.mode, a.imm | |
| 2234 if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then | |
| 2235 -- Local label (1: ... 9:) or global label (->global:). | |
| 2236 waction("LABEL_LG", nil, 1) | |
| 2237 wputxb(imm) | |
| 2238 elseif mode == "iJ" then | |
| 2239 -- PC label (=>pcexpr:). | |
| 2240 waction("LABEL_PC", imm) | |
| 2241 else | |
| 2242 werror("bad label definition") | |
| 2243 end | |
| 2244 -- SETLABEL must immediately follow LABEL_LG/LABEL_PC. | |
| 2245 local addr = params[2] | |
| 2246 if addr then | |
| 2247 local a = parseoperand(addr) | |
| 2248 if a.mode == "iPJ" then | |
| 2249 waction("SETLABEL", a.imm) | |
| 2250 else | |
| 2251 werror("bad label assignment") | |
| 2252 end | |
| 2253 end | |
| 2254 end | |
| 2255 map_op[".label_1"] = map_op[".label_2"] | |
| 2256 | |
| 2257 ------------------------------------------------------------------------------ | |
| 2258 | |
| 2259 -- Alignment pseudo-opcode. | |
| 2260 map_op[".align_1"] = function(params) | |
| 2261 if not params then return "numpow2" end | |
| 2262 if secpos+1 > maxsecpos then wflush() end | |
| 2263 local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]] | |
| 2264 if align then | |
| 2265 local x = align | |
| 2266 -- Must be a power of 2 in the range (2 ... 256). | |
| 2267 for i=1,8 do | |
| 2268 x = x / 2 | |
| 2269 if x == 1 then | |
| 2270 waction("ALIGN", nil, 1) | |
| 2271 wputxb(align-1) -- Action byte is 2**n-1. | |
| 2272 return | |
| 2273 end | |
| 2274 end | |
| 2275 end | |
| 2276 werror("bad alignment") | |
| 2277 end | |
| 2278 | |
| 2279 -- Spacing pseudo-opcode. | |
| 2280 map_op[".space_2"] = function(params) | |
| 2281 if not params then return "num [, filler]" end | |
| 2282 if secpos+1 > maxsecpos then wflush() end | |
| 2283 waction("SPACE", params[1]) | |
| 2284 local fill = params[2] | |
| 2285 if fill then | |
| 2286 fill = tonumber(fill) | |
| 2287 if not fill or fill < 0 or fill > 255 then werror("bad filler") end | |
| 2288 end | |
| 2289 wputxb(fill or 0) | |
| 2290 end | |
| 2291 map_op[".space_1"] = map_op[".space_2"] | |
| 2292 | |
| 2293 ------------------------------------------------------------------------------ | |
| 2294 | |
| 2295 -- Pseudo-opcode for (primitive) type definitions (map to C types). | |
| 2296 map_op[".type_3"] = function(params, nparams) | |
| 2297 if not params then | |
| 2298 return nparams == 2 and "name, ctype" or "name, ctype, reg" | |
| 2299 end | |
| 2300 local name, ctype, reg = params[1], params[2], params[3] | |
| 2301 if not match(name, "^[%a_][%w_]*$") then | |
| 2302 werror("bad type name `"..name.."'") | |
| 2303 end | |
| 2304 local tp = map_type[name] | |
| 2305 if tp then | |
| 2306 werror("duplicate type `"..name.."'") | |
| 2307 end | |
| 2308 if reg and not map_reg_valid_base[reg] then | |
| 2309 werror("bad base register `"..(map_reg_rev[reg] or reg).."'") | |
| 2310 end | |
| 2311 -- Add #type to defines. A bit unclean to put it in map_archdef. | |
| 2312 map_archdef["#"..name] = "sizeof("..ctype..")" | |
| 2313 -- Add new type and emit shortcut define. | |
| 2314 local num = ctypenum + 1 | |
| 2315 map_type[name] = { | |
| 2316 ctype = ctype, | |
| 2317 ctypefmt = format("Dt%X(%%s)", num), | |
| 2318 reg = reg, | |
| 2319 } | |
| 2320 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) | |
| 2321 ctypenum = num | |
| 2322 end | |
| 2323 map_op[".type_2"] = map_op[".type_3"] | |
| 2324 | |
| 2325 -- Dump type definitions. | |
| 2326 local function dumptypes(out, lvl) | |
| 2327 local t = {} | |
| 2328 for name in pairs(map_type) do t[#t+1] = name end | |
| 2329 sort(t) | |
| 2330 out:write("Type definitions:\n") | |
| 2331 for _,name in ipairs(t) do | |
| 2332 local tp = map_type[name] | |
| 2333 local reg = tp.reg and map_reg_rev[tp.reg] or "" | |
| 2334 out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) | |
| 2335 end | |
| 2336 out:write("\n") | |
| 2337 end | |
| 2338 | |
| 2339 ------------------------------------------------------------------------------ | |
| 2340 | |
| 2341 -- Set the current section. | |
| 2342 function _M.section(num) | |
| 2343 waction("SECTION") | |
| 2344 wputxb(num) | |
| 2345 wflush(true) -- SECTION is a terminal action. | |
| 2346 end | |
| 2347 | |
| 2348 ------------------------------------------------------------------------------ | |
| 2349 | |
| 2350 -- Dump architecture description. | |
| 2351 function _M.dumparch(out) | |
| 2352 out:write(format("DynASM %s version %s, released %s\n\n", | |
| 2353 _info.arch, _info.version, _info.release)) | |
| 2354 dumpregs(out) | |
| 2355 dumpactions(out) | |
| 2356 end | |
| 2357 | |
| 2358 -- Dump all user defined elements. | |
| 2359 function _M.dumpdef(out, lvl) | |
| 2360 dumptypes(out, lvl) | |
| 2361 dumpglobals(out, lvl) | |
| 2362 dumpexterns(out, lvl) | |
| 2363 end | |
| 2364 | |
| 2365 ------------------------------------------------------------------------------ | |
| 2366 | |
| 2367 -- Pass callbacks from/to the DynASM core. | |
| 2368 function _M.passcb(wl, we, wf, ww) | |
| 2369 wline, werror, wfatal, wwarn = wl, we, wf, ww | |
| 2370 return wflush | |
| 2371 end | |
| 2372 | |
| 2373 -- Setup the arch-specific module. | |
| 2374 function _M.setup(arch, opt) | |
| 2375 g_arch, g_opt = arch, opt | |
| 2376 end | |
| 2377 | |
| 2378 -- Merge the core maps and the arch-specific maps. | |
| 2379 function _M.mergemaps(map_coreop, map_def) | |
| 2380 setmetatable(map_op, { __index = map_coreop }) | |
| 2381 setmetatable(map_def, { __index = map_archdef }) | |
| 2382 return map_op, map_def | |
| 2383 end | |
| 2384 | |
| 2385 return _M | |
| 2386 | |
| 2387 ------------------------------------------------------------------------------ | |
| 2388 |