comparison third_party/luajit/dynasm/dasm_x86.lua @ 178:94705b5986b3

[ThirdParty] Added WRK and luajit for load testing.
author MrJuneJune <me@mrjunejune.com>
date Thu, 22 Jan 2026 20:10:30 -0800
parents
children
comparison
equal deleted inserted replaced
177:24fe8ff94056 178:94705b5986b3
1 ------------------------------------------------------------------------------
2 -- DynASM x86/x64 module.
3 --
4 -- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5 -- See dynasm.lua for full copyright notice.
6 ------------------------------------------------------------------------------
7
8 local x64 = x64
9
10 -- Module information:
11 local _info = {
12 arch = x64 and "x64" or "x86",
13 description = "DynASM x86/x64 module",
14 version = "1.5.0",
15 vernum = 10500,
16 release = "2021-05-02",
17 author = "Mike Pall",
18 license = "MIT",
19 }
20
21 -- Exported glue functions for the arch-specific module.
22 local _M = { _info = _info }
23
24 -- Cache library functions.
25 local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
26 local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable
27 local _s = string
28 local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
29 local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
30 local concat, sort, remove = table.concat, table.sort, table.remove
31 local bit = bit or require("bit")
32 local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
33
34 -- Inherited tables and callbacks.
35 local g_opt, g_arch
36 local wline, werror, wfatal, wwarn
37
38 -- Action name list.
39 -- CHECK: Keep this in sync with the C code!
40 local action_names = {
41 -- int arg, 1 buffer pos:
42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB",
43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
44 "VREG", "SPACE",
45 -- ptrdiff_t arg, 1 buffer pos (address): !x64
46 "SETLABEL", "REL_A",
47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
48 "REL_LG", "REL_PC",
49 -- action arg (1 byte) or int arg, 1 buffer pos (link):
50 "IMM_LG", "IMM_PC",
51 -- action arg (1 byte) or int arg, 1 buffer pos (offset):
52 "LABEL_LG", "LABEL_PC",
53 -- action arg (1 byte), 1 buffer pos (offset):
54 "ALIGN",
55 -- action args (2 bytes), no buffer pos.
56 "EXTERN",
57 -- action arg (1 byte), no buffer pos.
58 "ESC",
59 -- no action arg, no buffer pos.
60 "MARK",
61 -- action arg (1 byte), no buffer pos, terminal action:
62 "SECTION",
63 -- no args, no buffer pos, terminal action:
64 "STOP"
65 }
66
67 -- Maximum number of section buffer positions for dasm_put().
68 -- CHECK: Keep this in sync with the C code!
69 local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
70
71 -- Action name -> action number (dynamically generated below).
72 local map_action = {}
73 -- First action number. Everything below does not need to be escaped.
74 local actfirst = 256-#action_names
75
76 -- Action list buffer and string (only used to remove dupes).
77 local actlist = {}
78 local actstr = ""
79
80 -- Argument list for next dasm_put(). Start with offset 0 into action list.
81 local actargs = { 0 }
82
83 -- Current number of section buffer positions for dasm_put().
84 local secpos = 1
85
86 -- VREG kind encodings, pre-shifted by 5 bits.
87 local map_vreg = {
88 ["modrm.rm.m"] = 0x00,
89 ["modrm.rm.r"] = 0x20,
90 ["opcode"] = 0x20,
91 ["sib.base"] = 0x20,
92 ["sib.index"] = 0x40,
93 ["modrm.reg"] = 0x80,
94 ["vex.v"] = 0xa0,
95 ["imm.hi"] = 0xc0,
96 }
97
98 -- Current number of VREG actions contributing to REX/VEX shrinkage.
99 local vreg_shrink_count = 0
100
101 ------------------------------------------------------------------------------
102
103 -- Compute action numbers for action names.
104 for n,name in ipairs(action_names) do
105 local num = actfirst + n - 1
106 map_action[name] = num
107 end
108
109 -- Dump action names and numbers.
110 local function dumpactions(out)
111 out:write("DynASM encoding engine action codes:\n")
112 for n,name in ipairs(action_names) do
113 local num = map_action[name]
114 out:write(format(" %-10s %02X %d\n", name, num, num))
115 end
116 out:write("\n")
117 end
118
119 -- Write action list buffer as a huge static C array.
120 local function writeactions(out, name)
121 local nn = #actlist
122 local last = actlist[nn] or 255
123 actlist[nn] = nil -- Remove last byte.
124 if nn == 0 then nn = 1 end
125 out:write("static const unsigned char ", name, "[", nn, "] = {\n")
126 local s = " "
127 for n,b in ipairs(actlist) do
128 s = s..b..","
129 if #s >= 75 then
130 assert(out:write(s, "\n"))
131 s = " "
132 end
133 end
134 out:write(s, last, "\n};\n\n") -- Add last byte back.
135 end
136
137 ------------------------------------------------------------------------------
138
139 -- Add byte to action list.
140 local function wputxb(n)
141 assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range")
142 actlist[#actlist+1] = n
143 end
144
145 -- Add action to list with optional arg. Advance buffer pos, too.
146 local function waction(action, a, num)
147 wputxb(assert(map_action[action], "bad action name `"..action.."'"))
148 if a then actargs[#actargs+1] = a end
149 if a or num then secpos = secpos + (num or 1) end
150 end
151
152 -- Optionally add a VREG action.
153 local function wvreg(kind, vreg, psz, sk, defer)
154 if not vreg then return end
155 waction("VREG", vreg)
156 local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
157 if b < (sk or 0) then
158 vreg_shrink_count = vreg_shrink_count + 1
159 end
160 if not defer then
161 b = b + vreg_shrink_count * 8
162 vreg_shrink_count = 0
163 end
164 wputxb(b + (psz or 0))
165 end
166
167 -- Add call to embedded DynASM C code.
168 local function wcall(func, args)
169 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
170 end
171
172 -- Delete duplicate action list chunks. A tad slow, but so what.
173 local function dedupechunk(offset)
174 local al, as = actlist, actstr
175 local chunk = char(unpack(al, offset+1, #al))
176 local orig = find(as, chunk, 1, true)
177 if orig then
178 actargs[1] = orig-1 -- Replace with original offset.
179 for i=offset+1,#al do al[i] = nil end -- Kill dupe.
180 else
181 actstr = as..chunk
182 end
183 end
184
185 -- Flush action list (intervening C code or buffer pos overflow).
186 local function wflush(term)
187 local offset = actargs[1]
188 if #actlist == offset then return end -- Nothing to flush.
189 if not term then waction("STOP") end -- Terminate action list.
190 dedupechunk(offset)
191 wcall("put", actargs) -- Add call to dasm_put().
192 actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
193 secpos = 1 -- The actionlist offset occupies a buffer position, too.
194 end
195
196 -- Put escaped byte.
197 local function wputb(n)
198 if n >= actfirst then waction("ESC") end -- Need to escape byte.
199 wputxb(n)
200 end
201
202 ------------------------------------------------------------------------------
203
204 -- Global label name -> global label number. With auto assignment on 1st use.
205 local next_global = 10
206 local map_global = setmetatable({}, { __index = function(t, name)
207 if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end
208 local n = next_global
209 if n > 246 then werror("too many global labels") end
210 next_global = n + 1
211 t[name] = n
212 return n
213 end})
214
215 -- Dump global labels.
216 local function dumpglobals(out, lvl)
217 local t = {}
218 for name, n in pairs(map_global) do t[n] = name end
219 out:write("Global labels:\n")
220 for i=10,next_global-1 do
221 out:write(format(" %s\n", t[i]))
222 end
223 out:write("\n")
224 end
225
226 -- Write global label enum.
227 local function writeglobals(out, prefix)
228 local t = {}
229 for name, n in pairs(map_global) do t[n] = name end
230 out:write("enum {\n")
231 for i=10,next_global-1 do
232 out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n")
233 end
234 out:write(" ", prefix, "_MAX\n};\n")
235 end
236
237 -- Write global label names.
238 local function writeglobalnames(out, name)
239 local t = {}
240 for name, n in pairs(map_global) do t[n] = name end
241 out:write("static const char *const ", name, "[] = {\n")
242 for i=10,next_global-1 do
243 out:write(" \"", t[i], "\",\n")
244 end
245 out:write(" (const char *)0\n};\n")
246 end
247
248 ------------------------------------------------------------------------------
249
250 -- Extern label name -> extern label number. With auto assignment on 1st use.
251 local next_extern = -1
252 local map_extern = setmetatable({}, { __index = function(t, name)
253 -- No restrictions on the name for now.
254 local n = next_extern
255 if n < -256 then werror("too many extern labels") end
256 next_extern = n - 1
257 t[name] = n
258 return n
259 end})
260
261 -- Dump extern labels.
262 local function dumpexterns(out, lvl)
263 local t = {}
264 for name, n in pairs(map_extern) do t[-n] = name end
265 out:write("Extern labels:\n")
266 for i=1,-next_extern-1 do
267 out:write(format(" %s\n", t[i]))
268 end
269 out:write("\n")
270 end
271
272 -- Write extern label names.
273 local function writeexternnames(out, name)
274 local t = {}
275 for name, n in pairs(map_extern) do t[-n] = name end
276 out:write("static const char *const ", name, "[] = {\n")
277 for i=1,-next_extern-1 do
278 out:write(" \"", t[i], "\",\n")
279 end
280 out:write(" (const char *)0\n};\n")
281 end
282
283 ------------------------------------------------------------------------------
284
285 -- Arch-specific maps.
286 local map_archdef = {} -- Ext. register name -> int. name.
287 local map_reg_rev = {} -- Int. register name -> ext. name.
288 local map_reg_num = {} -- Int. register name -> register number.
289 local map_reg_opsize = {} -- Int. register name -> operand size.
290 local map_reg_valid_base = {} -- Int. register name -> valid base register?
291 local map_reg_valid_index = {} -- Int. register name -> valid index register?
292 local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex.
293 local reg_list = {} -- Canonical list of int. register names.
294
295 local map_type = {} -- Type name -> { ctype, reg }
296 local ctypenum = 0 -- Type number (for _PTx macros).
297
298 local addrsize = x64 and "q" or "d" -- Size for address operands.
299
300 -- Helper functions to fill register maps.
301 local function mkrmap(sz, cl, names)
302 local cname = format("@%s", sz)
303 reg_list[#reg_list+1] = cname
304 map_archdef[cl] = cname
305 map_reg_rev[cname] = cl
306 map_reg_num[cname] = -1
307 map_reg_opsize[cname] = sz
308 if sz == addrsize or sz == "d" then
309 map_reg_valid_base[cname] = true
310 map_reg_valid_index[cname] = true
311 end
312 if names then
313 for n,name in ipairs(names) do
314 local iname = format("@%s%x", sz, n-1)
315 reg_list[#reg_list+1] = iname
316 map_archdef[name] = iname
317 map_reg_rev[iname] = name
318 map_reg_num[iname] = n-1
319 map_reg_opsize[iname] = sz
320 if sz == "b" and n > 4 then map_reg_needrex[iname] = false end
321 if sz == addrsize or sz == "d" then
322 map_reg_valid_base[iname] = true
323 map_reg_valid_index[iname] = true
324 end
325 end
326 end
327 for i=0,(x64 and sz ~= "f") and 15 or 7 do
328 local needrex = sz == "b" and i > 3
329 local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
330 if needrex then map_reg_needrex[iname] = true end
331 local name
332 if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
333 elseif sz == "f" then name = format("st%d", i)
334 else name = format("r%d%s", i, sz == addrsize and "" or sz) end
335 map_archdef[name] = iname
336 if not map_reg_rev[iname] then
337 reg_list[#reg_list+1] = iname
338 map_reg_rev[iname] = name
339 map_reg_num[iname] = i
340 map_reg_opsize[iname] = sz
341 if sz == addrsize or sz == "d" then
342 map_reg_valid_base[iname] = true
343 map_reg_valid_index[iname] = true
344 end
345 end
346 end
347 reg_list[#reg_list+1] = ""
348 end
349
350 -- Integer registers (qword, dword, word and byte sized).
351 if x64 then
352 mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"})
353 end
354 mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
355 mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
356 mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
357 map_reg_valid_index[map_archdef.esp] = false
358 if x64 then map_reg_valid_index[map_archdef.rsp] = false end
359 if x64 then map_reg_needrex[map_archdef.Rb] = true end
360 map_archdef["Ra"] = "@"..addrsize
361
362 -- FP registers (internally tword sized, but use "f" as operand size).
363 mkrmap("f", "Rf")
364
365 -- SSE registers (oword sized, but qword and dword accessible).
366 mkrmap("o", "xmm")
367
368 -- AVX registers (yword sized, but oword, qword and dword accessible).
369 mkrmap("y", "ymm")
370
371 -- Operand size prefixes to codes.
372 local map_opsize = {
373 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
374 tword = "t", aword = addrsize,
375 }
376
377 -- Operand size code to number.
378 local map_opsizenum = {
379 b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
380 }
381
382 -- Operand size code to name.
383 local map_opsizename = {
384 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
385 t = "tword", f = "fpword",
386 }
387
388 -- Valid index register scale factors.
389 local map_xsc = {
390 ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
391 }
392
393 -- Condition codes.
394 local map_cc = {
395 o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7,
396 s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15,
397 c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7,
398 pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15,
399 }
400
401
402 -- Reverse defines for registers.
403 function _M.revdef(s)
404 return gsub(s, "@%w+", map_reg_rev)
405 end
406
407 -- Dump register names and numbers
408 local function dumpregs(out)
409 out:write("Register names, sizes and internal numbers:\n")
410 for _,reg in ipairs(reg_list) do
411 if reg == "" then
412 out:write("\n")
413 else
414 local name = map_reg_rev[reg]
415 local num = map_reg_num[reg]
416 local opsize = map_opsizename[map_reg_opsize[reg]]
417 out:write(format(" %-5s %-8s %s\n", name, opsize,
418 num < 0 and "(variable)" or num))
419 end
420 end
421 end
422
423 ------------------------------------------------------------------------------
424
425 -- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
426 local function wputlabel(aprefix, imm, num)
427 if type(imm) == "number" then
428 if imm < 0 then
429 waction("EXTERN")
430 wputxb(aprefix == "IMM_" and 0 or 1)
431 imm = -imm-1
432 else
433 waction(aprefix.."LG", nil, num);
434 end
435 wputxb(imm)
436 else
437 waction(aprefix.."PC", imm, num)
438 end
439 end
440
441 -- Put signed byte or arg.
442 local function wputsbarg(n)
443 if type(n) == "number" then
444 if n < -128 or n > 127 then
445 werror("signed immediate byte out of range")
446 end
447 if n < 0 then n = n + 256 end
448 wputb(n)
449 else waction("IMM_S", n) end
450 end
451
452 -- Put unsigned byte or arg.
453 local function wputbarg(n)
454 if type(n) == "number" then
455 if n < 0 or n > 255 then
456 werror("unsigned immediate byte out of range")
457 end
458 wputb(n)
459 else waction("IMM_B", n) end
460 end
461
462 -- Put unsigned word or arg.
463 local function wputwarg(n)
464 if type(n) == "number" then
465 if shr(n, 16) ~= 0 then
466 werror("unsigned immediate word out of range")
467 end
468 wputb(band(n, 255)); wputb(shr(n, 8));
469 else waction("IMM_W", n) end
470 end
471
472 -- Put signed or unsigned dword or arg.
473 local function wputdarg(n)
474 local tn = type(n)
475 if tn == "number" then
476 wputb(band(n, 255))
477 wputb(band(shr(n, 8), 255))
478 wputb(band(shr(n, 16), 255))
479 wputb(shr(n, 24))
480 elseif tn == "table" then
481 wputlabel("IMM_", n[1], 1)
482 else
483 waction("IMM_D", n)
484 end
485 end
486
487 -- Put signed or unsigned qword or arg.
488 local function wputqarg(n)
489 local tn = type(n)
490 if tn == "number" then -- This is only used for numbers from -2^31..2^32-1.
491 wputb(band(n, 255))
492 wputb(band(shr(n, 8), 255))
493 wputb(band(shr(n, 16), 255))
494 wputb(shr(n, 24))
495 local sign = n < 0 and 255 or 0
496 wputb(sign); wputb(sign); wputb(sign); wputb(sign)
497 else
498 waction("IMM_D", format("(unsigned int)(%s)", n))
499 waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n))
500 end
501 end
502
503 -- Put operand-size dependent number or arg (defaults to dword).
504 local function wputszarg(sz, n)
505 if not sz or sz == "d" or sz == "q" then wputdarg(n)
506 elseif sz == "w" then wputwarg(n)
507 elseif sz == "b" then wputbarg(n)
508 elseif sz == "s" then wputsbarg(n)
509 else werror("bad operand size") end
510 end
511
512 -- Put multi-byte opcode with operand-size dependent modifications.
513 local function wputop(sz, op, rex, vex, vregr, vregxb)
514 local psz, sk = 0, nil
515 if vex then
516 local tail
517 if vex.m == 1 and band(rex, 11) == 0 then
518 if x64 and vregxb then
519 sk = map_vreg["modrm.reg"]
520 else
521 wputb(0xc5)
522 tail = shl(bxor(band(rex, 4), 4), 5)
523 psz = 3
524 end
525 end
526 if not tail then
527 wputb(0xc4)
528 wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
529 tail = shl(band(rex, 8), 4)
530 psz = 4
531 end
532 local reg, vreg = 0, nil
533 if vex.v then
534 reg = vex.v.reg
535 if not reg then werror("bad vex operand") end
536 if reg < 0 then reg = 0; vreg = vex.v.vreg end
537 end
538 if sz == "y" or vex.l then tail = tail + 4 end
539 wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
540 wvreg("vex.v", vreg)
541 rex = 0
542 if op >= 256 then werror("bad vex opcode") end
543 else
544 if rex ~= 0 then
545 if not x64 then werror("bad operand size") end
546 elseif (vregr or vregxb) and x64 then
547 rex = 0x10
548 sk = map_vreg["vex.v"]
549 end
550 end
551 local r
552 if sz == "w" then wputb(102) end
553 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
554 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
555 if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end
556 if op >= 65536 then
557 if rex ~= 0 then
558 local opc3 = band(op, 0xffff00)
559 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
560 wputb(64 + band(rex, 15)); rex = 0; psz = 2
561 end
562 end
563 wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
564 end
565 if op >= 256 then
566 local b = shr(op, 8)
567 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
568 wputb(b); op = band(op, 255); psz = psz + 1
569 end
570 if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
571 if sz == "b" then op = op - 1 end
572 wputb(op)
573 return psz, sk
574 end
575
576 -- Put ModRM or SIB formatted byte.
577 local function wputmodrm(m, s, rm, vs, vrm)
578 assert(m < 4 and s < 16 and rm < 16, "bad modrm operands")
579 wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7))
580 end
581
582 -- Put ModRM/SIB plus optional displacement.
583 local function wputmrmsib(t, imark, s, vsreg, psz, sk)
584 local vreg, vxreg
585 local reg, xreg = t.reg, t.xreg
586 if reg and reg < 0 then reg = 0; vreg = t.vreg end
587 if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end
588 if s < 0 then s = 0 end
589
590 -- Register mode.
591 if sub(t.mode, 1, 1) == "r" then
592 wputmodrm(3, s, reg)
593 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
594 wvreg("modrm.rm.r", vreg, psz+1, sk)
595 return
596 end
597
598 local disp = t.disp
599 local tdisp = type(disp)
600 -- No base register?
601 if not reg then
602 local riprel = false
603 if xreg then
604 -- Indexed mode with index register only.
605 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
606 wputmodrm(0, s, 4)
607 if imark == "I" then waction("MARK") end
608 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
609 wputmodrm(t.xsc, xreg, 5)
610 wvreg("sib.index", vxreg, psz+2, sk)
611 else
612 -- Pure 32 bit displacement.
613 if x64 and tdisp ~= "table" then
614 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
615 wvreg("modrm.reg", vsreg, psz+1, sk)
616 if imark == "I" then waction("MARK") end
617 wputmodrm(0, 4, 5)
618 else
619 riprel = x64
620 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
621 wvreg("modrm.reg", vsreg, psz+1, sk)
622 if imark == "I" then waction("MARK") end
623 end
624 end
625 if riprel then -- Emit rip-relative displacement.
626 if match("UWSiI", imark) then
627 werror("NYI: rip-relative displacement followed by immediate")
628 end
629 -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
630 wputlabel("REL_", disp[1], 2)
631 else
632 wputdarg(disp)
633 end
634 return
635 end
636
637 local m
638 if tdisp == "number" then -- Check displacement size at assembly time.
639 if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
640 if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
641 elseif disp >= -128 and disp <= 127 then m = 1
642 else m = 2 end
643 elseif tdisp == "table" then
644 m = 2
645 end
646
647 -- Index register present or esp as base register: need SIB encoding.
648 if xreg or band(reg, 7) == 4 then
649 wputmodrm(m or 2, s, 4) -- ModRM.
650 if m == nil or imark == "I" then waction("MARK") end
651 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
652 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
653 wvreg("sib.index", vxreg, psz+2, sk, vreg)
654 wvreg("sib.base", vreg, psz+2, sk)
655 else
656 wputmodrm(m or 2, s, reg) -- ModRM.
657 if (imark == "I" and (m == 1 or m == 2)) or
658 (m == nil and (vsreg or vreg)) then waction("MARK") end
659 wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
660 wvreg("modrm.rm.m", vreg, psz+1, sk)
661 end
662
663 -- Put displacement.
664 if m == 1 then wputsbarg(disp)
665 elseif m == 2 then wputdarg(disp)
666 elseif m == nil then waction("DISP", disp) end
667 end
668
669 ------------------------------------------------------------------------------
670
671 -- Return human-readable operand mode string.
672 local function opmodestr(op, args)
673 local m = {}
674 for i=1,#args do
675 local a = args[i]
676 m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?")
677 end
678 return op.." "..concat(m, ",")
679 end
680
681 -- Convert number to valid integer or nil.
682 local function toint(expr, isqword)
683 local n = tonumber(expr)
684 if n then
685 if n % 1 ~= 0 then
686 werror("not an integer number `"..expr.."'")
687 elseif isqword then
688 if n < -2147483648 or n > 2147483647 then
689 n = nil -- Handle it as an expression to avoid precision loss.
690 end
691 elseif n < -2147483648 or n > 4294967295 then
692 werror("bad integer number `"..expr.."'")
693 end
694 return n
695 end
696 end
697
698 -- Parse immediate expression.
699 local function immexpr(expr)
700 -- &expr (pointer)
701 if sub(expr, 1, 1) == "&" then
702 return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2))
703 end
704
705 local prefix = sub(expr, 1, 2)
706 -- =>expr (pc label reference)
707 if prefix == "=>" then
708 return "iJ", sub(expr, 3)
709 end
710 -- ->name (global label reference)
711 if prefix == "->" then
712 return "iJ", map_global[sub(expr, 3)]
713 end
714
715 -- [<>][1-9] (local label reference)
716 local dir, lnum = match(expr, "^([<>])([1-9])$")
717 if dir then -- Fwd: 247-255, Bkwd: 1-9.
718 return "iJ", lnum + (dir == ">" and 246 or 0)
719 end
720
721 local extname = match(expr, "^extern%s+(%S+)$")
722 if extname then
723 return "iJ", map_extern[extname]
724 end
725
726 -- expr (interpreted as immediate)
727 return "iI", expr
728 end
729
730 -- Parse displacement expression: +-num, +-expr, +-opsize*num
731 local function dispexpr(expr)
732 local disp = expr == "" and 0 or toint(expr)
733 if disp then return disp end
734 local c, dispt = match(expr, "^([+-])%s*(.+)$")
735 if c == "+" then
736 expr = dispt
737 elseif not c then
738 werror("bad displacement expression `"..expr.."'")
739 end
740 local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$")
741 local ops, imm = map_opsize[opsize], toint(tailops)
742 if ops and imm then
743 if c == "-" then imm = -imm end
744 return imm*map_opsizenum[ops]
745 end
746 local mode, iexpr = immexpr(dispt)
747 if mode == "iJ" then
748 if c == "-" then werror("cannot invert label reference") end
749 return { iexpr }
750 end
751 return expr -- Need to return original signed expression.
752 end
753
754 -- Parse register or type expression.
755 local function rtexpr(expr)
756 if not expr then return end
757 local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$")
758 local tp = map_type[tname or expr]
759 if tp then
760 local reg = ovreg or tp.reg
761 local rnum = map_reg_num[reg]
762 if not rnum then
763 werror("type `"..(tname or expr).."' needs a register override")
764 end
765 if not map_reg_valid_base[reg] then
766 werror("bad base register override `"..(map_reg_rev[reg] or reg).."'")
767 end
768 return reg, rnum, tp
769 end
770 return expr, map_reg_num[expr]
771 end
772
773 -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
774 local function parseoperand(param, isqword)
775 local t = {}
776
777 local expr = param
778 local opsize, tailops = match(param, "^(%w+)%s*(.+)$")
779 if opsize then
780 t.opsize = map_opsize[opsize]
781 if t.opsize then expr = tailops end
782 end
783
784 local br = match(expr, "^%[%s*(.-)%s*%]$")
785 repeat
786 if br then
787 t.mode = "xm"
788
789 -- [disp]
790 t.disp = toint(br)
791 if t.disp then
792 t.mode = x64 and "xm" or "xmO"
793 break
794 end
795
796 -- [reg...]
797 local tp
798 local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$")
799 reg, t.reg, tp = rtexpr(reg)
800 if not t.reg then
801 -- [expr]
802 t.mode = x64 and "xm" or "xmO"
803 t.disp = dispexpr("+"..br)
804 break
805 end
806
807 if t.reg == -1 then
808 t.vreg, tailr = match(tailr, "^(%b())(.*)$")
809 if not t.vreg then werror("bad variable register expression") end
810 end
811
812 -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
813 local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$")
814 if xsc then
815 if not map_reg_valid_index[reg] then
816 werror("bad index register `"..map_reg_rev[reg].."'")
817 end
818 t.xsc = map_xsc[xsc]
819 t.xreg = t.reg
820 t.vxreg = t.vreg
821 t.reg = nil
822 t.vreg = nil
823 t.disp = dispexpr(tailsc)
824 break
825 end
826 if not map_reg_valid_base[reg] then
827 werror("bad base register `"..map_reg_rev[reg].."'")
828 end
829
830 -- [reg] or [reg+-disp]
831 t.disp = toint(tailr) or (tailr == "" and 0)
832 if t.disp then break end
833
834 -- [reg+xreg...]
835 local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$")
836 xreg, t.xreg, tp = rtexpr(xreg)
837 if not t.xreg then
838 -- [reg+-expr]
839 t.disp = dispexpr(tailr)
840 break
841 end
842 if not map_reg_valid_index[xreg] then
843 werror("bad index register `"..map_reg_rev[xreg].."'")
844 end
845
846 if t.xreg == -1 then
847 t.vxreg, tailx = match(tailx, "^(%b())(.*)$")
848 if not t.vxreg then werror("bad variable register expression") end
849 end
850
851 -- [reg+xreg*xsc...]
852 local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$")
853 if xsc then
854 t.xsc = map_xsc[xsc]
855 tailx = tailsc
856 end
857
858 -- [...] or [...+-disp] or [...+-expr]
859 t.disp = dispexpr(tailx)
860 else
861 -- imm or opsize*imm
862 local imm = toint(expr, isqword)
863 if not imm and sub(expr, 1, 1) == "*" and t.opsize then
864 imm = toint(sub(expr, 2))
865 if imm then
866 imm = imm * map_opsizenum[t.opsize]
867 t.opsize = nil
868 end
869 end
870 if imm then
871 if t.opsize then werror("bad operand size override") end
872 local m = "i"
873 if imm == 1 then m = m.."1" end
874 if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end
875 if imm >= -128 and imm <= 127 then m = m.."S" end
876 t.imm = imm
877 t.mode = m
878 break
879 end
880
881 local tp
882 local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$")
883 reg, t.reg, tp = rtexpr(reg)
884 if t.reg then
885 if t.reg == -1 then
886 t.vreg, tailr = match(tailr, "^(%b())(.*)$")
887 if not t.vreg then werror("bad variable register expression") end
888 end
889 -- reg
890 if tailr == "" then
891 if t.opsize then werror("bad operand size override") end
892 t.opsize = map_reg_opsize[reg]
893 if t.opsize == "f" then
894 t.mode = t.reg == 0 and "fF" or "f"
895 else
896 if reg == "@w4" or (x64 and reg == "@d4") then
897 wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'"))
898 end
899 t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm")
900 end
901 t.needrex = map_reg_needrex[reg]
902 break
903 end
904
905 -- type[idx], type[idx].field, type->field -> [reg+offset_expr]
906 if not tp then werror("bad operand `"..param.."'") end
907 t.mode = "xm"
908 t.disp = format(tp.ctypefmt, tailr)
909 else
910 t.mode, t.imm = immexpr(expr)
911 if sub(t.mode, -1) == "J" then
912 if t.opsize and t.opsize ~= addrsize then
913 werror("bad operand size override")
914 end
915 t.opsize = addrsize
916 end
917 end
918 end
919 until true
920 return t
921 end
922
923 ------------------------------------------------------------------------------
924 -- x86 Template String Description
925 -- ===============================
926 --
927 -- Each template string is a list of [match:]pattern pairs,
928 -- separated by "|". The first match wins. No match means a
929 -- bad or unsupported combination of operand modes or sizes.
930 --
931 -- The match part and the ":" is omitted if the operation has
932 -- no operands. Otherwise the first N characters are matched
933 -- against the mode strings of each of the N operands.
934 --
935 -- The mode string for each operand type is (see parseoperand()):
936 -- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
937 -- FP register: "f", +"F" for st0
938 -- Index operand: "xm", +"O" for [disp] (pure offset)
939 -- Immediate: "i", +"S" for signed 8 bit, +"1" for 1,
940 -- +"I" for arg, +"P" for pointer
941 -- Any: +"J" for valid jump targets
942 --
943 -- So a match character "m" (mixed) matches both an integer register
944 -- and an index operand (to be encoded with the ModRM/SIB scheme).
945 -- But "r" matches only a register and "x" only an index operand
946 -- (e.g. for FP memory access operations).
947 --
948 -- The operand size match string starts right after the mode match
949 -- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
950 -- The effective data size of the operation is matched against this list.
951 --
952 -- If only the regular "b", "w", "d", "q", "t" operand sizes are
953 -- present, then all operands must be the same size. Unspecified sizes
954 -- are ignored, but at least one operand must have a size or the pattern
955 -- won't match (use the "byte", "word", "dword", "qword", "tword"
956 -- operand size overrides. E.g.: mov dword [eax], 1).
957 --
958 -- If the list has a "1" or "2" prefix, the operand size is taken
959 -- from the respective operand and any other operand sizes are ignored.
960 -- If the list contains only ".", all operand sizes are ignored.
961 -- If the list has a "/" prefix, the concatenated (mixed) operand sizes
962 -- are compared to the match.
963 --
964 -- E.g. "rrdw" matches for either two dword registers or two word
965 -- registers. "Fx2dq" matches an st0 operand plus an index operand
966 -- pointing to a dword (float) or qword (double).
967 --
968 -- Every character after the ":" is part of the pattern string:
969 -- Hex chars are accumulated to form the opcode (left to right).
970 -- "n" disables the standard opcode mods
971 -- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
972 -- "X" Force REX.W.
973 -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode.
974 -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand.
975 -- The spare 3 bits are either filled with the last hex digit or
976 -- the result from a previous "r"/"R". The opcode is restored.
977 -- "u" Use VEX encoding, vvvv unused.
978 -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is
979 -- removed from the list used by future characters).
980 -- "w" Use VEX encoding, vvvv from 3rd operand.
981 -- "L" Force VEX.L
982 --
983 -- All of the following characters force a flush of the opcode:
984 -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand.
985 -- "s" stores a 4 bit immediate from the last register operand,
986 -- followed by 4 zero bits.
987 -- "S" stores a signed 8 bit immediate from the last operand.
988 -- "U" stores an unsigned 8 bit immediate from the last operand.
989 -- "W" stores an unsigned 16 bit immediate from the last operand.
990 -- "i" stores an operand sized immediate from the last operand.
991 -- "I" dito, but generates an action code to optionally modify
992 -- the opcode (+2) for a signed 8 bit immediate.
993 -- "J" generates one of the REL action codes from the last operand.
994 --
995 ------------------------------------------------------------------------------
996
997 -- Template strings for x86 instructions. Ordered by first opcode byte.
998 -- Unimplemented opcodes (deliberate omissions) are marked with *.
999 local map_op = {
1000 -- 00-05: add...
1001 -- 06: *push es
1002 -- 07: *pop es
1003 -- 08-0D: or...
1004 -- 0E: *push cs
1005 -- 0F: two byte opcode prefix
1006 -- 10-15: adc...
1007 -- 16: *push ss
1008 -- 17: *pop ss
1009 -- 18-1D: sbb...
1010 -- 1E: *push ds
1011 -- 1F: *pop ds
1012 -- 20-25: and...
1013 es_0 = "26",
1014 -- 27: *daa
1015 -- 28-2D: sub...
1016 cs_0 = "2E",
1017 -- 2F: *das
1018 -- 30-35: xor...
1019 ss_0 = "36",
1020 -- 37: *aaa
1021 -- 38-3D: cmp...
1022 ds_0 = "3E",
1023 -- 3F: *aas
1024 inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m",
1025 dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m",
1026 push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or
1027 "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
1028 pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m",
1029 -- 60: *pusha, *pushad, *pushaw
1030 -- 61: *popa, *popad, *popaw
1031 -- 62: *bound rdw,x
1032 -- 63: x86: *arpl mw,rw
1033 movsxd_2 = x64 and "rm/qd:63rM",
1034 fs_0 = "64",
1035 gs_0 = "65",
1036 o16_0 = "66",
1037 a16_0 = not x64 and "67" or nil,
1038 a32_0 = x64 and "67",
1039 -- 68: push idw
1040 -- 69: imul rdw,mdw,idw
1041 -- 6A: push ib
1042 -- 6B: imul rdw,mdw,S
1043 -- 6C: *insb
1044 -- 6D: *insd, *insw
1045 -- 6E: *outsb
1046 -- 6F: *outsd, *outsw
1047 -- 70-7F: jcc lb
1048 -- 80: add... mb,i
1049 -- 81: add... mdw,i
1050 -- 82: *undefined
1051 -- 83: add... mdw,S
1052 test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
1053 -- 86: xchg rb,mb
1054 -- 87: xchg rdw,mdw
1055 -- 88: mov mb,r
1056 -- 89: mov mdw,r
1057 -- 8A: mov r,mb
1058 -- 8B: mov r,mdw
1059 -- 8C: *mov mdw,seg
1060 lea_2 = "rx1dq:8DrM",
1061 -- 8E: *mov seg,mdw
1062 -- 8F: pop mdw
1063 nop_0 = "90",
1064 xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
1065 cbw_0 = "6698",
1066 cwde_0 = "98",
1067 cdqe_0 = "4898",
1068 cwd_0 = "6699",
1069 cdq_0 = "99",
1070 cqo_0 = "4899",
1071 -- 9A: *call iw:idw
1072 wait_0 = "9B",
1073 fwait_0 = "9B",
1074 pushf_0 = "9C",
1075 pushfd_0 = not x64 and "9C",
1076 pushfq_0 = x64 and "9C",
1077 popf_0 = "9D",
1078 popfd_0 = not x64 and "9D",
1079 popfq_0 = x64 and "9D",
1080 sahf_0 = "9E",
1081 lahf_0 = "9F",
1082 mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
1083 movsb_0 = "A4",
1084 movsw_0 = "66A5",
1085 movsd_0 = "A5",
1086 cmpsb_0 = "A6",
1087 cmpsw_0 = "66A7",
1088 cmpsd_0 = "A7",
1089 -- A8: test Rb,i
1090 -- A9: test Rdw,i
1091 stosb_0 = "AA",
1092 stosw_0 = "66AB",
1093 stosd_0 = "AB",
1094 lodsb_0 = "AC",
1095 lodsw_0 = "66AD",
1096 lodsd_0 = "AD",
1097 scasb_0 = "AE",
1098 scasw_0 = "66AF",
1099 scasd_0 = "AF",
1100 -- B0-B7: mov rb,i
1101 -- B8-BF: mov rdw,i
1102 -- C0: rol... mb,i
1103 -- C1: rol... mdw,i
1104 ret_1 = "i.:nC2W",
1105 ret_0 = "C3",
1106 -- C4: *les rdw,mq
1107 -- C5: *lds rdw,mq
1108 -- C6: mov mb,i
1109 -- C7: mov mdw,i
1110 -- C8: *enter iw,ib
1111 leave_0 = "C9",
1112 -- CA: *retf iw
1113 -- CB: *retf
1114 int3_0 = "CC",
1115 int_1 = "i.:nCDU",
1116 into_0 = "CE",
1117 -- CF: *iret
1118 -- D0: rol... mb,1
1119 -- D1: rol... mdw,1
1120 -- D2: rol... mb,cl
1121 -- D3: rol... mb,cl
1122 -- D4: *aam ib
1123 -- D5: *aad ib
1124 -- D6: *salc
1125 -- D7: *xlat
1126 -- D8-DF: floating point ops
1127 -- E0: *loopne
1128 -- E1: *loope
1129 -- E2: *loop
1130 -- E3: *jcxz, *jecxz
1131 -- E4: *in Rb,ib
1132 -- E5: *in Rdw,ib
1133 -- E6: *out ib,Rb
1134 -- E7: *out ib,Rdw
1135 call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J",
1136 jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB
1137 -- EA: *jmp iw:idw
1138 -- EB: jmp ib
1139 -- EC: *in Rb,dx
1140 -- ED: *in Rdw,dx
1141 -- EE: *out dx,Rb
1142 -- EF: *out dx,Rdw
1143 lock_0 = "F0",
1144 int1_0 = "F1",
1145 repne_0 = "F2",
1146 repnz_0 = "F2",
1147 rep_0 = "F3",
1148 repe_0 = "F3",
1149 repz_0 = "F3",
1150 -- F4: *hlt
1151 cmc_0 = "F5",
1152 -- F6: test... mb,i; div... mb
1153 -- F7: test... mdw,i; div... mdw
1154 clc_0 = "F8",
1155 stc_0 = "F9",
1156 -- FA: *cli
1157 cld_0 = "FC",
1158 std_0 = "FD",
1159 -- FE: inc... mb
1160 -- FF: inc... mdw
1161
1162 -- misc ops
1163 not_1 = "m:F72m",
1164 neg_1 = "m:F73m",
1165 mul_1 = "m:F74m",
1166 imul_1 = "m:F75m",
1167 div_1 = "m:F76m",
1168 idiv_1 = "m:F77m",
1169
1170 imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
1171 imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",
1172
1173 movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
1174 movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",
1175
1176 bswap_1 = "rqd:0FC8r",
1177 bsf_2 = "rmqdw:0FBCrM",
1178 bsr_2 = "rmqdw:0FBDrM",
1179 bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU",
1180 btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU",
1181 btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU",
1182 bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU",
1183
1184 shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
1185 shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
1186
1187 rdtsc_0 = "0F31", -- P1+
1188 rdpmc_0 = "0F33", -- P6+
1189 cpuid_0 = "0FA2", -- P1+
1190
1191 -- floating point ops
1192 fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m",
1193 fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
1194 fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
1195
1196 fpop_0 = "DDD8", -- Alias for fstp st0.
1197
1198 fist_1 = "xw:nDF2m|xd:DB2m",
1199 fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m",
1200 fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m",
1201
1202 fxch_0 = "D9C9",
1203 fxch_1 = "ff:D9C8r",
1204 fxch_2 = "fFf:D9C8r|Fff:D9C8R",
1205
1206 fucom_1 = "ff:DDE0r",
1207 fucom_2 = "Fff:DDE0R",
1208 fucomp_1 = "ff:DDE8r",
1209 fucomp_2 = "Fff:DDE8R",
1210 fucomi_1 = "ff:DBE8r", -- P6+
1211 fucomi_2 = "Fff:DBE8R", -- P6+
1212 fucomip_1 = "ff:DFE8r", -- P6+
1213 fucomip_2 = "Fff:DFE8R", -- P6+
1214 fcomi_1 = "ff:DBF0r", -- P6+
1215 fcomi_2 = "Fff:DBF0R", -- P6+
1216 fcomip_1 = "ff:DFF0r", -- P6+
1217 fcomip_2 = "Fff:DFF0R", -- P6+
1218 fucompp_0 = "DAE9",
1219 fcompp_0 = "DED9",
1220
1221 fldenv_1 = "x.:D94m",
1222 fnstenv_1 = "x.:D96m",
1223 fstenv_1 = "x.:9BD96m",
1224 fldcw_1 = "xw:nD95m",
1225 fstcw_1 = "xw:n9BD97m",
1226 fnstcw_1 = "xw:nD97m",
1227 fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m",
1228 fnstsw_1 = "Rw:nDFE0|xw:nDD7m",
1229 fclex_0 = "9BDBE2",
1230 fnclex_0 = "DBE2",
1231
1232 fnop_0 = "D9D0",
1233 -- D9D1-D9DF: unassigned
1234
1235 fchs_0 = "D9E0",
1236 fabs_0 = "D9E1",
1237 -- D9E2: unassigned
1238 -- D9E3: unassigned
1239 ftst_0 = "D9E4",
1240 fxam_0 = "D9E5",
1241 -- D9E6: unassigned
1242 -- D9E7: unassigned
1243 fld1_0 = "D9E8",
1244 fldl2t_0 = "D9E9",
1245 fldl2e_0 = "D9EA",
1246 fldpi_0 = "D9EB",
1247 fldlg2_0 = "D9EC",
1248 fldln2_0 = "D9ED",
1249 fldz_0 = "D9EE",
1250 -- D9EF: unassigned
1251
1252 f2xm1_0 = "D9F0",
1253 fyl2x_0 = "D9F1",
1254 fptan_0 = "D9F2",
1255 fpatan_0 = "D9F3",
1256 fxtract_0 = "D9F4",
1257 fprem1_0 = "D9F5",
1258 fdecstp_0 = "D9F6",
1259 fincstp_0 = "D9F7",
1260 fprem_0 = "D9F8",
1261 fyl2xp1_0 = "D9F9",
1262 fsqrt_0 = "D9FA",
1263 fsincos_0 = "D9FB",
1264 frndint_0 = "D9FC",
1265 fscale_0 = "D9FD",
1266 fsin_0 = "D9FE",
1267 fcos_0 = "D9FF",
1268
1269 -- SSE, SSE2
1270 andnpd_2 = "rmo:660F55rM",
1271 andnps_2 = "rmo:0F55rM",
1272 andpd_2 = "rmo:660F54rM",
1273 andps_2 = "rmo:0F54rM",
1274 clflush_1 = "x.:0FAE7m",
1275 cmppd_3 = "rmio:660FC2rMU",
1276 cmpps_3 = "rmio:0FC2rMU",
1277 cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:",
1278 cmpss_3 = "rrio:F30FC2rMU|rxi/od:",
1279 comisd_2 = "rro:660F2FrM|rx/oq:",
1280 comiss_2 = "rro:0F2FrM|rx/od:",
1281 cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:",
1282 cvtdq2ps_2 = "rmo:0F5BrM",
1283 cvtpd2dq_2 = "rmo:F20FE6rM",
1284 cvtpd2ps_2 = "rmo:660F5ArM",
1285 cvtpi2pd_2 = "rx/oq:660F2ArM",
1286 cvtpi2ps_2 = "rx/oq:0F2ArM",
1287 cvtps2dq_2 = "rmo:660F5BrM",
1288 cvtps2pd_2 = "rro:0F5ArM|rx/oq:",
1289 cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
1290 cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:",
1291 cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM",
1292 cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM",
1293 cvtss2sd_2 = "rro:F30F5ArM|rx/od:",
1294 cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
1295 cvttpd2dq_2 = "rmo:660FE6rM",
1296 cvttps2dq_2 = "rmo:F30F5BrM",
1297 cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
1298 cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
1299 fxsave_1 = "x.:0FAE0m",
1300 fxrstor_1 = "x.:0FAE1m",
1301 ldmxcsr_1 = "xd:0FAE2m",
1302 lfence_0 = "0FAEE8",
1303 maskmovdqu_2 = "rro:660FF7rM",
1304 mfence_0 = "0FAEF0",
1305 movapd_2 = "rmo:660F28rM|mro:660F29Rm",
1306 movaps_2 = "rmo:0F28rM|mro:0F29Rm",
1307 movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:",
1308 movdqa_2 = "rmo:660F6FrM|mro:660F7FRm",
1309 movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm",
1310 movhlps_2 = "rro:0F12rM",
1311 movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm",
1312 movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm",
1313 movlhps_2 = "rro:0F16rM",
1314 movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm",
1315 movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm",
1316 movmskpd_2 = "rr/do:660F50rM",
1317 movmskps_2 = "rr/do:0F50rM",
1318 movntdq_2 = "xro:660FE7Rm",
1319 movnti_2 = "xrqd:0FC3Rm",
1320 movntpd_2 = "xro:660F2BRm",
1321 movntps_2 = "xro:0F2BRm",
1322 movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
1323 movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
1324 movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
1325 movupd_2 = "rmo:660F10rM|mro:660F11Rm",
1326 movups_2 = "rmo:0F10rM|mro:0F11Rm",
1327 orpd_2 = "rmo:660F56rM",
1328 orps_2 = "rmo:0F56rM",
1329 pause_0 = "F390",
1330 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
1331 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:",
1332 pmovmskb_2 = "rr/do:660FD7rM",
1333 prefetchnta_1 = "xb:n0F180m",
1334 prefetcht0_1 = "xb:n0F181m",
1335 prefetcht1_1 = "xb:n0F182m",
1336 prefetcht2_1 = "xb:n0F183m",
1337 pshufd_3 = "rmio:660F70rMU",
1338 pshufhw_3 = "rmio:F30F70rMU",
1339 pshuflw_3 = "rmio:F20F70rMU",
1340 pslld_2 = "rmo:660FF2rM|rio:660F726mU",
1341 pslldq_2 = "rio:660F737mU",
1342 psllq_2 = "rmo:660FF3rM|rio:660F736mU",
1343 psllw_2 = "rmo:660FF1rM|rio:660F716mU",
1344 psrad_2 = "rmo:660FE2rM|rio:660F724mU",
1345 psraw_2 = "rmo:660FE1rM|rio:660F714mU",
1346 psrld_2 = "rmo:660FD2rM|rio:660F722mU",
1347 psrldq_2 = "rio:660F733mU",
1348 psrlq_2 = "rmo:660FD3rM|rio:660F732mU",
1349 psrlw_2 = "rmo:660FD1rM|rio:660F712mU",
1350 rcpps_2 = "rmo:0F53rM",
1351 rcpss_2 = "rro:F30F53rM|rx/od:",
1352 rsqrtps_2 = "rmo:0F52rM",
1353 rsqrtss_2 = "rmo:F30F52rM",
1354 sfence_0 = "0FAEF8",
1355 shufpd_3 = "rmio:660FC6rMU",
1356 shufps_3 = "rmio:0FC6rMU",
1357 stmxcsr_1 = "xd:0FAE3m",
1358 ucomisd_2 = "rro:660F2ErM|rx/oq:",
1359 ucomiss_2 = "rro:0F2ErM|rx/od:",
1360 unpckhpd_2 = "rmo:660F15rM",
1361 unpckhps_2 = "rmo:0F15rM",
1362 unpcklpd_2 = "rmo:660F14rM",
1363 unpcklps_2 = "rmo:0F14rM",
1364 xorpd_2 = "rmo:660F57rM",
1365 xorps_2 = "rmo:0F57rM",
1366
1367 -- SSE3 ops
1368 fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m",
1369 addsubpd_2 = "rmo:660FD0rM",
1370 addsubps_2 = "rmo:F20FD0rM",
1371 haddpd_2 = "rmo:660F7CrM",
1372 haddps_2 = "rmo:F20F7CrM",
1373 hsubpd_2 = "rmo:660F7DrM",
1374 hsubps_2 = "rmo:F20F7DrM",
1375 lddqu_2 = "rxo:F20FF0rM",
1376 movddup_2 = "rmo:F20F12rM",
1377 movshdup_2 = "rmo:F30F16rM",
1378 movsldup_2 = "rmo:F30F12rM",
1379
1380 -- SSSE3 ops
1381 pabsb_2 = "rmo:660F381CrM",
1382 pabsd_2 = "rmo:660F381ErM",
1383 pabsw_2 = "rmo:660F381DrM",
1384 palignr_3 = "rmio:660F3A0FrMU",
1385 phaddd_2 = "rmo:660F3802rM",
1386 phaddsw_2 = "rmo:660F3803rM",
1387 phaddw_2 = "rmo:660F3801rM",
1388 phsubd_2 = "rmo:660F3806rM",
1389 phsubsw_2 = "rmo:660F3807rM",
1390 phsubw_2 = "rmo:660F3805rM",
1391 pmaddubsw_2 = "rmo:660F3804rM",
1392 pmulhrsw_2 = "rmo:660F380BrM",
1393 pshufb_2 = "rmo:660F3800rM",
1394 psignb_2 = "rmo:660F3808rM",
1395 psignd_2 = "rmo:660F380ArM",
1396 psignw_2 = "rmo:660F3809rM",
1397
1398 -- SSE4.1 ops
1399 blendpd_3 = "rmio:660F3A0DrMU",
1400 blendps_3 = "rmio:660F3A0CrMU",
1401 blendvpd_3 = "rmRo:660F3815rM",
1402 blendvps_3 = "rmRo:660F3814rM",
1403 dppd_3 = "rmio:660F3A41rMU",
1404 dpps_3 = "rmio:660F3A40rMU",
1405 extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
1406 insertps_3 = "rrio:660F3A41rMU|rxi/od:",
1407 movntdqa_2 = "rxo:660F382ArM",
1408 mpsadbw_3 = "rmio:660F3A42rMU",
1409 packusdw_2 = "rmo:660F382BrM",
1410 pblendvb_3 = "rmRo:660F3810rM",
1411 pblendw_3 = "rmio:660F3A0ErMU",
1412 pcmpeqq_2 = "rmo:660F3829rM",
1413 pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
1414 pextrd_3 = "mri/do:660F3A16RmU",
1415 pextrq_3 = "mri/qo:660F3A16RmU",
1416 -- pextrw is SSE2, mem operand is SSE4.1 only
1417 phminposuw_2 = "rmo:660F3841rM",
1418 pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:",
1419 pinsrd_3 = "rmi/od:660F3A22rMU",
1420 pinsrq_3 = "rmi/oq:660F3A22rXMU",
1421 pmaxsb_2 = "rmo:660F383CrM",
1422 pmaxsd_2 = "rmo:660F383DrM",
1423 pmaxud_2 = "rmo:660F383FrM",
1424 pmaxuw_2 = "rmo:660F383ErM",
1425 pminsb_2 = "rmo:660F3838rM",
1426 pminsd_2 = "rmo:660F3839rM",
1427 pminud_2 = "rmo:660F383BrM",
1428 pminuw_2 = "rmo:660F383ArM",
1429 pmovsxbd_2 = "rro:660F3821rM|rx/od:",
1430 pmovsxbq_2 = "rro:660F3822rM|rx/ow:",
1431 pmovsxbw_2 = "rro:660F3820rM|rx/oq:",
1432 pmovsxdq_2 = "rro:660F3825rM|rx/oq:",
1433 pmovsxwd_2 = "rro:660F3823rM|rx/oq:",
1434 pmovsxwq_2 = "rro:660F3824rM|rx/od:",
1435 pmovzxbd_2 = "rro:660F3831rM|rx/od:",
1436 pmovzxbq_2 = "rro:660F3832rM|rx/ow:",
1437 pmovzxbw_2 = "rro:660F3830rM|rx/oq:",
1438 pmovzxdq_2 = "rro:660F3835rM|rx/oq:",
1439 pmovzxwd_2 = "rro:660F3833rM|rx/oq:",
1440 pmovzxwq_2 = "rro:660F3834rM|rx/od:",
1441 pmuldq_2 = "rmo:660F3828rM",
1442 pmulld_2 = "rmo:660F3840rM",
1443 ptest_2 = "rmo:660F3817rM",
1444 roundpd_3 = "rmio:660F3A09rMU",
1445 roundps_3 = "rmio:660F3A08rMU",
1446 roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:",
1447 roundss_3 = "rrio:660F3A0ArMU|rxi/od:",
1448
1449 -- SSE4.2 ops
1450 crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
1451 pcmpestri_3 = "rmio:660F3A61rMU",
1452 pcmpestrm_3 = "rmio:660F3A60rMU",
1453 pcmpgtq_2 = "rmo:660F3837rM",
1454 pcmpistri_3 = "rmio:660F3A63rMU",
1455 pcmpistrm_3 = "rmio:660F3A62rMU",
1456 popcnt_2 = "rmqdw:F30FB8rM",
1457
1458 -- SSE4a
1459 extrq_2 = "rro:660F79rM",
1460 extrq_3 = "riio:660F780mUU",
1461 insertq_2 = "rro:F20F79rM",
1462 insertq_4 = "rriio:F20F78rMUU",
1463 lzcnt_2 = "rmqdw:F30FBDrM",
1464 movntsd_2 = "xr/qo:nF20F2BRm",
1465 movntss_2 = "xr/do:F30F2BRm",
1466 -- popcnt is also in SSE4.2
1467
1468 -- AES-NI
1469 aesdec_2 = "rmo:660F38DErM",
1470 aesdeclast_2 = "rmo:660F38DFrM",
1471 aesenc_2 = "rmo:660F38DCrM",
1472 aesenclast_2 = "rmo:660F38DDrM",
1473 aesimc_2 = "rmo:660F38DBrM",
1474 aeskeygenassist_3 = "rmio:660F3ADFrMU",
1475 pclmulqdq_3 = "rmio:660F3A44rMU",
1476
1477 -- AVX FP ops
1478 vaddsubpd_3 = "rrmoy:660FVD0rM",
1479 vaddsubps_3 = "rrmoy:F20FVD0rM",
1480 vandpd_3 = "rrmoy:660FV54rM",
1481 vandps_3 = "rrmoy:0FV54rM",
1482 vandnpd_3 = "rrmoy:660FV55rM",
1483 vandnps_3 = "rrmoy:0FV55rM",
1484 vblendpd_4 = "rrmioy:660F3AV0DrMU",
1485 vblendps_4 = "rrmioy:660F3AV0CrMU",
1486 vblendvpd_4 = "rrmroy:660F3AV4BrMs",
1487 vblendvps_4 = "rrmroy:660F3AV4ArMs",
1488 vbroadcastf128_2 = "rx/yo:660F38u1ArM",
1489 vcmppd_4 = "rrmioy:660FVC2rMU",
1490 vcmpps_4 = "rrmioy:0FVC2rMU",
1491 vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:",
1492 vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:",
1493 vcomisd_2 = "rro:660Fu2FrM|rx/oq:",
1494 vcomiss_2 = "rro:0Fu2FrM|rx/od:",
1495 vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:",
1496 vcvtdq2ps_2 = "rmoy:0Fu5BrM",
1497 vcvtpd2dq_2 = "rmoy:F20FuE6rM",
1498 vcvtpd2ps_2 = "rmoy:660Fu5ArM",
1499 vcvtps2dq_2 = "rmoy:660Fu5BrM",
1500 vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:",
1501 vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
1502 vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:",
1503 vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
1504 vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
1505 vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:",
1506 vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
1507 vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
1508 vcvttps2dq_2 = "rmoy:F30Fu5BrM",
1509 vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
1510 vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
1511 vdppd_4 = "rrmio:660F3AV41rMU",
1512 vdpps_4 = "rrmioy:660F3AV40rMU",
1513 vextractf128_3 = "mri/oy:660F3AuL19RmU",
1514 vextractps_3 = "mri/do:660F3Au17RmU",
1515 vhaddpd_3 = "rrmoy:660FV7CrM",
1516 vhaddps_3 = "rrmoy:F20FV7CrM",
1517 vhsubpd_3 = "rrmoy:660FV7DrM",
1518 vhsubps_3 = "rrmoy:F20FV7DrM",
1519 vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
1520 vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:",
1521 vldmxcsr_1 = "xd:0FuAE2m",
1522 vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
1523 vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
1524 vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm",
1525 vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm",
1526 vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
1527 vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
1528 vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:",
1529 vmovhlps_3 = "rrro:0FV12rM",
1530 vmovhpd_2 = "xr/qo:660Fu17Rm",
1531 vmovhpd_3 = "rrx/ooq:660FV16rM",
1532 vmovhps_2 = "xr/qo:0Fu17Rm",
1533 vmovhps_3 = "rrx/ooq:0FV16rM",
1534 vmovlhps_3 = "rrro:0FV16rM",
1535 vmovlpd_2 = "xr/qo:660Fu13Rm",
1536 vmovlpd_3 = "rrx/ooq:660FV12rM",
1537 vmovlps_2 = "xr/qo:0Fu13Rm",
1538 vmovlps_3 = "rrx/ooq:0FV12rM",
1539 vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM",
1540 vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM",
1541 vmovntpd_2 = "xroy:660Fu2BRm",
1542 vmovntps_2 = "xroy:0Fu2BRm",
1543 vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
1544 vmovsd_3 = "rrro:F20FV10rM",
1545 vmovshdup_2 = "rmoy:F30Fu16rM",
1546 vmovsldup_2 = "rmoy:F30Fu12rM",
1547 vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
1548 vmovss_3 = "rrro:F30FV10rM",
1549 vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm",
1550 vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm",
1551 vorpd_3 = "rrmoy:660FV56rM",
1552 vorps_3 = "rrmoy:0FV56rM",
1553 vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
1554 vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
1555 vperm2f128_4 = "rrmiy:660F3AV06rMU",
1556 vptestpd_2 = "rmoy:660F38u0FrM",
1557 vptestps_2 = "rmoy:660F38u0ErM",
1558 vrcpps_2 = "rmoy:0Fu53rM",
1559 vrcpss_3 = "rrro:F30FV53rM|rrx/ood:",
1560 vrsqrtps_2 = "rmoy:0Fu52rM",
1561 vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:",
1562 vroundpd_3 = "rmioy:660F3Au09rMU",
1563 vroundps_3 = "rmioy:660F3Au08rMU",
1564 vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:",
1565 vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:",
1566 vshufpd_4 = "rrmioy:660FVC6rMU",
1567 vshufps_4 = "rrmioy:0FVC6rMU",
1568 vsqrtps_2 = "rmoy:0Fu51rM",
1569 vsqrtss_2 = "rro:F30Fu51rM|rx/od:",
1570 vsqrtpd_2 = "rmoy:660Fu51rM",
1571 vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:",
1572 vstmxcsr_1 = "xd:0FuAE3m",
1573 vucomisd_2 = "rro:660Fu2ErM|rx/oq:",
1574 vucomiss_2 = "rro:0Fu2ErM|rx/od:",
1575 vunpckhpd_3 = "rrmoy:660FV15rM",
1576 vunpckhps_3 = "rrmoy:0FV15rM",
1577 vunpcklpd_3 = "rrmoy:660FV14rM",
1578 vunpcklps_3 = "rrmoy:0FV14rM",
1579 vxorpd_3 = "rrmoy:660FV57rM",
1580 vxorps_3 = "rrmoy:0FV57rM",
1581 vzeroall_0 = "0FuL77",
1582 vzeroupper_0 = "0Fu77",
1583
1584 -- AVX2 FP ops
1585 vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
1586 vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
1587 -- *vgather* (!vsib)
1588 vpermpd_3 = "rmiy:660F3AuX01rMU",
1589 vpermps_3 = "rrmy:660F38V16rM",
1590
1591 -- AVX, AVX2 integer ops
1592 -- In general, xmm requires AVX, ymm requires AVX2.
1593 vaesdec_3 = "rrmo:660F38VDErM",
1594 vaesdeclast_3 = "rrmo:660F38VDFrM",
1595 vaesenc_3 = "rrmo:660F38VDCrM",
1596 vaesenclast_3 = "rrmo:660F38VDDrM",
1597 vaesimc_2 = "rmo:660F38uDBrM",
1598 vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
1599 vlddqu_2 = "rxoy:F20FuF0rM",
1600 vmaskmovdqu_2 = "rro:660FuF7rM",
1601 vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm",
1602 vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
1603 vmovntdq_2 = "xroy:660FuE7Rm",
1604 vmovntdqa_2 = "rxoy:660F38u2ArM",
1605 vmpsadbw_4 = "rrmioy:660F3AV42rMU",
1606 vpabsb_2 = "rmoy:660F38u1CrM",
1607 vpabsd_2 = "rmoy:660F38u1ErM",
1608 vpabsw_2 = "rmoy:660F38u1DrM",
1609 vpackusdw_3 = "rrmoy:660F38V2BrM",
1610 vpalignr_4 = "rrmioy:660F3AV0FrMU",
1611 vpblendvb_4 = "rrmroy:660F3AV4CrMs",
1612 vpblendw_4 = "rrmioy:660F3AV0ErMU",
1613 vpclmulqdq_4 = "rrmio:660F3AV44rMU",
1614 vpcmpeqq_3 = "rrmoy:660F38V29rM",
1615 vpcmpestri_3 = "rmio:660F3Au61rMU",
1616 vpcmpestrm_3 = "rmio:660F3Au60rMU",
1617 vpcmpgtq_3 = "rrmoy:660F38V37rM",
1618 vpcmpistri_3 = "rmio:660F3Au63rMU",
1619 vpcmpistrm_3 = "rmio:660F3Au62rMU",
1620 vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
1621 vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
1622 vpextrd_3 = "mri/do:660F3Au16RmU",
1623 vpextrq_3 = "mri/qo:660F3Au16RmU",
1624 vphaddw_3 = "rrmoy:660F38V01rM",
1625 vphaddd_3 = "rrmoy:660F38V02rM",
1626 vphaddsw_3 = "rrmoy:660F38V03rM",
1627 vphminposuw_2 = "rmo:660F38u41rM",
1628 vphsubw_3 = "rrmoy:660F38V05rM",
1629 vphsubd_3 = "rrmoy:660F38V06rM",
1630 vphsubsw_3 = "rrmoy:660F38V07rM",
1631 vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:",
1632 vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:",
1633 vpinsrd_4 = "rrmi/ood:660F3AV22rMU",
1634 vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU",
1635 vpmaddubsw_3 = "rrmoy:660F38V04rM",
1636 vpmaxsb_3 = "rrmoy:660F38V3CrM",
1637 vpmaxsd_3 = "rrmoy:660F38V3DrM",
1638 vpmaxuw_3 = "rrmoy:660F38V3ErM",
1639 vpmaxud_3 = "rrmoy:660F38V3FrM",
1640 vpminsb_3 = "rrmoy:660F38V38rM",
1641 vpminsd_3 = "rrmoy:660F38V39rM",
1642 vpminuw_3 = "rrmoy:660F38V3ArM",
1643 vpminud_3 = "rrmoy:660F38V3BrM",
1644 vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM",
1645 vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:",
1646 vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:",
1647 vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:",
1648 vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:",
1649 vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:",
1650 vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:",
1651 vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:",
1652 vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:",
1653 vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:",
1654 vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:",
1655 vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:",
1656 vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:",
1657 vpmuldq_3 = "rrmoy:660F38V28rM",
1658 vpmulhrsw_3 = "rrmoy:660F38V0BrM",
1659 vpmulld_3 = "rrmoy:660F38V40rM",
1660 vpshufb_3 = "rrmoy:660F38V00rM",
1661 vpshufd_3 = "rmioy:660Fu70rMU",
1662 vpshufhw_3 = "rmioy:F30Fu70rMU",
1663 vpshuflw_3 = "rmioy:F20Fu70rMU",
1664 vpsignb_3 = "rrmoy:660F38V08rM",
1665 vpsignw_3 = "rrmoy:660F38V09rM",
1666 vpsignd_3 = "rrmoy:660F38V0ArM",
1667 vpslldq_3 = "rrioy:660Fv737mU",
1668 vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU",
1669 vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU",
1670 vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU",
1671 vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU",
1672 vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU",
1673 vpsrldq_3 = "rrioy:660Fv733mU",
1674 vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU",
1675 vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU",
1676 vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU",
1677 vptest_2 = "rmoy:660F38u17rM",
1678
1679 -- AVX2 integer ops
1680 vbroadcasti128_2 = "rx/yo:660F38u5ArM",
1681 vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
1682 vextracti128_3 = "mri/oy:660F3AuL39RmU",
1683 vpblendd_4 = "rrmioy:660F3AV02rMU",
1684 vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
1685 vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
1686 vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
1687 vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
1688 vpermd_3 = "rrmy:660F38V36rM",
1689 vpermq_3 = "rmiy:660F3AuX00rMU",
1690 -- *vpgather* (!vsib)
1691 vperm2i128_4 = "rrmiy:660F3AV46rMU",
1692 vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
1693 vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
1694 vpsllvd_3 = "rrmoy:660F38V47rM",
1695 vpsllvq_3 = "rrmoy:660F38VX47rM",
1696 vpsravd_3 = "rrmoy:660F38V46rM",
1697 vpsrlvd_3 = "rrmoy:660F38V45rM",
1698 vpsrlvq_3 = "rrmoy:660F38VX45rM",
1699
1700 -- Intel ADX
1701 adcx_2 = "rmqd:660F38F6rM",
1702 adox_2 = "rmqd:F30F38F6rM",
1703
1704 -- BMI1
1705 andn_3 = "rrmqd:0F38VF2rM",
1706 bextr_3 = "rmrqd:0F38wF7rM",
1707 blsi_2 = "rmqd:0F38vF33m",
1708 blsmsk_2 = "rmqd:0F38vF32m",
1709 blsr_2 = "rmqd:0F38vF31m",
1710 tzcnt_2 = "rmqdw:F30FBCrM",
1711
1712 -- BMI2
1713 bzhi_3 = "rmrqd:0F38wF5rM",
1714 mulx_3 = "rrmqd:F20F38VF6rM",
1715 pdep_3 = "rrmqd:F20F38VF5rM",
1716 pext_3 = "rrmqd:F30F38VF5rM",
1717 rorx_3 = "rmSqd:F20F3AuF0rMS",
1718 sarx_3 = "rmrqd:F30F38wF7rM",
1719 shrx_3 = "rmrqd:F20F38wF7rM",
1720 shlx_3 = "rmrqd:660F38wF7rM",
1721
1722 -- FMA3
1723 vfmaddsub132pd_3 = "rrmoy:660F38VX96rM",
1724 vfmaddsub132ps_3 = "rrmoy:660F38V96rM",
1725 vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM",
1726 vfmaddsub213ps_3 = "rrmoy:660F38VA6rM",
1727 vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM",
1728 vfmaddsub231ps_3 = "rrmoy:660F38VB6rM",
1729
1730 vfmsubadd132pd_3 = "rrmoy:660F38VX97rM",
1731 vfmsubadd132ps_3 = "rrmoy:660F38V97rM",
1732 vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM",
1733 vfmsubadd213ps_3 = "rrmoy:660F38VA7rM",
1734 vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM",
1735 vfmsubadd231ps_3 = "rrmoy:660F38VB7rM",
1736
1737 vfmadd132pd_3 = "rrmoy:660F38VX98rM",
1738 vfmadd132ps_3 = "rrmoy:660F38V98rM",
1739 vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:",
1740 vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:",
1741 vfmadd213pd_3 = "rrmoy:660F38VXA8rM",
1742 vfmadd213ps_3 = "rrmoy:660F38VA8rM",
1743 vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:",
1744 vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:",
1745 vfmadd231pd_3 = "rrmoy:660F38VXB8rM",
1746 vfmadd231ps_3 = "rrmoy:660F38VB8rM",
1747 vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:",
1748 vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:",
1749
1750 vfmsub132pd_3 = "rrmoy:660F38VX9ArM",
1751 vfmsub132ps_3 = "rrmoy:660F38V9ArM",
1752 vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:",
1753 vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:",
1754 vfmsub213pd_3 = "rrmoy:660F38VXAArM",
1755 vfmsub213ps_3 = "rrmoy:660F38VAArM",
1756 vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:",
1757 vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:",
1758 vfmsub231pd_3 = "rrmoy:660F38VXBArM",
1759 vfmsub231ps_3 = "rrmoy:660F38VBArM",
1760 vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:",
1761 vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:",
1762
1763 vfnmadd132pd_3 = "rrmoy:660F38VX9CrM",
1764 vfnmadd132ps_3 = "rrmoy:660F38V9CrM",
1765 vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:",
1766 vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:",
1767 vfnmadd213pd_3 = "rrmoy:660F38VXACrM",
1768 vfnmadd213ps_3 = "rrmoy:660F38VACrM",
1769 vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:",
1770 vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:",
1771 vfnmadd231pd_3 = "rrmoy:660F38VXBCrM",
1772 vfnmadd231ps_3 = "rrmoy:660F38VBCrM",
1773 vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:",
1774 vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:",
1775
1776 vfnmsub132pd_3 = "rrmoy:660F38VX9ErM",
1777 vfnmsub132ps_3 = "rrmoy:660F38V9ErM",
1778 vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:",
1779 vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:",
1780 vfnmsub213pd_3 = "rrmoy:660F38VXAErM",
1781 vfnmsub213ps_3 = "rrmoy:660F38VAErM",
1782 vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:",
1783 vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:",
1784 vfnmsub231pd_3 = "rrmoy:660F38VXBErM",
1785 vfnmsub231ps_3 = "rrmoy:660F38VBErM",
1786 vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:",
1787 vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:",
1788 }
1789
1790 ------------------------------------------------------------------------------
1791
1792 -- Arithmetic ops.
1793 for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3,
1794 ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do
1795 local n8 = shl(n, 3)
1796 map_op[name.."_2"] = format(
1797 "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
1798 1+n8, 3+n8, n, n, 5+n8, n)
1799 end
1800
1801 -- Shift ops.
1802 for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3,
1803 shl = 4, shr = 5, sar = 7, sal = 4 } do
1804 map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n)
1805 end
1806
1807 -- Conditional ops.
1808 for cc,n in pairs(map_cc) do
1809 map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X
1810 map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n)
1811 map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+
1812 end
1813
1814 -- FP arithmetic ops.
1815 for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
1816 sub = 4, subr = 5, div = 6, divr = 7 } do
1817 local nc = 0xc0 + shl(n, 3)
1818 local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
1819 local fn = "f"..name
1820 map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n)
1821 if n == 2 or n == 3 then
1822 map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n)
1823 else
1824 map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n)
1825 map_op[fn.."p_1"] = format("ff:DE%02Xr", nr)
1826 map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr)
1827 end
1828 map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n)
1829 end
1830
1831 -- FP conditional moves.
1832 for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
1833 local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6)
1834 map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+
1835 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
1836 end
1837
1838 -- SSE / AVX FP arithmetic ops.
1839 for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
1840 sub = 12, min = 13, div = 14, max = 15 } do
1841 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
1842 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
1843 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
1844 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
1845 if n ~= 1 then
1846 map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
1847 map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
1848 map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
1849 map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
1850 end
1851 end
1852
1853 -- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
1854 for name,n in pairs{
1855 paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
1856 paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
1857 packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
1858 paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
1859 pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
1860 pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
1861 pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
1862 pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
1863 pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
1864 pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
1865 psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
1866 psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
1867 punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
1868 punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
1869 punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
1870 } do
1871 map_op[name.."_2"] = format("rmo:660F%02XrM", n)
1872 map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
1873 end
1874
1875 ------------------------------------------------------------------------------
1876
1877 local map_vexarg = { u = false, v = 1, V = 2, w = 3 }
1878
1879 -- Process pattern string.
1880 local function dopattern(pat, args, sz, op, needrex)
1881 local digit, addin, vex
1882 local opcode = 0
1883 local szov = sz
1884 local narg = 1
1885 local rex = 0
1886
1887 -- Limit number of section buffer positions used by a single dasm_put().
1888 -- A single opcode needs a maximum of 6 positions.
1889 if secpos+6 > maxsecpos then wflush() end
1890
1891 -- Process each character.
1892 for c in gmatch(pat.."|", ".") do
1893 if match(c, "%x") then -- Hex digit.
1894 digit = byte(c) - 48
1895 if digit > 48 then digit = digit - 39
1896 elseif digit > 16 then digit = digit - 7 end
1897 opcode = opcode*16 + digit
1898 addin = nil
1899 elseif c == "n" then -- Disable operand size mods for opcode.
1900 szov = nil
1901 elseif c == "X" then -- Force REX.W.
1902 rex = 8
1903 elseif c == "L" then -- Force VEX.L.
1904 vex.l = true
1905 elseif c == "r" then -- Merge 1st operand regno. into opcode.
1906 addin = args[1]; opcode = opcode + (addin.reg % 8)
1907 if narg < 2 then narg = 2 end
1908 elseif c == "R" then -- Merge 2nd operand regno. into opcode.
1909 addin = args[2]; opcode = opcode + (addin.reg % 8)
1910 narg = 3
1911 elseif c == "m" or c == "M" then -- Encode ModRM/SIB.
1912 local s
1913 if addin then
1914 s = addin.reg
1915 opcode = opcode - band(s, 7) -- Undo regno opcode merge.
1916 else
1917 s = band(opcode, 15) -- Undo last digit.
1918 opcode = shr(opcode, 4)
1919 end
1920 local nn = c == "m" and 1 or 2
1921 local t = args[nn]
1922 if narg <= nn then narg = nn + 1 end
1923 if szov == "q" and rex == 0 then rex = rex + 8 end
1924 if t.reg and t.reg > 7 then rex = rex + 1 end
1925 if t.xreg and t.xreg > 7 then rex = rex + 2 end
1926 if s > 7 then rex = rex + 4 end
1927 if needrex then rex = rex + 16 end
1928 local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
1929 opcode = nil
1930 local imark = sub(pat, -1) -- Force a mark (ugly).
1931 -- Put ModRM/SIB with regno/last digit as spare.
1932 wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
1933 addin = nil
1934 elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
1935 local b = band(opcode, 255); opcode = shr(opcode, 8)
1936 local m = 1
1937 if b == 0x38 then m = 2
1938 elseif b == 0x3a then m = 3 end
1939 if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
1940 if b ~= 0x0f then
1941 werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
1942 "' in pattern `"..pat.."' for `"..op.."'")
1943 end
1944 local v = map_vexarg[c]
1945 if v then v = remove(args, v) end
1946 b = band(opcode, 255)
1947 local p = 0
1948 if b == 0x66 then p = 1
1949 elseif b == 0xf3 then p = 2
1950 elseif b == 0xf2 then p = 3 end
1951 if p ~= 0 then opcode = shr(opcode, 8) end
1952 if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
1953 vex = { m = m, p = p, v = v }
1954 else
1955 if opcode then -- Flush opcode.
1956 if szov == "q" and rex == 0 then rex = rex + 8 end
1957 if needrex then rex = rex + 16 end
1958 if addin and addin.reg == -1 then
1959 local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
1960 wvreg("opcode", addin.vreg, psz, sk)
1961 else
1962 if addin and addin.reg > 7 then rex = rex + 1 end
1963 wputop(szov, opcode, rex, vex)
1964 end
1965 opcode = nil
1966 end
1967 if c == "|" then break end
1968 if c == "o" then -- Offset (pure 32 bit displacement).
1969 wputdarg(args[1].disp); if narg < 2 then narg = 2 end
1970 elseif c == "O" then
1971 wputdarg(args[2].disp); narg = 3
1972 else
1973 -- Anything else is an immediate operand.
1974 local a = args[narg]
1975 narg = narg + 1
1976 local mode, imm = a.mode, a.imm
1977 if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then
1978 werror("bad operand size for label")
1979 end
1980 if c == "S" then
1981 wputsbarg(imm)
1982 elseif c == "U" then
1983 wputbarg(imm)
1984 elseif c == "W" then
1985 wputwarg(imm)
1986 elseif c == "i" or c == "I" then
1987 if mode == "iJ" then
1988 wputlabel("IMM_", imm, 1)
1989 elseif mode == "iI" and c == "I" then
1990 waction(sz == "w" and "IMM_WB" or "IMM_DB", imm)
1991 else
1992 wputszarg(sz, imm)
1993 end
1994 elseif c == "J" then
1995 if mode == "iPJ" then
1996 waction("REL_A", imm) -- !x64 (secpos)
1997 else
1998 wputlabel("REL_", imm, 2)
1999 end
2000 elseif c == "s" then
2001 local reg = a.reg
2002 if reg < 0 then
2003 wputb(0)
2004 wvreg("imm.hi", a.vreg)
2005 else
2006 wputb(shl(reg, 4))
2007 end
2008 else
2009 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
2010 end
2011 end
2012 end
2013 end
2014 end
2015
2016 ------------------------------------------------------------------------------
2017
2018 -- Mapping of operand modes to short names. Suppress output with '#'.
2019 local map_modename = {
2020 r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm",
2021 f = "stx", F = "st0", J = "lbl", ["1"] = "1",
2022 I = "#", S = "#", O = "#",
2023 }
2024
2025 -- Return a table/string showing all possible operand modes.
2026 local function templatehelp(template, nparams)
2027 if nparams == 0 then return "" end
2028 local t = {}
2029 for tm in gmatch(template, "[^%|]+") do
2030 local s = map_modename[sub(tm, 1, 1)]
2031 s = s..gsub(sub(tm, 2, nparams), ".", function(c)
2032 return ", "..map_modename[c]
2033 end)
2034 if not match(s, "#") then t[#t+1] = s end
2035 end
2036 return t
2037 end
2038
2039 -- Match operand modes against mode match part of template.
2040 local function matchtm(tm, args)
2041 for i=1,#args do
2042 if not match(args[i].mode, sub(tm, i, i)) then return end
2043 end
2044 return true
2045 end
2046
2047 -- Handle opcodes defined with template strings.
2048 map_op[".template__"] = function(params, template, nparams)
2049 if not params then return templatehelp(template, nparams) end
2050 local args = {}
2051
2052 -- Zero-operand opcodes have no match part.
2053 if #params == 0 then
2054 dopattern(template, args, "d", params.op, nil)
2055 return
2056 end
2057
2058 -- Determine common operand size (coerce undefined size) or flag as mixed.
2059 local sz, szmix, needrex
2060 for i,p in ipairs(params) do
2061 args[i] = parseoperand(p)
2062 local nsz = args[i].opsize
2063 if nsz then
2064 if sz and sz ~= nsz then szmix = true else sz = nsz end
2065 end
2066 local nrex = args[i].needrex
2067 if nrex ~= nil then
2068 if needrex == nil then
2069 needrex = nrex
2070 elseif needrex ~= nrex then
2071 werror("bad mix of byte-addressable registers")
2072 end
2073 end
2074 end
2075
2076 -- Try all match:pattern pairs (separated by '|').
2077 local gotmatch, lastpat
2078 for tm in gmatch(template, "[^%|]+") do
2079 -- Split off size match (starts after mode match) and pattern string.
2080 local szm, pat = match(tm, "^(.-):(.*)$", #args+1)
2081 if pat == "" then pat = lastpat else lastpat = pat end
2082 if matchtm(tm, args) then
2083 local prefix = sub(szm, 1, 1)
2084 if prefix == "/" then -- Exactly match leading operand sizes.
2085 for i = #szm,1,-1 do
2086 if i == 1 then
2087 dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
2088 return
2089 elseif args[i-1].opsize ~= sub(szm, i, i) then
2090 break
2091 end
2092 end
2093 else -- Match common operand size.
2094 local szp = sz
2095 if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes.
2096 if prefix == "1" then szp = args[1].opsize; szmix = nil
2097 elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
2098 if not szmix and (prefix == "." or match(szm, szp or "#")) then
2099 dopattern(pat, args, szp, params.op, needrex) -- Process pattern.
2100 return
2101 end
2102 end
2103 gotmatch = true
2104 end
2105 end
2106
2107 local msg = "bad operand mode"
2108 if gotmatch then
2109 if szmix then
2110 msg = "mixed operand size"
2111 else
2112 msg = sz and "bad operand size" or "missing operand size"
2113 end
2114 end
2115
2116 werror(msg.." in `"..opmodestr(params.op, args).."'")
2117 end
2118
2119 ------------------------------------------------------------------------------
2120
2121 -- x64-specific opcode for 64 bit immediates and displacements.
2122 if x64 then
2123 function map_op.mov64_2(params)
2124 if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
2125 if secpos+2 > maxsecpos then wflush() end
2126 local opcode, op64, sz, rex, vreg
2127 local op64 = match(params[1], "^%[%s*(.-)%s*%]$")
2128 if op64 then
2129 local a = parseoperand(params[2])
2130 if a.mode ~= "rmR" then werror("bad operand mode") end
2131 sz = a.opsize
2132 rex = sz == "q" and 8 or 0
2133 opcode = 0xa3
2134 else
2135 op64 = match(params[2], "^%[%s*(.-)%s*%]$")
2136 local a = parseoperand(params[1])
2137 if op64 then
2138 if a.mode ~= "rmR" then werror("bad operand mode") end
2139 sz = a.opsize
2140 rex = sz == "q" and 8 or 0
2141 opcode = 0xa1
2142 else
2143 if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then
2144 werror("bad operand mode")
2145 end
2146 op64 = params[2]
2147 if a.reg == -1 then
2148 vreg = a.vreg
2149 opcode = 0xb8
2150 else
2151 opcode = 0xb8 + band(a.reg, 7)
2152 end
2153 rex = a.reg > 7 and 9 or 8
2154 end
2155 end
2156 local psz, sk = wputop(sz, opcode, rex, nil, vreg)
2157 wvreg("opcode", vreg, psz, sk)
2158 waction("IMM_D", format("(unsigned int)(%s)", op64))
2159 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
2160 end
2161 end
2162
2163 ------------------------------------------------------------------------------
2164
2165 -- Pseudo-opcodes for data storage.
2166 local function op_data(params)
2167 if not params then return "imm..." end
2168 local sz = sub(params.op, 2, 2)
2169 if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end
2170 for _,p in ipairs(params) do
2171 local a = parseoperand(p, sz == "q")
2172 if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
2173 werror("bad mode or size in `"..p.."'")
2174 end
2175 if a.mode == "iJ" then
2176 wputlabel("IMM_", a.imm, 1)
2177 elseif sz == "q" then
2178 wputqarg(a.imm)
2179 else
2180 wputszarg(sz, a.imm)
2181 end
2182 if secpos+2 > maxsecpos then wflush() end
2183 end
2184 end
2185
2186 map_op[".byte_*"] = op_data
2187 map_op[".sbyte_*"] = op_data
2188 map_op[".word_*"] = op_data
2189 map_op[".dword_*"] = op_data
2190 map_op[".qword_*"] = op_data
2191 map_op[".aword_*"] = op_data
2192 map_op[".long_*"] = op_data
2193 map_op[".quad_*"] = op_data
2194 map_op[".addr_*"] = op_data
2195
2196 ------------------------------------------------------------------------------
2197
2198 -- Pseudo-opcode to mark the position where the action list is to be emitted.
2199 map_op[".actionlist_1"] = function(params)
2200 if not params then return "cvar" end
2201 local name = params[1] -- No syntax check. You get to keep the pieces.
2202 wline(function(out) writeactions(out, name) end)
2203 end
2204
2205 -- Pseudo-opcode to mark the position where the global enum is to be emitted.
2206 map_op[".globals_1"] = function(params)
2207 if not params then return "prefix" end
2208 local prefix = params[1] -- No syntax check. You get to keep the pieces.
2209 wline(function(out) writeglobals(out, prefix) end)
2210 end
2211
2212 -- Pseudo-opcode to mark the position where the global names are to be emitted.
2213 map_op[".globalnames_1"] = function(params)
2214 if not params then return "cvar" end
2215 local name = params[1] -- No syntax check. You get to keep the pieces.
2216 wline(function(out) writeglobalnames(out, name) end)
2217 end
2218
2219 -- Pseudo-opcode to mark the position where the extern names are to be emitted.
2220 map_op[".externnames_1"] = function(params)
2221 if not params then return "cvar" end
2222 local name = params[1] -- No syntax check. You get to keep the pieces.
2223 wline(function(out) writeexternnames(out, name) end)
2224 end
2225
2226 ------------------------------------------------------------------------------
2227
2228 -- Label pseudo-opcode (converted from trailing colon form).
2229 map_op[".label_2"] = function(params)
2230 if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end
2231 if secpos+2 > maxsecpos then wflush() end
2232 local a = parseoperand(params[1])
2233 local mode, imm = a.mode, a.imm
2234 if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then
2235 -- Local label (1: ... 9:) or global label (->global:).
2236 waction("LABEL_LG", nil, 1)
2237 wputxb(imm)
2238 elseif mode == "iJ" then
2239 -- PC label (=>pcexpr:).
2240 waction("LABEL_PC", imm)
2241 else
2242 werror("bad label definition")
2243 end
2244 -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
2245 local addr = params[2]
2246 if addr then
2247 local a = parseoperand(addr)
2248 if a.mode == "iPJ" then
2249 waction("SETLABEL", a.imm)
2250 else
2251 werror("bad label assignment")
2252 end
2253 end
2254 end
2255 map_op[".label_1"] = map_op[".label_2"]
2256
2257 ------------------------------------------------------------------------------
2258
2259 -- Alignment pseudo-opcode.
2260 map_op[".align_1"] = function(params)
2261 if not params then return "numpow2" end
2262 if secpos+1 > maxsecpos then wflush() end
2263 local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]]
2264 if align then
2265 local x = align
2266 -- Must be a power of 2 in the range (2 ... 256).
2267 for i=1,8 do
2268 x = x / 2
2269 if x == 1 then
2270 waction("ALIGN", nil, 1)
2271 wputxb(align-1) -- Action byte is 2**n-1.
2272 return
2273 end
2274 end
2275 end
2276 werror("bad alignment")
2277 end
2278
2279 -- Spacing pseudo-opcode.
2280 map_op[".space_2"] = function(params)
2281 if not params then return "num [, filler]" end
2282 if secpos+1 > maxsecpos then wflush() end
2283 waction("SPACE", params[1])
2284 local fill = params[2]
2285 if fill then
2286 fill = tonumber(fill)
2287 if not fill or fill < 0 or fill > 255 then werror("bad filler") end
2288 end
2289 wputxb(fill or 0)
2290 end
2291 map_op[".space_1"] = map_op[".space_2"]
2292
2293 ------------------------------------------------------------------------------
2294
2295 -- Pseudo-opcode for (primitive) type definitions (map to C types).
2296 map_op[".type_3"] = function(params, nparams)
2297 if not params then
2298 return nparams == 2 and "name, ctype" or "name, ctype, reg"
2299 end
2300 local name, ctype, reg = params[1], params[2], params[3]
2301 if not match(name, "^[%a_][%w_]*$") then
2302 werror("bad type name `"..name.."'")
2303 end
2304 local tp = map_type[name]
2305 if tp then
2306 werror("duplicate type `"..name.."'")
2307 end
2308 if reg and not map_reg_valid_base[reg] then
2309 werror("bad base register `"..(map_reg_rev[reg] or reg).."'")
2310 end
2311 -- Add #type to defines. A bit unclean to put it in map_archdef.
2312 map_archdef["#"..name] = "sizeof("..ctype..")"
2313 -- Add new type and emit shortcut define.
2314 local num = ctypenum + 1
2315 map_type[name] = {
2316 ctype = ctype,
2317 ctypefmt = format("Dt%X(%%s)", num),
2318 reg = reg,
2319 }
2320 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
2321 ctypenum = num
2322 end
2323 map_op[".type_2"] = map_op[".type_3"]
2324
2325 -- Dump type definitions.
2326 local function dumptypes(out, lvl)
2327 local t = {}
2328 for name in pairs(map_type) do t[#t+1] = name end
2329 sort(t)
2330 out:write("Type definitions:\n")
2331 for _,name in ipairs(t) do
2332 local tp = map_type[name]
2333 local reg = tp.reg and map_reg_rev[tp.reg] or ""
2334 out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg))
2335 end
2336 out:write("\n")
2337 end
2338
2339 ------------------------------------------------------------------------------
2340
2341 -- Set the current section.
2342 function _M.section(num)
2343 waction("SECTION")
2344 wputxb(num)
2345 wflush(true) -- SECTION is a terminal action.
2346 end
2347
2348 ------------------------------------------------------------------------------
2349
2350 -- Dump architecture description.
2351 function _M.dumparch(out)
2352 out:write(format("DynASM %s version %s, released %s\n\n",
2353 _info.arch, _info.version, _info.release))
2354 dumpregs(out)
2355 dumpactions(out)
2356 end
2357
2358 -- Dump all user defined elements.
2359 function _M.dumpdef(out, lvl)
2360 dumptypes(out, lvl)
2361 dumpglobals(out, lvl)
2362 dumpexterns(out, lvl)
2363 end
2364
2365 ------------------------------------------------------------------------------
2366
2367 -- Pass callbacks from/to the DynASM core.
2368 function _M.passcb(wl, we, wf, ww)
2369 wline, werror, wfatal, wwarn = wl, we, wf, ww
2370 return wflush
2371 end
2372
2373 -- Setup the arch-specific module.
2374 function _M.setup(arch, opt)
2375 g_arch, g_opt = arch, opt
2376 end
2377
2378 -- Merge the core maps and the arch-specific maps.
2379 function _M.mergemaps(map_coreop, map_def)
2380 setmetatable(map_op, { __index = map_coreop })
2381 setmetatable(map_def, { __index = map_archdef })
2382 return map_op, map_def
2383 end
2384
2385 return _M
2386
2387 ------------------------------------------------------------------------------
2388