qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v1 4/4] x86-disas: add x86-mini disassembler implementation


From: Michael Clark
Subject: [PATCH v1 4/4] x86-disas: add x86-mini disassembler implementation
Date: Fri, 24 Jan 2025 13:10:32 +1300

the x86-mini library is a lightweight x86 encoder, decoder, and
disassembler that uses extensions to the Intel instruction set
metadata to encode modern VEX/EVEX instructions and legacy
instructions with a parameterized LEX (legacy extension) format.

this patch adds the x86-tablegen.py script, the disassembler,
a print_insn_x86 implementation plus host and target cpu stubs.

Signed-off-by: Michael Clark <michael@anarch128.org>
---
 disas/disas-host.c      |    5 +
 disas/meson.build       |   97 ++
 disas/x86-core.c        | 2689 +++++++++++++++++++++++++++++++++++++++
 disas/x86-disas.c       |   94 ++
 disas/x86.h             | 1969 ++++++++++++++++++++++++++++
 include/disas/dis-asm.h |    1 +
 target/i386/cpu.c       |    7 +
 7 files changed, 4862 insertions(+)
 create mode 100644 disas/x86-core.c
 create mode 100644 disas/x86-disas.c
 create mode 100644 disas/x86.h

diff --git a/disas/disas-host.c b/disas/disas-host.c
index 8146fafe804c..e05dec5c76f6 100644
--- a/disas/disas-host.c
+++ b/disas/disas-host.c
@@ -7,6 +7,7 @@
 #include "disas/disas.h"
 #include "disas/capstone.h"
 #include "disas-internal.h"
+#include "x86.h"
 
 
 /*
@@ -50,12 +51,16 @@ static void initialize_debug_host(CPUDebug *s)
     s->info.cap_mode = CS_MODE_32;
     s->info.cap_insn_unit = 1;
     s->info.cap_insn_split = 8;
+    s->info.private_data = x86_ctx_create(x86_modes_32);
+    s->info.print_insn = print_insn_x86;
 #elif defined(__x86_64__)
     s->info.mach = bfd_mach_x86_64;
     s->info.cap_arch = CS_ARCH_X86;
     s->info.cap_mode = CS_MODE_64;
     s->info.cap_insn_unit = 1;
     s->info.cap_insn_split = 8;
+    s->info.private_data = x86_ctx_create(x86_modes_64);
+    s->info.print_insn = print_insn_x86;
 #elif defined(_ARCH_PPC)
     s->info.cap_arch = CS_ARCH_PPC;
 # ifdef _ARCH_PPC64
diff --git a/disas/meson.build b/disas/meson.build
index bbfa11978352..dca926a00987 100644
--- a/disas/meson.build
+++ b/disas/meson.build
@@ -9,6 +9,103 @@ common_ss.add(when: 'CONFIG_RISCV_DIS', if_true: files(
     'riscv-xthead.c',
     'riscv-xventana.c'
 ))
+x86_data = files(
+    'x86-data/x86_adx.csv',
+    'x86-data/x86_aes.csv',
+    'x86-data/x86_aeskle.csv',
+    'x86-data/x86_aesklewide.csv',
+    'x86-data/x86_avx2.csv',
+    'x86-data/x86_avx5124fmaps.csv',
+    'x86-data/x86_avx5124vnniw.csv',
+    'x86-data/x86_avx512b.csv',
+    'x86-data/x86_avx512bf16.csv',
+    'x86-data/x86_avx512bitalg.csv',
+    'x86-data/x86_avx512bw.csv',
+    'x86-data/x86_avx512cd.csv',
+    'x86-data/x86_avx512d.csv',
+    'x86-data/x86_avx512dq.csv',
+    'x86-data/x86_avx512er.csv',
+    'x86-data/x86_avx512f.csv',
+    'x86-data/x86_avx512fp16.csv',
+    'x86-data/x86_avx512ifma.csv',
+    'x86-data/x86_avx512pf.csv',
+    'x86-data/x86_avx512vbmi2.csv',
+    'x86-data/x86_avx512vbmi.csv',
+    'x86-data/x86_avx512vl.csv',
+    'x86-data/x86_avx512vnni.csv',
+    'x86-data/x86_avx512vp2intersect.csv',
+    'x86-data/x86_avx512vpopcntdq.csv',
+    'x86-data/x86_avx.csv',
+    'x86-data/x86_avxneconvert.csv',
+    'x86-data/x86_avxvnni.csv',
+    'x86-data/x86_avxvnniint8.csv',
+    'x86-data/x86_base.csv',
+    'x86-data/x86_bmi1.csv',
+    'x86-data/x86_bmi2.csv',
+    'x86-data/x86_cet.csv',
+    'x86-data/x86_cldemote.csv',
+    'x86-data/x86_clwb.csv',
+    'x86-data/x86_enqcmd.csv',
+    'x86-data/x86_f16c.csv',
+    'x86-data/x86_fma.csv',
+    'x86-data/x86_fsgsbase.csv',
+    'x86-data/x86_gfni.csv',
+    'x86-data/x86_hreset.csv',
+    'x86-data/x86_invpcid.csv',
+    'x86-data/x86_lzcnt.csv',
+    'x86-data/x86_mmx.csv',
+    'x86-data/x86_movdir64b.csv',
+    'x86-data/x86_movdiri.csv',
+    'x86-data/x86_mpx.csv',
+    'x86-data/x86_msrlist.csv',
+    'x86-data/x86_ospke.csv',
+    'x86-data/x86_pclmulqdq.csv',
+    'x86-data/x86_pconfig.csv',
+    'x86-data/x86_prefetchw.csv',
+    'x86-data/x86_raoint.csv',
+    'x86-data/x86_rdpid.csv',
+    'x86-data/x86_rdrand.csv',
+    'x86-data/x86_rdseed.csv',
+    'x86-data/x86_rtm.csv',
+    'x86-data/x86_serialize.csv',
+    'x86-data/x86_sha.csv',
+    'x86-data/x86_smap.csv',
+    'x86-data/x86_sse2.csv',
+    'x86-data/x86_sse3.csv',
+    'x86-data/x86_sse4_1.csv',
+    'x86-data/x86_sse4_2.csv',
+    'x86-data/x86_sse4_3.csv',
+    'x86-data/x86_sse.csv',
+    'x86-data/x86_ssse3.csv',
+    'x86-data/x86_uintr.csv',
+    'x86-data/x86_vaes.csv',
+    'x86-data/x86_vmx.csv',
+    'x86-data/x86_waitpkg.csv',
+    'x86-data/x86_wbnoinvd.csv',
+    'x86-data/x86_x87.csv',
+    'x86-data/x86_xsaveopt.csv'
+)
+x86_tablegen_py = find_program('../scripts/x86-tablegen.py')
+x86_gen_enums_inc = custom_target(
+    'x86-enums.inc',
+    output: 'x86-enums.inc',
+    depend_files: x86_data,
+    command: [x86_tablegen_py, '--print-opcode-enums',
+              '--output-file', '@OUTPUT@', x86_data]
+)
+x86_gen_tables_inc = custom_target(
+    'x86-tables.inc',
+    output: 'x86-tables.inc',
+    depend_files: x86_data,
+    command: [x86_tablegen_py, '--print-opcode-tables',
+              '--output-file', '@OUTPUT@', x86_data]
+)
+common_ss.add(when: 'CONFIG_I386_DIS', if_true: x86_gen_enums_inc)
+common_ss.add(when: 'CONFIG_I386_DIS', if_true: x86_gen_tables_inc)
+common_ss.add(when: 'CONFIG_I386_DIS', if_true: files(
+    'x86-core.c',
+    'x86-disas.c'
+))
 common_ss.add(when: 'CONFIG_SH4_DIS', if_true: files('sh4.c'))
 common_ss.add(when: 'CONFIG_SPARC_DIS', if_true: files('sparc.c'))
 common_ss.add(when: 'CONFIG_XTENSA_DIS', if_true: files('xtensa.c'))
diff --git a/disas/x86-core.c b/disas/x86-core.c
new file mode 100644
index 000000000000..d2704bf839f4
--- /dev/null
+++ b/disas/x86-core.c
@@ -0,0 +1,2689 @@
+/*
+ * Copyright (c) 2024-2025 Michael Clark
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <limits.h>
+
+#include "disas/x86.h"
+#include "disas/x86-tables.inc"
+
+#define array_size(arr) (sizeof(arr) / sizeof(arr[0]))
+
+typedef struct x86_table_col x86_table_col;
+typedef struct x86_map_str x86_map_str;
+typedef struct x86_operands x86_operands;
+typedef struct x86_arg x86_arg;
+typedef struct x86_opc_prefix x86_opc_prefix;
+typedef struct x86_opr_formatter x86_opr_formatter;
+typedef struct x86_opr_formats x86_opr_formats;
+
+typedef size_t (*x86_opr_str_fn)(char *buf, size_t buflen, x86_codec *c,
+    x86_arg a);
+typedef size_t (*x86_opr_addr_fn)(char *buf, size_t buflen, x86_codec *c,
+    x86_arg a, size_t pc_offset, x86_fmt_symbol sym_cb);
+
+enum
+{
+    x86_state_top,
+    x86_state_segment,
+    x86_state_legacy,
+    x86_state_map_0f,
+    x86_state_lex_opcode,
+    x86_state_rex_opcode,
+    x86_state_vex_opcode,
+    x86_state_done,
+};
+
+struct x86_table_col
+{
+    int width;
+    char *data;
+};
+
+struct x86_map_str
+{
+    uint ord;
+    const char *str;
+};
+
+struct x86_opr_formatter
+{
+    x86_opr_str_fn fmt_const;
+    x86_opr_str_fn fmt_imm;
+    x86_opr_str_fn fmt_reg;
+    x86_opr_str_fn fmt_mrm;
+    x86_opr_str_fn fmt_vec;
+    x86_opr_str_fn fmt_opb;
+    x86_opr_str_fn fmt_is4;
+    x86_opr_str_fn fmt_ime;
+    x86_opr_addr_fn fmt_rel;
+};
+
+struct x86_opr_formats
+{
+    const char *ptr_rip;
+    const char *ptr_rip_disp;
+    const char *ptr_reg;
+    const char *ptr_reg_disp;
+    const char *ptr_reg_sreg;
+    const char *ptr_reg_sreg_disp;
+    const char *ptr_reg_reg;
+    const char *ptr_reg_reg_disp;
+    const char *ptr_sreg;
+    const char *ptr_disp;
+    const char *ptr_imm64;
+    const char *ptr_imm32;
+    const char *imm64;
+    const char *imm32;
+    const char *reg;
+};
+
+struct x86_operands
+{
+    ullong mod : 3;
+    ullong rm  : 3;
+    ullong r   : 5;
+    ullong x   : 5;
+    ullong b   : 5;
+    ullong s   : 2;
+    ullong v   : 5;
+    ullong k   : 5;
+    ullong l   : 2;
+    ullong w   : 1;
+    ullong brd : 1;
+    ullong osz : 1;
+};
+
+struct x86_arg
+{
+    uint enc;
+    uint opr;
+    uint ord;
+    x86_operands q;
+};
+
+struct x86_opc_prefix
+{
+    uint pfx;
+    uint pfx_w;
+    uint pfx_o;
+    uint modfun;
+    uint modreg;
+    uint modmem;
+};
+
+x86_opr_formatter x86_format_intel_hex;
+x86_opr_formatter x86_format_intel_dec;
+x86_opr_formats x86_opr_formats_intel_hex;
+x86_opr_formats x86_opr_formats_intel_dec;
+
+static uint debug;
+
+void x86_set_debug(uint d) { debug = d; }
+
+x86_map_str x86_mode_names[] =
+{
+    { x86_modes_64,             "64"               },
+    { x86_modes_32,             "32"               },
+    { x86_modes_16,             "16"               },
+};
+
+x86_map_str x86_map_names[] =
+{
+    { x86_map_map6,            "map6"              },
+    { x86_map_map5,            "map5"              },
+    { x86_map_map4,            "map4"              },
+    { x86_map_0f3a,            "0f3a"              },
+    { x86_map_0f38,            "0f38"              },
+    { x86_map_0f,              "0f"                },
+};
+
+x86_map_str x86_ord_names[] =
+{
+    { x86_ord_rflags,           "rflags"           },
+    { x86_ord_mxcsr,            "mxcsr"            },
+    { x86_ord_xmm0_7,           "xmm0_7"           },
+    { x86_ord_xmm0,             "xmm0"             },
+    { x86_ord_seg,              "seg"              },
+    { x86_ord_stx,              "stx"              },
+    { x86_ord_st0,              "st0"              },
+    { x86_ord_rdi,              "rdi"              },
+    { x86_ord_rsi,              "rsi"              },
+    { x86_ord_rbp,              "rbp"              },
+    { x86_ord_rsp,              "rsp"              },
+    { x86_ord_rbx,              "rbx"              },
+    { x86_ord_rdx,              "rdx"              },
+    { x86_ord_rcx,              "rcx"              },
+    { x86_ord_rax,              "rax"              },
+    { x86_ord_one,              "one"              },
+    { x86_ord_ime,              "ime"              },
+    { x86_ord_is4,              "is4"              },
+    { x86_ord_sib,              "sib"              },
+    { x86_ord_vec,              "vec"              },
+    { x86_ord_opr,              "opr"              },
+    { x86_ord_mrm,              "mrm"              },
+    { x86_ord_reg,              "reg"              },
+    { x86_ord_imm,              "imm"              },
+    { x86_ord_rwi,              "rwi"              },
+    { x86_ord_wi,               "wi"               },
+    { x86_ord_ri,               "ri"               },
+    { x86_ord_rw,               "rw"               },
+    { x86_ord_i,                "i"                },
+    { x86_ord_r,                "r"                },
+    { x86_ord_w,                "w"                },
+    { 0,                        NULL               },
+};
+
+x86_map_str x86_opr_names[] =
+{
+    { x86_opr_bnd_mem,          "bnd/mem"          },
+    { x86_opr_k_m64,            "k/m64"            },
+    { x86_opr_k_m32,            "k/m32"            },
+    { x86_opr_k_m16,            "k/m16"            },
+    { x86_opr_k_m8,             "k/m8"             },
+    { x86_opr_vm64z,            "vm64z"            },
+    { x86_opr_vm64y,            "vm64y"            },
+    { x86_opr_vm64x,            "vm64x"            },
+    { x86_opr_vm32z,            "vm32z"            },
+    { x86_opr_vm32y,            "vm32y"            },
+    { x86_opr_vm32x,            "vm32x"            },
+    { x86_opr_rw_mw,            "rw/mw"            },
+    { x86_opr_r64_m64,          "r64/m64"          },
+    { x86_opr_r64_m32,          "r64/m32"          },
+    { x86_opr_r64_m16,          "r64/m16"          },
+    { x86_opr_r64_m8,           "r64/m8"           },
+    { x86_opr_r32_m32,          "r32/m32"          },
+    { x86_opr_r32_m16,          "r32/m16"          },
+    { x86_opr_r32_m8,           "r32/m8"           },
+    { x86_opr_r16_m16,          "r16/m16"          },
+    { x86_opr_r16_m8,           "r16/m8"           },
+    { x86_opr_r8_m8,            "r8/m8"            },
+    { x86_opr_zmm_m512_m64bcst, "zmm/m512/m64bcst" },
+    { x86_opr_zmm_m512_m32bcst, "zmm/m512/m32bcst" },
+    { x86_opr_zmm_m512_m16bcst, "zmm/m512/m16bcst" },
+    { x86_opr_ymm_m256_m64bcst, "ymm/m256/m64bcst" },
+    { x86_opr_ymm_m256_m32bcst, "ymm/m256/m32bcst" },
+    { x86_opr_ymm_m256_m16bcst, "ymm/m256/m16bcst" },
+    { x86_opr_xmm_m128_m64bcst, "xmm/m128/m64bcst" },
+    { x86_opr_xmm_m128_m32bcst, "xmm/m128/m32bcst" },
+    { x86_opr_xmm_m128_m16bcst, "xmm/m128/m16bcst" },
+    { x86_opr_xmm_m64_m32bcst,  "xmm/m64/m32bcst"  },
+    { x86_opr_xmm_m64_m16bcst,  "xmm/m64/m16bcst"  },
+    { x86_opr_xmm_m32_m16bcst,  "xmm/m32/m16bcst"  },
+    { x86_opr_zmm_m512,         "zmm/m512"         },
+    { x86_opr_ymm_m256,         "ymm/m256"         },
+    { x86_opr_xmm_m128,         "xmm/m128"         },
+    { x86_opr_xmm_m64,          "xmm/m64"          },
+    { x86_opr_xmm_m32,          "xmm/m32"          },
+    { x86_opr_xmm_m16,          "xmm/m16"          },
+    { x86_opr_xmm_m8,           "xmm/m8"           },
+    { x86_opr_mm_m64,           "mm/m64"           },
+    { x86_opr_mm_m32,           "mm/m32"           },
+    { x86_opr_mp,               "mp"               },
+    { x86_opr_mw,               "mw"               },
+    { x86_opr_vm64,             "vm64"             },
+    { x86_opr_vm32,             "vm32"             },
+    { x86_opr_r_m64,            "r/m64"            },
+    { x86_opr_r_m32,            "r/m32"            },
+    { x86_opr_r_m16,            "r/m16"            },
+    { x86_opr_r_m8,             "r/m8"             },
+    { x86_opr_m64bcst,          "m64bcst"          },
+    { x86_opr_m32bcst,          "m32bcst"          },
+    { x86_opr_m16bcst,          "m16bcst"          },
+    { x86_opr_mib,              "mib"              },
+    { x86_opr_m384,             "m384"             },
+    { x86_opr_m80,              "m80"              },
+    { x86_opr_m512,             "m512"             },
+    { x86_opr_m256,             "m256"             },
+    { x86_opr_m128,             "m128"             },
+    { x86_opr_m64,              "m64"              },
+    { x86_opr_m32,              "m32"              },
+    { x86_opr_m16,              "m16"              },
+    { x86_opr_m8,               "m8"               },
+    { x86_opr_seg_gs,           "gs"               },
+    { x86_opr_seg_fs,           "fs"               },
+    { x86_opr_seg_ds,           "ds"               },
+    { x86_opr_seg_ss,           "ss"               },
+    { x86_opr_seg_cs,           "cs"               },
+    { x86_opr_seg_es,           "es"               },
+    { x86_opr_reg_xmm0_7,       "xmm0_7"           },
+    { x86_opr_reg_xmm0,         "xmm0"             },
+    { x86_opr_reg_pdi,          "pdi"              },
+    { x86_opr_reg_psi,          "psi"              },
+    { x86_opr_reg_pb,           "pb"               },
+    { x86_opr_reg_pd,           "pd"               },
+    { x86_opr_reg_pc,           "pc"               },
+    { x86_opr_reg_pa,           "pa"               },
+    { x86_opr_reg_bw,           "bw"               },
+    { x86_opr_reg_dw,           "dw"               },
+    { x86_opr_reg_cw,           "cw"               },
+    { x86_opr_reg_aw,           "aw"               },
+    { x86_opr_reg_rbx,          "rbx"              },
+    { x86_opr_reg_rdx,          "rdx"              },
+    { x86_opr_reg_rcx,          "rcx"              },
+    { x86_opr_reg_rax,          "rax"              },
+    { x86_opr_reg_ebx,          "ebx"              },
+    { x86_opr_reg_edx,          "edx"              },
+    { x86_opr_reg_ecx,          "ecx"              },
+    { x86_opr_reg_eax,          "eax"              },
+    { x86_opr_reg_bx,           "bx"               },
+    { x86_opr_reg_dx,           "dx"               },
+    { x86_opr_reg_cx,           "cx"               },
+    { x86_opr_reg_ax,           "ax"               },
+    { x86_opr_reg_cl,           "cl"               },
+    { x86_opr_reg_al,           "al"               },
+    { x86_opr_reg_st0,          "st0"              },
+    { x86_opr_reg_v0,           "v0"               },
+    { x86_opr_reg_ah,           "ah"               },
+    { x86_opr_reg_di,           "di"               },
+    { x86_opr_reg_si,           "si"               },
+    { x86_opr_reg_d,            "d"                },
+    { x86_opr_reg_c,            "c"                },
+    { x86_opr_reg_a,            "a"                },
+    { x86_opr_memfar16_64,      "memfar16:64"      },
+    { x86_opr_memfar16_32,      "memfar16:32"      },
+    { x86_opr_memfar16_16,      "memfar16:16"      },
+    { x86_opr_far16_32,         "far16:32"         },
+    { x86_opr_far16_16,         "far16:16"         },
+    { x86_opr_relw,             "relw"             },
+    { x86_opr_rel8,             "rel8"             },
+    { x86_opr_moffs,            "moffs"            },
+    { x86_opr_1,                "1"                },
+    { x86_opr_zmm,              "zmm"              },
+    { x86_opr_ymm,              "ymm"              },
+    { x86_opr_xmm,              "xmm"              },
+    { x86_opr_mm,               "mm"               },
+    { x86_opr_ra,               "ra"               },
+    { x86_opr_rw,               "rw"               },
+    { x86_opr_r64,              "r64"              },
+    { x86_opr_r32,              "r32"              },
+    { x86_opr_r16,              "r16"              },
+    { x86_opr_r8,               "r8"               },
+    { x86_opr_iw,               "iw"               },
+    { x86_opr_iwd,              "iwd"              },
+    { x86_opr_i64,              "i64"              },
+    { x86_opr_i32,              "i32"              },
+    { x86_opr_i16,              "i16"              },
+    { x86_opr_ib,               "ib"               },
+    { x86_opr_bnd,              "bnd"              },
+    { x86_opr_dreg,             "dreg"             },
+    { x86_opr_creg,             "creg"             },
+    { x86_opr_seg,              "seg"              },
+    { x86_opr_k,                "k"                },
+    { x86_opr_st,               "st"               },
+    { x86_opr_mmx,              "mmx"              },
+    { x86_opr_vec,              "vec"              },
+    { x86_opr_reg,              "reg"              },
+    { x86_opr_imm,              "imm"              },
+    { x86_opr_bcst,             "bcst"             },
+    { x86_opr_mem,              "mem"              },
+    { x86_opr_flag_er,          "{er}"             },
+    { x86_opr_flag_k,           "{k}"              },
+    { x86_opr_flag_sae,         "{sae}"            },
+    { x86_opr_flag_z,           "{z}"              },
+    { x86_opr_flag_rs2,         "{rs2}"            },
+    { x86_opr_flag_rs4,         "{rs4}"            },
+    { x86_opr_f64x8,            "/f64x8"           },
+    { x86_opr_f64x4,            "/f64x4"           },
+    { x86_opr_f64x2,            "/f64x2"           },
+    { x86_opr_f64x1,            "/f64x1"           },
+    { x86_opr_f32x16,           "/f32x16"          },
+    { x86_opr_f32x8,            "/f32x8"           },
+    { x86_opr_f32x4,            "/f32x4"           },
+    { x86_opr_f32x2,            "/f32x2"           },
+    { x86_opr_f32x1,            "/f32x1"           },
+    { x86_opr_f16x32,           "/f16x32"          },
+    { x86_opr_f16x16,           "/f16x16"          },
+    { x86_opr_f16x8,            "/f16x8"           },
+    { x86_opr_f16x4,            "/f16x4"           },
+    { x86_opr_f16x2,            "/f16x2"           },
+    { x86_opr_f16x1,            "/f16x1"           },
+    { x86_opr_f8x64,            "/f8x64"           },
+    { x86_opr_f8x32,            "/f8x32"           },
+    { x86_opr_f8x16,            "/f8x16"           },
+    { x86_opr_f8x8,             "/f8x8"            },
+    { x86_opr_f8x4,             "/f8x4"            },
+    { x86_opr_f8x2,             "/f8x2"            },
+    { x86_opr_f8x1,             "/f8x1"            },
+    { x86_opr_i512x1,           "/i512x1"          },
+    { x86_opr_i256x2,           "/i256x2"          },
+    { x86_opr_i256x1,           "/i256x1"          },
+    { x86_opr_i128x4,           "/i128x4"          },
+    { x86_opr_i128x2,           "/i128x2"          },
+    { x86_opr_i128x1,           "/i128x1"          },
+    { x86_opr_i64x8,            "/i64x8"           },
+    { x86_opr_i64x4,            "/i64x4"           },
+    { x86_opr_i64x2,            "/i64x2"           },
+    { x86_opr_i64x1,            "/i64x1"           },
+    { x86_opr_i32x16,           "/i32x16"          },
+    { x86_opr_i32x8,            "/i32x8"           },
+    { x86_opr_i32x4,            "/i32x4"           },
+    { x86_opr_i32x2,            "/i32x2"           },
+    { x86_opr_i32x1,            "/i32x1"           },
+    { x86_opr_i16x32,           "/i16x32"          },
+    { x86_opr_i16x16,           "/i16x16"          },
+    { x86_opr_i16x8,            "/i16x8"           },
+    { x86_opr_i16x4,            "/i16x4"           },
+    { x86_opr_i16x2,            "/i16x2"           },
+    { x86_opr_i16x1,            "/i16x1"           },
+    { x86_opr_i8x64,            "/i8x64"           },
+    { x86_opr_i8x32,            "/i8x32"           },
+    { x86_opr_i8x16,            "/i8x16"           },
+    { x86_opr_i8x8,             "/i8x8"            },
+    { x86_opr_i8x4,             "/i8x4"            },
+    { x86_opr_i8x2,             "/i8x2"            },
+    { x86_opr_i8x1,             "/i8x1"            },
+    { 0,                        NULL               },
+};
+
+x86_map_str x86_enc_names[] =
+{
+    { x86_enc_r_norexb,         " .norexb"         },
+    { x86_enc_r_lock,           " .lock"           },
+    { x86_enc_r_rep,            " .rep"            },
+    { x86_enc_s_a64,            " .a64"            },
+    { x86_enc_s_a32,            " .a32"            },
+    { x86_enc_s_a16,            " .a16"            },
+    { x86_enc_s_o64,            " .o64"            },
+    { x86_enc_s_o32,            " .o32"            },
+    { x86_enc_s_o16,            " .o16"            },
+    { x86_enc_j_i16,            " i16"             },
+    { x86_enc_j_ib,             " ib"              },
+    { x86_enc_i_i64,            " i64"             },
+    { x86_enc_i_i32,            " i32"             },
+    { x86_enc_i_i16,            " i16"             },
+    { x86_enc_i_iwd,            " iwd"             },
+    { x86_enc_i_iw,             " iw"              },
+    { x86_enc_i_ib,             " ib"              },
+    { x86_enc_f_opcode_r,       ""                 },
+    { x86_enc_f_opcode,         ""                 },
+    { x86_enc_f_modrm_n,        ""                 },
+    { x86_enc_f_modrm_r,        ""                 },
+    { x86_enc_o_opcode_r,       ""                 },
+    { x86_enc_t_evex,           ".evex"            },
+    { x86_enc_t_vex,            ".vex"             },
+    { x86_enc_t_lex,            ".lex"             },
+    { x86_enc_l_lig,            ".lig"             },
+    { x86_enc_l_512,            ".512"             },
+    { x86_enc_l_256,            ".256"             },
+    { x86_enc_l_128,            ".128"             },
+    { x86_enc_l_l1,             ".l1"              },
+    { x86_enc_l_l0,             ".l0"              },
+    { x86_enc_l_lz,             ".lz"              },
+    { x86_enc_p_rexw,           ".w"               },
+    { x86_enc_p_9b,             ".9b"              },
+    { x86_enc_p_f2,             ".f2"              },
+    { x86_enc_p_f3,             ".f3"              },
+    { x86_enc_p_66,             ".66"              },
+    { x86_enc_m_map6,           ".map6"            },
+    { x86_enc_m_map5,           ".map5"            },
+    { x86_enc_m_map4,           ".map4"            },
+    { x86_enc_m_0f3a,           ".0f3a"            },
+    { x86_enc_m_0f38,           ".0f38"            },
+    { x86_enc_m_0f,             ".0f"              },
+    { x86_enc_w_wig,            ".wig"             },
+    { x86_enc_w_ww,             ".ww"              },
+    { x86_enc_w_wx,             ".wx"              },
+    { x86_enc_w_wn,             ".wn"              },
+    { x86_enc_w_wb,             ".wb"              },
+    { x86_enc_w_w1,             ".w1"              },
+    { x86_enc_w_w0,             ".w0"              },
+    { 0,                        NULL               },
+};
+
+static size_t x86_name_map(x86_map_str *p, char * buf, size_t len, uint ord,
+    const char *sep)
+{
+    size_t count = 0;
+    int ret = 0;
+    for (;;) {
+        while (p->str != NULL) {
+            if (p->ord && (p->ord & ord) == p->ord) {
+                ord = ord & ~p->ord;
+                break;
+            }
+            p++;
+        }
+        if (p->str == NULL) break;
+        if (buf == NULL || len - count <= len) {
+            ret = snprintf(buf ? buf + count : NULL,
+                buf ? len - count : 0,
+                "%s%s", count == 0 ? "" : sep, p->str);
+            if (ret > 0) count += ret;
+        }
+    }
+    return count;
+}
+
+size_t x86_mode_name(char *buf, size_t len, uint mode, const char *sep)
+{
+    return x86_name_map(x86_mode_names, buf, len, mode, sep);
+}
+
+size_t x86_map_name(char *buf, size_t len, uint mode, const char *sep)
+{
+    return x86_name_map(x86_map_names, buf, len, mode, sep);
+}
+
+size_t x86_ord_name(char *buf, size_t len, uint ord, const char *sep)
+{
+    return x86_name_map(x86_ord_names, buf, len, ord, sep);
+}
+
+size_t x86_opr_name(char *buf, size_t len, uint opr)
+{
+    return x86_name_map(x86_opr_names, buf, len, opr, "");
+}
+
+size_t x86_enc_name(char *buf, size_t len, uint enc)
+{
+    return x86_name_map(x86_enc_names, buf, len, enc, "");
+}
+
+const char *x86_reg_name(uint reg)
+{
+    return (reg < 512) ? x86_reg_names[reg] : "invalid";
+}
+
+size_t x86_ord_mnem(char *buf, size_t len, const ushort *ord)
+{
+    const char codes[8] = " -irmvo ";
+    size_t count = 0;
+    for (size_t i = 0; i < array_size(x86_ord_table[0].ord) && ord[i]; i++) {
+        uint type = x86_ord_type_val(ord[i]);
+        if (buf && count < len) {
+            buf[count++] = codes[type];
+        }
+    }
+    buf[count] = '\0';
+    return count;
+}
+
+const char *x86_table_type_name(uint type)
+{
+    switch (type) {
+    case x86_table_none: return "none";
+    case x86_table_lex: return "lex";
+    case x86_table_vex: return "vex";
+    case x86_table_evex: return "evex";
+    default: return "";
+    }
+}
+
+const char *x86_table_map_name(uint map)
+{
+    switch (map) {
+    case x86_map_none: return "";
+    case x86_map_0f: return "0f";
+    case x86_map_0f38: return "0f38";
+    case x86_map_0f3a: return "0f3a";
+    case x86_map_map4: return "map4";
+    case x86_map_map5: return "map5";
+    case x86_map_map6: return "map6";
+    default: return "";
+    }
+}
+
+const char *x86_table_prefix_name(uint prefix)
+{
+    switch (prefix) {
+    case x86_pfx_66: return "66";
+    case x86_pfx_f3: return "f3";
+    case x86_pfx_f2: return "f2";
+    case x86_pfx_9b: return "9b";
+    case x86_pfx_66 | x86_pfx_rexw: return "66+w";
+    case x86_pfx_f3 | x86_pfx_rexw: return "f3+w";
+    case x86_pfx_f2 | x86_pfx_rexw: return "f2+w";
+    case x86_pfx_9b | x86_pfx_rexw: return "9b+w";
+    default: return "";
+    }
+}
+
+/*
+ *  metadata filters
+ */
+
+int x86_enc_filter_rex(x86_rex prefix, uint enc)
+{
+    uint lex =  (enc & x86_enc_t_mask) == x86_enc_t_lex;
+    uint ew0 =  (enc & x86_enc_w_mask) == x86_enc_w_w0;
+    uint ew1 =  (enc & x86_enc_w_mask) == x86_enc_w_w1;
+    uint ewn =  (enc & x86_enc_w_mask) == x86_enc_w_wn;
+    uint ewb =  (enc & x86_enc_w_mask) == x86_enc_w_wb;
+    uint eww =  (enc & x86_enc_w_mask) == x86_enc_w_ww;
+    uint ewx =  (enc & x86_enc_w_mask) == x86_enc_w_wx;
+    uint ewig = (enc & x86_enc_w_mask) == x86_enc_w_wig;
+    uint norexb = (enc & x86_enc_r_norexb) != 0;
+
+    uint w = (prefix.data[0] >> 3) & 1;
+
+    if (!lex) return -1;
+    if (norexb) return -1;
+
+    switch (w) {
+    case x86_vex_w0:
+        if (!(ew0 || ewig || ewn || ewb || eww || ewx)) return -1;
+        break;
+    case x86_vex_w1:
+        if (!(ew1 || ewig || ewn || ewb || eww || ewx)) return -1;
+        break;
+    }
+
+    return 0;
+}
+
+int x86_enc_filter_rex2(x86_rex2 prefix, uint enc)
+{
+    uint lex =  (enc & x86_enc_t_mask) == x86_enc_t_lex;
+    uint ew0 =  (enc & x86_enc_w_mask) == x86_enc_w_w0;
+    uint ew1 =  (enc & x86_enc_w_mask) == x86_enc_w_w1;
+    uint ewig = (enc & x86_enc_w_mask) == x86_enc_w_wig;
+    uint em =   (enc & x86_enc_m_mask) >> x86_enc_m_shift;
+
+    uint m = (prefix.data[0] >> 7) & 1;
+    uint w = (prefix.data[0] >> 3) & 1;
+
+    if (!lex || m != em) return -1;
+
+    switch (w) {
+    case x86_vex_w0: if (!(ew0 || ewig)) return -1; break;
+    case x86_vex_w1: if (!(ew1 || ewig)) return -1; break;
+    default: return -1;
+    }
+
+    return 0;
+}
+
+int x86_enc_filter_vex2(x86_vex2 prefix, uint enc)
+{
+    uint vex =  (enc & x86_enc_t_mask) == x86_enc_t_vex;
+    uint ew0 =  (enc & x86_enc_w_mask) == x86_enc_w_w0;
+    uint ewig = (enc & x86_enc_w_mask) == x86_enc_w_wig;
+    uint lz =   (enc & x86_enc_l_mask) == x86_enc_l_lz;
+    uint l0 =   (enc & x86_enc_l_mask) == x86_enc_l_l0;
+    uint l1 =   (enc & x86_enc_l_mask) == x86_enc_l_l1;
+    uint l128 = (enc & x86_enc_l_mask) == x86_enc_l_128;
+    uint l256 = (enc & x86_enc_l_mask) == x86_enc_l_256;
+    uint lig =  (enc & x86_enc_l_mask) == x86_enc_l_lig;
+    uint np =   (enc & x86_enc_p_mask) == x86_enc_p_none;
+    uint p66 =  (enc & x86_enc_p_mask) == x86_enc_p_66;
+    uint pf2 =  (enc & x86_enc_p_mask) == x86_enc_p_f2;
+    uint pf3 =  (enc & x86_enc_p_mask) == x86_enc_p_f3;
+    uint em =   (enc & x86_enc_m_mask) >> x86_enc_m_shift;
+
+    uint p = (prefix.data[0] >> 0) & 3;
+    uint l = (prefix.data[0] >> 2) & 1;
+
+    if (!vex || !(ew0 || ewig) || x86_map_0f != em) return -1;
+
+    switch (l) {
+    case x86_vex_l0: if (!(lig || lz || l0 || l128)) return -1; break;
+    case x86_vex_l1: if (!(lig || l1 || l256)) return -1; break;
+    default: return -1;
+    }
+
+    switch (p) {
+    case x86_pfx_none: if (!np) return -1; break;
+    case x86_pfx_66: if (!p66) return -1; break;
+    case x86_pfx_f2: if (!pf2) return -1; break;
+    case x86_pfx_f3: if (!pf3) return -1; break;
+    default: return -1;
+    }
+
+    return 0;
+}
+
+int x86_enc_filter_vex3(x86_vex3 prefix, uint enc)
+{
+    uint vex =  (enc & x86_enc_t_mask) == x86_enc_t_vex;
+    uint ew0 =  (enc & x86_enc_w_mask) == x86_enc_w_w0;
+    uint ew1 =  (enc & x86_enc_w_mask) == x86_enc_w_w1;
+    uint ewig = (enc & x86_enc_w_mask) == x86_enc_w_wig;
+    uint lz =   (enc & x86_enc_l_mask) == x86_enc_l_lz;
+    uint l0 =   (enc & x86_enc_l_mask) == x86_enc_l_l0;
+    uint l1 =   (enc & x86_enc_l_mask) == x86_enc_l_l1;
+    uint l128 = (enc & x86_enc_l_mask) == x86_enc_l_128;
+    uint l256 = (enc & x86_enc_l_mask) == x86_enc_l_256;
+    uint lig =  (enc & x86_enc_l_mask) == x86_enc_l_lig;
+    uint np =   (enc & x86_enc_p_mask) == x86_enc_p_none;
+    uint p66 =  (enc & x86_enc_p_mask) == x86_enc_p_66;
+    uint pf2 =  (enc & x86_enc_p_mask) == x86_enc_p_f2;
+    uint pf3 =  (enc & x86_enc_p_mask) == x86_enc_p_f3;
+    uint em =   (enc & x86_enc_m_mask) >> x86_enc_m_shift;
+
+    uint m = (prefix.data[0] >> 0) & 31;
+    uint w = (prefix.data[1] >> 7) & 1;
+    uint p = (prefix.data[1] >> 0) & 3;
+    uint l = (prefix.data[1] >> 2) & 1;
+
+    if (!vex || m != em) return -1;
+
+    switch (w) {
+    case x86_vex_w0: if (!(ew0 || ewig)) return -1; break;
+    case x86_vex_w1: if (!(ew1 || ewig)) return -1; break;
+    default: return -1;
+    }
+
+    switch (l) {
+    case x86_vex_l0: if (!(lig || lz || l0 || l128)) return -1; break;
+    case x86_vex_l1: if (!(lig || l1 || l256)) return -1; break;
+    default: return -1;
+    }
+
+    switch (p) {
+    case x86_pfx_none: if (!np) return -1; break;
+    case x86_pfx_66: if (!p66) return -1; break;
+    case x86_pfx_f2: if (!pf2) return -1; break;
+    case x86_pfx_f3: if (!pf3) return -1; break;
+    default: return -1;
+    }
+
+    return 0;
+}
+
+int x86_enc_filter_evex(x86_evex prefix, uint enc)
+{
+    uint evex = (enc & x86_enc_t_mask) == x86_enc_t_evex;
+    uint ew0 =  (enc & x86_enc_w_mask) == x86_enc_w_w0;
+    uint ew1 =  (enc & x86_enc_w_mask) == x86_enc_w_w1;
+    uint ewig = (enc & x86_enc_w_mask) == x86_enc_w_wig;
+    uint l128 = (enc & x86_enc_l_mask) == x86_enc_l_128;
+    uint l256 = (enc & x86_enc_l_mask) == x86_enc_l_256;
+    uint l512 = (enc & x86_enc_l_mask) == x86_enc_l_512;
+    uint lig =  (enc & x86_enc_l_mask) == x86_enc_l_lig;
+    uint np =   (enc & x86_enc_p_mask) == x86_enc_p_none;
+    uint p66 =  (enc & x86_enc_p_mask) == x86_enc_p_66;
+    uint pf2 =  (enc & x86_enc_p_mask) == x86_enc_p_f2;
+    uint pf3 =  (enc & x86_enc_p_mask) == x86_enc_p_f3;
+    uint em =   (enc & x86_enc_m_mask) >> x86_enc_m_shift;
+
+    uint m = (prefix.data[0] >> 0) & 7;
+    uint w = (prefix.data[1] >> 7) & 1;
+    uint p = (prefix.data[1] >> 0) & 3;
+    uint l = (prefix.data[2] >> 5) & 3;
+
+    if (!evex || m != em) return -1;
+
+    switch (w) {
+    case x86_vex_w0: if (!(ew0 || ewig)) return -1; break;
+    case x86_vex_w1: if (!(ew1 || ewig)) return -1; break;
+    default: return -1;
+    }
+
+    switch (l) {
+    case x86_vex_l0: if (!(lig || l128)) return -1; break;
+    case x86_vex_l1: if (!(lig || l256)) return -1; break;
+    case x86_vex_l2: if (!(lig || l512)) return -1; break;
+    default: return -1;
+    }
+
+    switch (p) {
+    case x86_pfx_none: if (!np) return -1; break;
+    case x86_pfx_66: if (!p66) return -1; break;
+    case x86_pfx_f2: if (!pf2) return -1; break;
+    case x86_pfx_f3: if (!pf3) return -1; break;
+    default: return -1;
+    }
+
+    return 0;
+}
+
+/*
+ *  table sorting
+ */
+
+static int x86_opc_data_compare_opcode(const void *p1, const void *p2)
+{
+    const x86_opc_data *op1 = x86_opc_table + *(size_t *)p1;
+    const x86_opc_data *op2 = x86_opc_table + *(size_t *)p2;
+
+    /* split into prefix and suffix */
+    uint mask = x86_enc_t_mask | x86_enc_p_mask | x86_enc_m_mask;
+    uint op1pre = op1->enc & mask;
+    uint op2pre = op2->enc & mask;
+    uint op1suf = op1->enc & ~mask;
+    uint op2suf = op2->enc & ~mask;
+
+    if (op1pre < op2pre) return -1;
+    if (op1pre > op2pre) return 1;
+    if (op1->opc[0] < op2->opc[0]) return -1;
+    if (op1->opc[0] > op2->opc[0]) return 1;
+    if (op1->opc[1] < op2->opc[1]) return -1;
+    if (op1->opc[1] > op2->opc[1]) return 1;
+    if (op1suf < op2suf) return -1;
+    if (op1suf > op2suf) return 1;
+    return 0;
+}
+
+static int x86_opc_data_compare_alpha(const void *p1, const void *p2)
+{
+    const x86_opc_data *op1 = x86_opc_table + *(size_t *)p1;
+    const x86_opc_data *op2 = x86_opc_table + *(size_t *)p2;
+    int alpha = strcmp(x86_op_names[op1->op], x86_op_names[op2->op]);
+    if (alpha == 0) return x86_opc_data_compare_opcode(p1, p2);
+    else return alpha;
+}
+
+static x86_table_idx x86_opc_table_index(size_t n)
+{
+    x86_table_idx tab = { n, malloc(sizeof(size_t) * n) };
+    for (size_t i = 0; i < tab.count; i++) tab.idx[i] = i;
+    return tab;
+}
+
+x86_table_idx x86_opc_table_identity(void)
+{
+    return x86_opc_table_index(x86_opc_table_size);
+}
+
+x86_table_idx x86_opc_table_sorted(x86_table_idx tab, uint sort)
+{
+    switch (sort) {
+    case x86_sort_none:
+        break;
+    case x86_sort_numeric:
+        qsort(tab.idx, tab.count, sizeof(size_t), x86_opc_data_compare_opcode);
+        break;
+    case x86_sort_alpha:
+        qsort(tab.idx, tab.count, sizeof(size_t), x86_opc_data_compare_alpha);
+        break;
+    }
+    return tab;
+}
+
+x86_table_idx x86_opc_table_filter(x86_table_idx tab, uint modes)
+{
+    size_t count = 0;
+    for (size_t i = 0; i < tab.count; i++) {
+        const x86_opc_data *d = x86_opc_table + tab.idx[i];
+        if (d->mode & modes) count++;
+    }
+    x86_table_idx newtab = { count, malloc(sizeof(size_t) * count) };
+    count = 0;
+    for (size_t i = 0; i < tab.count; i++) {
+        const x86_opc_data *d = x86_opc_table + tab.idx[i];
+        if (d->mode & modes) newtab.idx[count++] = i;
+    }
+    free(tab.idx);
+    return newtab;
+}
+
+static int x86_opc_data_compare_masked(const void *p1, const void *p2)
+{
+    x86_opc_data *om1 = (x86_opc_data *)p1;
+    x86_opc_data *om2 = (x86_opc_data *)p2;
+    uint enc1 = om1->enc, enc2 = om2->enc;
+    uint pre1 = enc1 & (x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    uint pre2 = enc2 & (x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    if (pre1 < pre2) return -1;
+    if (pre1 > pre2) return 1;
+    ushort mask_s = om1->opm_s & om2->opm_s;
+    if ((om1->opc_s & mask_s) < (om2->opc_s & mask_s)) return -1;
+    if ((om1->opc_s & mask_s) > (om2->opc_s & mask_s)) return 1;
+    /* suffix is unnecessary for matching opcodes because it contains format*/
+    return 0;
+}
+
+static int x86_opc_data_compare_build(const void *p1, const void *p2)
+{
+    x86_opc_data *om1 = (x86_opc_data *)p1;
+    x86_opc_data *om2 = (x86_opc_data *)p2;
+    uint enc1 = om1->enc, enc2 = om2->enc;
+    uint pre1 = enc1 & (x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    uint pre2 = enc2 & (x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    if (pre1 < pre2) return -1;
+    if (pre1 > pre2) return 1;
+    for (size_t i = 0; i < 2; i++) {
+        if (om1->opc[i] < om2->opc[i]) return -1;
+        if (om1->opc[i] > om2->opc[i]) return 1;
+        /* most specific mask first for fixed modrm */
+        if (om1->opm[i] < om2->opm[i]) return 1;
+        if (om1->opm[i] > om2->opm[i]) return -1;
+    }
+    uint suf1 = enc1 & ~(x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    uint suf2 = enc2 & ~(x86_enc_t_mask | x86_enc_m_mask | x86_enc_prexw_mask);
+    if (suf1 < suf2) return -1;
+    if (suf1 > suf2) return 1;
+    return 0;
+}
+
+static x86_opc_prefix x86_table_make_prefix(const x86_opc_data *d,
+    const x86_opr_data *o, const x86_ord_data *p)
+{
+    x86_opc_prefix tp;
+    memset(&tp, 0, sizeof(tp));
+
+    /* extract prefix and synthesize width prefixes */
+    switch (x86_enc_type(d->enc)) {
+    case x86_enc_t_lex:
+    case x86_enc_t_vex:
+    case x86_enc_t_evex:
+        switch (d->enc & x86_enc_w_mask) {
+        case x86_enc_w_wig:
+        case x86_enc_w_wn:
+        case x86_enc_w_wb:
+        case x86_enc_w_w0: break;
+        case x86_enc_w_w1: tp.pfx = x86_enc_p_rexw; break;
+        case x86_enc_w_wx: tp.pfx_w = x86_enc_p_rexw; /* fallthrough */
+        case x86_enc_w_ww: tp.pfx_o = x86_enc_p_66; break;
+        }
+        break;
+     }
+
+    /* find register or memory operand mapping to modrm.rm field
+     * so that we can add mod=0b11 or mod!=0b11 to modrm mask */
+    tp.modfun = x86_enc_func(d->enc) == x86_enc_f_modrm_n;
+    for (size_t i = 0; i < array_size(o->opr) && o->opr[i]; i++) {
+        uint isreg = x86_opr_type_val(o->opr[i]) >= x86_opr_reg;
+        uint ismem = x86_opr_has_mem(o->opr[i]);
+        uint ismrm = x86_ord_type_val(p->ord[i]) == x86_ord_mrm;
+        if (ismrm) {
+            if (isreg && !ismem) {
+                tp.modreg = 1; /* mod == 0b11 */
+                break;
+            } else if (!isreg && ismem) {
+                tp.modmem = 1; /* mod != 0b11 */
+                break;
+            }
+        }
+    }
+
+    /* explict second opcode byte has mod == 0b11 */
+    if (d->opm[1] == 0xff && (d->opc[1] & 0xc0) == 0xc0 &&
+        !tp.modreg && !tp.modmem)
+    {
+        tp.modreg = 1;
+    }
+
+    return tp;
+}
+
+static void x86_build_prefix_clashes(x86_acc_idx *idx, x86_table_idx tab,
+    ullong *modfun, ullong *modmod)
+{
+    /*
+     * record modrm.reg /n or modrm.mod (reg or mem) usage
+     * so that opcodes with clashes can expand mod entries
+     */
+    for (size_t i = 0; i < tab.count; i++) {
+        const x86_opc_data *d = x86_opc_table + tab.idx[i];
+        const x86_opr_data *o = x86_opr_table + d->opr;
+        const x86_ord_data *p = x86_ord_table + d->ord;
+        x86_opc_prefix tp = x86_table_make_prefix(d, o, p);
+        uint type = x86_enc_type(d->enc) >> x86_enc_t_shift;
+        uint prefix = x86_enc_prefix(d->enc) >> x86_enc_p_shift;
+        uint map = x86_enc_map(d->enc) >> x86_enc_m_shift;
+        size_t tpm = x86_acc_page(type, prefix, map);
+        size_t x = (tpm << 8) | d->opc[0];
+        if (tp.modfun) {
+            x86_bitmap_set(modfun, x, 1);
+        }
+        if (tp.modreg || tp.modmem) {
+            x86_bitmap_set(modmod, x, 1);
+        }
+    }
+}
+
+static size_t x86_add_opc_data(x86_opc_data *op_map, size_t idx,
+    x86_opc_data rec, uint modreg, uint modmem, uint modcla)
+{
+    /*
+     * add entries to opcode map, expanding mod entries where
+     * for modreg or modmem constraints or clashes with mod.reg /n
+     */
+    if (op_map) {
+        if (modreg) {
+            /* add one entry with mod == 0b11 - ModRM.rm is register */
+            rec.opm[1] |= 0xc0;
+            rec.opc[1] |= 0xc0;
+            op_map[idx] = rec;
+        } else if (modmem) {
+            /* add three entries with mod != 0b11 - ModRM.rm is memory */
+            rec.opm[1] |= 0xc0;
+            rec.opc[1] = (rec.opc[1] & 0x3f) | 0x80;
+            op_map[idx] = rec;
+            rec.opc[1] = (rec.opc[1] & 0x3f) | 0x40;
+            op_map[idx + 1] = rec;
+            rec.opc[1] = (rec.opc[1] & 0x3f);
+            op_map[idx + 2] = rec;
+        } else if (modcla) {
+            /* add four entries mod (0b00..0b11) due to function clash */
+            rec.opm[1] |= 0xc0;
+            rec.opc[1] = (rec.opc[1] & 0x3f) | 0xc0;
+            op_map[idx] = rec;
+            rec.opc[1] = (rec.opc[1] & 0x3f) | 0x80;
+            op_map[idx + 1] = rec;
+            rec.opc[1] = (rec.opc[1] & 0x3f) | 0x40;
+            op_map[idx + 2] = rec;
+            rec.opc[1] = (rec.opc[1] & 0x3f);
+            op_map[idx + 3] = rec;
+        } else {
+            /* add entry unmodified */
+            op_map[idx] = rec;
+        }
+    }
+    return modreg ? 1 : modmem ? 3 : modcla ? 4 : 1;
+}
+
+static void x86_build_prefix_table(x86_acc_idx *idx,
+    x86_table_idx tab, x86_opc_data *op_map, size_t *count,
+    ullong *modfun, ullong *modmod)
+{
+    /*
+     * build the opcode map with synthesized prefixes and modrm expansion
+     */
+    size_t n = 1;
+    for (size_t i = 0; i < tab.count; i++) {
+        const x86_opc_data *d = x86_opc_table + tab.idx[i];
+        const x86_opr_data *o = x86_opr_table + d->opr;
+        const x86_ord_data *p = x86_ord_table + d->ord;
+
+        uint type = x86_enc_type(d->enc) >> x86_enc_t_shift;
+        uint prefix = x86_enc_prefix(d->enc) >> x86_enc_p_shift;
+        uint map = x86_enc_map(d->enc) >> x86_enc_m_shift;
+        size_t tpm = x86_acc_page(type, prefix, map);
+        size_t x = (tpm << 8) | d->opc[0];
+        uint modcla = x86_bitmap_get(modfun, x) && x86_bitmap_get(modmod, x);
+        x86_opc_prefix tp = x86_table_make_prefix(d, o, p);
+
+        x86_opc_data rec = *d;
+        rec.enc |= tp.pfx;
+        n += x86_add_opc_data(op_map, n, rec,
+            tp.modreg, tp.modmem, modcla);
+        if (tp.pfx_w) {
+            rec = *d;
+            rec.enc |= tp.pfx | tp.pfx_w;
+            n += x86_add_opc_data(op_map, n, rec,
+                tp.modreg, tp.modmem, modcla);
+        }
+        if (tp.pfx_o) {
+            rec = *d;
+            rec.enc |= tp.pfx | tp.pfx_o;
+            n += x86_add_opc_data(op_map, n, rec,
+                tp.modreg, tp.modmem, modcla);
+        }
+    }
+
+    if (count) *count = n;
+}
+
+static size_t x86_build_accel_offsets(x86_acc_idx *idx)
+{
+    /*
+     * allocate offsets for type prefix map combinations
+     *
+     * offset zero means the slice is not allocated but page zero is
+     * preallocated as a special cased for type:LEX, prefix:0, map:0
+     */
+    size_t num_pages = 1;
+    for (size_t i = 1; i < idx->map_count; i++) {
+        const x86_opc_data *m = idx->map + i;
+        uint type = x86_enc_type(m->enc) >> x86_enc_t_shift;
+        uint prefix = x86_enc_prefix(m->enc) >> x86_enc_p_shift;
+        uint map = x86_enc_map(m->enc) >> x86_enc_m_shift;
+        size_t acc_page = x86_acc_page(type, prefix, map);
+        if (acc_page > 0 && idx->page_offsets[acc_page] == 0) {
+            size_t page = num_pages++;
+            idx->page_offsets[acc_page] = page;
+        }
+    }
+    return num_pages << 8;
+}
+
+static void x86_build_accel_table(x86_acc_idx *idx, x86_acc_entry *acc)
+{
+    /*
+     * add entries to the acceleration table. the acceleration
+     * table contains ranges for all entries of a given opcode.
+     *
+     * (type, prefix, map, opcode) -> (index, count)
+     */
+    for (size_t i = 1; i < idx->map_count; i++) {
+        const x86_opc_data *m = idx->map + i;
+        uint type = x86_enc_type(m->enc) >> x86_enc_t_shift;
+        uint prefix = x86_enc_prefix(m->enc) >> x86_enc_p_shift;
+        uint map = x86_enc_map(m->enc) >> x86_enc_m_shift;
+        size_t acc_page = x86_acc_page(type, prefix, map);
+        size_t offset = x86_acc_offset(idx, acc_page);
+        uint opc = m->opc[0], opc_i = opc, opm = m->opm[0];
+        while ((opc_i & opm) == opc) {
+            if (acc[offset + opc_i].idx == 0) {
+                acc[offset + opc_i].idx = i;
+            }
+            acc[offset + opc_i].nent++;
+            opc_i++;
+        }
+    }
+}
+
+static x86_acc_idx *x86_table_build(uint modes)
+{
+    x86_acc_idx *idx = calloc(1, sizeof(x86_acc_idx));
+    x86_table_idx tab = x86_opc_table_sorted(x86_opc_table_filter(
+        x86_opc_table_identity(), modes), x86_sort_numeric);
+    ullong *modfun = (ullong *)calloc(2048, sizeof(ullong));
+    ullong *modmod = (ullong *)calloc(2048, sizeof(ullong));
+    x86_build_prefix_clashes(idx, tab, modfun, modmod);
+    x86_build_prefix_table(idx, tab, NULL, &idx->map_count, modfun, modmod);
+    idx->map = calloc(idx->map_count, sizeof(x86_opc_data));
+    x86_build_prefix_table(idx, tab, idx->map, NULL, modfun, modmod);
+    qsort(idx->map, idx->map_count, sizeof(x86_opc_data),
+        x86_opc_data_compare_build);
+    idx->page_offsets = calloc(512, sizeof(uchar));
+    idx->acc_count = x86_build_accel_offsets(idx);
+    idx->acc = calloc(sizeof(x86_acc_entry), idx->acc_count);
+    x86_build_accel_table(idx, idx->acc);
+    free(tab.idx);
+    free(modfun);
+    free(modmod);
+    return idx;
+}
+
+static x86_opc_data *x86_table_lookup_slow(x86_acc_idx *idx,
+    const x86_opc_data *m)
+{
+    size_t begin = 0, end = idx->map_count;
+    while (end != 0) {
+        size_t half = (end >> 1), probe = begin + half;
+        if (x86_opc_data_compare_masked(m, idx->map + probe) > 0) {
+            begin = probe + 1;
+            end -= half + 1;
+        } else {
+            end = half;
+        }
+    }
+    return idx->map + begin;
+}
+
+x86_opc_data *x86_table_lookup(x86_acc_idx *idx, const x86_opc_data *m)
+{
+    uint type = x86_enc_type(m->enc) >> x86_enc_t_shift;
+    uint prefix = x86_enc_prefix(m->enc) >> x86_enc_p_shift;
+    uint map = x86_enc_map(m->enc) >> x86_enc_m_shift;
+    size_t acc_page = x86_acc_page(type, prefix, map);
+    size_t offset = x86_acc_offset(idx, acc_page) + m->opc[0];
+    x86_acc_entry *ent = x86_acc_lookup(idx, offset);
+    x86_acc_idx new_idx = { ent->nent, idx->map + ent->idx };
+    return x86_table_lookup_slow(&new_idx, m);
+}
+
+/*
+ * table printing utilities
+ */
+
+static x86_table_col x86_new_column(int width, char *data)
+{
+    x86_table_col col = { width, strdup(data) };
+    return col;
+}
+
+static void x86_print_row(size_t count, x86_table_col *cols)
+{
+    printf("|");
+    for (size_t i = 0; i < count; i++) {
+        printf(" %-*s |", cols[i].width, cols[i].data);
+        free(cols[i].data);
+    }
+    printf("\n");
+}
+
+static size_t x86_format_enc(char *buf, size_t buflen, const x86_opc_data *d)
+{
+    size_t len = 0;
+
+    uint s = x86_enc_suffix(d->enc);
+    uint i = x86_enc_imm(d->enc);
+    uint j = x86_enc_imm2(d->enc);
+    uint enc = x86_enc_leading(d->enc);
+
+    len += x86_enc_name(buf + len, buflen - len, enc);
+
+    switch (x86_enc_opcode(enc)) {
+    case x86_enc_o_opcode_r:
+        len += snprintf(buf + len, buflen - len, " %02hhx+r", d->opc[0]);
+        break;
+    default:
+        len += snprintf(buf + len, buflen - len, " %02hhx", d->opc[0]);
+        break;
+    }
+
+    switch (x86_enc_func(enc)) {
+    case x86_enc_f_modrm_r:
+        len += snprintf(buf + len, buflen - len, " /r");
+        break;
+    case x86_enc_f_modrm_n:
+        len += snprintf(buf + len, buflen - len, " /%d", (d->opc[1] >> 3) & 7);
+        break;
+    case x86_enc_f_opcode_r:
+        len += snprintf(buf + len, buflen - len, " %02hhx+r", d->opc[1]);
+        break;
+    case x86_enc_f_opcode:
+        len += snprintf(buf + len, buflen - len, " %02hhx", d->opc[1]);
+        break;
+    }
+
+    if (i) {
+        len += x86_enc_name(buf + len, buflen - len, i);
+    }
+    if (j) {
+        len += x86_enc_name(buf + len, buflen - len, j);
+    }
+    if (s) {
+        len += x86_enc_name(buf + len, buflen - len, s);
+    }
+
+    return len;
+}
+
+void x86_print_op(const x86_opc_data *d, uint compact, uint opcode)
+{
+    char buf[256];
+    x86_table_col cols[6];
+    size_t count = 0, buflen = sizeof(buf), len;
+
+    const x86_opr_data *o = x86_opr_table + d->opr;
+    const x86_ord_data *p = x86_ord_table + d->ord;
+
+    buf[(len = 0)] = '\0';
+    if (compact) {
+        len += snprintf(buf + len, buflen - len, "%s", x86_op_names[d->op]);
+        cols[count++] = x86_new_column(18, buf);
+    } else {
+        len += snprintf(buf + len, buflen - len, "%s ", x86_op_names[d->op]);
+        for (size_t i = 0; i < array_size(o->opr) && o->opr[i]; i++) {
+            if (i != 0) len += snprintf(buf + len, buflen - len, ",");
+            len += x86_opr_name(buf + len, buflen - len, o->opr[i]);
+        }
+        cols[count++] = x86_new_column(52, buf);
+    }
+
+    if (opcode) {
+        buf[(len = 0)] = '\0';
+        len += snprintf(buf + len, buflen - len, "%02hhx %02hhx",
+            d->opc[0], d->opc[1]);
+        cols[count++] = x86_new_column(5, buf);
+        buf[(len = 0)] = '\0';
+        len += snprintf(buf + len, buflen - len, "%02hhx %02hhx",
+            d->opm[0], d->opm[1]);
+        cols[count++] = x86_new_column(5, buf);
+    }
+
+    if (compact) {
+        buf[(len = 0)] = '\0';
+        len += x86_ord_mnem(buf + len, buflen - len, p->ord);
+        cols[count++] = x86_new_column(4, buf);
+    }
+
+    buf[(len = 0)] = '\0';
+    len += x86_format_enc(buf, buflen - len, d);
+    cols[count++] = x86_new_column(31, buf);
+
+    if (!compact) {
+        buf[(len = 0)] = '\0';
+        for (size_t i = 0; i < array_size(p->ord) && p->ord[i]; i++) {
+            if (i != 0) len += snprintf(buf + len, buflen - len, ",");
+            len += x86_ord_name(buf + len, buflen - len, p->ord[i], "/");
+        }
+        cols[count++] = x86_new_column(23, buf);
+    }
+
+    buf[(len = 0)] = '\0';
+    len += x86_mode_name(buf + len, buflen - len, d->mode, "/");
+    cols[count++] = x86_new_column(8, buf);
+
+    x86_print_row(count, cols);
+}
+
+/*
+ * encoding / decoding
+ */
+
+int x86_codec_write(x86_ctx *ctx, x86_buffer *buf, x86_codec c, size_t *len)
+{
+    size_t nbytes = 0;
+
+    /* segment prefix */
+    switch (c.seg) {
+    case x86_seg_es: nbytes += x86_out8(buf, x86_pb_es); break;
+    case x86_seg_cs: nbytes += x86_out8(buf, x86_pb_cs); break;
+    case x86_seg_ss: nbytes += x86_out8(buf, x86_pb_ss); break;
+    case x86_seg_ds: nbytes += x86_out8(buf, x86_pb_ds); break;
+    case x86_seg_fs: nbytes += x86_out8(buf, x86_pb_fs); break;
+    case x86_seg_gs: nbytes += x86_out8(buf, x86_pb_gs); break;
+    }
+
+    /* other prefixes */
+    if (x86_codec_has_osize(&c)) {
+        nbytes += x86_out8(buf, x86_pb_osize);
+    }
+    if (x86_codec_has_asize(&c)) {
+        nbytes += x86_out8(buf, x86_pb_asize);
+    }
+    if (x86_codec_has_wait(&c)) {
+        nbytes += x86_out8(buf, x86_pb_wait);
+    }
+    if (x86_codec_has_lock(&c)) {
+        nbytes += x86_out8(buf, x86_pb_lock);
+    }
+    if (x86_codec_has_rep(&c)) {
+        nbytes += x86_out8(buf, x86_pb_rep);
+    }
+    if (x86_codec_has_repne(&c)) {
+        nbytes += x86_out8(buf, x86_pb_repne);
+    }
+
+    /* extended prefixes */
+    switch (x86_codec_field_ce(&c) >> x86_ce_shift) {
+        case x86_ce_rex >> x86_ce_shift:
+            nbytes += x86_out8(buf, c.rex.data[0]);
+            break;
+        case x86_ce_rex2 >> x86_ce_shift:
+            nbytes += x86_out8(buf, x86_pb_rex2);
+            nbytes += x86_out8(buf, c.rex2.data[0]);
+            break;
+        case x86_ce_vex2 >> x86_ce_shift:
+            nbytes += x86_out8(buf, x86_pb_vex2);
+            nbytes += x86_out8(buf, c.vex2.data[0]);
+            break;
+        case x86_ce_vex3 >> x86_ce_shift:
+            nbytes += x86_out8(buf, x86_pb_vex3);
+            nbytes += x86_out8(buf, c.vex3.data[0]);
+            nbytes += x86_out8(buf, c.vex3.data[1]);
+            break;
+        case x86_ce_evex >> x86_ce_shift:
+            nbytes += x86_out8(buf, x86_pb_evex);
+            nbytes += x86_out8(buf, c.evex.data[0]);
+            nbytes += x86_out8(buf, c.evex.data[1]);
+            nbytes += x86_out8(buf, c.evex.data[2]);
+            break;
+    }
+
+    /* map */
+    switch (x86_codec_field_cm(&c) >> x86_cm_shift) {
+    case x86_cm_none >> x86_cm_shift:
+        break;
+    case x86_cm_0f   >> x86_cm_shift:
+        nbytes += x86_out8(buf, 0x0f);
+        break;
+    case x86_cm_0f38 >> x86_cm_shift:
+        nbytes += x86_out16(buf, 0x380f);
+        break;
+    case x86_cm_0f3a >> x86_cm_shift:
+        nbytes += x86_out16(buf, 0x3a0f);
+        break;
+    }
+
+    /* opcode */
+    for (size_t i = 0; i < c.opclen; i++) {
+        nbytes += x86_out8(buf, c.opc[i]);
+    }
+
+    /* ModRM and SIB */
+    int b;
+    if (x86_codec_has_modrm(&c)) {
+        nbytes += x86_out8(buf, c.modrm.data[0]);
+
+        uchar rm = x86_modrm_rm(c.modrm.data[0]);
+        uchar mod = x86_modrm_mod(c.modrm.data[0]);
+
+        switch (mod) {
+        case x86_mod_disp0:
+        case x86_mod_disp8:
+        case x86_mod_dispw:
+            /* there is no SIB in real mode */
+            if (!x86_codec_is16(&c) && rm == x86_rm_sp_sib) {
+                nbytes += x86_out8(buf, c.sib.data[0]);
+            }
+            break;
+        case x86_mod_reg: break;
+        }
+        switch (mod) {
+        case x86_mod_disp0:
+            /* special case for rm/b == bp */
+            b = x86_sib_b(c.sib.data[0]);
+            if (rm == x86_rm_bp_disp0 ||
+                (rm == x86_rm_sp_sib && b == x86_rm_bp_disp0))
+            {
+                if (x86_codec_is16(&c)) {
+                    nbytes += x86_out16(buf, (u16)c.disp32);
+                } else {
+                    /* this is RIP-relative in amd64 mode */
+                    nbytes += x86_out32(buf, (u32)c.disp32);
+                }
+            }
+            break;
+        case x86_mod_disp8:
+            nbytes += x86_out8(buf, (u8)c.disp32);
+            break;
+        case x86_mod_dispw:
+            if (x86_codec_is16(&c)) {
+                nbytes += x86_out16(buf, (u16)c.disp32); break;
+            } else {
+                nbytes += x86_out32(buf, (u32)c.disp32); break;
+            }
+        case x86_mod_reg: break;
+        }
+    }
+
+    /* immediate */
+    switch (x86_codec_field_ci(&c) >> x86_ci_shift) {
+    case x86_ci_iw >> x86_ci_shift:
+        if (x86_codec_is16(&c) ^ x86_codec_has_osize(&c)) {
+            nbytes += x86_out16(buf, (u16)c.imm32);
+        } else {
+            nbytes += x86_out32(buf, (u32)c.imm32);
+        }
+        break;
+    case x86_ci_iwd >> x86_ci_shift:
+        if (x86_codec_is16(&c)) {
+            nbytes += x86_out16(buf, (u16)c.imm32);
+        } else {
+            nbytes += x86_out32(buf, (u32)c.imm32);
+        }
+        break;
+    case x86_ci_ib >> x86_ci_shift:
+        nbytes += x86_out8(buf, (u8)c.imm32);
+        break;
+    case x86_ci_i16 >> x86_ci_shift:
+        nbytes += x86_out16(buf, (u16)c.imm32);
+        break;
+    case x86_ci_i32 >> x86_ci_shift:
+        nbytes += x86_out32(buf, (u32)c.imm32);
+        break;
+    case x86_ci_i64 >> x86_ci_shift:
+        nbytes += x86_out64(buf, (u64)c.imm64);
+        break;
+    }
+
+    /* additional immediate used by CALLF/JMPF/ENTER */
+    switch (x86_codec_field_cj(&c) >> x86_cj_shift) {
+    case x86_cj_ib >> x86_cj_shift:
+        nbytes += x86_out8(buf, (u8)c.imm2);
+        break;
+    case x86_cj_i16 >> x86_cj_shift:
+        nbytes += x86_out16(buf, (u16)c.imm2);
+        break;
+    }
+
+    *len = nbytes;
+    return 0;
+}
+
+static int x86_filter_op(x86_codec *c, x86_opc_data *d, uint w)
+{
+    if (x86_codec_is16(c) && !x86_mode_has16(d->mode)) return -1;
+    if (x86_codec_is32(c) && !x86_mode_has32(d->mode)) return -1;
+    if (x86_codec_is64(c) && !x86_mode_has64(d->mode)) return -1;
+
+    switch (x86_codec_field_ce(c) >> x86_ce_shift) {
+    case x86_ce_rex >> x86_ce_shift:
+        if (x86_enc_filter_rex(c->rex, d->enc) < 0) return -1;
+        break;
+    case x86_ce_rex2 >> x86_ce_shift:
+        if (x86_enc_filter_rex2(c->rex2, d->enc) < 0) return -1;
+        break;
+    case x86_ce_vex2 >> x86_ce_shift:
+        if (x86_enc_filter_vex2(c->vex2, d->enc) < 0) return -1;
+        break;
+    case x86_ce_vex3 >> x86_ce_shift:
+        if (x86_enc_filter_vex3(c->vex3, d->enc) < 0) return -1;
+        break;
+    case x86_ce_evex >> x86_ce_shift:
+        if (x86_enc_filter_evex(c->evex, d->enc) < 0) return -1;
+        break;
+    }
+
+    if (x86_enc_has_a16(d->enc)) {
+        if (!x86_codec_is16(c) ||
+            !(x86_codec_is32(c) && x86_codec_has_asize(c))) return -1;
+    }
+    if (x86_enc_has_a32(d->enc)) {
+        if (!x86_codec_is32(c) ||
+            !(x86_codec_is64(c) && x86_codec_has_asize(c))) return -1;
+    }
+    if (x86_enc_has_a64(d->enc)) {
+        if (!x86_codec_is64(c) || x86_codec_has_asize(c)) return -1;
+    }
+
+    if (x86_enc_has_o16(d->enc)) {
+        switch (x86_enc_width(d->enc)) {
+        case x86_enc_w_ww:
+        case x86_enc_w_wx:
+            if (!(x86_codec_is16(c) ^ x86_codec_has_osize(c)) || w) return -1;
+            break;
+        }
+    }
+    if (x86_enc_has_o32(d->enc)) {
+        switch (x86_enc_width(d->enc)) {
+        case x86_enc_w_ww:
+            /* .ww means no 32-bit operands in 64-bit mode */
+            if ((x86_codec_is16(c) ^ x86_codec_has_osize(c)) ||
+                 x86_codec_is64(c)) return -1;
+            break;
+        case x86_enc_w_wx:
+            if ((x86_codec_is16(c) ^ x86_codec_has_osize(c)) || w) return -1;
+            break;
+        }
+    }
+    if (x86_enc_has_o64(d->enc)) {
+        switch (x86_enc_width(d->enc)) {
+        case x86_enc_w_ww:
+            /* .ww means ignores W=1 in 64-bit mode */
+            if (!x86_codec_is64(c)) return -1;
+            break;
+        case x86_enc_w_wx:
+            if (!x86_codec_is64(c) || !w) return -1;
+            break;
+        }
+    }
+
+    return 0;
+}
+
+static size_t x86_parse_encoding(x86_buffer *buf, x86_codec *c,
+    x86_opc_data *d)
+{
+    size_t nbytes = 0;
+
+    /* parse SIB and displacement */
+    int b;
+    if (x86_codec_has_modrm(c)) {
+        uchar rm = x86_modrm_rm(c->modrm.data[0]);
+        uchar mod = x86_modrm_mod(c->modrm.data[0]);
+        switch (mod) {
+        case x86_mod_disp0:
+        case x86_mod_disp8:
+        case x86_mod_dispw:
+            /* there is no SIB in real mode */
+            if (!x86_codec_is16(c) && rm == x86_rm_sp_sib) {
+                c->sib.data[0] = (u8)x86_in8(buf); nbytes += 1;
+            }
+            break;
+        case x86_mod_reg:
+            break;
+        }
+        switch (mod) {
+        case x86_mod_disp0:
+            /* special case for rm/b == bp */
+            b = x86_sib_b(c->sib.data[0]);
+            if (rm == x86_rm_bp_disp0 ||
+                (rm == x86_rm_sp_sib && b == x86_rm_bp_disp0))
+            {
+                if (x86_codec_is16(c)) {
+                    c->disp32 = (i16)x86_in16(buf); nbytes += 2;
+                } else {
+                    /* this is RIP-relative in amd64 mode */
+                    c->disp32 = (i32)x86_in32(buf); nbytes += 4;
+                }
+            }
+            break;
+        case x86_mod_disp8:
+            c->disp32 = (i8)x86_in8(buf); nbytes += 1;
+            break;
+        case x86_mod_dispw:
+            if (x86_codec_is16(c)) {
+                c->disp32 = (i16)x86_in16(buf); nbytes += 2;
+            } else {
+                c->disp32 = (i32)x86_in32(buf); nbytes += 4;
+            }
+        case x86_mod_reg:
+            break;
+        }
+    }
+
+    /* parse immediate */
+    switch (x86_enc_imm(d->enc) >> x86_enc_i_shift) {
+    case x86_enc_i_ib >> x86_enc_i_shift:
+        c->imm32 = (i8)x86_in8(buf); nbytes += 1;
+        c->flags |= x86_ci_ib;
+        break;
+    case x86_enc_i_iw >> x86_enc_i_shift:
+        if (x86_codec_is16(c) ^ x86_codec_has_osize(c)) {
+            c->imm32 = (i16)x86_in16(buf); nbytes += 2;
+        } else {
+            c->imm32 = (i32)x86_in32(buf); nbytes += 4;
+        }
+        c->flags |= x86_ci_iw;
+        break;
+    case x86_enc_i_iwd >> x86_enc_i_shift:
+        if (x86_codec_is16(c)) {
+            c->imm32 = (i16)x86_in16(buf); nbytes += 2;
+        } else {
+            c->imm32 = (i32)x86_in32(buf); nbytes += 4;
+        }
+        c->flags |= x86_ci_iwd;
+        break;
+    case x86_enc_i_i16 >> x86_enc_i_shift:
+        c->imm32 = (i16)x86_in16(buf);  nbytes += 2;
+        c->flags |= x86_ci_i16;
+        break;
+    case x86_enc_i_i32 >> x86_enc_i_shift:
+        c->imm32 = (i32)x86_in32(buf);  nbytes += 4;
+        c->flags |= x86_ci_i32;
+        break;
+    case x86_enc_i_i64 >> x86_enc_i_shift:
+        c->imm64 = (i64)x86_in64(buf);  nbytes += 8;
+        c->flags |= x86_ci_i64;
+        break;
+    }
+
+    /* additional immediate used by CALLF/JMPF/ENTER */
+    switch (x86_enc_imm2(d->enc) >> x86_enc_j_shift) {
+    case x86_enc_j_ib >> x86_enc_j_shift:
+        c->imm2 = (i8)x86_in8(buf); nbytes += 1;
+        c->flags |= x86_cj_ib;
+        break;
+    case x86_enc_j_i16 >> x86_enc_j_shift:
+        c->imm2 = (i16)x86_in16(buf); nbytes += 2;
+        c->flags |= x86_cj_i16;
+        break;
+    }
+
+    return nbytes;
+}
+
+static x86_operands x86_codec_operands(x86_ctx *ctx, x86_codec *c)
+{
+    x86_operands q;
+    memset(&q, 0, sizeof(q));
+
+    const x86_opc_data *d = ctx->idx->map + c->rec;
+
+    q.osz = x86_codec_has_osize(c);
+
+    if (x86_codec_has_modrm(c)) {
+        uchar rm = x86_modrm_rm(c->modrm.data[0]);
+        uchar reg = x86_modrm_reg(c->modrm.data[0]);
+        uchar mod = x86_modrm_mod(c->modrm.data[0]);
+
+        /*
+         * q.rm contains unextended value from ModRM.rm
+         * and is used to indicate SIB/disp encoding.
+         *
+         * if SIB present, copy SIB.b into q.b
+         * if SIB not present, copy ModRM.rm into q.b
+         *
+         * q.b contains extended ModRM.rm or SIB.b
+         */
+
+        q.mod = mod;
+        q.rm = rm;
+        q.r = reg;
+
+        switch (mod) {
+        case x86_mod_disp0:
+        case x86_mod_disp8:
+        case x86_mod_dispw:
+            if (!x86_codec_is16(c) && rm == x86_rm_sp_sib) {
+                q.b = x86_sib_b(c->sib.data[0]);
+                q.x = x86_sib_x(c->sib.data[0]);
+                q.s = x86_sib_s(c->sib.data[0]);
+            } else {
+                q.b = q.rm;
+            }
+            break;
+        case x86_mod_reg:
+            q.b = q.rm;
+            break;
+        }
+    } else if (d->enc & x86_enc_o_opcode_r) {
+        q.b = c->opc[0] & 7;
+    } else if (d->enc & x86_enc_f_opcode_r) {
+        q.b = c->opc[1] & 7;
+    }
+
+    switch (x86_codec_field_ce(c) >> x86_ce_shift) {
+    case x86_ce_rex >> x86_ce_shift:
+        q.b |=  (c->rex.data[0] &    1) << 3; /* [0] -> b[3]*/
+        q.x |=  (c->rex.data[0] &    2) << 2; /* [1] -> x[3]*/
+        q.r |=  (c->rex.data[0] &    4) << 1; /* [2] -> r[3]*/
+        q.w  =  (c->rex.data[0] &    8) >> 3;
+        break;
+    case x86_ce_rex2 >> x86_ce_shift:
+        q.b |=  (c->rex2.data[0] &   1) << 3; /* [0] -> b[3]*/
+        q.x |=  (c->rex2.data[0] &   2) << 2; /* [1] -> x[3]*/
+        q.r |=  (c->rex2.data[0] &   4) << 1; /* [2] -> r[3]*/
+        q.w  =  (c->rex2.data[0] &   8) >> 3;
+        q.b |=  (c->rex2.data[0] &  16) >> 0; /* [4] -> b[4]*/
+        q.x |=  (c->rex2.data[0] &  32) >> 1; /* [5] -> x[4]*/
+        q.r |=  (c->rex2.data[0] &  64) >> 2; /* [6] -> r[4]*/
+        break;
+    case x86_ce_vex2 >> x86_ce_shift:
+        q.r |= (~c->vex2.data[0] & 128) >> 4; /* [7] -> r[3] */
+        q.l  =  (c->vex2.data[0] >>  2) & 1;
+        q.v  = (~c->vex2.data[0] >>  3) & 15;
+        q.osz = (c->vex2.data[0] & 3) == x86_pfx_66;
+        break;
+    case x86_ce_vex3 >> x86_ce_shift:
+        q.b |= (~c->vex3.data[0] &  32) >> 2; /* [5] -> b[3]*/
+        q.x |= (~c->vex3.data[0] &  64) >> 3; /* [6] -> x[3]*/
+        q.r |= (~c->vex3.data[0] & 128) >> 4; /* [7] -> r[3]*/
+        q.l  =  (c->vex3.data[1] >>  2) & 1;
+        q.v  = (~c->vex3.data[1] >>  3) & 15;
+        q.w  =  (c->vex3.data[1] >>  7) & 1;
+        q.osz = (c->vex3.data[1] & 3) == x86_pfx_66;
+        break;
+    case x86_ce_evex >> x86_ce_shift:
+        q.b |= (~c->evex.data[0] &  32) >> 2; /* [5] -> b[3]*/
+        q.x |= (~c->evex.data[0] &  64) >> 3; /* [6] -> x[3]*/
+        q.r |= (~c->evex.data[0] & 128) >> 4; /* [7] -> r[3]*/
+        q.b |=  (c->evex.data[0] &   8) << 1; /* [3] -> b[4]*/
+        q.x |= (~c->evex.data[1] &   4) << 2; /* [2] -> x[4]*/
+        q.r |= (~c->evex.data[0] &  16) >> 0; /* [4] -> r[4]*/
+        q.v  = (~c->evex.data[1] >>  3) & 15;
+        q.v |= (~c->evex.data[2] &   8) << 1; /* [3] -> v[4]*/
+        q.k  =  (c->evex.data[2] >>  0) & 7;
+        q.l  =  (c->evex.data[2] >>  5) & 3;
+        q.brd = (c->evex.data[2] >>  4) & 1;
+        q.osz = (c->evex.data[1] & 3) == x86_pfx_66;
+        break;
+    }
+
+    return q;
+}
+
+static inline x86_arg x86_codec_meta(uint enc, uint opr, uint ord,
+    x86_operands q)
+{
+    x86_arg a = { enc, opr, ord, q };
+    return a;
+}
+
+static uint x86_codec_addr_size(x86_codec *c)
+{
+    /* todo - handle address size prefix */
+    if (x86_codec_is32(c)) return x86_opr_size_32;
+    if (x86_codec_is64(c)) return x86_opr_size_64;
+    return x86_opr_size_16;
+}
+
+static const char *x86_ptr_size_str(uint sz)
+{
+    switch (sz) {
+    case x86_opr_size_8: return "byte ptr ";
+    case x86_opr_size_16: return "word ptr ";
+    case x86_opr_size_32: return "dword ptr ";
+    case x86_opr_size_64: return "qword ptr ";
+    case x86_opr_size_80: return "tbyte ptr ";
+    case x86_opr_size_128: return "xmmword ptr ";
+    case x86_opr_size_256: return "ymmword ptr ";
+    case x86_opr_size_512: return "zmmword ptr ";
+    default: return "";
+    }
+}
+
+static uint x86_opr_reg_size(x86_codec *c, x86_arg a)
+{
+    uint oprty = x86_opr_type_val(a.opr);
+    uint oprsz = x86_opr_size_val(a.opr);
+    uint oprmem = x86_opr_mem_val(a.opr);
+
+    /* 'rw' or 'mw' deduce size from mode, operand size prefix and REX.W */
+    if ((oprty == x86_opr_reg && oprsz == x86_opr_size_w) ||
+        (oprmem == x86_opr_mw) ||
+        (a.opr == x86_opr_moffs || a.opr == x86_opr_reg_psi ||
+         a.opr == x86_opr_reg_pdi))
+    {
+        switch (x86_enc_width(a.enc)) {
+        case x86_enc_w_wb: return x86_opr_size_8;
+        case x86_enc_w_ww:
+            if (x86_codec_is16(c))
+                return a.q.osz ? x86_opr_size_32 : x86_opr_size_16;
+            if (x86_codec_is32(c))
+                return a.q.osz ? x86_opr_size_16 : x86_opr_size_32;
+            if (x86_codec_is64(c))
+                return a.q.osz ? x86_opr_size_16 : x86_opr_size_64;
+            break;
+        case x86_enc_w_wx:
+            if (x86_codec_is16(c))
+                return a.q.osz ? x86_opr_size_32 : x86_opr_size_16;
+            if (x86_codec_is32(c))
+                return a.q.osz ? x86_opr_size_16 : x86_opr_size_32;
+            if (x86_codec_is64(c))
+                return a.q.osz ? x86_opr_size_16 :
+                        a.q.w ? x86_opr_size_64 : x86_opr_size_32;
+            break;
+        case x86_enc_w_w0: return x86_opr_size_32;
+        case x86_enc_w_w1: return x86_opr_size_64;
+        default: break;
+        }
+    }
+
+    /* operand contains the register size */
+    if (oprsz != 0 && oprsz != x86_opr_size_w && oprsz != x86_opr_size_a) {
+        return oprsz;
+    }
+
+    return 0;
+}
+
+static uint x86_opr_ptr_size(x86_codec *c, x86_arg a)
+{
+    uint memsz = x86_opr_mem_size(a.opr);
+    if (memsz == x86_opr_size_w) {
+        memsz = x86_opr_reg_size(c, a);
+    }
+    return memsz;
+}
+
+static uint x86_sized_gpr(x86_codec *c, uint reg, uint opr)
+{
+    switch (x86_opr_size_val(opr)) {
+    case x86_opr_size_8:
+        /* legacy encoding selects ah/cd/dh/bh instead of spl/bpl/sil/dil */
+        if ((x86_codec_field_ce(c)) == x86_ce_none &&
+           ((reg & 31) >= 4 && (reg & 31) < 8)) return x86_reg_bl | (reg & 31);
+        return x86_reg_b | (reg & 31);
+    case x86_opr_size_16: return x86_reg_w | (reg & 31);
+    case x86_opr_size_32: return x86_reg_d | (reg & 31);
+    case x86_opr_size_64: return x86_reg_q | (reg & 31);
+    default: return reg;
+    }
+}
+
+static uint x86_sized_vec(uint reg, uint opr)
+{
+    switch (x86_opr_size_val(opr)) {
+    case x86_opr_size_64:  return x86_reg_mmx | (reg & 7);
+    case x86_opr_size_128: return x86_reg_xmm | (reg & 31);
+    case x86_opr_size_256: return x86_reg_ymm | (reg & 31);
+    case x86_opr_size_512: return x86_reg_zmm | (reg & 31);
+    default: return reg;
+    }
+}
+
+static uint x86_regsz_bytes(uint regsz)
+{
+    switch (regsz) {
+    case x86_opr_size_8: return 1;
+    case x86_opr_size_16: return 2;
+    case x86_opr_size_32: return 4;
+    case x86_opr_size_64: return 8;
+    case x86_opr_size_128: return 16;
+    case x86_opr_size_256: return 32;
+    case x86_opr_size_512: return 64;
+    default: break;
+    }
+    return 1;
+}
+
+x86_opr_formats x86_opr_formats_intel_hex =
+{
+    .ptr_rip            = "%s[rip]",
+    .ptr_rip_disp       = "%s[rip %s 0x%x]",
+    .ptr_reg            = "%s[%s]",
+    .ptr_reg_disp       = "%s[%s %s 0x%x]",
+    .ptr_reg_sreg       = "%s[%s + %d*%s]",
+    .ptr_reg_sreg_disp  = "%s[%s + %d*%s %s 0x%x]",
+    .ptr_reg_reg        = "%s[%s + %s]",
+    .ptr_reg_reg_disp   = "%s[%s + %s %s 0x%x]",
+    .ptr_sreg           = "%s[%d*%s]",
+    .ptr_disp           = "%s[%s0x%x]",
+    .ptr_imm64          = "%s[%s0x%llx]",
+    .ptr_imm32          = "%s[%s0x%x]",
+    .imm64              = "%s0x%llx",
+    .imm32              = "%s0x%x",
+    .reg                = "%s",
+};
+
+x86_opr_formats x86_opr_formats_intel_dec =
+{
+    .ptr_rip            = "%s[rip]",
+    .ptr_rip_disp       = "%s[rip %s %u]",
+    .ptr_reg            = "%s[%s]",
+    .ptr_reg_disp       = "%s[%s %s %u]",
+    .ptr_reg_sreg       = "%s[%s + %d*%s]",
+    .ptr_reg_sreg_disp  = "%s[%s + %d*%s %s %u]",
+    .ptr_reg_reg        = "%s[%s + %s]",
+    .ptr_reg_reg_disp   = "%s[%s + %s %s %u]",
+    .ptr_sreg           = "%s[%d*%s]",
+    .ptr_disp           = "%s[%s%u]",
+    .ptr_imm64          = "%s[%s%llu]",
+    .ptr_imm32          = "%s[%s%u]",
+    .imm64              = "%s%llu",
+    .imm32              = "%s%u",
+    .reg                = "%s",
+};
+
+static size_t x86_opr_intel_reg_str_internal(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a, uint reg)
+{
+    size_t len = 0;
+
+    switch (x86_opr_type_val(a.opr)) {
+    case x86_opr_reg: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_sized_gpr(c, reg,
+        x86_opr_reg_size(c, a)))); break;
+    case x86_opr_vec: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_sized_vec(reg, a.opr))); break;
+    case x86_opr_k: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_kmask | (reg & 7))); break;
+    case x86_opr_mmx: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_mmx | (reg & 7))); break;
+    case x86_opr_st: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_fpu | (reg & 7))); break;
+    case x86_opr_bnd: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_bnd | (reg & 7))); break;
+    case x86_opr_seg: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_sreg | (reg & 7)));  break;
+    case x86_opr_creg: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_creg | (reg & 15))); break;
+    case x86_opr_dreg: len = snprintf(buf, buflen, "%s",
+        x86_reg_name(x86_reg_dreg | (reg & 15))); break;
+    default: len = snprintf(buf, buflen, "%s", "unknown"); break;
+    }
+
+    if ((a.q.k & 7) > 0 && (a.opr & x86_opr_flag_k) != 0) {
+        len += snprintf(buf + len, buflen - len, " {%s}",
+                        x86_reg_name(x86_reg_kmask | (a.q.k & 7)));
+    }
+
+    return len;
+}
+
+static uint x86_opr_bcst_size(uint opr)
+{
+    switch (x86_opr_bcst_val(opr)) {
+    case x86_opr_m16bcst: return x86_opr_size_16;
+    case x86_opr_m32bcst: return x86_opr_size_32;
+    case x86_opr_m64bcst: return x86_opr_size_64;
+    }
+    return 0;
+}
+
+static size_t x86_opr_intel_mrm_str_internal(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a, x86_opr_formats *fmt)
+{
+    uint regsz = x86_opr_reg_size(c, a);
+    uint ptrsz = x86_opr_ptr_size(c, a);
+    uint addrsz = x86_codec_addr_size(c);
+    uint bcstsz = x86_opr_bcst_size(a.opr);
+    uint oprmem = x86_opr_mem_val(a.opr);
+    uint vmsz = x86_opr_ew_size(a.opr);
+    size_t len = 0;
+
+    int is_disp = c->disp32 != 0;
+    int is_scale = a.q.s != 0;
+    int is_sib = a.q.rm == x86_sp;
+    int is_vsib = oprmem == x86_opr_vm32 || oprmem == x86_opr_vm64;
+    int is_reg = a.q.mod == x86_mod_reg;
+    int is_disp0 = a.q.mod == x86_mod_disp0;
+    int is_disp8 = a.q.mod == x86_mod_disp8;
+    int is_base_bp = (a.q.b & 7) == x86_bp;
+    int is_base_sp = (a.q.b & 7) == x86_sp;
+    int is_index_sp = a.q.x == x86_sp;
+    int is_64bit = x86_codec_is64(c);
+    int is_evex = x86_codec_field_ce(c) == x86_ce_evex;
+
+    int is_ptr_rip = is_base_bp && is_disp0 && !is_sib && is_64bit;
+    int is_ptr_rip_disp = is_ptr_rip && is_disp;
+    int is_ptr_disp = is_base_bp && is_disp0 && (!is_sib ||
+                      (is_sib && !is_vsib && is_index_sp && !is_scale));
+    int is_ptr_sreg = is_sib && is_base_bp && is_disp0;
+    int is_ptr_reg_sreg = is_sib && is_scale;
+    int is_ptr_reg_sreg_disp = is_ptr_reg_sreg && is_disp;
+    int is_ptr_reg_reg = is_sib && !(is_base_sp && is_index_sp && !is_vsib);
+    int is_ptr_reg_reg_disp = is_ptr_reg_reg && is_disp;
+
+    int s = (1 << a.q.s);
+    uint d = c->disp32 < 0 ? -c->disp32 : c->disp32;
+
+    const char *so = c->disp32 < 0 ? "-" : "+";
+    const char *sn = c->disp32 < 0 ? "-" : "";
+    const char *p = is_vsib ? x86_ptr_size_str(vmsz) : x86_ptr_size_str(ptrsz);
+    const char *b = x86_reg_name(x86_sized_gpr(c, a.q.b, addrsz));
+    const char *x = is_vsib ? x86_reg_name(x86_sized_vec(a.q.x, regsz)) :
+        is_index_sp ? "riz" : x86_reg_name(x86_sized_gpr(c, a.q.x, addrsz));
+
+    if (is_disp8 && is_evex) {
+        d *= x86_opr_et_val(a.opr) ?
+            x86_opr_ew_bytes(a.opr) : x86_regsz_bytes(ptrsz);
+    }
+
+    if (is_reg) {
+        len = x86_opr_intel_reg_str_internal(buf, buflen, c, a, a.q.b);
+    } else if (is_ptr_rip_disp) {
+        len = snprintf(buf, buflen, fmt->ptr_rip_disp, p, so, d);
+    } else if (is_ptr_rip) {
+        len = snprintf(buf, buflen, fmt->ptr_rip, p);
+    } else if (is_ptr_disp) {
+        len = snprintf(buf, buflen, fmt->ptr_disp, p, sn, d);
+    } else if (is_ptr_sreg) {
+        len = snprintf(buf, buflen, fmt->ptr_sreg, p, s, x);
+    } else if (is_ptr_reg_sreg_disp) {
+        len = snprintf(buf, buflen, fmt->ptr_reg_sreg_disp, p, b, s, x, so, d);
+    } else if (is_ptr_reg_sreg) {
+        len = snprintf(buf, buflen, fmt->ptr_reg_sreg, p, b, s, x);
+    } else if (is_ptr_reg_reg_disp) {
+        len = snprintf(buf, buflen, fmt->ptr_reg_reg_disp, p, b, x, so, d);
+    } else if (is_ptr_reg_reg) {
+        len = snprintf(buf, buflen, fmt->ptr_reg_reg, p, b, x);
+    } else if (is_disp) {
+        len = snprintf(buf, buflen, fmt->ptr_reg_disp, p, b, so, d);
+    } else {
+        len = snprintf(buf, buflen, fmt->ptr_reg, p, b);
+    }
+
+    if (!is_reg && (a.q.k & 7) > 0 && (a.opr & x86_opr_flag_k) != 0) {
+        const char *k = x86_reg_name(x86_reg_kmask | (a.q.k & 7));
+        len += snprintf(buf + len, buflen - len, " {%s}", k);
+    }
+
+    if (bcstsz && a.q.brd) {
+        int bcstsc = x86_regsz_bytes(ptrsz) / x86_regsz_bytes(bcstsz);
+        len += snprintf(buf + len, buflen - len, "{1to%u}", bcstsc);
+    }
+
+    return len;
+}
+
+static size_t x86_opr_intel_mrm_dec_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_mrm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_dec);
+}
+
+static size_t x86_opr_intel_mrm_hex_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_mrm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_hex);
+}
+
+static size_t x86_opr_intel_reg_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_reg_str_internal(buf, buflen, c, a, a.q.r);
+}
+
+static size_t x86_opr_intel_vec_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_reg_str_internal(buf, buflen, c, a, a.q.v);
+}
+
+static size_t x86_opr_intel_opb_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_reg_str_internal(buf, buflen, c, a, a.q.b);
+}
+
+static size_t x86_opr_intel_is4_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    uint reg = (c->imm32 >> 4) & 15;
+    return x86_opr_intel_reg_str_internal(buf, buflen, c, a, reg);
+}
+
+static size_t x86_opr_intel_imm_str_internal(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a, x86_opr_formats *fmt)
+{
+    if (a.opr == x86_opr_moffs) {
+        uint regsz = x86_opr_reg_size(c, a);
+        if ((x86_codec_field_ci(c)) == x86_ci_i64) {
+            llong imm = c->imm64;
+            return snprintf(buf, buflen, fmt->ptr_imm64,
+                x86_ptr_size_str(regsz),
+                imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+        } else {
+            int imm = c->imm32;
+            return snprintf(buf, buflen, fmt->ptr_imm32,
+                x86_ptr_size_str(regsz),
+                imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+        }
+    } else {
+        if ((x86_codec_field_ci(c)) == x86_ci_i64) {
+            llong imm = c->imm64;
+            return snprintf(buf, buflen, fmt->imm64,
+                imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+        } else {
+            int imm = c->imm32;
+            return snprintf(buf, buflen, fmt->imm32,
+                imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+        }
+    }
+}
+
+static size_t x86_opr_intel_imm_hex_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_imm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_hex);
+}
+
+static size_t x86_opr_intel_imm_dec_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    return x86_opr_intel_imm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_dec);
+}
+
+static size_t x86_opr_intel_ime_hex_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    int imm = c->imm2;
+    return snprintf(buf, buflen, "%s0x%x",
+        imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+}
+
+static size_t x86_opr_intel_ime_dec_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a)
+{
+    int imm = c->imm2;
+    return snprintf(buf, buflen, "%s%u",
+        imm < 0 ? "-" : "", imm < 0 ? -imm : imm);
+}
+
+static size_t x86_opr_intel_rel_hex_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a, size_t pc_offset, x86_fmt_symbol sym_cb)
+{
+    size_t len = x86_opr_intel_imm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_hex);
+    if (sym_cb) {
+        len += sym_cb(buf + len, buflen - len, c, pc_offset);
+    }
+    return len;
+}
+
+static size_t x86_opr_intel_rel_dec_str(char *buf, size_t buflen,
+    x86_codec *c, x86_arg a, size_t pc_offset, x86_fmt_symbol sym_cb)
+{
+    size_t len = x86_opr_intel_imm_str_internal(buf, buflen, c, a,
+        &x86_opr_formats_intel_dec);
+    if (sym_cb) {
+        len += sym_cb(buf + len, buflen - len, c, pc_offset);
+    }
+    return len;
+}
+
+static uint x86_opr_intel_const_reg(x86_codec *c,
+    x86_arg a)
+{
+    uint regsz = x86_opr_reg_size(c, a);
+    uint addrsz = x86_codec_addr_size(c);
+
+    switch (a.opr) {
+    case x86_opr_reg_al: return x86_al;
+    case x86_opr_reg_cl: return x86_cl;
+    case x86_opr_reg_ax: return x86_ax;
+    case x86_opr_reg_cx: return x86_cx;
+    case x86_opr_reg_dx: return x86_dx;
+    case x86_opr_reg_bx: return x86_bx;
+    case x86_opr_reg_eax: return x86_eax;
+    case x86_opr_reg_ecx: return x86_ecx;
+    case x86_opr_reg_edx: return x86_edx;
+    case x86_opr_reg_ebx: return x86_ebx;
+    case x86_opr_reg_rax: return x86_rax;
+    case x86_opr_reg_rcx: return x86_rcx;
+    case x86_opr_reg_rdx: return x86_rdx;
+    case x86_opr_reg_rbx: return x86_rbx;
+    case x86_opr_reg_aw: return x86_sized_gpr(c, x86_al, regsz);
+    case x86_opr_reg_cw: return x86_sized_gpr(c, x86_cl, regsz);
+    case x86_opr_reg_dw: return x86_sized_gpr(c, x86_dl, regsz);
+    case x86_opr_reg_bw: return x86_sized_gpr(c, x86_bl, regsz);
+    case x86_opr_reg_pa: return x86_sized_gpr(c, x86_al, addrsz);
+    case x86_opr_reg_pc: return x86_sized_gpr(c, x86_cl, addrsz);
+    case x86_opr_reg_pd: return x86_sized_gpr(c, x86_dl, addrsz);
+    case x86_opr_reg_pb: return x86_sized_gpr(c, x86_bl, addrsz);
+    default: break;
+    }
+    return -1;
+}
+
+static size_t x86_opr_intel_const_str(char *buf, size_t buflen, x86_codec *c,
+    x86_arg a)
+{
+    uint regsz = x86_opr_reg_size(c, a);
+    uint addrsz = x86_codec_addr_size(c);
+    int regname = x86_opr_intel_const_reg(c, a);
+
+    if (regname >= 0) {
+        return snprintf(buf, buflen, "%s", x86_reg_name(regname));
+    }
+
+    switch (a.opr) {
+    case x86_opr_1:
+        return snprintf(buf, buflen, "1");
+    case x86_opr_reg_xmm0:
+        return snprintf(buf, buflen, "%s", "xmm0");
+    case x86_opr_reg_xmm0_7:
+        return snprintf(buf, buflen, "%s", "xmm0_7");
+    case x86_opr_seg_fs:
+        return snprintf(buf, buflen, "fs");
+    case x86_opr_seg_gs:
+        return snprintf(buf, buflen, "gs");
+    case x86_opr_reg_st0:
+        return snprintf(buf, buflen, "st");
+    case x86_opr_reg_psi:
+        return snprintf(buf, buflen, "%s[%s]",
+            x86_ptr_size_str(regsz),
+            x86_reg_name(x86_sized_gpr(c, x86_sil, addrsz)));
+    case x86_opr_reg_pdi:
+        return snprintf(buf, buflen, "%s[%s]",
+            x86_ptr_size_str(regsz),
+            x86_reg_name(x86_sized_gpr(c, x86_dil, addrsz)));
+    default: return snprintf(buf, buflen, "%s", "unknown");
+    }
+}
+
+x86_opr_formatter x86_format_intel_hex =
+{
+    .fmt_const = &x86_opr_intel_const_str,
+    .fmt_imm = &x86_opr_intel_imm_hex_str,
+    .fmt_reg = &x86_opr_intel_reg_str,
+    .fmt_mrm = &x86_opr_intel_mrm_hex_str,
+    .fmt_vec = &x86_opr_intel_vec_str,
+    .fmt_opb = &x86_opr_intel_opb_str,
+    .fmt_is4 = &x86_opr_intel_is4_str,
+    .fmt_ime = &x86_opr_intel_ime_hex_str,
+    .fmt_rel = &x86_opr_intel_rel_hex_str
+};
+
+x86_opr_formatter x86_format_intel_dec =
+{
+    .fmt_const = &x86_opr_intel_const_str,
+    .fmt_imm = &x86_opr_intel_imm_dec_str,
+    .fmt_reg = &x86_opr_intel_reg_str,
+    .fmt_mrm = &x86_opr_intel_mrm_dec_str,
+    .fmt_vec = &x86_opr_intel_vec_str,
+    .fmt_opb = &x86_opr_intel_opb_str,
+    .fmt_is4 = &x86_opr_intel_is4_str,
+    .fmt_ime = &x86_opr_intel_ime_dec_str,
+    .fmt_rel = &x86_opr_intel_rel_dec_str
+};
+
+static size_t x86_format_operand(char *buf, size_t buflen, x86_codec *c,
+    x86_arg a, size_t pc_offset, x86_fmt_symbol sym_cb, x86_opr_formatter *fmt)
+{
+    switch (x86_ord_type_val(a.ord)) {
+    case x86_ord_const:
+        return fmt->fmt_const(buf, buflen, c, a);
+    case x86_ord_reg:
+        return fmt->fmt_reg(buf, buflen, c, a);
+    case x86_ord_mrm:
+        return fmt->fmt_mrm(buf, buflen, c, a);
+    case x86_ord_vec:
+        return fmt->fmt_vec(buf, buflen, c, a);
+    case x86_ord_opr:
+        return fmt->fmt_opb(buf, buflen, c, a);
+    case x86_ord_imm:
+        if (a.opr == x86_opr_rel8 || a.opr == x86_opr_relw) {
+            return fmt->fmt_rel(buf, buflen, c, a, pc_offset, sym_cb);
+        } else if ((a.ord & ~x86_ord_flag_mask) == x86_ord_is4) {
+            return fmt->fmt_is4(buf, buflen, c, a);
+        } else if ((a.ord & ~x86_ord_flag_mask) == x86_ord_ime) {
+            return fmt->fmt_ime(buf, buflen, c, a);
+        } else {
+            return fmt->fmt_imm(buf, buflen, c, a);
+        }
+    default: return 0;
+    }
+}
+
+static size_t x86_format_op_internal(char *buf, size_t buflen, x86_ctx *ctx,
+    x86_codec *c, size_t pc_offset, x86_fmt_symbol sym_cb)
+{
+    const x86_opc_data *d = ctx->idx->map + c->rec;
+    const x86_opr_data *o = x86_opr_table + d->opr;
+    const x86_ord_data *s = x86_ord_table + d->ord;
+
+    x86_operands q = x86_codec_operands(ctx, c);
+
+    size_t len = 0;
+    uint prefix = d->enc & x86_enc_p_mask;
+
+    if (x86_codec_has_lock(c)) {
+        len += snprintf(buf + len, buflen - len, "lock ");
+    }
+    if (x86_codec_has_rep(c) && prefix != x86_enc_p_f3) {
+        len += snprintf(buf + len, buflen - len, "rep ");
+    }
+    if (x86_codec_has_repne(c) && prefix != x86_enc_p_f2) {
+        len += snprintf(buf + len, buflen - len, "repne ");
+    }
+    if (x86_codec_has_wait(c) && prefix != x86_enc_p_9b) {
+        len += snprintf(buf + len, buflen - len, "wait ");
+    }
+
+    len += snprintf(buf + len, buflen - len, "%s", x86_op_names[d->op]);
+
+    for (size_t i = 0; i < array_size(o->opr) && o->opr[i]; i++) {
+        x86_arg a = x86_codec_meta(d->enc, o->opr[i], s->ord[i], q);
+        len += snprintf(buf + len, buflen - len, i == 0 ? "\t" : ", ");
+        len += x86_format_operand(buf + len, buflen - len, c, a,
+            pc_offset, sym_cb, &x86_format_intel_dec);
+    }
+
+    return len;
+}
+
+size_t x86_format_op(char *buf, size_t buflen, x86_ctx *ctx, x86_codec *c)
+{
+    return x86_format_op_internal(buf, buflen, ctx, c, 0, NULL);
+}
+
+size_t x86_format_op_symbol(char *buf, size_t buflen, x86_ctx *ctx,
+    x86_codec *c, size_t pc_offset, x86_fmt_symbol sym_cb)
+{
+    /* note: caller needs to add instruction length to pc_offset */
+    return x86_format_op_internal(buf, buflen, ctx, c, pc_offset, sym_cb);
+}
+
+size_t x86_format_hex(char *buf, size_t buflen, uchar *data, size_t datalen)
+{
+    size_t len = 0;
+    for (size_t i = 0; i < datalen && i < 11; i++) {
+        len += snprintf(buf + len, buflen - len, i == 0 ? "\t" : " ");
+        len += snprintf(buf + len, buflen - len, "%02hhx", data[i]);
+    }
+    size_t tabs = datalen < 10 ? (40 - datalen * 3) / 8 : 1;
+    for (size_t i = 0; i < tabs ; i++) {
+        len += snprintf(buf + len, buflen - len, "\t");
+    }
+    return len;
+}
+
+enum {
+    x86_enc_tpm_mask  = x86_enc_t_mask | x86_enc_prexw_mask | x86_enc_m_mask
+};
+
+static x86_opc_data *x86_table_match(x86_ctx *ctx, x86_codec *c,
+    x86_opc_data k, int w)
+{
+    x86_opc_data *r = NULL;
+    /* key is type+prefix+map with substituted rexw=w flag */
+    k.enc = ((k.enc & ~x86_enc_p_rexw) |
+             (-w    &  x86_enc_p_rexw)) & x86_enc_tpm_mask;
+    x86_debugf("table_lookup { type:%x prefix:%x map:%x "
+        "opc:[%02hhx %02hhx] opm:[%02hhx %02hhx] }",
+        (k.enc & x86_enc_t_mask) >> x86_enc_t_shift,
+        (k.enc & x86_enc_p_mask) >> x86_enc_p_shift,
+        (k.enc & x86_enc_m_mask) >> x86_enc_m_shift,
+        k.opc[0], k.opc[1], k.opm[0], k.opm[1]);
+    r = x86_table_lookup(ctx->idx, &k);
+    while (r < ctx->idx->map + ctx->idx->map_count) {
+        /* substitute suffix of record for precise match */
+        k.enc = ((k.enc & x86_enc_tpm_mask) |
+                  (r->enc & ~x86_enc_tpm_mask));
+        size_t oprec = (r - ctx->idx->map);
+        x86_debugf("checking opdata %zu", oprec);
+        if (debug) x86_print_op(r, 1, 1);
+        if (x86_opc_data_compare_masked(&k, r) != 0) {
+            x86_debugf("** no matches");
+            r = NULL;
+            break;
+        }
+        if (x86_filter_op(c, r, w) == 0) break;
+        r++;
+    }
+    return r;
+}
+
+int x86_codec_read(x86_ctx *ctx, x86_buffer *buf, x86_codec *c, size_t *len)
+{
+    uint state = x86_state_top;
+    size_t nbytes = 0, limit = buf->end - buf->start;
+    uint t = 0, m = 0, w = 0, p = 0, l = 0, mode = ctx->mode;
+    x86_opc_data k = { 0 }, *r = NULL;
+    uchar b = 0, lastp = 0;
+
+    memset(c, 0, sizeof(x86_codec));
+    switch (mode) {
+    case x86_modes_32: c->flags |= x86_cf_ia32; break;
+    case x86_modes_64: c->flags |= x86_cf_amd64; break;
+    }
+
+    while (state != x86_state_done) {
+        nbytes += x86_buffer_read(buf, &b, 1);
+        switch (state) {
+        case x86_state_top:
+            switch (b) {
+            case 0x40: case 0x41: case 0x42: case 0x43:
+            case 0x44: case 0x45: case 0x46: case 0x47:
+            case 0x48: case 0x49: case 0x4a: case 0x4b:
+            case 0x4c: case 0x4d: case 0x4e: case 0x4f:
+                c->rex.data[0] = b;
+                c->flags |= x86_ce_rex;
+                w = (c->rex.data[0] >> 3) & 1;
+                t = x86_table_lex;
+                state = x86_state_rex_opcode;
+                break;
+            case x86_pb_26:
+            case x86_pb_2e:
+            case x86_pb_36:
+            case x86_pb_3e:
+            case x86_pb_64:
+            case x86_pb_65:
+                state = x86_state_segment;
+                goto segment_reparse;
+            case x86_pb_66:
+            case x86_pb_67:
+            case x86_pb_9b:
+            case x86_pb_f0:
+            case x86_pb_f2:
+            case x86_pb_f3:
+                state = x86_state_legacy;
+                goto legacy_reparse;
+            case x86_pb_62:
+                nbytes += x86_buffer_read(buf, c->evex.data, 3);
+                c->flags |= x86_ce_evex;
+                m = (c->evex.data[0] >> 0) & 7;
+                w = (c->evex.data[1] >> 7) & 1;
+                p = (c->evex.data[1] >> 0) & 3;
+                l = (c->evex.data[2] >> 5) & 3;
+                t = x86_table_evex;
+                state = x86_state_vex_opcode;
+                break;
+            case x86_pb_c4:
+                nbytes += x86_buffer_read(buf, c->vex3.data, 2);
+                c->flags |= x86_ce_vex3;
+                m = (c->vex3.data[0] >> 0) & 31;
+                w = (c->vex3.data[1] >> 7) & 1;
+                p = (c->vex3.data[1] >> 0) & 3;
+                l = (c->vex3.data[1] >> 2) & 1;
+                t = x86_table_vex;
+                state = x86_state_vex_opcode;
+                break;
+            case x86_pb_c5:
+                nbytes += x86_buffer_read(buf, c->vex2.data, 1);
+                c->flags |= x86_ce_vex2;
+                m = x86_map_0f;
+                p = (c->vex2.data[0] >> 0) & 3;
+                l = (c->vex2.data[0] >> 2) & 1;
+                t = x86_table_vex;
+                state = x86_state_vex_opcode;
+                break;
+            case x86_pb_d5:
+                nbytes += x86_buffer_read(buf, c->rex2.data, 1);
+                c->flags |= x86_ce_rex2;
+                m = (c->rex2.data[0] >> 7) & 1;
+                w = (c->rex2.data[0] >> 3) & 1;
+                t = x86_table_lex;
+                state = x86_state_lex_opcode;
+                break;
+            case 0x0f:
+                t = x86_table_lex;
+                state = x86_state_map_0f;
+                break;
+            default:
+                m = x86_map_none;
+                t = x86_table_lex;
+                state = x86_state_lex_opcode;
+                goto lex_reparse;
+            }
+            break;
+        case x86_state_segment: segment_reparse:
+            switch (b) {
+            case 0x40: case 0x41: case 0x42: case 0x43:
+            case 0x44: case 0x45: case 0x46: case 0x47:
+            case 0x48: case 0x49: case 0x4a: case 0x4b:
+            case 0x4c: case 0x4d: case 0x4e: case 0x4f:
+                c->rex.data[0] = b;
+                c->flags |= x86_ce_rex;
+                w = (c->rex.data[0] >> 3) & 1;
+                t = x86_table_lex;
+                state = x86_state_rex_opcode;
+                break;
+            case x86_pb_26:
+                c->seg = x86_seg_es; state = x86_state_legacy;
+                break;
+            case x86_pb_2e:
+                c->seg = x86_seg_cs; state = x86_state_legacy;
+                break;
+            case x86_pb_36:
+                c->seg = x86_seg_ss; state = x86_state_legacy;
+                break;
+            case x86_pb_3e:
+                c->seg = x86_seg_ds; state = x86_state_legacy;
+                break;
+            case x86_pb_64:
+                c->seg = x86_seg_fs; state = x86_state_legacy;
+                break;
+            case x86_pb_65:
+                c->seg = x86_seg_gs; state = x86_state_legacy;
+                break;
+            case x86_pb_66:
+            case x86_pb_67:
+            case x86_pb_9b:
+            case x86_pb_f0:
+            case x86_pb_f2:
+            case x86_pb_f3:
+                state = x86_state_legacy;
+                goto legacy_reparse;
+            case x86_pb_62:
+            case x86_pb_c4:
+            case x86_pb_c5:
+            case x86_pb_d5:
+                goto err;
+            case 0x0f:
+                t = x86_table_lex;
+                state = x86_state_map_0f;
+                break;
+            default:
+                m = x86_map_none;
+                t = x86_table_lex;
+                state = x86_state_lex_opcode;
+                goto lex_reparse;
+            }
+            break;
+        case x86_state_legacy: legacy_reparse:
+            switch (b) {
+            case 0x40: case 0x41: case 0x42: case 0x43:
+            case 0x44: case 0x45: case 0x46: case 0x47:
+            case 0x48: case 0x49: case 0x4a: case 0x4b:
+            case 0x4c: case 0x4d: case 0x4e: case 0x4f:
+                c->rex.data[0] = b;
+                c->flags |= x86_ce_rex;
+                w = (c->rex.data[0] >> 3) & 1;
+                t = x86_table_lex;
+                state = x86_state_rex_opcode;
+                break;
+            case x86_pb_26:
+            case x86_pb_2e:
+            case x86_pb_36:
+            case x86_pb_3e:
+            case x86_pb_64:
+            case x86_pb_65:
+            case x86_pb_62:
+            case x86_pb_c4:
+            case x86_pb_c5:
+            case x86_pb_d5:
+                goto err;
+            case x86_pb_66:
+                lastp = b;
+                c->flags |= x86_cp_osize;
+                break;
+            case x86_pb_67:
+                lastp = b;
+                c->flags |= x86_cp_asize;
+                break;
+            case x86_pb_9b:
+                lastp = b;
+                c->flags |= x86_cp_wait;
+                break;
+            case x86_pb_f0:
+                lastp = b;
+                c->flags |= x86_cp_lock;
+                break;
+            case x86_pb_f2:
+                lastp = b;
+                c->flags |= x86_cp_repne;
+                break;
+            case x86_pb_f3:
+                lastp = b;
+                c->flags |= x86_cp_rep;
+                break;
+            case 0x0f:
+                t = x86_table_lex;
+                state = x86_state_map_0f;
+                break;
+            default:
+                m = x86_map_none;
+                t = x86_table_lex;
+                state = x86_state_lex_opcode;
+                goto lex_reparse;
+            }
+            break;
+        case x86_state_rex_opcode:
+            switch (b) {
+            case 0x0f:
+                state = x86_state_map_0f;
+                break;
+            default:
+                state = x86_state_lex_opcode;
+                goto lex_reparse;
+            }
+            break;
+        case x86_state_map_0f:
+            switch (b) {
+            case 0x38:
+                c->flags |= x86_cm_0f38;
+                m = x86_map_0f38;
+                state = x86_state_lex_opcode;
+                break;
+            case 0x3a:
+                c->flags |= x86_cm_0f3a;
+                m = x86_map_0f3a;
+                state = x86_state_lex_opcode;
+                break;
+            default:
+                c->flags |= x86_cm_0f;
+                m = x86_map_0f;
+                state = x86_state_lex_opcode;
+                goto lex_reparse;
+            }
+            break;
+        case x86_state_lex_opcode: lex_reparse:
+            k.enc |= ((t << x86_enc_t_shift) & x86_enc_t_mask)
+                  |  ((m << x86_enc_m_shift) & x86_enc_m_mask);
+            switch (lastp) {
+            case 0x66: k.enc |= x86_enc_p_66; break;
+            case 0x9b: k.enc |= x86_enc_p_9b; break;
+            case 0xf2: k.enc |= x86_enc_p_f2; break;
+            case 0xf3: k.enc |= x86_enc_p_f3; break;
+            }
+            state = x86_state_done;
+            break;
+        case x86_state_vex_opcode:
+            k.enc |= ((t << x86_enc_t_shift) & x86_enc_t_mask)
+                  |  ((m << x86_enc_m_shift) & x86_enc_m_mask);
+            switch (p) {
+            case x86_pfx_66: k.enc |= x86_enc_p_66; break;
+            case x86_pfx_f2: k.enc |= x86_enc_p_f2; break;
+            case x86_pfx_f3: k.enc |= x86_enc_p_f3; break;
+            }
+            state = x86_state_done;
+            (void)l; /* l can be added to the index key */
+            break;
+        default:
+            abort();
+        }
+    };
+
+    /* populate opcode for table lookup */
+    k.mode = mode;
+    c->opc[0] = k.opc[0] = b;
+    nbytes += x86_buffer_read(buf, &b, 1);
+    c->opc[1] = k.opc[1] = b;
+    k.opm[0] = k.opm[1] = 0xff;
+
+    /* if REX.W=1 first attempt to lookup W=1 record */
+    if (w) {
+        r = x86_table_match(ctx, c, k, 1);
+    }
+
+    /* if REX.W=0 or search failed lookup W=0/WIG record */
+    if (!w || (w && !r)) {
+        r = x86_table_match(ctx, c, k, 0);
+    }
+
+    /* now attempt lookup without using the prefix */
+    if (!r) {
+        k.enc &= ~x86_enc_p_mask;
+
+        /* if REX.W=1 first attempt to lookup W=1 record */
+        if (w) {
+            r = x86_table_match(ctx, c, k, 1);
+        }
+
+        /* if REX.W=0 or search failed lookup W=0/WIG record */
+        if (!w || (w && !r)) {
+            r = x86_table_match(ctx, c, k, 0);
+        }
+    }
+
+    /* parse encoding */
+    if (r) {
+
+        /* set opcode length and modrm flags */
+        switch (x86_enc_func(r->enc)) {
+        case x86_enc_f_modrm_r:
+        case x86_enc_f_modrm_n:
+            /* second byte is modrm */
+            c->flags |= x86_cf_modrm;
+            c->opclen = 1;
+            break;
+        case x86_enc_f_opcode:
+        case x86_enc_f_opcode_r:
+            /* two byte opcode */
+            c->opclen = 2;
+            break;
+        default:
+            /* no second opcode byte */
+            nbytes -= x86_buffer_unread(buf, 1);
+            c->opclen = 1;
+            break;
+        }
+
+        /* parse SIB, disp, imm from format */
+        nbytes += x86_parse_encoding(buf, c, r);
+        if (nbytes <= limit) {
+            c->rec = (r - ctx->idx->map);
+            *len = nbytes;
+            return 0;
+        }
+    }
+
+err:
+    nbytes -= x86_buffer_unread(buf, nbytes);
+    *len = nbytes;
+    return -1;
+}
+
+x86_ctx *x86_ctx_create(uint mode)
+{
+    x86_ctx *ctx = malloc(sizeof(x86_ctx));
+    ctx->mode = mode;
+    ctx->idx = x86_table_build(mode);
+    return ctx;
+}
+
+void x86_ctx_destroy(x86_ctx *ctx)
+{
+    free(ctx->idx->page_offsets);
+    free(ctx->idx->map);
+    free(ctx->idx->acc);
+    free(ctx);
+}
diff --git a/disas/x86-disas.c b/disas/x86-disas.c
new file mode 100644
index 000000000000..3ae139a23f1e
--- /dev/null
+++ b/disas/x86-disas.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2024-2025 Michael Clark
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "qemu/osdep.h"
+#include "disas/dis-asm.h"
+#include "x86.h"
+
+static size_t format_hex(char *buf, size_t buflen, uchar *data, size_t nbytes)
+{
+    size_t len = 0;
+    size_t indent = 1;
+
+    const size_t hexcols = 10;
+
+    for (size_t i = 0; i < nbytes; i++) {
+        len += snprintf(buf + len, buflen - len, " %02x" + (i == 0), data[i]);
+    }
+    if (hexcols - nbytes < hexcols) {
+        indent = (hexcols - nbytes) * 3 + 8 - (hexcols * 3) % 8;
+    }
+    for (size_t i = 0; i < indent && len < (buflen - 1); i++) {
+        buf[len++] = ' ';
+    }
+    buf[len] = '\0';
+
+    return len;
+}
+
+static size_t format_symbol(char *buf, size_t buflen, x86_codec *c,
+    size_t pc_offset)
+{
+    ullong addr = pc_offset + c->imm32;
+    return snprintf(buf, buflen, " # 0x%llx", addr);
+}
+
+int print_insn_x86(bfd_vma memaddr, struct disassemble_info *info)
+{
+    x86_buffer buf;
+    x86_codec codec;
+    x86_ctx *ctx;
+    bfd_byte *packet;
+    size_t nfetch, ndecode, len;
+    char str[128];
+    int ret;
+
+    static const size_t max_fetch_len = 16;
+
+    /* read instruction */
+    nfetch = info->buffer_vma + info->buffer_length - memaddr;
+    if (nfetch > max_fetch_len) {
+        nfetch = max_fetch_len;
+    }
+    packet = alloca(nfetch);
+    ret = (*info->read_memory_func)(memaddr, packet, nfetch, info);
+    if (ret != 0) {
+        (*info->memory_error_func)(ret, memaddr, info);
+        return ret;
+    }
+
+    /* decode instruction */
+    ctx = (x86_ctx *)info->private_data;
+    x86_buffer_init_ex(&buf, packet, 0, nfetch);
+    ret = x86_codec_read(ctx, &buf, &codec, &ndecode);
+    if (ret != 0) {
+        return -1;
+    }
+
+    /* format instruction */
+    len = format_hex(str, sizeof(str), packet, ndecode);
+    x86_format_op_symbol(str + len, sizeof(str) - len, ctx, &codec,
+        memaddr + ndecode, format_symbol);
+    (*info->fprintf_func)(info->stream, "%s", str);
+
+    return ndecode;
+}
diff --git a/disas/x86.h b/disas/x86.h
new file mode 100644
index 000000000000..3e745c2438fa
--- /dev/null
+++ b/disas/x86.h
@@ -0,0 +1,1969 @@
+/*
+ * Copyright (c) 2024-2025 Michael Clark
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#pragma once
+
+#include <stddef.h>
+#include <string.h>
+
+typedef unsigned char uchar;
+typedef unsigned short ushort;
+typedef unsigned int uint;
+typedef unsigned long ulong;
+typedef unsigned long long ullong;
+typedef long long llong;
+
+typedef char i8;
+typedef short i16;
+typedef int i32;
+typedef llong i64;
+
+typedef uchar u8;
+typedef ushort u16;
+typedef uint u32;
+typedef ullong u64;
+
+static inline uint clz_u32(uint val) {
+    return val == 0 ? 32 : __builtin_clz(val);
+}
+static inline uint clz_u64(ullong val) {
+    return val == 0 ? 64 : __builtin_clzll(val);
+}
+static inline uint ctz_u32(uint val) {
+    return val == 0 ? 32 : __builtin_ctz(val);
+}
+static inline uint ctz_u64(ullong val) {
+    return val == 0 ? 64 : __builtin_ctzll(val);
+}
+static inline uint popcnt_u32(uint val) {
+    return __builtin_popcount(val);
+}
+static inline uint popcnt_u64(ullong val) {
+    return __builtin_popcountll(val);
+}
+static inline uint ispow2_u32(uint v) {
+    return v && !(v & (v - 1));
+}
+static inline uint ispow2_u64(ullong v) {
+    return v && !(v & (v - 1));
+}
+
+#define clz_ulong clz_u64
+#define ctz_ulong ctz_u64
+#define popcnt_ulong popcnt_u64
+#define ispow2_ulong ispow2_u64
+
+/* C11 generics for clz, ctz, popcnt, ispow2 */
+#define clz(X) _Generic((X), \
+uint : clz_u32, int : clz_u32, \
+ulong : clz_ulong, long : clz_ulong, \
+ullong : clz_u64, llong : clz_u64)(X)
+#define ctz(X) _Generic((X), \
+uint : ctz_u32, int : ctz_u32, \
+ulong : ctz_ulong, long : ctz_ulong, \
+ullong : ctz_u64, llong : ctz_u64)(X)
+#define popcnt(X) _Generic((X), \
+uint : popcnt_u32, int : popcnt_u32, \
+ulong : popcnt_ulong, long : popcnt_ulong, \
+ullong : popcnt_u64, llong : popcnt_u64)(X)
+#define ispow2(X) _Generic((X), \
+uint : ispow2_u32, int : ispow2_u32, \
+ulong : ispow2_ulong, long : ispow2_ulong, \
+ullong : ispow2_u64, llong : ispow2_u64)(X)
+
+static inline uint rupgtpow2_u32(uint x) {
+    return 1ull << (32 - clz(x - 1));
+}
+static inline uint rupgepow2_u32(uint x) {
+    return 1ull << (32 - clz(x));
+}
+static inline uint rdnlepow2_u32(uint x) {
+    return 1ull << (31 - clz(x - 1));
+}
+static inline uint rdnltpow2_u32(uint x) {
+    return 1ull << (31 - clz(x));
+}
+static inline ullong rupgtpow2_u64(ullong x) {
+    return 1ull << (64 - clz(x - 1));
+}
+static inline ullong rupgepow2_u64(ullong x) {
+    return 1ull << (64 - clz(x));
+}
+static inline ullong rdnlepow2_u64(ullong x) {
+    return 1ull << (63 - clz(x - 1));
+}
+static inline ullong rdnltpow2_u64(ullong x) {
+    return 1ull << (63 - clz(x));
+}
+
+#define rupgtpow2_ulong rupgtpow2_u64
+#define rupgepow2_ulong rupgepow2_u64
+#define rdnlepow2_ulong rdnlepow2_u64
+#define rdnltpow2_ulong rdnltpow2_u64
+
+/* C11 generics for roundpow2 */
+#define rupgtpow2(X) _Generic((X), \
+uint : rupgtpow2_u32, int : rupgtpow2_u32, \
+ulong : rupgtpow2_ulong, long : rupgtpow2_ulong, \
+ullong : rupgtpow2_u64, llong : rupgtpow2_u64)(X)
+#define rupgepow2(X) _Generic((X), \
+uint : rupgepow2_u32, int : rupgepow2_u32, \
+ulong : rupgepow2_ulong, long : rupgepow2_ulong, \
+ullong : rupgepow2_u64, llong : rupgepow2_u64)(X)
+#define rdnlepow2(X) _Generic((X), \
+uint : rdnlepow2_u32, int : rdnlepow2_u32, \
+ulong : rdnlepow2_ulong, long : rdnlepow2_ulong, \
+ullong : rdnlepow2_u64, llong : rdnlepow2_u64)(X)
+#define rdnltpow2(X) _Generic((X), \
+uint : rdnltpow2_u32, int : rdnltpow2_u32, \
+ulong : rdnltpow2_ulong, long : rdnltpow2_ulong, \
+ullong : rdnltpow2_u64, llong : rdnltpow2_u64)(X)
+
+/* endian helpers */
+static inline u16 be16(u16 v)
+{
+    union { u8 a[2]; u16 b; } u = {
+        .a = { (u8)(v >> 8), (u8)(v) }
+    };
+    return u.b;
+}
+
+static inline u16 le16(u16 v)
+{
+    union { u8 a[2]; u16 b; } u = {
+        .a = { (u8)(v), (u8)(v >> 8) }
+    };
+    return u.b;
+}
+
+static inline u32 be32(u32 v)
+{
+    union { u8 a[4]; u32 b; } u = {
+        .a = { (u8)(v >> 24), (u8)(v >> 16), (u8)(v >> 8), (u8)(v) }
+    };
+    return u.b;
+}
+
+static inline u32 le32(u32 v)
+{
+    union { u8 a[4]; u32 b; } u = {
+        .a = { (u8)(v), (u8)(v >> 8), (u8)(v >> 16), (u8)(v >> 24) }
+    };
+    return u.b;
+}
+
+static inline u64 be64(u64 v)
+{
+    union { u8 a[8]; u64 b; } u = {
+        .a = { (u8)(v >> 56), (u8)(v >> 48), (u8)(v >> 40), (u8)(v >> 32),
+               (u8)(v >> 24), (u8)(v >> 16), (u8)(v >> 8), (u8)(v) }
+    };
+    return u.b;
+}
+
+static inline u64 le64(u64 v)
+{
+    union { u8 a[8]; u64 b; } u = {
+        .a = { (u8)(v), (u8)(v >> 8), (u8)(v >> 16), (u8)(v >> 24),
+               (u8)(v >> 32), (u8)(v >> 40), (u8)(v >> 48), (u8)(v >> 56) }
+    };
+    return u.b;
+}
+
+/* x86 */
+
+typedef struct x86_rex x86_rex;
+typedef struct x86_rex2 x86_rex2;
+typedef struct x86_vex2 x86_vex2;
+typedef struct x86_vex3 x86_vex3;
+typedef struct x86_evex x86_evex;
+typedef struct x86_modrm x86_modrm;
+typedef struct x86_sib x86_sib;
+
+typedef struct x86_mem x86_mem;
+typedef struct x86_codec x86_codec;
+typedef struct x86_modeb x86_modeb;
+typedef struct x86_opc_data x86_opc_data;
+typedef struct x86_opr_data x86_opr_data;
+typedef struct x86_ord_data x86_ord_data;
+typedef struct x86_table_idx x86_table_idx;
+typedef struct x86_acc_idx x86_acc_idx;
+typedef struct x86_acc_entry x86_acc_entry;
+typedef struct x86_buffer x86_buffer;
+typedef struct x86_ctx x86_ctx;
+
+/*
+ * simple debug macros
+ */
+
+#define VA_ARGS(...) , ##__VA_ARGS__
+#define x86_debug(fmt, ...) if (debug) \
+    printf(fmt "\n" VA_ARGS(__VA_ARGS__))
+#define x86_debugf(fmt, ...) if (debug) \
+    printf("%s: " fmt "\n", __func__ VA_ARGS(__VA_ARGS__))
+
+/*
+ * register groups
+ */
+
+enum
+{
+    x86_reg_w     = (0 << 5),
+    x86_reg_b     = (1 << 5),
+    x86_reg_d     = (2 << 5),
+    x86_reg_q     = (3 << 5),
+
+    x86_reg_mmx   = (4 << 5),
+    x86_reg_xmm   = (5 << 5),
+    x86_reg_ymm   = (6 << 5),
+    x86_reg_zmm   = (7 << 5),
+
+    x86_reg_kmask = (8 << 5),
+    x86_reg_bl    = (9 << 5),
+    x86_reg_fpu   = (10 << 5),
+    x86_reg_bnd   = (11 << 5),
+
+    x86_reg_dreg  = (12 << 5),
+    x86_reg_creg  = (13 << 5),
+    x86_reg_sreg  = (14 << 5),
+    x86_reg_sys   = (15 << 5),
+};
+
+/*
+ * prefix byte
+ */
+
+enum
+{
+    x86_pb_26    = 0x26,
+    x86_pb_2e    = 0x2e,
+    x86_pb_36    = 0x36,
+    x86_pb_3e    = 0x3e,
+    x86_pb_41    = 0x41,
+    x86_pb_42    = 0x42,
+    x86_pb_44    = 0x44,
+    x86_pb_48    = 0x48,
+    x86_pb_62    = 0x62,
+    x86_pb_64    = 0x64,
+    x86_pb_65    = 0x65,
+    x86_pb_66    = 0x66,
+    x86_pb_67    = 0x67,
+    x86_pb_9b    = 0x9b,
+    x86_pb_c4    = 0xc4,
+    x86_pb_c5    = 0xc5,
+    x86_pb_d5    = 0xd5,
+    x86_pb_f0    = 0xf0,
+    x86_pb_f2    = 0xf2,
+    x86_pb_f3    = 0xf3,
+
+    x86_pb_es    = x86_pb_26,
+    x86_pb_cs    = x86_pb_2e, /* branch not taken with Jcc */
+    x86_pb_ss    = x86_pb_36,
+    x86_pb_ds    = x86_pb_3e, /* branch taken with Jcc (CET notrack) */
+    x86_pb_rex_b = x86_pb_41,
+    x86_pb_rex_x = x86_pb_42,
+    x86_pb_rex_r = x86_pb_44,
+    x86_pb_rex_w = x86_pb_48,
+    x86_pb_evex  = x86_pb_62,
+    x86_pb_fs    = x86_pb_64,
+    x86_pb_gs    = x86_pb_65,
+    x86_pb_osize = x86_pb_66,
+    x86_pb_asize = x86_pb_67,
+    x86_pb_wait  = x86_pb_9b, /* x87 */
+    x86_pb_vex3  = x86_pb_c4,
+    x86_pb_vex2  = x86_pb_c5,
+    x86_pb_rex2  = x86_pb_d5,
+    x86_pb_lock  = x86_pb_f0,
+    x86_pb_repne = x86_pb_f2, /* REPNE/REPNZ and XACQUIRE */
+    x86_pb_rep   = x86_pb_f3, /* REP/REPE/REPZ and XRELEASE */
+};
+
+/*
+ * mod values
+ */
+
+enum
+{
+    x86_mod_disp0,
+    x86_mod_disp8,
+    x86_mod_dispw,
+    x86_mod_reg,
+};
+
+/*
+ * SIB scale
+ */
+
+enum
+{
+    x86_scale_1,
+    x86_scale_2,
+    x86_scale_4,
+    x86_scale_8,
+};
+
+/*
+ * segment
+ */
+
+enum
+{
+    x86_seg_none,
+    x86_seg_es,
+    x86_seg_cs,
+    x86_seg_ss,
+    x86_seg_ds,
+    x86_seg_fs,
+    x86_seg_gs
+};
+
+/*
+ * REX flags
+ */
+
+enum
+{
+    x86_rex_b = 0x01,
+    x86_rex_x = 0x02,
+    x86_rex_r = 0x04,
+    x86_rex_w = 0x08,
+};
+
+/*
+ * VEX prefix
+ */
+
+enum
+{
+    x86_pfx_none,
+    x86_pfx_66,
+    x86_pfx_f3,
+    x86_pfx_f2,
+    x86_pfx_9b,
+    x86_pfx_rexw = 8
+};
+
+/*
+ * VEX map
+ */
+
+enum
+{
+    x86_map_none,
+    x86_map_0f,
+    x86_map_0f38,
+    x86_map_0f3a,
+    x86_map_map4,
+    x86_map_map5,
+    x86_map_map6,
+};
+
+/*
+ * VEX length
+ */
+
+enum
+{
+    x86_vex_l0 = 0,
+    x86_vex_l1 = 1,
+    x86_vex_l2 = 2,
+    x86_vex_l3 = 3,
+    x86_vex_lz = x86_vex_l0,
+    x86_vex_lig = x86_vex_l3,
+    x86_vex_l128 = x86_vex_l0,
+    x86_vex_l256 = x86_vex_l1,
+    x86_vex_l512 = x86_vex_l2,
+};
+
+/*
+ * VEX width
+ */
+
+enum
+{
+    x86_vex_w0,
+    x86_vex_w1,
+};
+
+/*
+ * test conditions
+ */
+
+enum
+{
+    /* non-signed */
+    x86_never  = (0 | 0 | 0 | 0),
+    x86_always = (0 | 0 | 0 | 1),
+    x86_eq     = (8 | 0 | 0 | 0),
+    x86_ne     = (8 | 0 | 0 | 1),
+    /* signed */
+    x86_lt     = (0 | 0 | 2 | 0),
+    x86_ge     = (0 | 0 | 2 | 1),
+    x86_le     = (8 | 0 | 2 | 0),
+    x86_gt     = (8 | 0 | 2 | 1),
+    /* unsigned */
+    x86_ltu    = (0 | 4 | 0 | 0),
+    x86_geu    = (0 | 4 | 0 | 1),
+    x86_leu    = (8 | 4 | 0 | 0),
+    x86_gtu    = (8 | 4 | 0 | 1),
+};
+
+/*
+ * compare packed operators
+ *
+ * oq = ordered queit, os = ordered signalling
+ * uq = unordered queit, us = unordered signalling
+ */
+
+enum
+{
+    x86_eq_oq    = 0x00,
+    x86_lt_os    = 0x01,
+    x86_le_os    = 0x02,
+    x86_unord_q  = 0x03,
+    x86_neq_uq   = 0x04,
+    x86_nlt_us   = 0x05,
+    x86_nle_us   = 0x06,
+    x86_ord_q    = 0x07,
+    x86_eq_uq    = 0x08,
+    x86_nge_us   = 0x09,
+    x86_ngt_us   = 0x0a,
+    x86_false_oq = 0x0b,
+    x86_neq_oq   = 0x0c,
+    x86_ge_os    = 0x0d,
+    x86_gt_os    = 0x0e,
+    x86_true_uq  = 0x0f,
+    x86_eq_os    = 0x10,
+    x86_lt_oq    = 0x11,
+    x86_le_oq    = 0x12,
+    x86_unord_s  = 0x13,
+    x86_neq_us   = 0x14,
+    x86_nlt_uq   = 0x15,
+    x86_nle_uq   = 0x16,
+    x86_ord_s    = 0x17,
+    x86_eq_us    = 0x18,
+    x86_nge_uq   = 0x19,
+    x86_ngt_uq   = 0x1a,
+    x86_false_os = 0x1b,
+    x86_neq_os   = 0x1c,
+    x86_ge_oq    = 0x1d,
+    x86_gt_oq    = 0x1e,
+    x86_true_us  = 0x1f,
+};
+
+/*
+ * prefix encoding structures
+ *
+ * note: bitfields used for documentation purposes only
+ */
+
+/*
+ * ModRM struct
+ */
+
+struct x86_modrm
+{
+    union {
+        uchar data[1];
+        struct {
+            /* [0:2] */ uchar rm:3;
+            /* [3:5] */ uchar reg:3;
+            /* [6:7] */ uchar mod:2;
+        };
+    };
+};
+
+/*
+ * ModRM values
+ */
+
+enum
+{
+    x86_rm_sp_sib       = 4,
+    x86_rm_bp_disp0     = 5,
+
+    x86_modrm_rm_mask   = 7,
+    x86_modrm_reg_shift = 3,
+    x86_modrm_reg_mask  = 7,
+    x86_modrm_mod_shift = 6,
+    x86_modrm_mod_mask  = 3,
+};
+
+/*
+ * ModRM accessors
+ */
+
+static inline uint x86_modrm_rm(uchar modrm) {
+    return modrm & x86_modrm_rm_mask;
+}
+static inline uint x86_modrm_reg(uchar modrm) {
+    return (modrm >> x86_modrm_reg_shift) & x86_modrm_reg_mask;
+}
+static inline uint x86_modrm_mod(uchar modrm) {
+    return (modrm >> x86_modrm_mod_shift) & x86_modrm_mod_mask;
+}
+
+/*
+ * SIB struct
+ */
+
+struct x86_sib
+{
+    union {
+        uchar data[1];
+        struct {
+            /* [0:2] */ uchar b : 3; /* base[0:2] */
+            /* [3:5] */ uchar x : 3; /* index[0:2] */
+            /* [6:7] */ uchar s : 2; /* scale[0:2] */
+        };
+    };
+};
+
+/*
+ * SIB values
+ */
+
+enum {
+    x86_sib_b_mask = 7,
+    x86_sib_x_shift = 3,
+    x86_sib_x_mask = 7,
+    x86_sib_s_shift = 6,
+    x86_sib_s_mask = 3
+};
+
+/*
+ * SIB accessors
+ */
+
+static inline uint x86_sib_b(uchar sib) {
+    return sib & x86_sib_b_mask;
+}
+static inline uint x86_sib_x(uchar sib) {
+    return (sib >> x86_sib_x_shift) & x86_sib_x_mask;
+}
+static inline uint x86_sib_s(uchar sib) {
+    return (sib >> x86_sib_s_shift) & x86_sib_s_mask;
+}
+
+/*
+ * REX struct
+ */
+
+struct x86_rex
+{
+    union {
+        uchar data[1];
+        struct {
+            /* [0] */ uchar b3 : 1; /* base[3] */
+            /* [1] */ uchar x3 : 1; /* index[3] */
+            /* [2] */ uchar r3 : 1; /* reg[3] */
+            /* [3] */ uchar w  : 1; /* width */
+        };
+    };
+};
+
+
+/*
+ * REX2 struct
+ */
+
+struct x86_rex2
+{
+    union {
+        uchar data[1];
+        struct {
+            /* [0] */ uchar b3 : 1; /* base[3] */
+            /* [1] */ uchar x3 : 1; /* index[3] */
+            /* [2] */ uchar r3 : 1; /* reg[3] */
+            /* [3] */ uchar w  : 1; /* width */
+            /* [4] */ uchar b4 : 1; /* base[4] */
+            /* [5] */ uchar x4 : 1; /* index[4] */
+            /* [6] */ uchar r4 : 1; /* reg[4] */
+            /* [7] */ uchar m0 : 1; /* map prefix 0x0f */
+        };
+    };
+};
+
+
+/*
+ * VEX2 struct
+ */
+
+struct x86_vex2
+{
+    union {
+        uchar data[1];
+        struct {
+            /* [0:1] */ uchar p   : 2; /* see x86_pfx */
+            /*   [2] */ uchar l   : 1; /* len 0=128 1=256 */
+            /* [3:6] */ uchar vn  : 4; /* ~vec[0:3] */
+            /*   [7] */ uchar r3n : 1; /* ~reg[3] */
+        };
+    };
+};
+
+/*
+ * VEX3 struct
+ */
+
+struct x86_vex3
+{
+    union {
+        uchar data[2];
+        struct {
+            /* [0:4] */ uchar m   : 5; /* see x86_map */
+            /*   [5] */ uchar b3n : 1; /* ~base[3] */
+            /*   [6] */ uchar x3n : 1; /* ~index[3] */
+            /*   [7] */ uchar r3n : 1; /* ~reg[3] */
+
+            /* [0:1] */ uchar p   : 2; /* see x86_pfx */
+            /*   [2] */ uchar l   : 1; /* len 0=128 1=256 */
+            /* [3:6] */ uchar vn  : 4; /* ~vec[0:3] */
+            /*   [7] */ uchar w   : 1; /* width */
+        };
+    };
+};
+
+/*
+ * EVEX struct
+ */
+
+struct x86_evex
+{
+    union {
+        uchar data[3];
+        struct {
+            /* [0:2] */ uchar m   : 3; /* see x86_map */
+            /*   [3] */ uchar b4  : 1; /* base[4] */
+            /*   [4] */ uchar r4n : 1; /* ~reg[4] */
+            /*   [5] */ uchar b3n : 1; /* ~base[3] */
+            /*   [6] */ uchar x3n : 1; /* ~index[3] */
+            /*   [7] */ uchar r3n : 1; /* ~reg[3] */
+
+            /* [0:1] */ uchar p   : 2; /* see x86_pfx */
+            /*   [2] */ uchar x4n : 1; /* ~index[4] */
+            /* [3:6] */ uchar vn  : 4; /* ~vec[0:3] */
+            /*   [7] */ uchar w   : 1; /* width */
+
+            /* [0:2] */ uchar a   : 3; /* kmask[0:2] or map4.NF[2] no-flags */
+            /*   [3] */ uchar v4n : 1; /* ~vec[4] */
+            /*   [4] */ uchar br  : 1; /* broadcast or map4.ND[4] new-dest */
+            /* [5:6] */ uchar l   : 2; /* len 00=128 01=256, 10=512 */
+            /*   [7] */ uchar z   : 1; /* merge/zero */
+        };
+    };
+};
+
+/*
+ * opcode encoding
+ */
+
+enum
+{
+    x86_enc_w_shift          = 0,
+    x86_enc_m_shift          = x86_enc_w_shift + 3,
+    x86_enc_p_shift          = x86_enc_m_shift + 3,
+    x86_enc_l_shift          = x86_enc_p_shift + 4,
+    x86_enc_t_shift          = x86_enc_l_shift + 3,
+    x86_enc_o_shift          = x86_enc_t_shift + 2,
+    x86_enc_f_shift          = x86_enc_o_shift + 1,
+    x86_enc_i_shift          = x86_enc_f_shift + 3,
+    x86_enc_j_shift          = x86_enc_i_shift + 3,
+    x86_enc_r_shift          = x86_enc_j_shift + 2,
+    x86_enc_s_shift          = x86_enc_r_shift + 3,
+
+    x86_enc_w_w0             = (1 << x86_enc_w_shift),
+    x86_enc_w_w1             = (2 << x86_enc_w_shift),
+    x86_enc_w_wb             = (3 << x86_enc_w_shift),
+    x86_enc_w_wn             = (4 << x86_enc_w_shift),
+    x86_enc_w_wx             = (5 << x86_enc_w_shift),
+    x86_enc_w_ww             = (6 << x86_enc_w_shift),
+    x86_enc_w_wig            = (7 << x86_enc_w_shift),
+    x86_enc_w_mask           = (7 << x86_enc_w_shift),
+
+    x86_enc_m_none           = (0 << x86_enc_m_shift),
+    x86_enc_m_0f             = (1 << x86_enc_m_shift),
+    x86_enc_m_0f38           = (2 << x86_enc_m_shift),
+    x86_enc_m_0f3a           = (3 << x86_enc_m_shift),
+    x86_enc_m_map4           = (4 << x86_enc_m_shift),
+    x86_enc_m_map5           = (5 << x86_enc_m_shift),
+    x86_enc_m_map6           = (6 << x86_enc_m_shift),
+    x86_enc_m_mask           = (7 << x86_enc_m_shift),
+
+    x86_enc_p_none           = (0 << x86_enc_p_shift),
+    x86_enc_p_66             = (1 << x86_enc_p_shift),
+    x86_enc_p_f3             = (2 << x86_enc_p_shift),
+    x86_enc_p_f2             = (3 << x86_enc_p_shift),
+    x86_enc_p_9b             = (4 << x86_enc_p_shift),
+    x86_enc_p_rexw           = (8 << x86_enc_p_shift),
+    x86_enc_p_mask           = (7 << x86_enc_p_shift),
+    x86_enc_prexw_mask       = (15 << x86_enc_p_shift),
+
+    x86_enc_l_lz             = (1 << x86_enc_l_shift),
+    x86_enc_l_l0             = (2 << x86_enc_l_shift),
+    x86_enc_l_l1             = (3 << x86_enc_l_shift),
+    x86_enc_l_128            = (4 << x86_enc_l_shift),
+    x86_enc_l_256            = (5 << x86_enc_l_shift),
+    x86_enc_l_512            = (6 << x86_enc_l_shift),
+    x86_enc_l_lig            = (7 << x86_enc_l_shift),
+    x86_enc_l_mask           = (7 << x86_enc_l_shift),
+
+    x86_enc_t_none           = (0 << x86_enc_t_shift),
+    x86_enc_t_lex            = (1 << x86_enc_t_shift),
+    x86_enc_t_vex            = (2 << x86_enc_t_shift),
+    x86_enc_t_evex           = (3 << x86_enc_t_shift),
+    x86_enc_t_mask           = (3 << x86_enc_t_shift),
+
+    x86_enc_o_opcode_r       = (1 << x86_enc_o_shift), /* XX+r */
+    x86_enc_o_mask           = (1 << x86_enc_o_shift),
+
+    x86_enc_f_modrm_r        = (1 << x86_enc_f_shift), /* /r */
+    x86_enc_f_modrm_n        = (2 << x86_enc_f_shift), /* /N */
+    x86_enc_f_opcode         = (3 << x86_enc_f_shift), /* XX */
+    x86_enc_f_opcode_r       = (4 << x86_enc_f_shift), /* XX+r */
+    x86_enc_f_mask           = (7 << x86_enc_f_shift),
+
+    x86_enc_i_ib             = (1 << x86_enc_i_shift),
+    x86_enc_i_iw             = (2 << x86_enc_i_shift),
+    x86_enc_i_iwd            = (3 << x86_enc_i_shift),
+    x86_enc_i_i16            = (4 << x86_enc_i_shift),
+    x86_enc_i_i32            = (5 << x86_enc_i_shift),
+    x86_enc_i_i64            = (6 << x86_enc_i_shift),
+    x86_enc_i_mask           = (7 << x86_enc_i_shift),
+
+    x86_enc_j_ib             = (1 << x86_enc_j_shift),
+    x86_enc_j_i16            = (2 << x86_enc_j_shift),
+    x86_enc_j_mask           = (3 << x86_enc_j_shift),
+
+    x86_enc_r_rep            = (1 << x86_enc_r_shift),
+    x86_enc_r_lock           = (2 << x86_enc_r_shift),
+    x86_enc_r_norexb         = (4 << x86_enc_r_shift),
+    x86_enc_r_mask           = (7 << x86_enc_r_shift),
+
+    x86_enc_s_o16            = (1 << x86_enc_s_shift),
+    x86_enc_s_o32            = (2 << x86_enc_s_shift),
+    x86_enc_s_o64            = (3 << x86_enc_s_shift),
+    x86_enc_s_a16            = (4 << x86_enc_s_shift),
+    x86_enc_s_a32            = (5 << x86_enc_s_shift),
+    x86_enc_s_a64            = (6 << x86_enc_s_shift),
+    x86_enc_s_mask           = (7 << x86_enc_s_shift),
+
+    x86_enc_immediate_mask   = x86_enc_i_mask | x86_enc_j_mask,
+    x86_enc_suffix_mask      = x86_enc_r_mask | x86_enc_s_mask,
+    x86_enc_param_mask       = x86_enc_immediate_mask | x86_enc_suffix_mask
+};
+
+/*
+ * opcode encoding accessors
+ */
+
+static inline uint x86_enc_width(uint enc) {
+    return enc & x86_enc_w_mask;
+}
+static inline uint x86_enc_prefix(uint enc) {
+    return enc & x86_enc_prexw_mask;
+}
+static inline uint x86_enc_length(uint enc) {
+    return enc & x86_enc_l_mask;
+}
+static inline uint x86_enc_opcode(uint enc) {
+    return enc & x86_enc_o_mask;
+}
+static inline uint x86_enc_func(uint enc) {
+    return enc & x86_enc_f_mask;
+}
+static inline uint x86_enc_map(uint enc) {
+    return enc & x86_enc_m_mask;
+}
+static inline uint x86_enc_imm(uint enc) {
+    return enc & x86_enc_i_mask;
+}
+static inline uint x86_enc_imm2(uint enc) {
+    return enc & x86_enc_j_mask;
+}
+static inline uint x86_enc_type(uint enc) {
+    return enc & x86_enc_t_mask;
+}
+static inline uint x86_enc_suffix(uint enc) {
+    return enc & x86_enc_suffix_mask;
+}
+static inline uint x86_enc_leading(uint enc) {
+    return enc & ~x86_enc_param_mask;
+}
+static inline uint x86_enc_has_rep(uint enc) {
+    return enc & x86_enc_r_rep;
+}
+static inline uint x86_enc_has_lock(uint enc) {
+    return enc & x86_enc_r_lock;
+}
+static inline uint x86_enc_has_norexb(uint enc) {
+    return enc & x86_enc_r_norexb;
+}
+static inline uint x86_enc_has_o16(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_o16;
+}
+static inline uint x86_enc_has_o32(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_o32;
+}
+static inline uint x86_enc_has_o64(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_o64;
+}
+static inline uint x86_enc_has_a16(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_a16;
+}
+static inline uint x86_enc_has_a32(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_a32;
+}
+static inline uint x86_enc_has_a64(uint enc) {
+    return (enc & x86_enc_s_mask) == x86_enc_s_a64;
+}
+
+/*
+ * operand encoding
+ */
+
+enum
+{
+    x86_opr_none,
+
+    /* operand field shifts */
+    x86_opr_ot               = 8,
+    x86_opr_os               = x86_opr_ot + 4,
+    x86_opr_ms               = x86_opr_os + 4,
+    x86_opr_av               = x86_opr_ms + 4,
+    x86_opr_et               = x86_opr_av + 4,
+    x86_opr_ew               = x86_opr_et + 2,
+    x86_opr_ec               = x86_opr_ew + 3,
+
+    x86_opr_ot_mask          = (15 << x86_opr_ot), /* operand type */
+    x86_opr_os_mask          = (15 << x86_opr_os), /* operand size */
+    x86_opr_ms_mask          = (15 << x86_opr_ms), /* memory size */
+    x86_opr_av_mask          = (15 << x86_opr_av), /* auxiliary value */
+    x86_opr_et_mask          = (3 << x86_opr_et), /* element type */
+    x86_opr_ew_mask          = (7 << x86_opr_ew), /* element width */
+    x86_opr_ec_mask          = (7 << x86_opr_ec), /* element count */
+
+    /* operand flags */
+    x86_opr_mem              = (1 << 0),
+    x86_opr_ind              = (1 << 1),
+    x86_opr_bcst             = (1 << 2),
+    x86_opr_flag_er          = (1 << 3),
+    x86_opr_flag_k           = (1 << 4),
+    x86_opr_flag_sae         = (1 << 5),
+    x86_opr_flag_z           = (1 << 6),
+    x86_opr_flag_rs          = (1 << 7),
+
+    /* operand type */
+    x86_opr_const            = (1 << x86_opr_ot), /* constant */
+    x86_opr_imm              = (2 << x86_opr_ot), /* immediate */
+    x86_opr_reg              = (3 << x86_opr_ot), /* register */
+    x86_opr_vec              = (4 << x86_opr_ot), /* vector */
+    x86_opr_mmx              = (5 << x86_opr_ot), /* mmx */
+    x86_opr_st               = (6 << x86_opr_ot), /* x87 */
+    x86_opr_k                = (7 << x86_opr_ot), /* mask */
+    x86_opr_seg              = (8 << x86_opr_ot), /* segment */
+    x86_opr_creg             = (9 << x86_opr_ot), /* control */
+    x86_opr_dreg             = (10 << x86_opr_ot), /* debug */
+    x86_opr_bnd              = (11 << x86_opr_ot), /* bound */
+    x86_opr_type_mask        = x86_opr_ot_mask,
+
+    /* operand sizes */
+    x86_opr_size_8           = (1 << x86_opr_os),
+    x86_opr_size_16          = (2 << x86_opr_os),
+    x86_opr_size_32          = (3 << x86_opr_os),
+    x86_opr_size_64          = (4 << x86_opr_os),
+    x86_opr_size_128         = (5 << x86_opr_os),
+    x86_opr_size_256         = (6 << x86_opr_os),
+    x86_opr_size_512         = (7 << x86_opr_os),
+    x86_opr_size_1024        = (8 << x86_opr_os),
+    x86_opr_size_80          = (9 << x86_opr_os),
+    x86_opr_size_wd          = (13 << x86_opr_os),
+    x86_opr_size_w           = (14 << x86_opr_os),
+    x86_opr_size_a           = (15 << x86_opr_os),
+    x86_opr_size_mask        = x86_opr_os_mask,
+
+    /* sized register */
+    x86_opr_r8               = x86_opr_reg | x86_opr_size_8,
+    x86_opr_r16              = x86_opr_reg | x86_opr_size_16,
+    x86_opr_r32              = x86_opr_reg | x86_opr_size_32,
+    x86_opr_r64              = x86_opr_reg | x86_opr_size_64,
+    x86_opr_rw               = x86_opr_reg | x86_opr_size_w,
+    x86_opr_ra               = x86_opr_reg | x86_opr_size_a,
+    x86_opr_mm               = x86_opr_vec | x86_opr_size_64,
+    x86_opr_xmm              = x86_opr_vec | x86_opr_size_128,
+    x86_opr_ymm              = x86_opr_vec | x86_opr_size_256,
+    x86_opr_zmm              = x86_opr_vec | x86_opr_size_512,
+
+    /* sized memory */
+    x86_opr_m8               = x86_opr_mem | (1 << x86_opr_ms), /* byte */
+    x86_opr_m16              = x86_opr_mem | (2 << x86_opr_ms), /* word */
+    x86_opr_m32              = x86_opr_mem | (3 << x86_opr_ms), /* dword */
+    x86_opr_m64              = x86_opr_mem | (4 << x86_opr_ms), /* qword */
+    x86_opr_m128             = x86_opr_mem | (5 << x86_opr_ms), /* 
oword/xmmword */
+    x86_opr_m256             = x86_opr_mem | (6 << x86_opr_ms), /* ymmword */
+    x86_opr_m512             = x86_opr_mem | (7 << x86_opr_ms), /* zmmword */
+    x86_opr_m1024            = x86_opr_mem | (8 << x86_opr_ms),
+    x86_opr_m80              = x86_opr_mem | (9 << x86_opr_ms), /* tword/tbyte 
*/
+    x86_opr_m384             = x86_opr_mem | (10 << x86_opr_ms),
+    x86_opr_mib              = x86_opr_mem | (11 << x86_opr_ms), /* bound */
+    x86_opr_vm32             = x86_opr_mem | (12 << x86_opr_ms), /* vecmem32 */
+    x86_opr_vm64             = x86_opr_mem | (13 << x86_opr_ms), /* vecmem64 */
+    x86_opr_mw               = x86_opr_mem | (14 << x86_opr_ms), /* 16/32/64 */
+    x86_opr_mp               = x86_opr_mem | (15 << x86_opr_ms), /* pointer */
+    x86_opr_mem_mask         = x86_opr_mem | (15 << x86_opr_ms),
+
+    /* unsized register / sized memory */
+    x86_opr_r_m8             = x86_opr_reg | x86_opr_m8,
+    x86_opr_r_m16            = x86_opr_reg | x86_opr_m16,
+    x86_opr_r_m32            = x86_opr_reg | x86_opr_m32,
+    x86_opr_r_m64            = x86_opr_reg | x86_opr_m64,
+
+    /* sized register / sized memory */
+    x86_opr_r8_m8            = x86_opr_r8  | x86_opr_m8,
+    x86_opr_r16_m8           = x86_opr_r16 | x86_opr_m8,
+    x86_opr_r16_m16          = x86_opr_r16 | x86_opr_m16,
+    x86_opr_r32_m8           = x86_opr_r32 | x86_opr_m8,
+    x86_opr_r32_m16          = x86_opr_r32 | x86_opr_m16,
+    x86_opr_r32_m32          = x86_opr_r32 | x86_opr_m32,
+    x86_opr_r64_m8           = x86_opr_r64 | x86_opr_m8,
+    x86_opr_r64_m16          = x86_opr_r64 | x86_opr_m16,
+    x86_opr_r64_m32          = x86_opr_r64 | x86_opr_m32,
+    x86_opr_r64_m64          = x86_opr_r64 | x86_opr_m64,
+    x86_opr_rw_mw            = x86_opr_rw  | x86_opr_mw,
+
+    /* sized vector / sized memory */
+    x86_opr_mm_m32           = x86_opr_mm  | x86_opr_m32,
+    x86_opr_mm_m64           = x86_opr_mm  | x86_opr_m64,
+    x86_opr_xmm_m8           = x86_opr_xmm | x86_opr_m8,
+    x86_opr_xmm_m16          = x86_opr_xmm | x86_opr_m16,
+    x86_opr_xmm_m32          = x86_opr_xmm | x86_opr_m32,
+    x86_opr_xmm_m64          = x86_opr_xmm | x86_opr_m64,
+    x86_opr_xmm_m128         = x86_opr_xmm | x86_opr_m128,
+    x86_opr_ymm_m256         = x86_opr_ymm | x86_opr_m256,
+    x86_opr_zmm_m512         = x86_opr_zmm | x86_opr_m512,
+
+    /* sized vector memory */
+    x86_opr_vm32x            = x86_opr_xmm | x86_opr_vm32,
+    x86_opr_vm32y            = x86_opr_ymm | x86_opr_vm32,
+    x86_opr_vm32z            = x86_opr_zmm | x86_opr_vm32,
+    x86_opr_vm64x            = x86_opr_xmm | x86_opr_vm64,
+    x86_opr_vm64y            = x86_opr_ymm | x86_opr_vm64,
+    x86_opr_vm64z            = x86_opr_zmm | x86_opr_vm64,
+
+    /* mask / sized memory */
+    x86_opr_k_m8             = x86_opr_k   | x86_opr_m8,
+    x86_opr_k_m16            = x86_opr_k   | x86_opr_m16,
+    x86_opr_k_m32            = x86_opr_k   | x86_opr_m32,
+    x86_opr_k_m64            = x86_opr_k   | x86_opr_m64,
+
+    /* bound / memory */
+    x86_opr_bnd_mem          = x86_opr_bnd | x86_opr_mem,
+
+    /* memory operand alias */
+    x86_opr_m16int           = x86_opr_m16,
+    x86_opr_m32fp            = x86_opr_m32,
+    x86_opr_m32int           = x86_opr_m32,
+    x86_opr_m64fp            = x86_opr_m64,
+    x86_opr_m64int           = x86_opr_m64,
+    x86_opr_m80bcd           = x86_opr_m80,
+    x86_opr_m80dec           = x86_opr_m80,
+    x86_opr_m80fp            = x86_opr_m80,
+
+    /* sized immediate */
+    x86_opr_ib               = x86_opr_imm | x86_opr_size_8,
+    x86_opr_i16              = x86_opr_imm | x86_opr_size_16,
+    x86_opr_i32              = x86_opr_imm | x86_opr_size_32,
+    x86_opr_i64              = x86_opr_imm | x86_opr_size_64,
+    x86_opr_iwd              = x86_opr_imm | x86_opr_size_wd,
+    x86_opr_iw               = x86_opr_imm | x86_opr_size_w,
+
+    /* memory offset */
+    x86_opr_moffs            = x86_opr_imm | x86_opr_mem | x86_opr_size_w,
+
+    /* constant */
+    x86_opr_1                = x86_opr_const | (1 << x86_opr_av),
+
+    /* registers sequential */
+    x86_opr_flag_rs2         = x86_opr_flag_rs | (1 << x86_opr_av),
+    x86_opr_flag_rs4         = x86_opr_flag_rs | (2 << x86_opr_av),
+
+    /* sized broadcast */
+    x86_opr_m16bcst          = x86_opr_bcst | (1 << x86_opr_av),
+    x86_opr_m32bcst          = x86_opr_bcst | (2 << x86_opr_av),
+    x86_opr_m64bcst          = x86_opr_bcst | (3 << x86_opr_av),
+    x86_opr_bcst_mask        = x86_opr_bcst | x86_opr_av_mask,
+
+    /* sized vector / sized memory / broadcast */
+    x86_opr_xmm_m32_m16bcst  = x86_opr_mm_m32   | x86_opr_m16bcst,
+    x86_opr_xmm_m64_m16bcst  = x86_opr_xmm_m64  | x86_opr_m16bcst,
+    x86_opr_xmm_m64_m32bcst  = x86_opr_xmm_m64  | x86_opr_m32bcst,
+    x86_opr_xmm_m128_m16bcst = x86_opr_xmm_m128 | x86_opr_m16bcst,
+    x86_opr_xmm_m128_m32bcst = x86_opr_xmm_m128 | x86_opr_m32bcst,
+    x86_opr_xmm_m128_m64bcst = x86_opr_xmm_m128 | x86_opr_m64bcst,
+    x86_opr_ymm_m256_m16bcst = x86_opr_ymm_m256 | x86_opr_m16bcst,
+    x86_opr_ymm_m256_m32bcst = x86_opr_ymm_m256 | x86_opr_m32bcst,
+    x86_opr_ymm_m256_m64bcst = x86_opr_ymm_m256 | x86_opr_m64bcst,
+    x86_opr_zmm_m512_m16bcst = x86_opr_zmm_m512 | x86_opr_m16bcst,
+    x86_opr_zmm_m512_m32bcst = x86_opr_zmm_m512 | x86_opr_m32bcst,
+    x86_opr_zmm_m512_m64bcst = x86_opr_zmm_m512 | x86_opr_m64bcst,
+
+    /* relative displacement */
+    x86_opr_rel8             = x86_opr_imm | x86_opr_ind | x86_opr_size_8,
+    x86_opr_relw             = x86_opr_imm | x86_opr_ind | x86_opr_size_w,
+
+    /* enter / far displacement */
+    x86_opr_far16_16         = x86_opr_imm | x86_opr_ind | (1 << x86_opr_av),
+    x86_opr_far16_32         = x86_opr_imm | x86_opr_ind | (2 << x86_opr_av),
+
+    /* far memory indirect */
+    x86_opr_memfar16_16      = x86_opr_mem | x86_opr_ind | (3 << x86_opr_av),
+    x86_opr_memfar16_32      = x86_opr_mem | x86_opr_ind | (4 << x86_opr_av),
+    x86_opr_memfar16_64      = x86_opr_mem | x86_opr_ind | (5 << x86_opr_av),
+
+    /* implicit register */
+    x86_opr_reg_a            = x86_opr_reg | x86_opr_ind | (1 << x86_opr_av),
+    x86_opr_reg_c            = x86_opr_reg | x86_opr_ind | (2 << x86_opr_av),
+    x86_opr_reg_d            = x86_opr_reg | x86_opr_ind | (3 << x86_opr_av),
+    x86_opr_reg_b            = x86_opr_reg | x86_opr_ind | (4 << x86_opr_av),
+    x86_opr_reg_si           = x86_opr_reg | x86_opr_ind | (5 << x86_opr_av),
+    x86_opr_reg_di           = x86_opr_reg | x86_opr_ind | (6 << x86_opr_av),
+    x86_opr_reg_ah           = x86_opr_reg | x86_opr_ind | (7 << x86_opr_av),
+    x86_opr_reg_v0           = x86_opr_vec | x86_opr_ind | (1 << x86_opr_av),
+    x86_opr_reg_st0          = x86_opr_st  | x86_opr_ind | (1 << x86_opr_av),
+    x86_opr_seg_es           = x86_opr_seg | x86_opr_ind | (1 << x86_opr_av),
+    x86_opr_seg_cs           = x86_opr_seg | x86_opr_ind | (2 << x86_opr_av),
+    x86_opr_seg_ss           = x86_opr_seg | x86_opr_ind | (3 << x86_opr_av),
+    x86_opr_seg_ds           = x86_opr_seg | x86_opr_ind | (4 << x86_opr_av),
+    x86_opr_seg_fs           = x86_opr_seg | x86_opr_ind | (5 << x86_opr_av),
+    x86_opr_seg_gs           = x86_opr_seg | x86_opr_ind | (6 << x86_opr_av),
+
+    /* sized implicit register */
+    x86_opr_reg_al           = x86_opr_reg_a | x86_opr_size_8,
+    x86_opr_reg_cl           = x86_opr_reg_c | x86_opr_size_8,
+    x86_opr_reg_ax           = x86_opr_reg_a | x86_opr_size_16,
+    x86_opr_reg_cx           = x86_opr_reg_c | x86_opr_size_16,
+    x86_opr_reg_dx           = x86_opr_reg_d | x86_opr_size_16,
+    x86_opr_reg_bx           = x86_opr_reg_b | x86_opr_size_16,
+    x86_opr_reg_eax          = x86_opr_reg_a | x86_opr_size_32,
+    x86_opr_reg_ecx          = x86_opr_reg_c | x86_opr_size_32,
+    x86_opr_reg_edx          = x86_opr_reg_d | x86_opr_size_32,
+    x86_opr_reg_ebx          = x86_opr_reg_b | x86_opr_size_32,
+    x86_opr_reg_rax          = x86_opr_reg_a | x86_opr_size_64,
+    x86_opr_reg_rcx          = x86_opr_reg_c | x86_opr_size_64,
+    x86_opr_reg_rdx          = x86_opr_reg_d | x86_opr_size_64,
+    x86_opr_reg_rbx          = x86_opr_reg_b | x86_opr_size_64,
+    x86_opr_reg_aw           = x86_opr_reg_a | x86_opr_size_w,
+    x86_opr_reg_cw           = x86_opr_reg_c | x86_opr_size_w,
+    x86_opr_reg_dw           = x86_opr_reg_d | x86_opr_size_w,
+    x86_opr_reg_bw           = x86_opr_reg_b | x86_opr_size_w,
+    x86_opr_reg_pa           = x86_opr_reg_a | x86_opr_size_a,
+    x86_opr_reg_pc           = x86_opr_reg_c | x86_opr_size_a,
+    x86_opr_reg_pd           = x86_opr_reg_d | x86_opr_size_a,
+    x86_opr_reg_pb           = x86_opr_reg_b | x86_opr_size_a,
+    x86_opr_reg_psi          = x86_opr_reg_si | x86_opr_size_a,
+    x86_opr_reg_pdi          = x86_opr_reg_di | x86_opr_size_a,
+    x86_opr_reg_xmm0         = x86_opr_reg_v0 | x86_opr_size_128,
+    x86_opr_reg_xmm0_7       = x86_opr_reg_v0 | x86_opr_size_1024,
+
+    /* element type */
+    x86_opr_et_none          = (0 << x86_opr_et),
+    x86_opr_et_i             = (1 << x86_opr_et),
+    x86_opr_et_f             = (2 << x86_opr_et),
+    x86_opr_et_bf            = (3 << x86_opr_et),
+
+    /* element width */
+    x86_opr_ew_8             = (1 << x86_opr_ew),
+    x86_opr_ew_16            = (2 << x86_opr_ew),
+    x86_opr_ew_32            = (3 << x86_opr_ew),
+    x86_opr_ew_64            = (4 << x86_opr_ew),
+    x86_opr_ew_128           = (5 << x86_opr_ew),
+    x86_opr_ew_256           = (6 << x86_opr_ew),
+    x86_opr_ew_512           = (7 << x86_opr_ew),
+
+    /* element count */
+    x86_opr_ec_x1            = (1 << x86_opr_ec),
+    x86_opr_ec_x2            = (2 << x86_opr_ec),
+    x86_opr_ec_x4            = (3 << x86_opr_ec),
+    x86_opr_ec_x8            = (4 << x86_opr_ec),
+    x86_opr_ec_x16           = (5 << x86_opr_ec),
+    x86_opr_ec_x32           = (6 << x86_opr_ec),
+    x86_opr_ec_x64           = (7 << x86_opr_ec),
+
+    /* tuple type */
+    x86_opr_i8x1             = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x1,
+    x86_opr_i8x2             = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x2,
+    x86_opr_i8x4             = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x4,
+    x86_opr_i8x8             = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x8,
+    x86_opr_i8x16            = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x16,
+    x86_opr_i8x32            = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x32,
+    x86_opr_i8x64            = x86_opr_et_i | x86_opr_ew_8 | x86_opr_ec_x64,
+    x86_opr_i16x1            = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x1,
+    x86_opr_i16x2            = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x2,
+    x86_opr_i16x4            = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x4,
+    x86_opr_i16x8            = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x8,
+    x86_opr_i16x16           = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x16,
+    x86_opr_i16x32           = x86_opr_et_i | x86_opr_ew_16 | x86_opr_ec_x32,
+    x86_opr_i32x1            = x86_opr_et_i | x86_opr_ew_32 | x86_opr_ec_x1,
+    x86_opr_i32x2            = x86_opr_et_i | x86_opr_ew_32 | x86_opr_ec_x2,
+    x86_opr_i32x4            = x86_opr_et_i | x86_opr_ew_32 | x86_opr_ec_x4,
+    x86_opr_i32x8            = x86_opr_et_i | x86_opr_ew_32 | x86_opr_ec_x8,
+    x86_opr_i32x16           = x86_opr_et_i | x86_opr_ew_32 | x86_opr_ec_x16,
+    x86_opr_i64x1            = x86_opr_et_i | x86_opr_ew_64 | x86_opr_ec_x1,
+    x86_opr_i64x2            = x86_opr_et_i | x86_opr_ew_64 | x86_opr_ec_x2,
+    x86_opr_i64x4            = x86_opr_et_i | x86_opr_ew_64 | x86_opr_ec_x4,
+    x86_opr_i64x8            = x86_opr_et_i | x86_opr_ew_64 | x86_opr_ec_x8,
+    x86_opr_i128x1           = x86_opr_et_i | x86_opr_ew_128 | x86_opr_ec_x1,
+    x86_opr_i128x2           = x86_opr_et_i | x86_opr_ew_128 | x86_opr_ec_x2,
+    x86_opr_i128x4           = x86_opr_et_i | x86_opr_ew_128 | x86_opr_ec_x4,
+    x86_opr_i256x1           = x86_opr_et_i | x86_opr_ew_256 | x86_opr_ec_x1,
+    x86_opr_i256x2           = x86_opr_et_i | x86_opr_ew_256 | x86_opr_ec_x2,
+    x86_opr_i512x1           = x86_opr_et_i | x86_opr_ew_512 | x86_opr_ec_x1,
+    x86_opr_f8x1             = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x1,
+    x86_opr_f8x2             = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x2,
+    x86_opr_f8x4             = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x4,
+    x86_opr_f8x8             = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x8,
+    x86_opr_f8x16            = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x16,
+    x86_opr_f8x32            = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x32,
+    x86_opr_f8x64            = x86_opr_et_f | x86_opr_ew_8 | x86_opr_ec_x64,
+    x86_opr_f16x1            = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x1,
+    x86_opr_f16x2            = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x2,
+    x86_opr_f16x4            = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x4,
+    x86_opr_f16x8            = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x8,
+    x86_opr_f16x16           = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x16,
+    x86_opr_f16x32           = x86_opr_et_f | x86_opr_ew_16 | x86_opr_ec_x32,
+    x86_opr_f32x1            = x86_opr_et_f | x86_opr_ew_32 | x86_opr_ec_x1,
+    x86_opr_f32x2            = x86_opr_et_f | x86_opr_ew_32 | x86_opr_ec_x2,
+    x86_opr_f32x4            = x86_opr_et_f | x86_opr_ew_32 | x86_opr_ec_x4,
+    x86_opr_f32x8            = x86_opr_et_f | x86_opr_ew_32 | x86_opr_ec_x8,
+    x86_opr_f32x16           = x86_opr_et_f | x86_opr_ew_32 | x86_opr_ec_x16,
+    x86_opr_f64x1            = x86_opr_et_f | x86_opr_ew_64 | x86_opr_ec_x1,
+    x86_opr_f64x2            = x86_opr_et_f | x86_opr_ew_64 | x86_opr_ec_x2,
+    x86_opr_f64x4            = x86_opr_et_f | x86_opr_ew_64 | x86_opr_ec_x4,
+    x86_opr_f64x8            = x86_opr_et_f | x86_opr_ew_64 | x86_opr_ec_x8,
+};
+
+/*
+ * operand encoding accessors
+ */
+
+static inline uint x86_opr_has_mem(uint opr) {
+    return (opr & x86_opr_mem) != 0;
+}
+static inline uint x86_opr_type_val(uint opr) {
+    return opr & x86_opr_type_mask;
+}
+static inline uint x86_opr_size_val(uint opr) {
+    return opr & x86_opr_size_mask;
+}
+static inline uint x86_opr_mem_val(uint opr) {
+    return opr & x86_opr_mem_mask;
+}
+static inline uint x86_opr_bcst_val(uint opr) {
+    return opr & x86_opr_bcst_mask;
+}
+static inline uint x86_opr_et_val(uint opr) {
+    return opr & x86_opr_et_mask;
+}
+static inline uint x86_opr_ec_val(uint opr) {
+    return opr & x86_opr_ec_mask;
+}
+static inline uint x86_opr_ew_val(uint opr) {
+    return opr & x86_opr_ew_mask;
+}
+
+static inline uint x86_opr_mem_size(uint opr)
+{
+    switch (x86_opr_mem_val(opr)) {
+    case x86_opr_m8: return x86_opr_size_8;
+    case x86_opr_m16: return x86_opr_size_16;
+    case x86_opr_m32: return x86_opr_size_32;
+    case x86_opr_m64: return x86_opr_size_64;
+    case x86_opr_m80: return x86_opr_size_80;
+    case x86_opr_m128: return x86_opr_size_128;
+    case x86_opr_m256: return x86_opr_size_256;
+    case x86_opr_m512: return x86_opr_size_512;
+    case x86_opr_mw:
+    default: return x86_opr_size_w;
+    }
+}
+
+static inline uint x86_opr_ew_bytes(uint opr)
+{
+    switch (x86_opr_ew_val(opr)) {
+    case x86_opr_ew_8: return 1;
+    case x86_opr_ew_16: return 2;
+    case x86_opr_ew_32: return 4;
+    case x86_opr_ew_64: return 8;
+    case x86_opr_ew_128: return 16;
+    case x86_opr_ew_256: return 32;
+    case x86_opr_ew_512: return 64;
+    }
+    return 0;
+}
+
+static inline uint x86_opr_ew_size(uint opr)
+{
+    switch (x86_opr_ew_val(opr)) {
+    case x86_opr_ew_8: return x86_opr_size_8;
+    case x86_opr_ew_16: return x86_opr_size_16;
+    case x86_opr_ew_32: return x86_opr_size_32;
+    case x86_opr_ew_64: return x86_opr_size_64;
+    case x86_opr_ew_128: return x86_opr_size_128;
+    case x86_opr_ew_256: return x86_opr_size_256;
+    case x86_opr_ew_512: return x86_opr_size_512;
+    }
+    return 0;
+}
+
+static inline uint x86_opr_ec_mult(uint opr)
+{
+    switch (x86_opr_ec_val(opr)) {
+    case x86_opr_ec_x1: return 1;
+    case x86_opr_ec_x2: return 2;
+    case x86_opr_ec_x4: return 4;
+    case x86_opr_ec_x8: return 8;
+    case x86_opr_ec_x16: return 16;
+    case x86_opr_ec_x32: return 32;
+    case x86_opr_ec_x64: return 64;
+    }
+    return 0;
+}
+
+/*
+ * order encoding
+ */
+
+enum
+{
+    x86_ord_none,
+
+    x86_ord_s1               = 0,
+    x86_ord_s2               = x86_ord_s1 + 3,
+    x86_ord_s3               = x86_ord_s2 + 3,
+
+    x86_ord_const            = (1 << x86_ord_s1), /* - */
+    x86_ord_imm              = (2 << x86_ord_s1), /* I */
+    x86_ord_reg              = (3 << x86_ord_s1), /* R */
+    x86_ord_mrm              = (4 << x86_ord_s1), /* M */
+    x86_ord_vec              = (5 << x86_ord_s1), /* V */
+    x86_ord_opr              = (6 << x86_ord_s1), /* O */
+    x86_ord_type_mask        = (7 << x86_ord_s1),
+
+    x86_ord_r                = (1 << x86_ord_s2),
+    x86_ord_w                = (2 << x86_ord_s2),
+    x86_ord_rw               = (3 << x86_ord_s2),
+    x86_ord_i                = (4 << x86_ord_s2),
+    x86_ord_ri               = (5 << x86_ord_s2),
+    x86_ord_wi               = (6 << x86_ord_s2),
+    x86_ord_rwi              = (7 << x86_ord_s2),
+    x86_ord_flag_mask        = (7 << x86_ord_s2),
+
+    x86_ord_one              = x86_ord_const | (1 << x86_ord_s3),
+    x86_ord_rax              = x86_ord_const | (2 << x86_ord_s3),
+    x86_ord_rcx              = x86_ord_const | (3 << x86_ord_s3),
+    x86_ord_rdx              = x86_ord_const | (4 << x86_ord_s3),
+    x86_ord_rbx              = x86_ord_const | (5 << x86_ord_s3),
+    x86_ord_rsp              = x86_ord_const | (6 << x86_ord_s3),
+    x86_ord_rbp              = x86_ord_const | (7 << x86_ord_s3),
+    x86_ord_rsi              = x86_ord_const | (8 << x86_ord_s3),
+    x86_ord_rdi              = x86_ord_const | (9 << x86_ord_s3),
+    x86_ord_st0              = x86_ord_const | (10 << x86_ord_s3),
+    x86_ord_stx              = x86_ord_const | (11 << x86_ord_s3),
+    x86_ord_seg              = x86_ord_const | (12 << x86_ord_s3),
+    x86_ord_xmm0             = x86_ord_const | (13 << x86_ord_s3),
+    x86_ord_xmm0_7           = x86_ord_const | (14 << x86_ord_s3),
+    x86_ord_mxcsr            = x86_ord_const | (15 << x86_ord_s3),
+    x86_ord_rflags           = x86_ord_const | (16 << x86_ord_s3),
+
+    x86_ord_sib              = x86_ord_mrm | (1 << x86_ord_s3),
+
+    x86_ord_is4              = x86_ord_imm | (1 << x86_ord_s3),
+    x86_ord_ime              = x86_ord_imm | (2 << x86_ord_s3),
+};
+
+/*
+ * order encoding accessors
+ */
+
+static inline uint x86_ord_type_val(uint ord) {
+    return ord & x86_ord_type_mask;
+}
+
+/*
+ * codec flags
+ */
+
+enum
+{
+    x86_ce_shift   = 0,
+    x86_cm_shift   = 3,
+    x86_ci_shift   = 5,
+    x86_cj_shift   = 8,
+
+    /* [0:2] encoding */
+    x86_ce_none    = (0 << x86_ce_shift),
+    x86_ce_rex     = (1 << x86_ce_shift),
+    x86_ce_rex2    = (2 << x86_ce_shift),
+    x86_ce_vex2    = (3 << x86_ce_shift),
+    x86_ce_vex3    = (4 << x86_ce_shift),
+    x86_ce_evex    = (5 << x86_ce_shift),
+    x86_ce_mask    = (7 << x86_ce_shift),
+
+    /* [3:4] map */
+    x86_cm_none    = (0 << x86_cm_shift),
+    x86_cm_0f      = (1 << x86_cm_shift),
+    x86_cm_0f38    = (2 << x86_cm_shift),
+    x86_cm_0f3a    = (3 << x86_cm_shift),
+    x86_cm_mask    = (3 << x86_cm_shift),
+
+    /* [5:7] imm */
+    x86_ci_none    = (0 << x86_ci_shift),
+    x86_ci_ib      = (1 << x86_ci_shift),
+    x86_ci_iw      = (2 << x86_ci_shift),
+    x86_ci_iwd     = (3 << x86_ci_shift),
+    x86_ci_i16     = (4 << x86_ci_shift),
+    x86_ci_i32     = (5 << x86_ci_shift),
+    x86_ci_i64     = (6 << x86_ci_shift),
+    x86_ci_mask    = (7 << x86_ci_shift),
+
+    /* [8:9] imm2 */
+    x86_cj_ib      = (1 << x86_cj_shift),
+    x86_cj_i16     = (2 << x86_cj_shift),
+    x86_cj_mask    = (3 << x86_cj_shift),
+
+    /* [10:15] prefixes */
+    x86_cp_osize   = (1 << 10), /* 0x66 */
+    x86_cp_asize   = (1 << 11), /* 0x67 */
+    x86_cp_wait    = (1 << 12), /* 0x9B */
+    x86_cp_lock    = (1 << 13), /* 0xF0 */
+    x86_cp_repne   = (1 << 14), /* 0xF2 */
+    x86_cp_rep     = (1 << 15), /* 0xF3 */
+
+    /* [16:18] flags */
+    x86_cf_modrm   = (1 << 16),
+    x86_cf_ia32    = (1 << 17),
+    x86_cf_amd64   = (1 << 18),
+};
+
+/*
+ * codec struct
+ */
+
+struct x86_codec
+{
+    union {
+        x86_rex rex;
+        x86_rex2 rex2;
+        x86_vex2 vex2;
+        x86_vex3 vex3;
+        x86_evex evex;
+    };
+
+    union {
+        uchar opc[2];
+        struct {
+            uchar opc0;
+            x86_modrm modrm;
+        };
+    };
+
+    uchar opclen : 2;
+    uchar seg : 3;
+    uchar spare;
+    x86_sib sib;
+
+    uint flags;
+    ushort rec;
+    short imm2;
+
+    union {
+        struct {
+            int disp32;
+            int imm32;
+        };
+        struct {
+            ullong imm64;
+        };
+    };
+};
+
+/*
+ * codec accessors
+ */
+
+static inline int x86_codec_field_ce(x86_codec *c) {
+    return c->flags & x86_ce_mask;
+}
+static inline int x86_codec_field_cm(x86_codec *c) {
+    return c->flags & x86_cm_mask;
+}
+static inline int x86_codec_field_ci(x86_codec *c) {
+    return c->flags & x86_ci_mask;
+}
+static inline int x86_codec_field_cj(x86_codec *c) {
+    return c->flags & x86_cj_mask;
+}
+static inline int x86_codec_has_wait(x86_codec *c) {
+    return (c->flags & x86_cp_wait) != 0;
+}
+static inline int x86_codec_has_lock(x86_codec *c) {
+    return (c->flags & x86_cp_lock) != 0;
+}
+static inline int x86_codec_has_rep(x86_codec *c) {
+    return (c->flags & x86_cp_rep) != 0;
+}
+static inline int x86_codec_has_repne(x86_codec *c) {
+    return (c->flags & x86_cp_repne) != 0;
+}
+static inline int x86_codec_has_osize(x86_codec *c) {
+    return (c->flags & x86_cp_osize) != 0;
+}
+static inline int x86_codec_has_asize(x86_codec *c) {
+    return (c->flags & x86_cp_asize) != 0;
+}
+static inline int x86_codec_has_modrm(x86_codec *c) {
+    return (c->flags & x86_cf_modrm) != 0;
+}
+static inline int x86_codec_is16(x86_codec *c) {
+    return (c->flags & (x86_cf_ia32 | x86_cf_amd64)) == 0;
+}
+static inline int x86_codec_is32(x86_codec *c) {
+    return (c->flags & x86_cf_ia32) != 0;
+}
+static inline int x86_codec_is64(x86_codec *c) {
+    return (c->flags & x86_cf_amd64) != 0;
+}
+
+/*
+ * modes
+ */
+
+enum
+{
+    x86_modes_16 = (1 << 0),
+    x86_modes_32 = (1 << 1),
+    x86_modes_64 = (1 << 2),
+};
+
+/*
+ * modes accessors
+ */
+
+static inline int x86_mode_has16(uint mode) {
+    return (mode & x86_modes_16) != 0;
+}
+static inline int x86_mode_has32(uint mode) {
+    return (mode & x86_modes_32) != 0;
+}
+static inline int x86_mode_has64(uint mode) {
+    return (mode & x86_modes_64) != 0;
+}
+
+/*
+ * memory operand
+ */
+
+struct x86_mem
+{
+    union {
+        uint code;
+        struct {
+            uint base : 9;
+            uint index : 9;
+            uint scale : 2;
+        };
+    };
+    int disp;
+};
+
+/*
+ * opcode metadata
+ *
+ * type, prefix, map, opcode, mask, plus operand and order records.
+ *
+ * opcode - opcode number from opcode enum for name lookup
+ * mode   - operating mode (16 | 32 | 64)
+ * opr    - operand list (r8/m8, rw/mw, xmm/m128, etc)
+ * ord    - operand order (register, immediate, regmem, etc)
+ * enc    - operand encoding (type, width, prefix, map, immediate, etc)
+ * opc    - opcode, ModRM function byte or second opcode byte.
+ * opm    - opcode mask (f8 for XX+r), ModRM function or second byte mask.
+ *
+ * prefix and map are provisioned as 6 bits each to align the bitfield.
+ * there are 3 types, 10 prefixes (5 * 2), and 7 maps (up to map6).
+ */
+
+struct x86_opc_data
+{
+    ushort op;
+    ushort mode;
+    ushort opr;
+    ushort ord;
+    uint enc;
+    union {
+        uchar opc[2];
+        ushort opc_s;
+    };
+    union {
+        uchar opm[2];
+        ushort opm_s;
+    };
+};
+
+struct x86_opr_data
+{
+    uint opr[4];
+};
+
+struct x86_ord_data
+{
+    ushort ord[4];
+};
+
+/*
+ * invert condition
+ */
+
+static inline uint x86_invert_cond(uint c) {
+    return c ^ 1;
+}
+
+/*
+ * swap condition operands
+ */
+
+static inline uint x86_swap_cond(uint c) {
+    return c & 6 ? c ^ 9 : c;
+}
+
+/*
+ * ModRM encoder
+ */
+
+static inline x86_modrm x86_enc_modrm(uint mod, uint reg, uint rm)
+{
+    x86_modrm modrm = {
+        .data = {
+            /* [0:2] */ (rm & 7u) |
+            /* [3:5] */ ((reg & 7u) << 3) |
+            /* [6:7] */ ((mod & 3u) << 6)
+        }
+    };
+    return modrm;
+}
+
+/*
+ * SIB encoder
+ */
+
+static inline x86_sib x86_enc_sib(uint s, uint x, uint b)
+{
+    x86_sib sib = {
+        .data = {
+            /* [0:2] */ (b & 7u) |
+            /* [3:5] */ ((x & 7u) << 3) |
+            /* [6:7] */ (((uint)s & 3u) << 6)
+        }
+    };
+    return sib;
+}
+
+/*
+ * REX encoder
+ */
+
+static inline x86_rex x86_enc_rex(uint w, uint r, uint x, uint b)
+{
+    x86_rex rex = {
+        .data = {
+            /*   [0] */ ((b & 8u) >> 3) |
+            /*   [1] */ ((x & 8u) >> 2) |
+            /*   [2] */ ((r & 8u) >> 1) |
+            /*   [3] */ ((w & 1u) << 3) |
+            /* [4:7] */ 0x40
+        }
+    };
+    return rex;
+}
+
+/*
+ * REX2 encoder
+ */
+
+static inline x86_rex2 x86_enc_rex2(uint m, uint w, uint r, uint x, uint b)
+{
+    x86_rex2 rex2 = {
+        .data = {
+            /*   [0] */ ((b &  8u) >> 3) |
+            /*   [1] */ ((x &  8u) >> 2) |
+            /*   [2] */ ((r &  8u) >> 1) |
+            /*   [3] */ ((w &  1u) << 3) |
+            /*   [4] */ ((b & 16u) << 0) |
+            /*   [5] */ ((x & 16u) << 1) |
+            /*   [6] */ ((r & 16u) << 2) |
+            /*   [7] */ ((m &  1u) << 7)
+        }
+    };
+    return rex2;
+}
+
+/*
+ * VEX2 encoder
+ */
+
+static inline x86_vex2 x86_enc_vex2(uint p, uint l, uint r, uint v)
+{
+    x86_vex2 vex2 = {
+        .data = {
+            /* [0:1] */ ((uint)p & 3u) |
+            /*   [2] */ ((l & 1u) << 2) |
+            /* [3:6] */ ((~v & 15u) << 3) |
+            /*   [7] */ ((~r & 8u) << 4)
+        }
+    };
+    return vex2;
+}
+
+/*
+ * VEX3 encoder
+ */
+
+static inline x86_vex3 x86_enc_vex3(uint m, uint p,
+    uint l, uint w, uint r, uint x, uint b, uint v)
+{
+    x86_vex3 vex3 = {
+        .data = {
+            /* [0:4] */ ((m &  31u) >> 0) |
+            /*   [5] */ ((~b &  8u) << 2) |
+            /*   [6] */ ((~x &  8u) << 3) |
+            /*   [7] */ ((~r &  8u) << 4),
+            /* [0:1] */ ((p &   3u) >> 0) |
+            /*   [2] */ ((l  &  1u) << 2) |
+            /* [3:6] */ ((~v & 15u) << 3) |
+            /*   [7] */ ((w  &  1u) << 7)
+        }
+    };
+    return vex3;
+}
+
+/*
+ * EVEX encoder
+ */
+
+static inline x86_evex x86_enc_evex(uint m, uint p,
+    uint l, uint w, uint r, uint x, uint b, uint v,
+    uint k, uint brd, uint z)
+{
+    x86_evex evex = {
+        .data = {
+            /* [0:2] */ ((m &   7u) >> 0) |
+            /*   [3] */ ((b &  16u) >> 1) |
+            /*   [4] */ ((~r & 16u) >> 0) |
+            /*   [5] */ ((~b &  8u) << 2) |
+            /*   [6] */ ((~x &  8u) << 3) |
+            /*   [7] */ ((~r &  8u) << 4),
+            /* [0:1] */ ((p &   3u) >> 0) |
+            /*   [2] */ ((~x & 16u) >> 2) |
+            /* [3:6] */ ((~v & 15u) << 3) |
+            /*   [7] */ ((w  &  1u) << 7),
+            /* [0:2] */ ((k &   7u) >> 0) |
+            /*   [3] */ ((~v & 16u) >> 1) |
+            /*   [4] */ ((brd & 1u) << 4) |
+            /* [5:6] */ ((l &   3u) << 5) |
+            /*   [7] */ ((z &   1u) << 7)
+        }
+    };
+    return evex;
+}
+
+/*
+ * table sort types
+ */
+
+enum
+{
+    x86_sort_none,
+    x86_sort_numeric,
+    x86_sort_alpha
+};
+
+/*
+ * table encoding prefix types
+ */
+
+enum
+{
+    x86_table_none,
+    x86_table_lex,
+    x86_table_vex,
+    x86_table_evex
+};
+
+/*
+ * table sort indices
+ */
+
+struct x86_table_idx
+{
+    size_t count;
+    size_t *idx;
+};
+
+/*
+ * opcode index decode tables
+ */
+
+struct x86_acc_idx
+{
+    size_t map_count;
+    x86_opc_data *map;
+    size_t acc_count;
+    x86_acc_entry *acc;
+    uchar *page_offsets;
+};
+
+/*
+ * opcode index acceleration entry
+ */
+
+struct x86_acc_entry
+{
+    uint idx : 24;
+    uint nent : 8;
+};
+
+/*
+ * opcode acceleration functions
+ */
+
+static inline uint x86_acc_page(uint type, uint prefix, uint map)
+{
+    return (type & 3) | ((prefix & 15) << 2) | ((map & 7) << 6);
+}
+
+static inline size_t x86_acc_offset(x86_acc_idx *idx, size_t acc_page)
+{
+    return (size_t)idx->page_offsets[acc_page] << 8;
+}
+
+static inline x86_acc_entry *x86_acc_lookup(x86_acc_idx *idx, size_t offset)
+{
+    return idx->acc + offset;
+}
+
+/*
+ * bitmap utility functions
+ */
+
+static inline size_t x86_bitmap_idx(size_t i) { return i >> 6; }
+static inline size_t x86_bitmap_shift(size_t i) { return (i & 63); }
+
+static inline int x86_bitmap_get(ullong *bitmap, size_t i)
+{
+    return (int)((bitmap[x86_bitmap_idx(i)] >> x86_bitmap_shift(i)) & 1);
+}
+
+static inline void x86_bitmap_set(ullong *bitmap, size_t i, int value)
+{
+    bitmap[x86_bitmap_idx(i)] |= ((ullong)value << x86_bitmap_shift(i));
+}
+
+/*
+ * context for encoder, decoder, formatter and parser
+ */
+
+struct x86_ctx
+{
+    uint mode;
+    x86_acc_idx *idx;
+};
+
+/*
+ * buffer
+ *
+ * simplified buffer with read (start) and write (end) cursors
+ * capacity is user managed because it does no limit checking.
+ */
+
+struct x86_buffer
+{
+    uchar *data;
+    size_t start;
+    size_t end;
+};
+
+/*
+ * buffer functions
+ */
+
+static inline void x86_buffer_init(x86_buffer *b, uchar *data)
+{
+    b->start = 0;
+    b->end = 0;
+    b->data = data;
+}
+
+static inline void x86_buffer_init_ex(x86_buffer *b, uchar *data,
+    size_t start, size_t end)
+{
+    b->start = start;
+    b->end = end;
+    b->data = data;
+}
+
+static inline size_t x86_buffer_read(x86_buffer *b, uchar *buf, size_t len)
+{
+    memcpy(buf, b->data + b->start, len);
+    b->start += len;
+    return len;
+}
+
+static inline size_t x86_buffer_unread(x86_buffer *b, size_t len)
+{
+    b->start -= len;
+    return len;
+}
+
+static inline size_t x86_buffer_write(x86_buffer *b, uchar *buf, size_t len)
+{
+    memcpy(b->data + b->end, buf, len);
+    b->end += len;
+    return len;
+}
+
+static inline size_t x86_buffer_unwrite(x86_buffer *b, size_t len)
+{
+    b->end -= len;
+    return len;
+}
+
+static inline size_t x86_out8(x86_buffer *buf, u8 v)
+{
+    return x86_buffer_write(buf, (void *)&v, sizeof(u8));
+}
+
+static inline size_t x86_out16(x86_buffer *buf, u16 v)
+{
+    u16 t = le16(v);
+    return x86_buffer_write(buf, (void *)&t, sizeof(u16));
+}
+
+static inline size_t x86_out32(x86_buffer *buf, u32 v)
+{
+    u32 t = le32(v);
+    return x86_buffer_write(buf, (void *)&t, sizeof(u32));
+}
+
+static inline size_t x86_out64(x86_buffer *buf, u64 v)
+{
+    u64 t = le64(v);
+    return x86_buffer_write(buf, (void *)&t, sizeof(u64));
+}
+
+static inline size_t x86_unput(x86_buffer *buf, size_t n)
+{
+    return x86_buffer_unwrite(buf, n);
+}
+
+static inline u8 x86_in8(x86_buffer *buf)
+{
+    u8 t = 0;
+    x86_buffer_read(buf, (void *)&t, sizeof(u8));
+    return t;
+}
+
+static inline u16 x86_in16(x86_buffer *buf)
+{
+    u16 t = 0;
+    x86_buffer_read(buf, (void *)&t, sizeof(u16));
+    return le16(t);
+}
+
+static inline u32 x86_in32(x86_buffer *buf)
+{
+    u32 t = 0;
+    x86_buffer_read(buf, (void *)&t, sizeof(u32));
+    return le32(t);
+}
+
+static inline u64 x86_in64(x86_buffer *buf)
+{
+    u64 t = 0;
+    x86_buffer_read(buf, (void *)&t, sizeof(u64));
+    return le64(t);
+}
+
+/*
+ * metadata tables
+ */
+
+extern const char *x86_reg_names[];
+extern const size_t x86_op_names_size;
+extern const char *x86_op_names[];
+extern const size_t x86_opc_table_size;
+extern const x86_opc_data x86_opc_table[];
+extern const size_t x86_opr_table_size;
+extern const x86_opr_data x86_opr_table[];
+extern const size_t x86_ord_table_size;
+extern const x86_ord_data x86_ord_table[];
+
+/*
+ * encoder, decoder, table lookup, disassembly
+ */
+
+void x86_set_debug(uint d);
+size_t x86_mode_name(char *buf, size_t len, uint mode, const char *sep);
+size_t x86_map_name(char *buf, size_t len, uint mode, const char *sep);
+size_t x86_ord_name(char *buf, size_t len, uint ord, const char *sep);
+size_t x86_ord_mnem(char *buf, size_t len, const ushort *ord);
+size_t x86_opr_name(char *buf, size_t len, uint opr);
+size_t x86_enc_name(char *buf, size_t len, uint enc);
+const char *x86_reg_name(uint reg);
+const char *x86_table_type_name(uint type);
+const char *x86_table_map_name(uint map);
+const char *x86_table_prefix_name(uint prefix);
+int x86_enc_filter_rex(x86_rex prefix, uint enc);
+int x86_enc_filter_rex2(x86_rex2 prefix, uint enc);
+int x86_enc_filter_vex2(x86_vex2 prefix, uint enc);
+int x86_enc_filter_vex3(x86_vex3 prefix, uint enc);
+int x86_enc_filter_evex(x86_evex prefix, uint enc);
+x86_table_idx x86_opc_table_identity(void);
+x86_table_idx x86_opc_table_sorted(x86_table_idx tab, uint sort);
+x86_table_idx x86_opc_table_filter(x86_table_idx tab, uint modes);
+x86_opc_data *x86_table_lookup(x86_acc_idx *idx, const x86_opc_data *m);
+void x86_print_op(const x86_opc_data *d, uint compact, uint opcode);
+size_t x86_format_op(char *buf, size_t len, x86_ctx *ctx, x86_codec *c);
+typedef size_t (*x86_fmt_symbol)(char *buf, size_t buflen, x86_codec *c,
+    size_t pc_offset);
+size_t x86_format_op_symbol(char *buf, size_t buflen, x86_ctx *ctx,
+    x86_codec *c, size_t pc_offset, x86_fmt_symbol sym_cb);
+size_t x86_format_hex(char *buf, size_t len, uchar *data, size_t datalen);
+x86_ctx *x86_ctx_create(uint mode);
+void x86_ctx_destroy(x86_ctx *ctx);
+int x86_codec_write(x86_ctx *ctx, x86_buffer *buf, x86_codec c, size_t *len);
+int x86_codec_read(x86_ctx *ctx, x86_buffer *buf, x86_codec *c, size_t *len);
+
+/*
+ * registers sand opcodes
+ */
+
+#include "disas/x86-enums.inc"
diff --git a/include/disas/dis-asm.h b/include/disas/dis-asm.h
index 3b50ecfb5409..fd8aa326948f 100644
--- a/include/disas/dis-asm.h
+++ b/include/disas/dis-asm.h
@@ -446,6 +446,7 @@ int print_insn_v850             (bfd_vma, 
disassemble_info*);
 int print_insn_tic30            (bfd_vma, disassemble_info*);
 int print_insn_microblaze       (bfd_vma, disassemble_info*);
 int print_insn_ia64             (bfd_vma, disassemble_info*);
+int print_insn_x86              (bfd_vma, disassemble_info*);
 int print_insn_xtensa           (bfd_vma, disassemble_info*);
 int print_insn_riscv32          (bfd_vma, disassemble_info*);
 int print_insn_riscv64          (bfd_vma, disassemble_info*);
diff --git a/target/i386/cpu.c b/target/i386/cpu.c
index 1b9c11022c48..9b23e2193630 100644
--- a/target/i386/cpu.c
+++ b/target/i386/cpu.c
@@ -44,6 +44,7 @@
 #endif
 
 #include "disas/capstone.h"
+#include "disas/x86.h"
 #include "cpu-internal.h"
 
 static void x86_cpu_realizefn(DeviceState *dev, Error **errp);
@@ -8353,6 +8354,12 @@ static void x86_disas_set_info(CPUState *cs, 
disassemble_info *info)
                       : CS_MODE_16);
     info->cap_insn_unit = 1;
     info->cap_insn_split = 8;
+
+    info->print_insn = print_insn_x86;
+    info->private_data = x86_ctx_create(
+        env->hflags & HF_CS64_MASK ? x86_modes_64
+        : env->hflags & HF_CS32_MASK ? x86_modes_32
+        : x86_modes_16);
 }
 
 void x86_update_hflags(CPUX86State *env)
-- 
2.43.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]