[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 12/37] target/i386: add scalar 0F 38 and 0F 3A instruction to new
From: |
Paolo Bonzini |
Subject: |
[PATCH 12/37] target/i386: add scalar 0F 38 and 0F 3A instruction to new decoder |
Date: |
Mon, 12 Sep 2022 01:03:52 +0200 |
Because these are the only VEX instructions that QEMU supports, the
new decoder is entered on the first byte of a valid VEX prefix, and VEX
decoding only needs to be done in decode-new.c.inc.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/decode-new.c.inc | 59 +++++++
target/i386/tcg/emit.c.inc | 261 +++++++++++++++++++++++++++++++
target/i386/tcg/translate.c | 49 +-----
3 files changed, 323 insertions(+), 46 deletions(-)
diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc
index 7b4fd9fb54..b31daecb90 100644
--- a/target/i386/tcg/decode-new.c.inc
+++ b/target/i386/tcg/decode-new.c.inc
@@ -133,11 +133,69 @@ static uint8_t get_modrm(DisasContext *s, CPUX86State
*env)
return s->modrm;
}
+static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry
*entry, uint8_t *b)
+{
+ static const X86GenFunc group17_gen[8] = {
+ NULL, gen_BLSR, gen_BLSMSK, gen_BLSI,
+ };
+ int op = (get_modrm(s, env) >> 3) & 7;
+ entry->gen = group17_gen[op];
+}
+
static const X86OpEntry opcodes_0F38_00toEF[240] = {
};
/* five rows for no prefix, 66, F3, F2, 66+F2 */
static X86OpEntry opcodes_0F38_F0toFF[16][5] = {
+ [0] = {
+ X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)),
+ X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)),
+ {},
+ X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
+ X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)),
+ },
+ [1] = {
+ X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)),
+ X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)),
+ {},
+ X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)),
+ X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)),
+ },
+ [2] = {
+ X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)),
+ {},
+ {},
+ {},
+ {},
+ },
+ [3] = {
+ X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)),
+ {},
+ {},
+ {},
+ {},
+ },
+ [5] = {
+ X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ {},
+ X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 cpuid(BMI2)),
+ X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 cpuid(BMI2)),
+ {},
+ },
+ [6] = {
+ {},
+ X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)),
+ X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)),
+ X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)),
+ {},
+ },
+ [7] = {
+ X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 cpuid(BMI1)),
+ {},
+ },
};
static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry,
uint8_t *b)
@@ -159,6 +217,7 @@ static void decode_0F38(DisasContext *s, CPUX86State *env,
X86OpEntry *entry, ui
}
static const X86OpEntry opcodes_0F3A[256] = {
+ [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2),
};
static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry,
uint8_t *b)
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index ce0205e05a..36b963a0d3 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -211,3 +211,264 @@ static void gen_writeback(DisasContext *s, X86DecodedOp
*op)
abort();
}
}
+
+static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op)
+{
+ TCGv carry_in = NULL;
+ TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2);
+ TCGv zero;
+
+ if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) {
+ /* Re-use the carry-out from a previous round. */
+ carry_in = carry_out;
+ cc_op = s->cc_op;
+ } else if (s->cc_op == CC_OP_ADCX || s->cc_op == CC_OP_ADOX) {
+ /* Merge with the carry-out from the opposite instruction. */
+ cc_op = CC_OP_ADCOX;
+ }
+
+ /* If we don't have a carry-in, get it out of EFLAGS. */
+ if (!carry_in) {
+ if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) {
+ gen_compute_eflags(s);
+ }
+ carry_in = s->tmp0;
+ tcg_gen_extract_tl(carry_in, cpu_cc_src,
+ ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1);
+ }
+
+ switch (ot) {
+#ifdef TARGET_X86_64
+ case MO_32:
+ /* If TL is 64-bit just do everything in 64-bit arithmetic. */
+ tcg_gen_add_i64(s->T0, s->T0, s->T1);
+ tcg_gen_add_i64(s->T0, s->T0, carry_in);
+ tcg_gen_shri_i64(carry_out, s->T0, 32);
+ break;
+#endif
+ default:
+ zero = tcg_const_tl(0);
+ tcg_gen_add2_tl(s->T0, carry_out, s->T0, zero, carry_in, zero);
+ tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero);
+ tcg_temp_free(zero);
+ break;
+ }
+ set_cc_op(s, cc_op);
+}
+
+static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX);
+}
+
+static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX);
+}
+
+static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ tcg_gen_andc_tl(s->T0, s->T1, s->T0);
+ gen_op_update1_cc(s);
+ set_cc_op(s, CC_OP_LOGICB + ot);
+}
+
+static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn
*decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv bound, zero;
+
+ /*
+ * Extract START, and shift the operand.
+ * Shifts larger than operand size get zeros.
+ */
+ tcg_gen_ext8u_tl(s->A0, s->T1);
+ tcg_gen_shr_tl(s->T0, s->T0, s->A0);
+
+ bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
+ zero = tcg_const_tl(0);
+ tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero);
+ tcg_temp_free(zero);
+
+ /* Extract the LEN into a mask. Lengths larger than
+ * operand size get all ones.
+ */
+ tcg_gen_extract_tl(s->A0, s->T1, 8, 8);
+ tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, s->A0, bound);
+ tcg_temp_free(bound);
+
+ tcg_gen_movi_tl(s->T1, 1);
+ tcg_gen_shl_tl(s->T1, s->T1, s->A0);
+ tcg_gen_subi_tl(s->T1, s->T1, 1);
+ tcg_gen_and_tl(s->T0, s->T0, s->T1);
+
+ gen_op_update1_cc(s);
+ set_cc_op(s, CC_OP_LOGICB + ot);
+}
+
+static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ tcg_gen_neg_tl(s->T1, s->T0);
+ tcg_gen_and_tl(s->T0, s->T0, s->T1);
+ tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+ set_cc_op(s, CC_OP_BMILGB + ot);
+}
+
+static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn
*decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ tcg_gen_subi_tl(s->T1, s->T0, 1);
+ tcg_gen_xor_tl(s->T0, s->T0, s->T1);
+ tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+ set_cc_op(s, CC_OP_BMILGB + ot);
+}
+
+static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ tcg_gen_subi_tl(s->T1, s->T0, 1);
+ tcg_gen_and_tl(s->T0, s->T0, s->T1);
+ tcg_gen_mov_tl(cpu_cc_dst, s->T0);
+ set_cc_op(s, CC_OP_BMILGB + ot);
+}
+
+static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ TCGv bound;
+
+ tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]);
+ bound = tcg_const_tl(ot == MO_64 ? 63 : 31);
+
+ /*
+ * Note that since we're using BMILG (in order to get O
+ * cleared) we need to store the inverse into C.
+ */
+ tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, s->T1, bound);
+ tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, bound, bound, s->T1);
+ tcg_temp_free(bound);
+
+ tcg_gen_movi_tl(s->A0, -1);
+ tcg_gen_shl_tl(s->A0, s->A0, s->T1);
+ tcg_gen_andc_tl(s->T0, s->T0, s->A0);
+
+ gen_op_update1_cc(s);
+ set_cc_op(s, CC_OP_BMILGB + ot);
+}
+
+static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn
*decode)
+{
+ MemOp ot = decode->op[2].ot;
+
+ tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+ gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_const_i32(8 << ot));
+}
+
+static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn
*decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* M operand type does not load/store */
+ if (decode->e.op0 == X86_TYPE_M) {
+ tcg_gen_qemu_st_tl(s->T0, s->A0, s->mem_index, ot | MO_BE);
+ } else {
+ tcg_gen_qemu_ld_tl(s->T0, s->A0, s->mem_index, ot | MO_BE);
+ }
+}
+
+static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+
+ /* low part of result in VEX.vvvv, high in MODRM */
+ switch (ot) {
+ default:
+ tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+ tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1);
+ tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32,
+ s->tmp2_i32, s->tmp3_i32);
+ tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32);
+ tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32);
+ break;
+#ifdef TARGET_X86_64
+ case MO_64:
+ tcg_gen_mulu2_i64(cpu_regs[s->vex_v], s->T0, s->T0, s->T1);
+ break;
+#endif
+ }
+
+}
+
+static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+ if (ot < MO_64) {
+ tcg_gen_ext32u_tl(s->T0, s->T0);
+ }
+ gen_helper_pdep(s->T0, s->T0, s->T1);
+}
+
+static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[1].ot;
+ if (ot < MO_64) {
+ tcg_gen_ext32u_tl(s->T0, s->T0);
+ }
+ gen_helper_pext(s->T0, s->T0, s->T1);
+}
+
+static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ int b = decode->immediate;
+
+ if (ot == MO_64) {
+ tcg_gen_rotri_tl(s->T0, s->T0, b & 63);
+ } else {
+ tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0);
+ tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31);
+ tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32);
+ }
+}
+
+static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ int mask;
+
+ mask = ot == MO_64 ? 63 : 31;
+ tcg_gen_andi_tl(s->T1, s->T1, mask);
+ if (ot != MO_64) {
+ tcg_gen_ext32s_tl(s->T0, s->T0);
+ }
+ tcg_gen_sar_tl(s->T0, s->T0, s->T1);
+}
+
+static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ int mask;
+
+ mask = ot == MO_64 ? 63 : 31;
+ tcg_gen_andi_tl(s->T1, s->T1, mask);
+ tcg_gen_shl_tl(s->T0, s->T0, s->T1);
+}
+
+static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode)
+{
+ MemOp ot = decode->op[0].ot;
+ int mask;
+
+ mask = ot == MO_64 ? 63 : 31;
+ tcg_gen_andi_tl(s->T1, s->T1, mask);
+ if (ot != MO_64) {
+ tcg_gen_ext32u_tl(s->T0, s->T0);
+ }
+ tcg_gen_shr_tl(s->T0, s->T0, s->T1);
+}
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 4ecf75ede3..7eed575f2e 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -4892,59 +4892,16 @@ static target_ulong disas_insn(DisasContext *s,
CPUState *cpu)
#endif
case 0xc5: /* 2-byte VEX */
case 0xc4: /* 3-byte VEX */
- use_new = false;
- /* VEX prefixes cannot be used except in 32-bit mode.
- Otherwise the instruction is LES or LDS. */
if (CODE32(s) && !VM86(s)) {
- static const int pp_prefix[4] = {
- 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ
- };
- int vex3, vex2 = x86_ldub_code(env, s);
+ int vex2 = x86_ldub_code(env, s);
+ s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) {
/* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b,
otherwise the instruction is LES or LDS. */
- s->pc--; /* rewind the advance_pc() x86_ldub_code() did */
break;
}
-
- /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */
- if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ
- | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) {
- goto illegal_op;
- }
-#ifdef TARGET_X86_64
- s->rex_r = (~vex2 >> 4) & 8;
-#endif
- if (b == 0xc5) {
- /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte
*/
- vex3 = vex2;
- b = x86_ldub_code(env, s) | 0x100;
- } else {
- /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */
- vex3 = x86_ldub_code(env, s);
-#ifdef TARGET_X86_64
- s->rex_x = (~vex2 >> 3) & 8;
- s->rex_b = (~vex2 >> 2) & 8;
- s->rex_w = (vex3 >> 7) & 1;
-#endif
- switch (vex2 & 0x1f) {
- case 0x01: /* Implied 0f leading opcode bytes. */
- b = x86_ldub_code(env, s) | 0x100;
- break;
- case 0x02: /* Implied 0f 38 leading opcode bytes. */
- b = 0x138;
- break;
- case 0x03: /* Implied 0f 3a leading opcode bytes. */
- b = 0x13a;
- break;
- default: /* Reserved for future use. */
- goto unknown_op;
- }
- }
- s->vex_v = (~vex3 >> 3) & 0xf;
- s->vex_l = (vex3 >> 2) & 1;
- prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX;
+ return disas_insn_new(s, cpu, b);
}
break;
}
--
2.37.2
- Re: [PATCH 07/37] target/i386: add CPUID[EAX=7, ECX=0].ECX to DisasContext, (continued)
- [PATCH 08/37] target/i386: add CPUID feature checks to new decoder, Paolo Bonzini, 2022/09/11
- [PATCH 04/37] target/i386: introduce insn_get_addr, Paolo Bonzini, 2022/09/11
- [PATCH 10/37] target/i386: validate VEX prefixes via the instructions' exception classes, Paolo Bonzini, 2022/09/11
- [PATCH 09/37] target/i386: add AVX_EN hflag, Paolo Bonzini, 2022/09/11
- [PATCH 12/37] target/i386: add scalar 0F 38 and 0F 3A instruction to new decoder,
Paolo Bonzini <=
- [PATCH 13/37] target/i386: remove scalar VEX instructions from old decoder, Paolo Bonzini, 2022/09/11
- [PATCH 11/37] target/i386: validate SSE prefixes directly in the decoding table, Paolo Bonzini, 2022/09/11
- [PATCH 14/37] target/i386: Prepare ops_sse_header.h for 256 bit AVX, Paolo Bonzini, 2022/09/11
- [PATCH 19/37] target/i386: Introduce 256-bit vector helpers, Paolo Bonzini, 2022/09/11
- [PATCH 16/37] target/i386: support operand merging in binary scalar helpers, Paolo Bonzini, 2022/09/11