[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 17/38] target/i386: improve code generation for BT
From: |
Paolo Bonzini |
Subject: |
[PULL 17/38] target/i386: improve code generation for BT |
Date: |
Fri, 10 Jan 2025 19:45:58 +0100 |
Because BT does not write back to the source operand, it can modify it to
ensure that one of the operands of TSTNE is a constant (after either gen_BT
or the optimizer's constant propagation). This produces better and more
optimizable TCG ops. For example, the sequence
movl $0x60013f, %ebx
btl %ecx, %ebx
becomes just
and_i32 tmp1,ecx,$0x1f dead: 1 2 pref=0xffff
shr_i32 tmp0,$0x60013f,tmp1 dead: 1 2 pref=0xffff
and_i32 tmp16,tmp0,$0x1 dead: 1 pref=0xbf80
On s390x, it can use four instructions to isolate bit 0 of 0x60013f >> (ecx &
31):
nilf %r12, 0x1f
lgfi %r11, 0x60013f
srlk %r12, %r11, 0(%r12)
nilf %r12, 1
Previously, it used five instructions to build 1 << (ecx & 31) and compute
TSTEQ, and also needed two more to construct the result of setcond:
nilf %r12, 0x1f
lghi %r11, 1
sllk %r12, %r11, 0(%r12)
lgfi %r9, 0x60013f
nrk %r0, %r12, %r9
lghi %r12, 0
locghilh %r12, 1
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/emit.c.inc | 36 ++++++++++++++++++++++++++++--------
1 file changed, 28 insertions(+), 8 deletions(-)
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index 785ff63f2ac..5c115429350 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -1443,8 +1443,9 @@ static TCGv gen_bt_mask(DisasContext *s, X86DecodedInsn
*decode)
return mask;
}
-/* Expects truncated bit index in s->T1, 1 << s->T1 in MASK. */
-static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src,
TCGv mask)
+/* Expects truncated bit index in COUNT, 1 << COUNT in MASK. */
+static void gen_bt_flags(DisasContext *s, X86DecodedInsn *decode, TCGv src,
+ TCGv count, TCGv mask)
{
TCGv cf;
@@ -1467,15 +1468,34 @@ static void gen_bt_flags(DisasContext *s,
X86DecodedInsn *decode, TCGv src, TCGv
decode->cc_src = tcg_temp_new();
decode->cc_dst = cpu_cc_dst;
decode->cc_op = CC_OP_SARB + cc_op_size(s->cc_op);
- tcg_gen_shr_tl(decode->cc_src, src, s->T1);
+ tcg_gen_shr_tl(decode->cc_src, src, count);
}
}
static void gen_BT(DisasContext *s, X86DecodedInsn *decode)
{
- TCGv mask = gen_bt_mask(s, decode);
+ TCGv count = s->T1;
+ TCGv mask;
- gen_bt_flags(s, decode, s->T0, mask);
+ /*
+ * Try to ensure that the rhs of the TSTNE condition is a constant (and a
+ * power of two), as that is more readily available on most TCG backends.
+ *
+ * For immediate bit number gen_bt_mask()'s output is already a constant;
+ * for register bit number, shift the source right and check bit 0.
+ */
+ if (decode->e.op2 == X86_TYPE_I) {
+ mask = gen_bt_mask(s, decode);
+ } else {
+ MemOp ot = decode->op[1].ot;
+
+ tcg_gen_andi_tl(s->T1, s->T1, (8 << ot) - 1);
+ tcg_gen_shr_tl(s->T0, s->T0, s->T1);
+
+ count = tcg_constant_tl(0);
+ mask = tcg_constant_tl(1);
+ }
+ gen_bt_flags(s, decode, s->T0, count, mask);
}
static void gen_BTC(DisasContext *s, X86DecodedInsn *decode)
@@ -1491,7 +1511,7 @@ static void gen_BTC(DisasContext *s, X86DecodedInsn
*decode)
tcg_gen_xor_tl(s->T0, s->T0, mask);
}
- gen_bt_flags(s, decode, old, mask);
+ gen_bt_flags(s, decode, old, s->T1, mask);
}
static void gen_BTR(DisasContext *s, X86DecodedInsn *decode)
@@ -1509,7 +1529,7 @@ static void gen_BTR(DisasContext *s, X86DecodedInsn
*decode)
tcg_gen_andc_tl(s->T0, s->T0, mask);
}
- gen_bt_flags(s, decode, old, mask);
+ gen_bt_flags(s, decode, old, s->T1, mask);
}
static void gen_BTS(DisasContext *s, X86DecodedInsn *decode)
@@ -1525,7 +1545,7 @@ static void gen_BTS(DisasContext *s, X86DecodedInsn
*decode)
tcg_gen_or_tl(s->T0, s->T0, mask);
}
- gen_bt_flags(s, decode, old, mask);
+ gen_bt_flags(s, decode, old, s->T1, mask);
}
static void gen_BZHI(DisasContext *s, X86DecodedInsn *decode)
--
2.47.1
- [PULL 08/38] rust: qom: move device_id to PL011 class side, (continued)
- [PULL 08/38] rust: qom: move device_id to PL011 class side, Paolo Bonzini, 2025/01/10
- [PULL 14/38] rust: hide warnings for subprojects, Paolo Bonzini, 2025/01/10
- [PULL 02/38] rust: add --check-cfg test to rustc arguments, Paolo Bonzini, 2025/01/10
- [PULL 04/38] rust: add a utility module for compile-time type checks, Paolo Bonzini, 2025/01/10
- [PULL 16/38] make-release: only leave tarball of wrap-file subprojects, Paolo Bonzini, 2025/01/10
- [PULL 15/38] qom: remove unused field, Paolo Bonzini, 2025/01/10
- [PULL 03/38] rust: qom: add ParentField, Paolo Bonzini, 2025/01/10
- [PULL 10/38] rust: qom: make INSTANCE_POST_INIT take a shared reference, Paolo Bonzini, 2025/01/10
- [PULL 17/38] target/i386: improve code generation for BT,
Paolo Bonzini <=
- [PULL 06/38] rust: macros: check that the first field of a #[derive(Object)] struct is a ParentField, Paolo Bonzini, 2025/01/10
- [PULL 05/38] rust: macros: check that #[derive(Object)] requires #[repr(C)], Paolo Bonzini, 2025/01/10
- [PULL 11/38] rust: qemu-api-macros: extend error reporting facility to parse errors, Paolo Bonzini, 2025/01/10
- [PULL 13/38] rust: qdev: expose inherited methods to subclasses of SysBusDevice, Paolo Bonzini, 2025/01/10
- [PULL 21/38] target/i386/kvm: Remove local MSR_KVM_WALL_CLOCK and MSR_KVM_SYSTEM_TIME definitions, Paolo Bonzini, 2025/01/10
- [PULL 24/38] target/i386/confidential-guest: Fix comment of x86_confidential_guest_kvm_type(), Paolo Bonzini, 2025/01/10
- [PULL 33/38] i386/topology: Introduce helpers for various topology info of different level, Paolo Bonzini, 2025/01/10
- [PULL 30/38] i386/cpu: Drop the variable smp_cores and smp_threads in x86_cpu_pre_plug(), Paolo Bonzini, 2025/01/10
- [PULL 34/38] i386/cpu: Track a X86CPUTopoInfo directly in CPUX86State, Paolo Bonzini, 2025/01/10
- [PULL 28/38] target/i386/kvm: Replace ARRAY_SIZE(msr_handlers) with KVM_MSR_FILTER_MAX_RANGES, Paolo Bonzini, 2025/01/10