[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PULL 13/48] target/i386: pull computation of string update value out of
From: |
Paolo Bonzini |
Subject: |
[PULL 13/48] target/i386: pull computation of string update value out of loop |
Date: |
Fri, 24 Jan 2025 10:44:07 +0100 |
This is a common operation that is executed many times in rep
movs or rep stos loops. It can improve performance by several
percentage points.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Link: https://lore.kernel.org/r/20241215090613.89588-13-pbonzini@redhat.com
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
target/i386/tcg/translate.c | 54 ++++++++++++++++++-------------------
1 file changed, 26 insertions(+), 28 deletions(-)
diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c
index 991baf5d829..9f4d3ebbd95 100644
--- a/target/i386/tcg/translate.c
+++ b/target/i386/tcg/translate.c
@@ -832,16 +832,13 @@ static bool gen_check_io(DisasContext *s, MemOp ot,
TCGv_i32 port,
#endif
}
-static void gen_movs(DisasContext *s, MemOp ot)
+static void gen_movs(DisasContext *s, MemOp ot, TCGv dshift)
{
- TCGv dshift;
-
gen_string_movl_A0_ESI(s);
gen_op_ld_v(s, ot, s->T0, s->A0);
gen_string_movl_A0_EDI(s);
gen_op_st_v(s, ot, s->T0, s->A0);
- dshift = gen_compute_Dshift(s, ot);
gen_op_add_reg(s, s->aflag, R_ESI, dshift);
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
@@ -1246,22 +1243,22 @@ static inline void gen_jcc(DisasContext *s, int b,
TCGLabel *l1)
}
}
-static void gen_stos(DisasContext *s, MemOp ot)
+static void gen_stos(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_EDI(s);
gen_op_st_v(s, ot, s->T0, s->A0);
- gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
-static void gen_lods(DisasContext *s, MemOp ot)
+static void gen_lods(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_ESI(s);
gen_op_ld_v(s, ot, s->T0, s->A0);
gen_op_mov_reg_v(s, ot, R_EAX, s->T0);
- gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_ESI, dshift);
}
-static void gen_scas(DisasContext *s, MemOp ot)
+static void gen_scas(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_EDI(s);
gen_op_ld_v(s, ot, s->T1, s->A0);
@@ -1270,13 +1267,11 @@ static void gen_scas(DisasContext *s, MemOp ot)
tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
set_cc_op(s, CC_OP_SUBB + ot);
- gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
-static void gen_cmps(DisasContext *s, MemOp ot)
+static void gen_cmps(DisasContext *s, MemOp ot, TCGv dshift)
{
- TCGv dshift;
-
gen_string_movl_A0_EDI(s);
gen_op_ld_v(s, ot, s->T1, s->A0);
gen_string_movl_A0_ESI(s);
@@ -1286,7 +1281,6 @@ static void gen_cmps(DisasContext *s, MemOp ot)
tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1);
set_cc_op(s, CC_OP_SUBB + ot);
- dshift = gen_compute_Dshift(s, ot);
gen_op_add_reg(s, s->aflag, R_ESI, dshift);
gen_op_add_reg(s, s->aflag, R_EDI, dshift);
}
@@ -1305,7 +1299,7 @@ static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port,
int ot)
}
}
-static void gen_ins(DisasContext *s, MemOp ot)
+static void gen_ins(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_EDI(s);
/* Note: we must do this dummy write first to be restartable in
@@ -1316,11 +1310,11 @@ static void gen_ins(DisasContext *s, MemOp ot)
tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
gen_helper_in_func(ot, s->T0, s->tmp2_i32);
gen_op_st_v(s, ot, s->T0, s->A0);
- gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_EDI, dshift);
gen_bpt_io(s, s->tmp2_i32, ot);
}
-static void gen_outs(DisasContext *s, MemOp ot)
+static void gen_outs(DisasContext *s, MemOp ot, TCGv dshift)
{
gen_string_movl_A0_ESI(s);
gen_op_ld_v(s, ot, s->T0, s->A0);
@@ -1329,14 +1323,14 @@ static void gen_outs(DisasContext *s, MemOp ot)
tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff);
tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0);
gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32);
- gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot));
+ gen_op_add_reg(s, s->aflag, R_ESI, dshift);
gen_bpt_io(s, s->tmp2_i32, ot);
}
#define REP_MAX 65535
-static void do_gen_rep(DisasContext *s, MemOp ot,
- void (*fn)(DisasContext *s, MemOp ot),
+static void do_gen_rep(DisasContext *s, MemOp ot, TCGv dshift,
+ void (*fn)(DisasContext *s, MemOp ot, TCGv dshift),
bool is_repz_nz)
{
TCGLabel *last = gen_new_label();
@@ -1401,7 +1395,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
}
gen_set_label(loop);
- fn(s, ot);
+ fn(s, ot, dshift);
tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
gen_update_cc_op(s);
@@ -1438,7 +1432,7 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
*/
gen_set_label(last);
set_cc_op(s, CC_OP_DYNAMIC);
- fn(s, ot);
+ fn(s, ot, dshift);
tcg_gen_mov_tl(cpu_regs[R_ECX], cx_next);
gen_update_cc_op(s);
}
@@ -1453,23 +1447,27 @@ static void do_gen_rep(DisasContext *s, MemOp ot,
}
static void gen_repz(DisasContext *s, MemOp ot,
- void (*fn)(DisasContext *s, MemOp ot))
+ void (*fn)(DisasContext *s, MemOp ot, TCGv dshift))
{
+ TCGv dshift = gen_compute_Dshift(s, ot);
+
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
- do_gen_rep(s, ot, fn, false);
+ do_gen_rep(s, ot, dshift, fn, false);
} else {
- fn(s, ot);
+ fn(s, ot, dshift);
}
}
static void gen_repz_nz(DisasContext *s, MemOp ot,
- void (*fn)(DisasContext *s, MemOp ot))
+ void (*fn)(DisasContext *s, MemOp ot, TCGv dshift))
{
+ TCGv dshift = gen_compute_Dshift(s, ot);
+
if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) {
- do_gen_rep(s, ot, fn, true);
+ do_gen_rep(s, ot, dshift, fn, true);
} else {
- fn(s, ot);
+ fn(s, ot, dshift);
}
}
--
2.48.1
- [PULL 04/48] target/i386: unify REP and REPZ/REPNZ generation, (continued)
- [PULL 04/48] target/i386: unify REP and REPZ/REPNZ generation, Paolo Bonzini, 2025/01/24
- [PULL 06/48] target/i386: reorganize ops emitted by do_gen_rep, drop repz_opt, Paolo Bonzini, 2025/01/24
- [PULL 07/48] target/i386: tcg: move gen_set/reset_* earlier in the file, Paolo Bonzini, 2025/01/24
- [PULL 08/48] target/i386: fix RF handling for string instructions, Paolo Bonzini, 2025/01/24
- [PULL 09/48] target/i386: make cc_op handling more explicit for repeated string instructions., Paolo Bonzini, 2025/01/24
- [PULL 10/48] target/i386: do not use gen_op_jz_ecx for repeated string operations, Paolo Bonzini, 2025/01/24
- [PULL 14/48] target/i386: extract common bits of gen_repz/gen_repz_nz, Paolo Bonzini, 2025/01/24
- [PULL 11/48] target/i386: optimize CX handling in repeated string operations, Paolo Bonzini, 2025/01/24
- [PULL 12/48] target/i386: execute multiple REP/REPZ iterations without leaving TB, Paolo Bonzini, 2025/01/24
- [PULL 16/48] target/i386: Introduce SierraForest-v2 model, Paolo Bonzini, 2025/01/24
- [PULL 13/48] target/i386: pull computation of string update value out of loop,
Paolo Bonzini <=
- [PULL 17/48] target/i386: Export BHI_NO bit to guests, Paolo Bonzini, 2025/01/24
- [PULL 15/48] target/i386: avoid using s->tmp0 for add to implicit registers, Paolo Bonzini, 2025/01/24
- [PULL 22/48] rust/pl011: Avoid bindings::*, Paolo Bonzini, 2025/01/24
- [PULL 23/48] memattrs: Convert unspecified member to bool, Paolo Bonzini, 2025/01/24
- [PULL 26/48] rust: vmstate: implement VMState for non-leaf types, Paolo Bonzini, 2025/01/24
- [PULL 18/48] target/i386: Add new CPU model ClearwaterForest, Paolo Bonzini, 2025/01/24
- [PULL 20/48] stub: Fix build failure with --enable-user --disable-system --enable-tools, Paolo Bonzini, 2025/01/24
- [PULL 35/48] rust: prefer NonNull::new to assertions, Paolo Bonzini, 2025/01/24
- [PULL 25/48] rust: vmstate: add new type safe implementation, Paolo Bonzini, 2025/01/24
- [PULL 29/48] rust: vmstate: implement VMState for scalar types, Paolo Bonzini, 2025/01/24