[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 1/6] tcg/arm: Hoist common argument loads in tcg_out_op()
From: |
Philippe Mathieu-Daudé |
Subject: |
[PATCH v2 1/6] tcg/arm: Hoist common argument loads in tcg_out_op() |
Date: |
Wed, 13 Jan 2021 18:24:54 +0100 |
Signed-off-by: Philippe Mathieu-Daudé <f4bug@amsat.org>
---
tcg/arm/tcg-target.c.inc | 192 +++++++++++++++++++--------------------
1 file changed, 92 insertions(+), 100 deletions(-)
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 0fd11264544..59bd196994f 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1747,15 +1747,23 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
*args, bool is64)
static void tcg_out_epilogue(TCGContext *s);
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
- const TCGArg *args, const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+ const TCGArg args[TCG_MAX_OP_ARGS],
+ const int const_args[TCG_MAX_OP_ARGS])
{
TCGArg a0, a1, a2, a3, a4, a5;
- int c;
+ int c, c2;
+
+ /* Hoist the loads of the most common arguments. */
+ a0 = args[0];
+ a1 = args[1];
+ a2 = args[2];
+ a3 = args[3];
+ c2 = const_args[2];
switch (opc) {
case INDEX_op_exit_tb:
- tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, a0);
tcg_out_epilogue(s);
break;
case INDEX_op_goto_tb:
@@ -1765,7 +1773,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
TCGReg base = TCG_REG_PC;
tcg_debug_assert(s->tb_jmp_insn_offset == 0);
- ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + args[0]);
+ ptr = (intptr_t)tcg_splitwx_to_rx(s->tb_jmp_target_addr + a0);
dif = tcg_pcrel_diff(s, (void *)ptr) - 8;
dil = sextract32(dif, 0, 12);
if (dif != dil) {
@@ -1778,74 +1786,68 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
tcg_out_movi32(s, COND_AL, base, ptr - dil);
}
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
- set_jmp_reset_offset(s, args[0]);
+ set_jmp_reset_offset(s, a0);
}
break;
case INDEX_op_goto_ptr:
- tcg_out_bx(s, COND_AL, args[0]);
+ tcg_out_bx(s, COND_AL, a0);
break;
case INDEX_op_br:
- tcg_out_goto_label(s, COND_AL, arg_label(args[0]));
+ tcg_out_goto_label(s, COND_AL, arg_label(a0));
break;
case INDEX_op_ld8u_i32:
- tcg_out_ld8u(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld8u(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld8s_i32:
- tcg_out_ld8s(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld8s(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld16u_i32:
- tcg_out_ld16u(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld16u(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld16s_i32:
- tcg_out_ld16s(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld16s(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_ld_i32:
- tcg_out_ld32u(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_ld32u(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_st8_i32:
- tcg_out_st8(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_st8(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_st16_i32:
- tcg_out_st16(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_st16(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_st_i32:
- tcg_out_st32(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_st32(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_movcond_i32:
/* Constraints mean that v2 is always in the same register as dest,
* so we only need to do "if condition passed, move v1 to dest".
*/
- tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
- args[1], args[2], const_args[2]);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a1, a2, c2);
tcg_out_dat_rIK(s, tcg_cond_to_arm_cond[args[5]], ARITH_MOV,
- ARITH_MVN, args[0], 0, args[3], const_args[3]);
+ ARITH_MVN, a0, 0, a3, const_args[3]);
break;
case INDEX_op_add_i32:
- tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_ADD, ARITH_SUB, a0, a1, a2, c2);
break;
case INDEX_op_sub_i32:
if (const_args[1]) {
- if (const_args[2]) {
- tcg_out_movi32(s, COND_AL, args[0], args[1] - args[2]);
+ if (c2) {
+ tcg_out_movi32(s, COND_AL, a0, a1 - a2);
} else {
- tcg_out_dat_rI(s, COND_AL, ARITH_RSB,
- args[0], args[2], args[1], 1);
+ tcg_out_dat_rI(s, COND_AL, ARITH_RSB, a0, a2, a1, 1);
}
} else {
- tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_SUB, ARITH_ADD, a0, a1, a2, c2);
}
break;
case INDEX_op_and_i32:
- tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIK(s, COND_AL, ARITH_AND, ARITH_BIC, a0, a1, a2, c2);
break;
case INDEX_op_andc_i32:
- tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND,
- args[0], args[1], args[2], const_args[2]);
+ tcg_out_dat_rIK(s, COND_AL, ARITH_BIC, ARITH_AND, a0, a1, a2, c2);
break;
case INDEX_op_or_i32:
c = ARITH_ORR;
@@ -1854,11 +1856,10 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
c = ARITH_EOR;
/* Fall through. */
gen_arith:
- tcg_out_dat_rI(s, COND_AL, c, args[0], args[1], args[2],
const_args[2]);
+ tcg_out_dat_rI(s, COND_AL, c, a0, a1, a2, c2);
break;
case INDEX_op_add2_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
- a3 = args[3], a4 = args[4], a5 = args[5];
+ a4 = args[4], a5 = args[5];
if (a0 == a3 || (a0 == a5 && !const_args[5])) {
a0 = TCG_REG_TMP;
}
@@ -1866,15 +1867,14 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
a0, a2, a4, const_args[4]);
tcg_out_dat_rIK(s, COND_AL, ARITH_ADC, ARITH_SBC,
a1, a3, a5, const_args[5]);
- tcg_out_mov_reg(s, COND_AL, args[0], a0);
+ tcg_out_mov_reg(s, COND_AL, a0, a0);
break;
case INDEX_op_sub2_i32:
- a0 = args[0], a1 = args[1], a2 = args[2];
- a3 = args[3], a4 = args[4], a5 = args[5];
+ a4 = args[4], a5 = args[5];
if ((a0 == a3 && !const_args[3]) || (a0 == a5 && !const_args[5])) {
a0 = TCG_REG_TMP;
}
- if (const_args[2]) {
+ if (c2) {
if (const_args[4]) {
tcg_out_movi32(s, COND_AL, a0, a4);
a4 = a0;
@@ -1884,7 +1884,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
tcg_out_dat_rIN(s, COND_AL, ARITH_SUB | TO_CPSR,
ARITH_ADD | TO_CPSR, a0, a2, a4, const_args[4]);
}
- if (const_args[3]) {
+ if (const_a3) {
if (const_args[5]) {
tcg_out_movi32(s, COND_AL, a1, a5);
a5 = a1;
@@ -1894,69 +1894,64 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
tcg_out_dat_rIK(s, COND_AL, ARITH_SBC, ARITH_ADC,
a1, a3, a5, const_args[5]);
}
- tcg_out_mov_reg(s, COND_AL, args[0], a0);
+ tcg_out_mov_reg(s, COND_AL, a0, a0);
break;
case INDEX_op_neg_i32:
- tcg_out_dat_imm(s, COND_AL, ARITH_RSB, args[0], args[1], 0);
+ tcg_out_dat_imm(s, COND_AL, ARITH_RSB, a0, a1, 0);
break;
case INDEX_op_not_i32:
- tcg_out_dat_reg(s, COND_AL,
- ARITH_MVN, args[0], 0, args[1], SHIFT_IMM_LSL(0));
+ tcg_out_dat_reg(s, COND_AL, ARITH_MVN, a0, 0, a1, SHIFT_IMM_LSL(0));
break;
case INDEX_op_mul_i32:
- tcg_out_mul32(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_mul32(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_mulu2_i32:
- tcg_out_umull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_umull32(s, COND_AL, a0, a1, a2, a3);
break;
case INDEX_op_muls2_i32:
- tcg_out_smull32(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_smull32(s, COND_AL, a0, a1, a2, a3);
break;
- /* XXX: Perhaps args[2] & 0x1f is wrong */
+ /* XXX: Perhaps a2 & 0x1f is wrong */
case INDEX_op_shl_i32:
- c = const_args[2] ?
- SHIFT_IMM_LSL(args[2] & 0x1f) : SHIFT_REG_LSL(args[2]);
+ c = c2 ? SHIFT_IMM_LSL(a2 & 0x1f) : SHIFT_REG_LSL(a2);
goto gen_shift32;
case INDEX_op_shr_i32:
- c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_LSR(args[2] & 0x1f) :
- SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(args[2]);
+ c = c2 ? (a2 & 0x1f) ? SHIFT_IMM_LSR(a2 & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_LSR(a2);
goto gen_shift32;
case INDEX_op_sar_i32:
- c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ASR(args[2] & 0x1f) :
- SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(args[2]);
+ c = c2 ? (a2 & 0x1f) ? SHIFT_IMM_ASR(a2 & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_ASR(a2);
goto gen_shift32;
case INDEX_op_rotr_i32:
- c = const_args[2] ? (args[2] & 0x1f) ? SHIFT_IMM_ROR(args[2] & 0x1f) :
- SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(args[2]);
+ c = c2 ? (a2 & 0x1f) ? SHIFT_IMM_ROR(a2 & 0x1f) :
+ SHIFT_IMM_LSL(0) : SHIFT_REG_ROR(a2);
/* Fall through. */
gen_shift32:
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1], c);
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1, c);
break;
case INDEX_op_rotl_i32:
- if (const_args[2]) {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
- ((0x20 - args[2]) & 0x1f) ?
- SHIFT_IMM_ROR((0x20 - args[2]) & 0x1f) :
+ if (c2) {
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1,
+ ((0x20 - a2) & 0x1f) ?
+ SHIFT_IMM_ROR((0x20 - a2) & 0x1f) :
SHIFT_IMM_LSL(0));
} else {
- tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, args[2], 0x20);
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0, args[1],
+ tcg_out_dat_imm(s, COND_AL, ARITH_RSB, TCG_REG_TMP, a2, 0x20);
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0, a1,
SHIFT_REG_ROR(TCG_REG_TMP));
}
break;
case INDEX_op_ctz_i32:
- tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, args[1], 0);
+ tcg_out_dat_reg(s, COND_AL, INSN_RBIT, TCG_REG_TMP, 0, a1, 0);
a1 = TCG_REG_TMP;
goto do_clz;
case INDEX_op_clz_i32:
- a1 = args[1];
do_clz:
- a0 = args[0];
- a2 = args[2];
- c = const_args[2];
+ c = c2;
if (c && a2 == 32) {
tcg_out_dat_reg(s, COND_AL, INSN_CLZ, a0, 0, a1, 0);
break;
@@ -1970,17 +1965,15 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
case INDEX_op_brcond_i32:
tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
- args[0], args[1], const_args[1]);
- tcg_out_goto_label(s, tcg_cond_to_arm_cond[args[2]],
- arg_label(args[3]));
+ a0, a1, const_args[1]);
+ tcg_out_goto_label(s, tcg_cond_to_arm_cond[a2], arg_label(a3));
break;
case INDEX_op_setcond_i32:
- tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0,
- args[1], args[2], const_args[2]);
- tcg_out_dat_imm(s, tcg_cond_to_arm_cond[args[3]],
- ARITH_MOV, args[0], 0, 1);
- tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(args[3])],
- ARITH_MOV, args[0], 0, 0);
+ tcg_out_dat_rIN(s, COND_AL, ARITH_CMP, ARITH_CMN, 0, a1, a2, c2);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[a3],
+ ARITH_MOV, a0, 0, 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(a3)],
+ ARITH_MOV, a0, 0, 0);
break;
case INDEX_op_brcond2_i32:
@@ -1989,9 +1982,9 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
break;
case INDEX_op_setcond2_i32:
c = tcg_out_cmp2(s, args + 1, const_args + 1);
- tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, args[0], 0, 1);
+ tcg_out_dat_imm(s, tcg_cond_to_arm_cond[c], ARITH_MOV, a0, 0, 1);
tcg_out_dat_imm(s, tcg_cond_to_arm_cond[tcg_invert_cond(c)],
- ARITH_MOV, args[0], 0, 0);
+ ARITH_MOV, a0, 0, 0);
break;
case INDEX_op_qemu_ld_i32:
@@ -2008,63 +2001,62 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode
opc,
break;
case INDEX_op_bswap16_i32:
- tcg_out_bswap16(s, COND_AL, args[0], args[1]);
+ tcg_out_bswap16(s, COND_AL, a0, a1);
break;
case INDEX_op_bswap32_i32:
- tcg_out_bswap32(s, COND_AL, args[0], args[1]);
+ tcg_out_bswap32(s, COND_AL, a0, a1);
break;
case INDEX_op_ext8s_i32:
- tcg_out_ext8s(s, COND_AL, args[0], args[1]);
+ tcg_out_ext8s(s, COND_AL, a0, a1);
break;
case INDEX_op_ext16s_i32:
- tcg_out_ext16s(s, COND_AL, args[0], args[1]);
+ tcg_out_ext16s(s, COND_AL, a0, a1);
break;
case INDEX_op_ext16u_i32:
- tcg_out_ext16u(s, COND_AL, args[0], args[1]);
+ tcg_out_ext16u(s, COND_AL, a0, a1);
break;
case INDEX_op_deposit_i32:
- tcg_out_deposit(s, COND_AL, args[0], args[2],
- args[3], args[4], const_args[2]);
+ tcg_out_deposit(s, COND_AL, a0, a2, a3, args[4], c2);
break;
case INDEX_op_extract_i32:
- tcg_out_extract(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_extract(s, COND_AL, a0, a1, a2, a3);
break;
case INDEX_op_sextract_i32:
- tcg_out_sextract(s, COND_AL, args[0], args[1], args[2], args[3]);
+ tcg_out_sextract(s, COND_AL, a0, a1, a2, a3);
break;
case INDEX_op_extract2_i32:
/* ??? These optimization vs zero should be generic. */
/* ??? But we can't substitute 2 for 1 in the opcode stream yet. */
if (const_args[1]) {
- if (const_args[2]) {
- tcg_out_movi(s, TCG_TYPE_REG, args[0], 0);
+ if (c2) {
+ tcg_out_movi(s, TCG_TYPE_REG, a0, 0);
} else {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
- args[2], SHIFT_IMM_LSL(32 - args[3]));
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0,
+ a2, SHIFT_IMM_LSL(32 - a3));
}
- } else if (const_args[2]) {
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, args[0], 0,
- args[1], SHIFT_IMM_LSR(args[3]));
+ } else if (c2) {
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, a0, 0,
+ a1, SHIFT_IMM_LSR(a3));
} else {
/* We can do extract2 in 2 insns, vs the 3 required otherwise. */
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0,
- args[2], SHIFT_IMM_LSL(32 - args[3]));
- tcg_out_dat_reg(s, COND_AL, ARITH_ORR, args[0], TCG_REG_TMP,
- args[1], SHIFT_IMM_LSR(args[3]));
+ a2, SHIFT_IMM_LSL(32 - a3));
+ tcg_out_dat_reg(s, COND_AL, ARITH_ORR, a0, TCG_REG_TMP,
+ a1, SHIFT_IMM_LSR(a3));
}
break;
case INDEX_op_div_i32:
- tcg_out_sdiv(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_sdiv(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_divu_i32:
- tcg_out_udiv(s, COND_AL, args[0], args[1], args[2]);
+ tcg_out_udiv(s, COND_AL, a0, a1, a2);
break;
case INDEX_op_mb:
- tcg_out_mb(s, args[0]);
+ tcg_out_mb(s, a0);
break;
case INDEX_op_mov_i32: /* Always emitted via tcg_out_mov. */
--
2.26.2
- [PATCH v2 0/6] tcg: Restrict tcg_out_op() to arrays of TCG_MAX_OP_ARGS elements, Philippe Mathieu-Daudé, 2021/01/13
- [PATCH v2 1/6] tcg/arm: Hoist common argument loads in tcg_out_op(),
Philippe Mathieu-Daudé <=
- [PATCH v2 2/6] tcg/arm: Replace goto statement by fall through comment, Philippe Mathieu-Daudé, 2021/01/13
- [PATCH v2 3/6] tcg/ppc: Hoist common argument loads in tcg_out_op(), Philippe Mathieu-Daudé, 2021/01/13
- [PATCH v2 4/6] tcg/s390: Hoist common argument loads in tcg_out_op(), Philippe Mathieu-Daudé, 2021/01/13
- [PATCH v2 5/6] tcg: Restrict tcg_out_op() to arrays of TCG_MAX_OP_ARGS elements, Philippe Mathieu-Daudé, 2021/01/13
- [PATCH v2 6/6] tcg: Restrict tcg_out_vec_op() to arrays of TCG_MAX_OP_ARGS elements, Philippe Mathieu-Daudé, 2021/01/13