[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 15/17] ppc: store CR registers in 32 1-bit registers
From: |
Paolo Bonzini |
Subject: |
[Qemu-devel] [PATCH 15/17] ppc: store CR registers in 32 1-bit registers |
Date: |
Thu, 28 Aug 2014 19:15:11 +0200 |
This makes comparisons much smaller and faster. The speedup is
approximately 10% on user-mode emulation on x86 host, 3-4% on PPC.
Note that CRF_* constants are flipped to match PowerPC's big
bit-endianness. Previously, the CR register was effectively stored
in mixed endianness, so now there is less indirection going on.
Signed-off-by: Paolo Bonzini <address@hidden>
---
linux-user/main.c | 4 +-
target-ppc/cpu.h | 33 ++++--
target-ppc/fpu_helper.c | 39 ++----
target-ppc/helper.h | 6 -
target-ppc/int_helper.c | 2 +-
target-ppc/machine.c | 9 ++
target-ppc/translate.c | 307 +++++++++++++++++++++++++-----------------------
7 files changed, 204 insertions(+), 196 deletions(-)
diff --git a/linux-user/main.c b/linux-user/main.c
index 152c031..b403f24 100644
--- a/linux-user/main.c
+++ b/linux-user/main.c
@@ -1929,7 +1929,7 @@ void cpu_loop(CPUPPCState *env)
* PPC ABI uses overflow flag in cr0 to signal an error
* in syscalls.
*/
- env->crf[0] &= ~0x1;
+ env->cr[CRF_SO] = 0;
ret = do_syscall(env, env->gpr[0], env->gpr[3], env->gpr[4],
env->gpr[5], env->gpr[6], env->gpr[7],
env->gpr[8], 0, 0);
@@ -1939,7 +1939,7 @@ void cpu_loop(CPUPPCState *env)
break;
}
if (ret > (target_ulong)(-515)) {
- env->crf[0] |= 0x1;
+ env->cr[CRF_SO] = 1;
ret = -ret;
}
env->gpr[3] = ret;
diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 05c29b2..67510e8 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -939,7 +939,7 @@ struct CPUPPCState {
/* CTR */
target_ulong ctr;
/* condition register */
- uint32_t crf[8];
+ uint32_t cr[32];
#if defined(TARGET_PPC64)
/* CFAR */
target_ulong cfar;
@@ -1058,6 +1058,9 @@ struct CPUPPCState {
uint64_t dtl_addr, dtl_size;
#endif /* TARGET_PPC64 */
+ /* condition register, for migration compatibility */
+ uint32_t crf[8];
+
int error_code;
uint32_t pending_interrupts;
#if !defined(CONFIG_USER_ONLY)
@@ -1200,12 +1203,20 @@ void store_fpscr(CPUPPCState *env, uint64_t arg,
uint32_t mask);
static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
{
- return env->crf[i];
+ uint32_t r;
+ r = env->cr[i * 4];
+ r = (r << 1) | (env->cr[i * 4 + 1]);
+ r = (r << 1) | (env->cr[i * 4 + 2]);
+ r = (r << 1) | (env->cr[i * 4 + 3]);
+ return r;
}
static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
{
- env->crf[i] = val;
+ env->cr[i * 4 + 0] = (val & 0x08) != 0;
+ env->cr[i * 4 + 1] = (val & 0x04) != 0;
+ env->cr[i * 4 + 2] = (val & 0x02) != 0;
+ env->cr[i * 4 + 3] = (val & 0x01) != 0;
}
static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
@@ -1256,14 +1267,14 @@ static inline int cpu_mmu_index (CPUPPCState *env)
/*****************************************************************************/
/* CRF definitions */
-#define CRF_LT 3
-#define CRF_GT 2
-#define CRF_EQ 1
-#define CRF_SO 0
-#define CRF_CH (1 << CRF_LT)
-#define CRF_CL (1 << CRF_GT)
-#define CRF_CH_OR_CL (1 << CRF_EQ)
-#define CRF_CH_AND_CL (1 << CRF_SO)
+#define CRF_LT 0
+#define CRF_GT 1
+#define CRF_EQ 2
+#define CRF_SO 3
+#define CRF_CH CRF_LT
+#define CRF_CL CRF_GT
+#define CRF_CH_OR_CL CRF_EQ
+#define CRF_CH_AND_CL CRF_SO
/* XER definitions */
#define XER_SO 31
diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 1ccbcf3..9574ebe 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1098,8 +1098,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1,
uint64_t arg2,
}
env->fpscr &= ~(0x0F << FPSCR_FPRF);
- env->fpscr |= (0x01 << FPSCR_FPRF) << ret;
- ppc_set_crf(env, crfD, 1 << ret);
+ env->fpscr |= (0x08 << FPSCR_FPRF) >> ret;
+ ppc_set_crf(env, crfD, 0x08 >> ret);
if (unlikely(ret == CRF_SO
&& (float64_is_signaling_nan(farg1.d) ||
@@ -1130,8 +1130,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1,
uint64_t arg2,
}
env->fpscr &= ~(0x0F << FPSCR_FPRF);
- env->fpscr |= (0x01 << FPSCR_FPRF) << ret;
- ppc_set_crf(env, crfD, 1 << ret);
+ env->fpscr |= (0x08 << FPSCR_FPRF) >> ret;
+ ppc_set_crf(env, crfD, 0x08 >> ret);
if (unlikely(ret == CRF_SO)) {
if (float64_is_signaling_nan(farg1.d) ||
@@ -1403,7 +1403,7 @@ static inline uint32_t efscmplt(CPUPPCState *env,
uint32_t op1, uint32_t op2)
u1.l = op1;
u2.l = op2;
- return float32_lt(u1.f, u2.f, &env->vec_status) ? 4 : 0;
+ return float32_lt(u1.f, u2.f, &env->vec_status);
}
static inline uint32_t efscmpgt(CPUPPCState *env, uint32_t op1, uint32_t op2)
@@ -1412,7 +1412,7 @@ static inline uint32_t efscmpgt(CPUPPCState *env,
uint32_t op1, uint32_t op2)
u1.l = op1;
u2.l = op2;
- return float32_le(u1.f, u2.f, &env->vec_status) ? 0 : 4;
+ return !float32_le(u1.f, u2.f, &env->vec_status);
}
static inline uint32_t efscmpeq(CPUPPCState *env, uint32_t op1, uint32_t op2)
@@ -1421,7 +1421,7 @@ static inline uint32_t efscmpeq(CPUPPCState *env,
uint32_t op1, uint32_t op2)
u1.l = op1;
u2.l = op2;
- return float32_eq(u1.f, u2.f, &env->vec_status) ? 4 : 0;
+ return float32_eq(u1.f, u2.f, &env->vec_status);
}
static inline uint32_t efststlt(CPUPPCState *env, uint32_t op1, uint32_t op2)
@@ -1465,25 +1465,6 @@ static inline uint32_t evcmp_merge(int t0, int t1)
return (t0 << 3) | (t1 << 2) | ((t0 | t1) << 1) | (t0 & t1);
}
-#define HELPER_VECTOR_SPE_CMP(name) \
- uint32_t helper_ev##name(CPUPPCState *env, uint64_t op1, uint64_t op2) \
- { \
- return evcmp_merge(e##name(env, op1 >> 32, op2 >> 32), \
- e##name(env, op1, op2)); \
- }
-/* evfststlt */
-HELPER_VECTOR_SPE_CMP(fststlt);
-/* evfststgt */
-HELPER_VECTOR_SPE_CMP(fststgt);
-/* evfststeq */
-HELPER_VECTOR_SPE_CMP(fststeq);
-/* evfscmplt */
-HELPER_VECTOR_SPE_CMP(fscmplt);
-/* evfscmpgt */
-HELPER_VECTOR_SPE_CMP(fscmpgt);
-/* evfscmpeq */
-HELPER_VECTOR_SPE_CMP(fscmpeq);
-
/* Double-precision floating-point conversion */
uint64_t helper_efdcfsi(CPUPPCState *env, uint32_t val)
{
@@ -1725,7 +1706,7 @@ uint32_t helper_efdtstlt(CPUPPCState *env, uint64_t op1,
uint64_t op2)
u1.ll = op1;
u2.ll = op2;
- return float64_lt(u1.d, u2.d, &env->vec_status) ? 4 : 0;
+ return float64_lt(u1.d, u2.d, &env->vec_status);
}
uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1, uint64_t op2)
@@ -1734,7 +1715,7 @@ uint32_t helper_efdtstgt(CPUPPCState *env, uint64_t op1,
uint64_t op2)
u1.ll = op1;
u2.ll = op2;
- return float64_le(u1.d, u2.d, &env->vec_status) ? 0 : 4;
+ return !float64_le(u1.d, u2.d, &env->vec_status);
}
uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1, uint64_t op2)
@@ -1743,7 +1724,7 @@ uint32_t helper_efdtsteq(CPUPPCState *env, uint64_t op1,
uint64_t op2)
u1.ll = op1;
u2.ll = op2;
- return float64_eq_quiet(u1.d, u2.d, &env->vec_status) ? 4 : 0;
+ return float64_eq_quiet(u1.d, u2.d, &env->vec_status);
}
uint32_t helper_efdcmplt(CPUPPCState *env, uint64_t op1, uint64_t op2)
diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 5342f13..8d6a92b 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -493,12 +493,6 @@ DEF_HELPER_3(efststeq, i32, env, i32, i32)
DEF_HELPER_3(efscmplt, i32, env, i32, i32)
DEF_HELPER_3(efscmpgt, i32, env, i32, i32)
DEF_HELPER_3(efscmpeq, i32, env, i32, i32)
-DEF_HELPER_3(evfststlt, i32, env, i64, i64)
-DEF_HELPER_3(evfststgt, i32, env, i64, i64)
-DEF_HELPER_3(evfststeq, i32, env, i64, i64)
-DEF_HELPER_3(evfscmplt, i32, env, i64, i64)
-DEF_HELPER_3(evfscmpgt, i32, env, i64, i64)
-DEF_HELPER_3(evfscmpeq, i32, env, i64, i64)
DEF_HELPER_2(efdcfsi, i64, env, i32)
DEF_HELPER_2(efdcfsid, i64, env, i64)
DEF_HELPER_2(efdcfui, i64, env, i32)
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 2287064..d3ace6a 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -2602,7 +2602,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, target_ulong
high,
done:
env->xer = (env->xer & ~0x7F) | i;
if (update_Rc) {
- env->crf[0] |= xer_so;
+ env->cr[CRF_SO] = xer_so;
}
return i;
}
diff --git a/target-ppc/machine.c b/target-ppc/machine.c
index c801b82..9fa309a 100644
--- a/target-ppc/machine.c
+++ b/target-ppc/machine.c
@@ -132,6 +132,10 @@ static void cpu_pre_save(void *opaque)
CPUPPCState *env = &cpu->env;
int i;
+ for (i = 0; i < 8; i++) {
+ env->crf[i] = ppc_get_crf(env, i);
+ }
+
env->spr[SPR_LR] = env->lr;
env->spr[SPR_CTR] = env->ctr;
env->spr[SPR_XER] = env->xer;
@@ -165,6 +169,11 @@ static int cpu_post_load(void *opaque, int version_id)
* software has to take care of running QEMU in a compatible mode.
*/
env->spr[SPR_PVR] = env->spr_cb[SPR_PVR].default_value;
+
+ for (i = 0; i < 8; i++) {
+ ppc_set_crf(env, i, env->crf[i]);
+ }
+
env->lr = env->spr[SPR_LR];
env->ctr = env->spr[SPR_CTR];
env->xer = env->spr[SPR_XER];
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 1ed6a8f..dd19b39 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -53,13 +53,13 @@ static char cpu_reg_names[10*3 + 22*4 /* GPR */
+ 10*4 + 22*5 /* FPR */
+ 2*(10*6 + 22*7) /* AVRh, AVRl */
+ 10*5 + 22*6 /* VSR */
- + 8*5 /* CRF */];
+ + 32*8 /* CR */];
static TCGv cpu_gpr[32];
static TCGv cpu_gprh[32];
static TCGv_i64 cpu_fpr[32];
static TCGv_i64 cpu_avrh[32], cpu_avrl[32];
static TCGv_i64 cpu_vsr[32];
-static TCGv_i32 cpu_crf[8];
+static TCGv_i32 cpu_cr[32];
static TCGv cpu_nip;
static TCGv cpu_msr;
static TCGv cpu_ctr;
@@ -89,12 +89,13 @@ void ppc_translate_init(void)
p = cpu_reg_names;
cpu_reg_names_size = sizeof(cpu_reg_names);
- for (i = 0; i < 8; i++) {
- snprintf(p, cpu_reg_names_size, "crf%d", i);
- cpu_crf[i] = tcg_global_mem_new_i32(TCG_AREG0,
- offsetof(CPUPPCState, crf[i]), p);
- p += 5;
- cpu_reg_names_size -= 5;
+ for (i = 0; i < 32; i++) {
+ static const char names[] = "lt\0gt\0eq\0so";
+ snprintf(p, cpu_reg_names_size, "cr%d[%s]", i >> 2, names + (i & 3) *
3);
+ cpu_cr[i] = tcg_global_mem_new_i32(TCG_AREG0,
+ offsetof(CPUPPCState, cr[i]), p);
+ p += 8;
+ cpu_reg_names_size -= 8;
}
for (i = 0; i < 32; i++) {
@@ -251,17 +252,30 @@ static inline void gen_reset_fpstatus(void)
static inline void gen_op_mfcr(TCGv dest, int first_cr, int shift)
{
- tcg_gen_shli_i32(dest, cpu_crf[first_cr >> 2], shift);
+ TCGv_i32 t0 = tcg_temp_new_i32();
+
+ tcg_gen_shli_i32(dest, cpu_cr[first_cr + 3], shift);
+ tcg_gen_shli_i32(t0, cpu_cr[first_cr + 2], shift + 1);
+ tcg_gen_or_i32(dest, dest, t0);
+ tcg_gen_shli_i32(t0, cpu_cr[first_cr + 1], shift + 2);
+ tcg_gen_or_i32(dest, dest, t0);
+ tcg_gen_shli_i32(t0, cpu_cr[first_cr], shift + 3);
}
static inline void gen_op_mtcr(int first_cr, TCGv src, int shift)
{
if (shift) {
- tcg_gen_shri_i32(cpu_crf[first_cr >> 2], src, shift);
- tcg_gen_andi_i32(cpu_crf[first_cr >> 2], cpu_crf[first_cr >> 2], 0x0F);
+ tcg_gen_shri_i32(cpu_cr[first_cr + 3], src, shift);
+ tcg_gen_andi_i32(cpu_cr[first_cr + 3], cpu_cr[first_cr + 3], 1);
} else {
- tcg_gen_andi_i32(cpu_crf[first_cr >> 2], src, 0x0F);
+ tcg_gen_andi_i32(cpu_cr[first_cr + 3], src, 1);
}
+ tcg_gen_shri_i32(cpu_cr[first_cr + 2], src, shift + 1);
+ tcg_gen_andi_i32(cpu_cr[first_cr + 2], cpu_cr[first_cr + 2], 1);
+ tcg_gen_shri_i32(cpu_cr[first_cr + 1], src, shift + 2);
+ tcg_gen_andi_i32(cpu_cr[first_cr + 1], cpu_cr[first_cr + 1], 1);
+ tcg_gen_shri_i32(cpu_cr[first_cr], src, shift + 3);
+ tcg_gen_andi_i32(cpu_cr[first_cr], cpu_cr[first_cr], 1);
}
static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
@@ -675,27 +689,19 @@ static bool is_user_mode(DisasContext *ctx)
static inline void gen_op_cmp(TCGv arg0, TCGv arg1, int s, int crf)
{
TCGv t0 = tcg_temp_new();
- TCGv_i32 t1 = tcg_temp_new_i32();
- tcg_gen_trunc_tl_i32(cpu_crf[crf], cpu_so);
+ tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_SO], cpu_so);
tcg_gen_setcond_tl((s ? TCG_COND_LT: TCG_COND_LTU), t0, arg0, arg1);
- tcg_gen_trunc_tl_i32(t1, t0);
- tcg_gen_shli_i32(t1, t1, CRF_LT);
- tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+ tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], t0);
tcg_gen_setcond_tl((s ? TCG_COND_GT: TCG_COND_GTU), t0, arg0, arg1);
- tcg_gen_trunc_tl_i32(t1, t0);
- tcg_gen_shli_i32(t1, t1, CRF_GT);
- tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+ tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], t0);
tcg_gen_setcond_tl(TCG_COND_EQ, t0, arg0, arg1);
- tcg_gen_trunc_tl_i32(t1, t0);
- tcg_gen_shli_i32(t1, t1, CRF_EQ);
- tcg_gen_or_i32(cpu_crf[crf], cpu_crf[crf], t1);
+ tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], t0);
tcg_temp_free(t0);
- tcg_temp_free_i32(t1);
}
static inline void gen_op_cmpi(TCGv arg0, target_ulong arg1, int s, int crf)
@@ -707,17 +713,22 @@ static inline void gen_op_cmpi(TCGv arg0, target_ulong
arg1, int s, int crf)
static inline void gen_op_cmp32(TCGv arg0, TCGv arg1, int s, int crf)
{
- TCGv t0, t1;
+ TCGv_i32 t0, t1;
+
t0 = tcg_temp_new();
t1 = tcg_temp_new();
- if (s) {
- tcg_gen_ext32s_tl(t0, arg0);
- tcg_gen_ext32s_tl(t1, arg1);
- } else {
- tcg_gen_ext32u_tl(t0, arg0);
- tcg_gen_ext32u_tl(t1, arg1);
- }
- gen_op_cmp(t0, t1, s, crf);
+ tcg_gen_trunc_tl_i32(t0, arg0);
+ tcg_gen_trunc_tl_i32(t1, arg1);
+
+ tcg_gen_setcond_i32((s ? TCG_COND_LT: TCG_COND_LTU),
+ cpu_cr[crf * 4 + CRF_LT], t0, t1);
+
+ tcg_gen_setcond_i32((s ? TCG_COND_GT: TCG_COND_GTU),
+ cpu_cr[crf * 4 + CRF_GT], t0, t1);
+
+ tcg_gen_setcond_i32(TCG_COND_EQ,
+ cpu_cr[crf * 4 + CRF_EQ], t0, t1);
+
tcg_temp_free(t1);
tcg_temp_free(t0);
}
@@ -790,15 +801,10 @@ static void gen_cmpli(DisasContext *ctx)
static void gen_isel(DisasContext *ctx)
{
uint32_t bi = rC(ctx->opcode);
- uint32_t mask;
- TCGv_i32 t0;
TCGv t1, true_op, zero;
- mask = 0x08 >> (bi & 0x03);
- t0 = tcg_temp_new_i32();
- tcg_gen_andi_i32(t0, cpu_crf[bi >> 2], mask);
t1 = tcg_temp_new();
- tcg_gen_extu_i32_tl(t1, t0);
+ tcg_gen_extu_i32_tl(t1, cpu_cr[bi]);
zero = tcg_const_tl(0);
if (rA(ctx->opcode) == 0)
true_op = zero;
@@ -2288,21 +2294,29 @@ GEN_FLOAT_B(rim, 0x08, 0x0F, 1, PPC_FLOAT_EXT);
static void gen_ftdiv(DisasContext *ctx)
{
+ TCGv_i32 crf;
if (unlikely(!ctx->fpu_enabled)) {
gen_exception(ctx, POWERPC_EXCP_FPU);
return;
}
- gen_helper_ftdiv(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rA(ctx->opcode)],
+ crf = tcg_temp_new_i32();
+ gen_helper_ftdiv(crf, cpu_fpr[rA(ctx->opcode)],
cpu_fpr[rB(ctx->opcode)]);
+ gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
+ tcg_temp_free_i32(crf);
}
static void gen_ftsqrt(DisasContext *ctx)
{
+ TCGv_i32 crf;
if (unlikely(!ctx->fpu_enabled)) {
gen_exception(ctx, POWERPC_EXCP_FPU);
return;
}
- gen_helper_ftsqrt(cpu_crf[crfD(ctx->opcode)], cpu_fpr[rB(ctx->opcode)]);
+ crf = tcg_temp_new_i32();
+ gen_helper_ftsqrt(crf, cpu_fpr[rB(ctx->opcode)]);
+ gen_op_mtcr(crfD(ctx->opcode) << 2, crf, 0);
+ tcg_temp_free_i32(crf);
}
@@ -3300,10 +3314,13 @@ static void gen_conditional_store(DisasContext *ctx,
TCGv EA,
{
int l1;
- tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
+ tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
+ tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
+ tcg_gen_movi_i32(cpu_cr[CRF_EQ], 0);
+ tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
l1 = gen_new_label();
tcg_gen_brcond_tl(TCG_COND_NE, EA, cpu_reserve, l1);
- tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 1 << CRF_EQ);
+ tcg_gen_movi_i32(cpu_cr[CRF_EQ], 1);
#if defined(TARGET_PPC64)
if (size == 8) {
gen_qemu_st64(ctx, cpu_gpr[reg], EA);
@@ -3870,17 +3887,11 @@ static inline void gen_bcond(DisasContext *ctx, int
type)
if ((bo & 0x10) == 0) {
/* Test CR */
uint32_t bi = BI(ctx->opcode);
- uint32_t mask = 0x08 >> (bi & 0x03);
- TCGv_i32 temp = tcg_temp_new_i32();
-
if (bo & 0x8) {
- tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
- tcg_gen_brcondi_i32(TCG_COND_EQ, temp, 0, l1);
+ tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[bi], 0, l1);
} else {
- tcg_gen_andi_i32(temp, cpu_crf[bi >> 2], mask);
- tcg_gen_brcondi_i32(TCG_COND_NE, temp, 0, l1);
+ tcg_gen_brcondi_i32(TCG_COND_NE, cpu_cr[bi], 0, l1);
}
- tcg_temp_free_i32(temp);
}
gen_update_cfar(ctx, ctx->nip);
if (type == BCOND_IM) {
@@ -3929,35 +3940,11 @@ static void gen_bctar(DisasContext *ctx)
}
/*** Condition register logical ***/
-#define GEN_CRLOGIC(name, tcg_op, opc) \
-static void glue(gen_, name)(DisasContext *ctx)
\
-{ \
- uint8_t bitmask; \
- int sh; \
- TCGv_i32 t0, t1; \
- sh = (crbD(ctx->opcode) & 0x03) - (crbA(ctx->opcode) & 0x03); \
- t0 = tcg_temp_new_i32(); \
- if (sh > 0) \
- tcg_gen_shri_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], sh); \
- else if (sh < 0) \
- tcg_gen_shli_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2], -sh); \
- else \
- tcg_gen_mov_i32(t0, cpu_crf[crbA(ctx->opcode) >> 2]); \
- t1 = tcg_temp_new_i32(); \
- sh = (crbD(ctx->opcode) & 0x03) - (crbB(ctx->opcode) & 0x03); \
- if (sh > 0) \
- tcg_gen_shri_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], sh); \
- else if (sh < 0) \
- tcg_gen_shli_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2], -sh); \
- else \
- tcg_gen_mov_i32(t1, cpu_crf[crbB(ctx->opcode) >> 2]); \
- tcg_op(t0, t0, t1); \
- bitmask = 0x08 >> (crbD(ctx->opcode) & 0x03); \
- tcg_gen_andi_i32(t0, t0, bitmask); \
- tcg_gen_andi_i32(t1, cpu_crf[crbD(ctx->opcode) >> 2], ~bitmask); \
- tcg_gen_or_i32(cpu_crf[crbD(ctx->opcode) >> 2], t0, t1); \
- tcg_temp_free_i32(t0); \
- tcg_temp_free_i32(t1); \
+#define GEN_CRLOGIC(name, tcg_op, opc)
\
+static void glue(gen_, name)(DisasContext *ctx)
\
+{
\
+ tcg_op(cpu_cr[crbD(ctx->opcode)], cpu_cr[crbA(ctx->opcode)],
\
+ cpu_cr[crbB(ctx->opcode)]);
\
}
/* crand */
@@ -3980,7 +3967,11 @@ GEN_CRLOGIC(crxor, tcg_gen_xor_i32, 0x06);
/* mcrf */
static void gen_mcrf(DisasContext *ctx)
{
- tcg_gen_mov_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfS(ctx->opcode)]);
+ int i;
+ for (i = 0; i < 4; i++) {
+ tcg_gen_mov_i32(cpu_cr[crfD(ctx->opcode) * 4 + i],
+ cpu_cr[crfS(ctx->opcode) * 4 + i]);
+ }
}
/*** System linkage ***/
@@ -4133,20 +4124,12 @@ static void gen_write_xer(TCGv src)
/* mcrxr */
static void gen_mcrxr(DisasContext *ctx)
{
- TCGv_i32 t0 = tcg_temp_new_i32();
- TCGv_i32 t1 = tcg_temp_new_i32();
- TCGv_i32 dst = cpu_crf[crfD(ctx->opcode)];
-
- tcg_gen_trunc_tl_i32(t0, cpu_so);
- tcg_gen_trunc_tl_i32(t1, cpu_ov);
- tcg_gen_trunc_tl_i32(dst, cpu_ca);
- tcg_gen_shli_i32(t0, t0, 3);
- tcg_gen_shli_i32(t1, t1, 2);
- tcg_gen_shli_i32(dst, dst, 1);
- tcg_gen_or_i32(dst, dst, t0);
- tcg_gen_or_i32(dst, dst, t1);
- tcg_temp_free_i32(t0);
- tcg_temp_free_i32(t1);
+ int crf = crfD(ctx->opcode);
+
+ tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_LT], cpu_so);
+ tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_GT], cpu_ov);
+ tcg_gen_trunc_tl_i32(cpu_cr[crf * 4 + CRF_EQ], cpu_ca);
+ tcg_gen_movi_i32(cpu_cr[crf * 4 + CRF_SO], 0);
tcg_gen_movi_tl(cpu_so, 0);
tcg_gen_movi_tl(cpu_ov, 0);
@@ -6320,11 +6303,13 @@ static void gen_tlbsx_40x(DisasContext *ctx)
gen_helper_4xx_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
tcg_temp_free(t0);
if (Rc(ctx->opcode)) {
- int l1 = gen_new_label();
- tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
- tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
- gen_set_label(l1);
+ t0 = tcg_temp_new();
+ tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
+ tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
+ tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
+ tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
+ tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
+ tcg_temp_free(t0);
}
#endif
}
@@ -6401,11 +6386,13 @@ static void gen_tlbsx_440(DisasContext *ctx)
gen_helper_440_tlbsx(cpu_gpr[rD(ctx->opcode)], cpu_env, t0);
tcg_temp_free(t0);
if (Rc(ctx->opcode)) {
- int l1 = gen_new_label();
- tcg_gen_trunc_tl_i32(cpu_crf[0], cpu_so);
- tcg_gen_brcondi_tl(TCG_COND_EQ, cpu_gpr[rD(ctx->opcode)], -1, l1);
- tcg_gen_ori_i32(cpu_crf[0], cpu_crf[0], 0x02);
- gen_set_label(l1);
+ t0 = tcg_temp_new();
+ tcg_gen_trunc_tl_i32(cpu_cr[CRF_SO], cpu_so);
+ tcg_gen_movi_i32(cpu_cr[CRF_LT], 0);
+ tcg_gen_movi_i32(cpu_cr[CRF_GT], 0);
+ tcg_gen_setcondi_tl(TCG_COND_EQ, t0, cpu_gpr[rD(ctx->opcode)], -1);
+ tcg_gen_trunc_tl_i32(cpu_cr[CRF_EQ], t0);
+ tcg_temp_free(t0);
}
#endif
}
@@ -7371,7 +7358,7 @@ GEN_VXFORM(vpmsumd, 4, 19)
static void gen_##op(DisasContext *ctx) \
{ \
TCGv_ptr ra, rb, rd; \
- TCGv_i32 ps; \
+ TCGv_i32 ps, crf; \
\
if (unlikely(!ctx->altivec_enabled)) { \
gen_exception(ctx, POWERPC_EXCP_VPU); \
@@ -7383,13 +7370,16 @@ static void gen_##op(DisasContext *ctx) \
rd = gen_avr_ptr(rD(ctx->opcode)); \
\
ps = tcg_const_i32((ctx->opcode & 0x200) != 0); \
+ crf = tcg_temp_new_i32(); \
\
- gen_helper_##op(cpu_crf[6], rd, ra, rb, ps); \
+ gen_helper_##op(crf, rd, ra, rb, ps); \
+ gen_op_mtcr(6 << 2, crf, 0); \
\
tcg_temp_free_ptr(ra); \
tcg_temp_free_ptr(rb); \
tcg_temp_free_ptr(rd); \
tcg_temp_free_i32(ps); \
+ tcg_temp_free_ptr(crf); \
}
GEN_BCD(bcdadd)
@@ -8217,6 +8207,7 @@ static void gen_##name(DisasContext *ctx) \
static void gen_##name(DisasContext *ctx) \
{ \
TCGv_ptr ra, rb; \
+ TCGv_i32 tmp; \
if (unlikely(!ctx->fpu_enabled)) { \
gen_exception(ctx, POWERPC_EXCP_FPU); \
return; \
@@ -8224,8 +8215,10 @@ static void gen_##name(DisasContext *ctx) \
gen_update_nip(ctx, ctx->nip - 4); \
ra = gen_fprp_ptr(rA(ctx->opcode)); \
rb = gen_fprp_ptr(rB(ctx->opcode)); \
- gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
- cpu_env, ra, rb); \
+ tmp = tcg_temp_new_i32(); \
+ gen_helper_##name(tmp, cpu_env, ra, rb); \
+ gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0); \
+ tcg_temp_free_i32(tmp); \
tcg_temp_free_ptr(ra); \
tcg_temp_free_ptr(rb); \
}
@@ -8234,7 +8227,7 @@ static void gen_##name(DisasContext *ctx) \
static void gen_##name(DisasContext *ctx) \
{ \
TCGv_ptr ra; \
- TCGv_i32 dcm; \
+ TCGv_i32 dcm, tmp; \
if (unlikely(!ctx->fpu_enabled)) { \
gen_exception(ctx, POWERPC_EXCP_FPU); \
return; \
@@ -8242,8 +8235,10 @@ static void gen_##name(DisasContext *ctx) \
gen_update_nip(ctx, ctx->nip - 4); \
ra = gen_fprp_ptr(rA(ctx->opcode)); \
dcm = tcg_const_i32(DCM(ctx->opcode)); \
- gen_helper_##name(cpu_crf[crfD(ctx->opcode)], \
- cpu_env, ra, dcm); \
+ tmp = tcg_temp_new_i32(); \
+ gen_helper_##name(tmp, cpu_env, ra, dcm); \
+ gen_op_mtcr(crfD(ctx->opcode) << 2, tmp, 0); \
+ tcg_temp_free_i32(tmp); \
tcg_temp_free_ptr(ra); \
tcg_temp_free_i32(dcm); \
}
@@ -8668,37 +8663,32 @@ GEN_SPEOP_ARITH_IMM2(evsubifw, tcg_gen_subi_i32);
#define GEN_SPEOP_COMP(name, tcg_cond) \
static inline void gen_##name(DisasContext *ctx) \
{ \
+ TCGv tmp = tcg_temp_new(); \
+ \
if (unlikely(!ctx->spe_enabled)) { \
gen_exception(ctx, POWERPC_EXCP_SPEU); \
return; \
} \
- int l1 = gen_new_label(); \
- int l2 = gen_new_label(); \
- int l3 = gen_new_label(); \
- int l4 = gen_new_label(); \
\
tcg_gen_ext32s_tl(cpu_gpr[rA(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]); \
tcg_gen_ext32s_tl(cpu_gpr[rB(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); \
tcg_gen_ext32s_tl(cpu_gprh[rA(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]); \
tcg_gen_ext32s_tl(cpu_gprh[rB(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]); \
\
- tcg_gen_brcond_tl(tcg_cond, cpu_gpr[rA(ctx->opcode)], \
- cpu_gpr[rB(ctx->opcode)], l1); \
- tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], 0); \
- tcg_gen_br(l2); \
- gen_set_label(l1); \
- tcg_gen_movi_i32(cpu_crf[crfD(ctx->opcode)], \
- CRF_CL | CRF_CH_OR_CL | CRF_CH_AND_CL); \
- gen_set_label(l2); \
- tcg_gen_brcond_tl(tcg_cond, cpu_gprh[rA(ctx->opcode)], \
- cpu_gprh[rB(ctx->opcode)], l3); \
- tcg_gen_andi_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], \
- ~(CRF_CH | CRF_CH_AND_CL)); \
- tcg_gen_br(l4); \
- gen_set_label(l3); \
- tcg_gen_ori_i32(cpu_crf[crfD(ctx->opcode)], cpu_crf[crfD(ctx->opcode)], \
- CRF_CH | CRF_CH_OR_CL); \
- gen_set_label(l4); \
+ tcg_gen_setcond_tl(tcg_cond, tmp, \
+ cpu_gpr[rA(ctx->opcode)], \
+ cpu_gpr[rB(ctx->opcode)]); \
+ tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], tmp); \
+ tcg_gen_setcond_tl(tcg_cond, tmp, \
+ cpu_gprh[rA(ctx->opcode)], \
+ cpu_gprh[rB(ctx->opcode)]); \
+ tcg_gen_trunc_tl_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], tmp); \
+ tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL], \
+ cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], \
+ cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); \
+ tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL], \
+ cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], \
+ cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); \
}
GEN_SPEOP_COMP(evcmpgtu, TCG_COND_GTU);
GEN_SPEOP_COMP(evcmpgts, TCG_COND_GT);
@@ -8769,22 +8759,20 @@ static inline void gen_evsel(DisasContext *ctx)
int l2 = gen_new_label();
int l3 = gen_new_label();
int l4 = gen_new_label();
- TCGv_i32 t0 = tcg_temp_local_new_i32();
- tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 3);
- tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
+
+ tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4], 0, l1);
tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rA(ctx->opcode)]);
tcg_gen_br(l2);
gen_set_label(l1);
tcg_gen_mov_tl(cpu_gprh[rD(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);
gen_set_label(l2);
- tcg_gen_andi_i32(t0, cpu_crf[ctx->opcode & 0x07], 1 << 2);
- tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l3);
+
+ tcg_gen_brcondi_i32(TCG_COND_EQ, cpu_cr[(ctx->opcode & 0x07) * 4 + 1], 0,
l3);
tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rA(ctx->opcode)]);
tcg_gen_br(l4);
gen_set_label(l3);
tcg_gen_mov_tl(cpu_gpr[rD(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]);
gen_set_label(l4);
- tcg_temp_free_i32(t0);
}
static void gen_evsel0(DisasContext *ctx)
@@ -9366,9 +9354,12 @@ static inline void gen_##name(DisasContext *ctx)
\
t0 = tcg_temp_new_i32(); \
t1 = tcg_temp_new_i32(); \
\
+ tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0); \
+ tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0); \
+ tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0); \
tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx->opcode)]); \
tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx->opcode)]); \
- gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1); \
+ gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, t0,
t1); \
\
tcg_temp_free_i32(t0); \
tcg_temp_free_i32(t1); \
@@ -9385,10 +9376,32 @@ static inline void gen_##name(DisasContext *ctx)
\
t1 = tcg_temp_new_i64(); \
gen_load_gpr64(t0, rA(ctx->opcode)); \
gen_load_gpr64(t1, rB(ctx->opcode)); \
- gen_helper_##name(cpu_crf[crfD(ctx->opcode)], cpu_env, t0, t1); \
+ tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_LT], 0); \
+ tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_GT], 0); \
+ tcg_gen_movi_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_SO], 0); \
+ gen_helper_##name(cpu_cr[crfD(ctx->opcode) * 4 + CRF_EQ], cpu_env, \
+ t0, t1); \
tcg_temp_free_i64(t0); \
tcg_temp_free_i64(t1); \
}
+#define GEN_SPEFPUOP_COMP_V64(name, helper) \
+static inline void gen_##name(DisasContext *ctx) \
+{ \
+ if (unlikely(!ctx->spe_enabled)) { \
+ gen_exception(ctx, POWERPC_EXCP_SPEU); \
+ return; \
+ } \
+ gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL], cpu_env, \
+ cpu_gpr[rA(ctx->opcode)], cpu_gpr[rB(ctx->opcode)]); \
+ gen_helper_##helper(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], cpu_env, \
+ cpu_gprh[rA(ctx->opcode)], cpu_gprh[rB(ctx->opcode)]);\
+ tcg_gen_or_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_OR_CL], \
+ cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], \
+ cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); \
+ tcg_gen_and_i32(cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH_AND_CL], \
+ cpu_cr[crfD(ctx->opcode) * 4 + CRF_CH], \
+ cpu_cr[crfD(ctx->opcode) * 4 + CRF_CL]); \
+}
/* Single precision floating-point vectors operations */
/* Arithmetic */
@@ -9443,12 +9456,12 @@ GEN_SPEFPUOP_CONV_64_64(evfsctuiz);
GEN_SPEFPUOP_CONV_64_64(evfsctsiz);
/* Comparison */
-GEN_SPEFPUOP_COMP_64(evfscmpgt);
-GEN_SPEFPUOP_COMP_64(evfscmplt);
-GEN_SPEFPUOP_COMP_64(evfscmpeq);
-GEN_SPEFPUOP_COMP_64(evfststgt);
-GEN_SPEFPUOP_COMP_64(evfststlt);
-GEN_SPEFPUOP_COMP_64(evfststeq);
+GEN_SPEFPUOP_COMP_V64(evfscmpgt, efscmpgt);
+GEN_SPEFPUOP_COMP_V64(evfscmplt, efscmplt);
+GEN_SPEFPUOP_COMP_V64(evfscmpeq, efscmpeq);
+GEN_SPEFPUOP_COMP_V64(evfststgt, efststgt);
+GEN_SPEFPUOP_COMP_V64(evfststlt, efststlt);
+GEN_SPEFPUOP_COMP_V64(evfststeq, efststeq);
/* Opcodes definitions */
GEN_SPE(evfsadd, evfssub, 0x00, 0x0A, 0x00000000, 0x00000000,
PPC_SPE_SINGLE); //
--
1.8.3.1
- [Qemu-devel] [PATCH 03/17] ppc: fix monitor access to CR, (continued)
- [Qemu-devel] [PATCH 03/17] ppc: fix monitor access to CR, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 05/17] ppc: use CRF_* in fpu_helper.c, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 06/17] ppc: use CRF_* in int_helper.c, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 07/17] ppc: fix result of DLMZB when no zero bytes are found, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 08/17] ppc: introduce helpers for mfocrf/mtocrf, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 09/17] ppc: reorganize gen_compute_fprf, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 10/17] ppc: introduce gen_op_mfcr/gen_op_mtcr, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 11/17] ppc: rename gen_set_cr6_from_fpscr, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 13/17] ppc: compute mask from BI using right shift, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 14/17] ppc: introduce ppc_get_crf and ppc_set_crf, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 15/17] ppc: store CR registers in 32 1-bit registers,
Paolo Bonzini <=
- [Qemu-devel] [PATCH 16/17] ppc: inline ppc_get_crf/ppc_set_crf when clearer, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 17/17] ppc: dump all 32 CR bits, Paolo Bonzini, 2014/08/28
- [Qemu-devel] [PATCH 12/17] ppc: use movcond for isel, Paolo Bonzini, 2014/08/28
- Re: [Qemu-devel] [RFT/RFH PATCH 00/16] PPC speedup patches for TCG, Tom Musta, 2014/08/28