[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH for-next 15/15] tcg-ppc64: Implement CONFIG_QEMU_LDS
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH for-next 15/15] tcg-ppc64: Implement CONFIG_QEMU_LDST_OPTIMIZATION |
Date: |
Mon, 5 Aug 2013 08:28:50 -1000 |
Signed-off-by: Richard Henderson <address@hidden>
---
configure | 2 +-
include/exec/exec-all.h | 4 +-
tcg/ppc64/tcg-target.c | 219 +++++++++++++++++++++++++++++++-----------------
3 files changed, 146 insertions(+), 79 deletions(-)
diff --git a/configure b/configure
index 18fa608..5b9a66c 100755
--- a/configure
+++ b/configure
@@ -3650,7 +3650,7 @@ echo "libs_softmmu=$libs_softmmu" >> $config_host_mak
echo "ARCH=$ARCH" >> $config_host_mak
case "$cpu" in
- arm|i386|x86_64|ppc|aarch64)
+ aarch64 | arm | i386 | x86_64 | ppc*)
# The TCG interpreter currently does not support ld/st optimization.
if test "$tcg_interpreter" = "no" ; then
echo "CONFIG_QEMU_LDST_OPTIMIZATION=y" >> $config_host_mak
diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h
index 26c3553..91b189b 100644
--- a/include/exec/exec-all.h
+++ b/include/exec/exec-all.h
@@ -326,11 +326,11 @@ extern uintptr_t tci_tb_ptr;
(5) post-process (e.g. stack adjust)
(6) jump to corresponding code of the next of fast path
*/
-# if defined(__i386__) || defined(__x86_64__)
+# if defined(__i386__) || defined(__x86_64__) || defined(_ARCH_PPC64)
# define GETRA() ((uintptr_t)__builtin_return_address(0))
/* The return address argument for ldst is passed directly. */
# define GETPC_LDST() (abort(), 0)
-# elif defined (_ARCH_PPC) && !defined (_ARCH_PPC64)
+# elif defined(_ARCH_PPC)
# define GETRA() ((uintptr_t)__builtin_return_address(0))
# define GETPC_LDST() ((uintptr_t) ((*(int32_t *)(GETRA() - 4)) - 1))
# elif defined(__arm__)
diff --git a/tcg/ppc64/tcg-target.c b/tcg/ppc64/tcg-target.c
index 4b23597..7ecc032 100644
--- a/tcg/ppc64/tcg-target.c
+++ b/tcg/ppc64/tcg-target.c
@@ -830,26 +830,50 @@ static void tcg_out_call(TCGContext *s, tcg_target_long
arg,
#endif
}
+static const PowerOpcode qemu_ldx_opc[8] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ LBZX, LHZX, LWZX, LDX,
+ 0, LHAX, LWAX, LDX
+#else
+ LBZX, LHBRX, LWBRX, LDBRX,
+ 0, 0, 0, LDBRX,
+#endif
+};
+
+static const PowerOpcode qemu_stx_opc[4] = {
+#ifdef TARGET_WORDS_BIGENDIAN
+ STBX, STHX, STWX, STDX
+#else
+ STBX, STHBRX, STWBRX, STDBRX,
+#endif
+};
+
+static const PowerOpcode qemu_exts_opc[4] = {
+ EXTSB, EXTSH, EXTSW, 0
+};
+
#if defined (CONFIG_SOFTMMU)
#include "exec/softmmu_defs.h"
/* helper signature: helper_ld_mmu(CPUState *env, target_ulong addr,
- int mmu_idx) */
+ * int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_ld_helpers[4] = {
- helper_ldb_mmu,
- helper_ldw_mmu,
- helper_ldl_mmu,
- helper_ldq_mmu,
+ helper_ret_ldb_mmu,
+ helper_ret_ldw_mmu,
+ helper_ret_ldl_mmu,
+ helper_ret_ldq_mmu,
};
/* helper signature: helper_st_mmu(CPUState *env, target_ulong addr,
- uintxx_t val, int mmu_idx) */
+ * uintxx_t val, int mmu_idx, uintptr_t ra)
+ */
static const void * const qemu_st_helpers[4] = {
- helper_stb_mmu,
- helper_stw_mmu,
- helper_stl_mmu,
- helper_stq_mmu,
+ helper_ret_stb_mmu,
+ helper_ret_stw_mmu,
+ helper_ret_stl_mmu,
+ helper_ret_stq_mmu,
};
/* Perform the TLB load and compare. Places the result of the comparison
@@ -911,29 +935,108 @@ static TCGReg tcg_out_tlb_read(TCGContext *s, int
s_bits, TCGReg addr_reg,
return addr_reg;
}
-#endif
-static const PowerOpcode qemu_ldx_opc[8] = {
-#ifdef TARGET_WORDS_BIGENDIAN
- LBZX, LHZX, LWZX, LDX,
- 0, LHAX, LWAX, LDX
-#else
- LBZX, LHBRX, LWBRX, LDBRX,
- 0, 0, 0, LDBRX,
-#endif
-};
+/* Record the context of a call to the out of line helper code for the slow
+ path for a load or store, so that we can later generate the correct
+ helper code. */
+static void add_qemu_ldst_label(TCGContext *s, bool is_ld, int opc,
+ int data_reg, int addr_reg, int mem_index,
+ uint8_t *raddr, uint8_t *label_ptr)
+{
+ int idx;
+ TCGLabelQemuLdst *label;
-static const PowerOpcode qemu_stx_opc[4] = {
-#ifdef TARGET_WORDS_BIGENDIAN
- STBX, STHX, STWX, STDX
-#else
- STBX, STHBRX, STWBRX, STDBRX,
-#endif
-};
+ if (s->nb_qemu_ldst_labels >= TCG_MAX_QEMU_LDST) {
+ tcg_abort();
+ }
-static const PowerOpcode qemu_exts_opc[4] = {
- EXTSB, EXTSH, EXTSW, 0
-};
+ idx = s->nb_qemu_ldst_labels++;
+ label = (TCGLabelQemuLdst *)&s->qemu_ldst_labels[idx];
+ label->is_ld = is_ld;
+ label->opc = opc;
+ label->datalo_reg = data_reg;
+ label->addrlo_reg = addr_reg;
+ label->mem_index = mem_index;
+ label->raddr = raddr;
+ label->label_ptr[0] = label_ptr;
+}
+
+/* See the GETPC definition in include/exec/exec-all.h. */
+static inline uintptr_t do_getpc(uint8_t *raddr)
+{
+ return (uintptr_t)raddr - 1;
+}
+
+static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ int opc = lb->opc;
+ int s_bits = opc & 3;
+ PowerOpcode insn;
+
+ reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr);
+
+ tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_R3, TCG_AREG0);
+
+ /* If the address needed to be zero-extended, we'll have already
+ placed it in R4. The only remaining case is 64-bit guest. */
+ if (lb->addrlo_reg != TCG_REG_R4) {
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg);
+ }
+
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R5, lb->mem_index);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R6, do_getpc(lb->raddr));
+
+ tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[s_bits], 1, LK);
+
+ if (opc & 4) {
+ insn = qemu_exts_opc[s_bits];
+ tcg_out32(s, insn | RA(lb->datalo_reg) | RS(TCG_REG_R3));
+ } else {
+ tcg_out_mov(s, TCG_TYPE_I64, lb->datalo_reg, TCG_REG_R3);
+ }
+
+ tcg_out_b(s, 0, (uintptr_t)lb->raddr);
+}
+
+static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
+{
+ int opc = lb->opc;
+
+ reloc_pc14(lb->label_ptr[0], (uintptr_t)s->code_ptr);
+
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R3, TCG_AREG0);
+
+ /* If the address needed to be zero-extended, we'll have already
+ placed it in R4. The only remaining case is 64-bit guest. */
+ if (lb->addrlo_reg != TCG_REG_R4) {
+ tcg_out_mov(s, TCG_TYPE_I64, TCG_REG_R4, lb->addrlo_reg);
+ }
+
+ tcg_out_rld(s, RLDICL, TCG_REG_R5, lb->datalo_reg,
+ 0, 64 - (1 << (3 + opc)));
+ tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R6, lb->mem_index);
+ tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R7, do_getpc(lb->raddr));
+
+ tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1, LK);
+
+ tcg_out_b(s, 0, (uintptr_t)lb->raddr);
+}
+
+void tcg_out_tb_finalize(TCGContext *s)
+{
+ int i, n = s->nb_qemu_ldst_labels;
+
+ /* qemu_ld/st slow paths */
+ for (i = 0; i < n; i++) {
+ TCGLabelQemuLdst *label = &s->qemu_ldst_labels[i];
+ if (label->is_ld) {
+ tcg_out_qemu_ld_slow_path(s, label);
+ } else {
+ tcg_out_qemu_st_slow_path(s, label);
+ }
+ }
+}
+#endif /* SOFTMMU */
static void tcg_out_qemu_ld (TCGContext *s, const TCGArg *args, int opc)
{
@@ -941,9 +1044,8 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg
*args, int opc)
PowerOpcode insn;
int s_bits;
#ifdef CONFIG_SOFTMMU
- TCGReg ir;
int mem_index;
- void *label1_ptr, *label2_ptr;
+ void *label_ptr;
#endif
data_reg = *args++;
@@ -955,29 +1057,8 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg
*args, int opc)
r0 = tcg_out_tlb_read(s, s_bits, addr_reg, mem_index, true);
- label1_ptr = s->code_ptr;
- tcg_out32(s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
-
- /* slow path */
- ir = TCG_REG_R3;
- tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0);
- tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg);
- tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index);
-
- tcg_out_call(s, (tcg_target_long)qemu_ld_helpers[s_bits], 1, LK);
-
- if (opc & 4) {
- insn = qemu_exts_opc[s_bits];
- tcg_out32(s, insn | RA(data_reg) | RS(3));
- } else if (data_reg != 3) {
- tcg_out_mov(s, TCG_TYPE_I64, data_reg, 3);
- }
-
- label2_ptr = s->code_ptr;
- tcg_out32(s, B);
-
- /* label1: fast path */
- reloc_pc14(label1_ptr, (tcg_target_long)s->code_ptr);
+ label_ptr = s->code_ptr;
+ tcg_out32(s, BC | BI (7, CR_EQ) | BO_COND_FALSE);
rbase = TCG_REG_R3;
r1 = TCG_REG_R0;
@@ -1007,7 +1088,8 @@ static void tcg_out_qemu_ld (TCGContext *s, const TCGArg
*args, int opc)
}
#ifdef CONFIG_SOFTMMU
- reloc_pc24(label2_ptr, (tcg_target_long)s->code_ptr);
+ add_qemu_ldst_label(s, true, opc, data_reg, r0, mem_index,
+ s->code_ptr, label_ptr);
#endif
}
@@ -1016,9 +1098,8 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg
*args, int opc)
TCGReg addr_reg, r0, r1, rbase, data_reg;
PowerOpcode insn;
#ifdef CONFIG_SOFTMMU
- TCGReg ir;
int mem_index;
- void *label1_ptr, *label2_ptr;
+ void *label_ptr;
#endif
data_reg = *args++;
@@ -1029,23 +1110,8 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg
*args, int opc)
r0 = tcg_out_tlb_read(s, opc, addr_reg, mem_index, false);
- label1_ptr = s->code_ptr;
- tcg_out32(s, BC | BI (7, CR_EQ) | BO_COND_TRUE);
-
- /* slow path */
- ir = TCG_REG_R3;
- tcg_out_mov(s, TCG_TYPE_I64, ir++, TCG_AREG0);
- tcg_out_mov(s, TCG_TYPE_I64, ir++, addr_reg);
- tcg_out_rld(s, RLDICL, ir++, data_reg, 0, 64 - (1 << (3 + opc)));
- tcg_out_movi(s, TCG_TYPE_I64, ir++, mem_index);
-
- tcg_out_call(s, (tcg_target_long)qemu_st_helpers[opc], 1, LK);
-
- label2_ptr = s->code_ptr;
- tcg_out32(s, B);
-
- /* label1: fast path */
- reloc_pc14(label1_ptr, (tcg_target_long) s->code_ptr);
+ label_ptr = s->code_ptr;
+ tcg_out32(s, BC | BI (7, CR_EQ) | BO_COND_FALSE);
rbase = TCG_REG_R3;
r1 = TCG_REG_R2;
@@ -1070,7 +1136,8 @@ static void tcg_out_qemu_st (TCGContext *s, const TCGArg
*args, int opc)
}
#ifdef CONFIG_SOFTMMU
- reloc_pc24 (label2_ptr, (tcg_target_long) s->code_ptr);
+ add_qemu_ldst_label(s, false, opc, data_reg, r0, mem_index,
+ s->code_ptr, label_ptr);
#endif
}
--
1.8.3.1
- [Qemu-devel] [PATCH for-next 04/15] tcg-ppc64: Don't load the static chain from TCG, (continued)
- [Qemu-devel] [PATCH for-next 04/15] tcg-ppc64: Don't load the static chain from TCG, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 05/15] tcg-ppc64: Look through the function descriptor when profitable, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 06/15] tcg-ppc64: Move AREG0 to r31, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 07/15] tcg-ppc64: Tidy register allocation order, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 08/15] tcg-ppc64: Create PowerOpcode, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 09/15] tcg-ppc64: Handle long offsets better, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 10/15] tcg-ppc64: Use indirect jump threading, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 11/15] tcg-ppc64: Setup TCG_REG_TB, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 12/15] tcg-ppc64: Use TCG_REG_TB in tcg_out_movi and tcg_out_mem_long, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 14/15] tcg-ppc64: Streamline tcg_out_tlb_read, Richard Henderson, 2013/08/05
- [Qemu-devel] [PATCH for-next 15/15] tcg-ppc64: Implement CONFIG_QEMU_LDST_OPTIMIZATION,
Richard Henderson <=
- [Qemu-devel] [PATCH for-next 13/15] tcg-ppc64: Tidy tcg_target_qemu_prologue, Richard Henderson, 2013/08/05
- Re: [Qemu-devel] [PATCH for-next 00/15] Collection of improvements for tcg/ppc64, Richard Henderson, 2013/08/17