[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v4 13/54] tcg/arm: Introduce prepare_host_addr
From: |
Richard Henderson |
Subject: |
[PATCH v4 13/54] tcg/arm: Introduce prepare_host_addr |
Date: |
Wed, 3 May 2023 07:56:48 +0100 |
Merge tcg_out_tlb_load, add_qemu_ldst_label, and some code that lived
in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that
returns HostAddress and TCGLabelQemuLdst structures.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/arm/tcg-target.c.inc | 351 ++++++++++++++++++---------------------
1 file changed, 159 insertions(+), 192 deletions(-)
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index b6b4ffc546..c744512778 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1434,125 +1434,6 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg
argreg,
}
}
-#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
-
-/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
-QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
-
-/* These offsets are built into the LDRD below. */
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
-QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
-
-/* Load and compare a TLB entry, leaving the flags set. Returns the register
- containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
-
-static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
- MemOp opc, int mem_index, bool is_load)
-{
- int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
- : offsetof(CPUTLBEntry, addr_write));
- int fast_off = TLB_MASK_TABLE_OFS(mem_index);
- unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
- unsigned a_mask = (1 << get_alignment_bits(opc)) - 1;
- TCGReg t_addr;
-
- /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
-
- /* Extract the tlb index from the address into R0. */
- tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
- SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
-
- /*
- * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
- * Load the tlb comparator into R2/R3 and the fast path addend into R1.
- */
- if (cmp_off == 0) {
- if (TARGET_LONG_BITS == 64) {
- tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
- } else {
- tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
- }
- } else {
- tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
- TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
- if (TARGET_LONG_BITS == 64) {
- tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
- } else {
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
- }
- }
-
- /* Load the tlb addend. */
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
- offsetof(CPUTLBEntry, addend));
-
- /*
- * Check alignment, check comparators.
- * Do this in 2-4 insns. Use MOVW for v7, if possible,
- * to reduce the number of sequential conditional instructions.
- * Almost all guests have at least 4k pages, which means that we need
- * to clear at least 9 bits even for an 8-byte memory, which means it
- * isn't worth checking for an immediate operand for BIC.
- *
- * For unaligned accesses, test the page of the last unit of alignment.
- * This leaves the least significant alignment bits unchanged, and of
- * course must be zero.
- */
- t_addr = addrlo;
- if (a_mask < s_mask) {
- t_addr = TCG_REG_R0;
- tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
- addrlo, s_mask - a_mask);
- }
- if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
- tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
- tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
- t_addr, TCG_REG_TMP, 0);
- tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
- } else {
- if (a_mask) {
- tcg_debug_assert(a_mask <= 0xff);
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
- }
- tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
- SHIFT_IMM_LSR(TARGET_PAGE_BITS));
- tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
- 0, TCG_REG_R2, TCG_REG_TMP,
- SHIFT_IMM_LSL(TARGET_PAGE_BITS));
- }
-
- if (TARGET_LONG_BITS == 64) {
- tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
- }
-
- return TCG_REG_R1;
-}
-
-/* Record the context of a call to the out of line helper code for the slow
- path for a load or store, so that we can later generate the correct
- helper code. */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
- MemOpIdx oi, TCGType type,
- TCGReg datalo, TCGReg datahi,
- TCGReg addrlo, TCGReg addrhi,
- tcg_insn_unit *raddr,
- tcg_insn_unit *label_ptr)
-{
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->oi = oi;
- label->type = type;
- label->datalo_reg = datalo;
- label->datahi_reg = datahi;
- label->addrlo_reg = addrlo;
- label->addrhi_reg = addrhi;
- label->raddr = tcg_splitwx_to_rx(raddr);
- label->label_ptr[0] = label_ptr;
-}
-
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
{
TCGReg argreg;
@@ -1636,29 +1517,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s,
TCGLabelQemuLdst *lb)
return true;
}
#else
-
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
- TCGReg addrhi, unsigned a_bits)
-{
- unsigned a_mask = (1 << a_bits) - 1;
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->addrlo_reg = addrlo;
- label->addrhi_reg = addrhi;
-
- /* We are expecting a_bits to max out at 7, and can easily support 8. */
- tcg_debug_assert(a_mask <= 0xff);
- /* tst addr, #mask */
- tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
-
- /* blne slow_path */
- label->label_ptr[0] = s->code_ptr;
- tcg_out_bl_imm(s, COND_NE, 0);
-
- label->raddr = tcg_splitwx_to_rx(s->code_ptr);
-}
-
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
@@ -1703,6 +1561,134 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s,
TCGLabelQemuLdst *l)
}
#endif /* SOFTMMU */
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+ TCGReg addrlo, TCGReg addrhi,
+ MemOpIdx oi, bool is_ld)
+{
+ TCGLabelQemuLdst *ldst = NULL;
+ MemOp opc = get_memop(oi);
+ MemOp a_bits = get_alignment_bits(opc);
+ unsigned a_mask = (1 << a_bits) - 1;
+
+#ifdef CONFIG_SOFTMMU
+ int mem_index = get_mmuidx(oi);
+ int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write);
+ int fast_off = TLB_MASK_TABLE_OFS(mem_index);
+ unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
+ TCGReg t_addr;
+
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
+ QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
+ QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
+
+ /* Extract the tlb index from the address into R0. */
+ tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
+
+ /*
+ * Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
+ * Load the tlb comparator into R2/R3 and the fast path addend into R1.
+ */
+ if (cmp_off == 0) {
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
+ } else {
+ tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
+ }
+ } else {
+ tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
+ TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
+ } else {
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
+ }
+ }
+
+ /* Load the tlb addend. */
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
+ offsetof(CPUTLBEntry, addend));
+
+ /*
+ * Check alignment, check comparators.
+ * Do this in 2-4 insns. Use MOVW for v7, if possible,
+ * to reduce the number of sequential conditional instructions.
+ * Almost all guests have at least 4k pages, which means that we need
+ * to clear at least 9 bits even for an 8-byte memory, which means it
+ * isn't worth checking for an immediate operand for BIC.
+ *
+ * For unaligned accesses, test the page of the last unit of alignment.
+ * This leaves the least significant alignment bits unchanged, and of
+ * course must be zero.
+ */
+ t_addr = addrlo;
+ if (a_mask < s_mask) {
+ t_addr = TCG_REG_R0;
+ tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
+ addrlo, s_mask - a_mask);
+ }
+ if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
+ tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
+ tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
+ t_addr, TCG_REG_TMP, 0);
+ tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
+ } else {
+ if (a_mask) {
+ tcg_debug_assert(a_mask <= 0xff);
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
+ }
+ tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
+ SHIFT_IMM_LSR(TARGET_PAGE_BITS));
+ tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
+ 0, TCG_REG_R2, TCG_REG_TMP,
+ SHIFT_IMM_LSL(TARGET_PAGE_BITS));
+ }
+
+ if (TARGET_LONG_BITS == 64) {
+ tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
+ }
+
+ *h = (HostAddress){
+ .cond = COND_AL,
+ .base = addrlo,
+ .index = TCG_REG_R1,
+ .index_scratch = true,
+ };
+#else
+ if (a_mask) {
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ /* We are expecting a_bits to max out at 7 */
+ tcg_debug_assert(a_mask <= 0xff);
+ /* tst addr, #mask */
+ tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
+ }
+
+ *h = (HostAddress){
+ .cond = COND_AL,
+ .base = addrlo,
+ .index = guest_base ? TCG_REG_GUEST_BASE : -1,
+ .index_scratch = false,
+ };
+#endif
+
+ return ldst;
+}
+
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
TCGReg datahi, HostAddress h)
{
@@ -1799,37 +1785,28 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg
datalo, TCGReg datahi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- h.cond = COND_AL;
- h.base = addrlo;
- h.index_scratch = true;
- h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1);
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
- /*
- * This a conditional BL only to load a pointer within this opcode into
- * LR for the slow path. We will not be using the value for a tail call.
- */
- tcg_insn_unit *label_ptr = s->code_ptr;
- tcg_out_bl_imm(s, COND_NE, 0);
+ /*
+ * This a conditional BL only to load a pointer within this
+ * opcode into LR for the slow path. We will not be using
+ * the value for a tail call.
+ */
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out_bl_imm(s, COND_NE, 0);
- tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
-
- add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+ } else {
+ tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
}
-
- h.cond = COND_AL;
- h.base = addrlo;
- h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
- h.index_scratch = false;
- tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
-#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
@@ -1891,35 +1868,25 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg
datalo, TCGReg datahi,
MemOpIdx oi, TCGType data_type)
{
MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#ifdef CONFIG_SOFTMMU
- h.cond = COND_EQ;
- h.base = addrlo;
- h.index_scratch = true;
- h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0);
- tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
- /* The conditional call must come last, as we're going to return here. */
- tcg_insn_unit *label_ptr = s->code_ptr;
- tcg_out_bl_imm(s, COND_NE, 0);
-
- add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
-
- h.cond = COND_AL;
- if (a_bits) {
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
h.cond = COND_EQ;
- }
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
- h.base = addrlo;
- h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
- h.index_scratch = false;
- tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
-#endif
+ /* The conditional call is last, as we're going to return here. */
+ ldst->label_ptr[0] = s->code_ptr;
+ tcg_out_bl_imm(s, COND_NE, 0);
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
+ } else {
+ tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
+ }
}
static void tcg_out_epilogue(TCGContext *s);
--
2.34.1
- [PATCH v4 05/54] tcg/i386: Introduce tcg_out_testi, (continued)
- [PATCH v4 05/54] tcg/i386: Introduce tcg_out_testi, Richard Henderson, 2023/05/03
- [PATCH v4 04/54] tcg/i386: Drop r0+r1 local variables from tcg_out_tlb_load, Richard Henderson, 2023/05/03
- [PATCH v4 03/54] tcg/i386: Introduce HostAddress, Richard Henderson, 2023/05/03
- [PATCH v4 06/54] tcg/i386: Introduce prepare_host_addr, Richard Henderson, 2023/05/03
- [PATCH v4 07/54] tcg/i386: Use indexed addressing for softmmu fast path, Richard Henderson, 2023/05/03
- [PATCH v4 08/54] tcg/aarch64: Rationalize args to tcg_out_qemu_{ld, st}, Richard Henderson, 2023/05/03
- [PATCH v4 09/54] tcg/aarch64: Introduce HostAddress, Richard Henderson, 2023/05/03
- [PATCH v4 11/54] tcg/arm: Rationalize args to tcg_out_qemu_{ld,st}, Richard Henderson, 2023/05/03
- [PATCH v4 10/54] tcg/aarch64: Introduce prepare_host_addr, Richard Henderson, 2023/05/03
- [PATCH v4 12/54] tcg/arm: Introduce HostAddress, Richard Henderson, 2023/05/03
- [PATCH v4 13/54] tcg/arm: Introduce prepare_host_addr,
Richard Henderson <=
- [PATCH v4 14/54] tcg/loongarch64: Rationalize args to tcg_out_qemu_{ld, st}, Richard Henderson, 2023/05/03
- [PATCH v4 16/54] tcg/loongarch64: Introduce prepare_host_addr, Richard Henderson, 2023/05/03
- [PATCH v4 15/54] tcg/loongarch64: Introduce HostAddress, Richard Henderson, 2023/05/03
- [PATCH v4 17/54] tcg/mips: Rationalize args to tcg_out_qemu_{ld,st}, Richard Henderson, 2023/05/03
- [PATCH v4 18/54] tcg/mips: Introduce prepare_host_addr, Richard Henderson, 2023/05/03
- [PATCH v4 21/54] tcg/ppc: Introduce prepare_host_addr, Richard Henderson, 2023/05/03
- [PATCH v4 19/54] tcg/ppc: Rationalize args to tcg_out_qemu_{ld,st}, Richard Henderson, 2023/05/03
- [PATCH v4 20/54] tcg/ppc: Introduce HostAddress, Richard Henderson, 2023/05/03
- [PATCH v4 22/54] tcg/riscv: Require TCG_TARGET_REG_BITS == 64, Richard Henderson, 2023/05/03
- [PATCH v4 23/54] tcg/riscv: Rationalize args to tcg_out_qemu_{ld,st}, Richard Henderson, 2023/05/03