[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v4 06/54] tcg/i386: Introduce prepare_host_addr
From: |
Richard Henderson |
Subject: |
[PATCH v4 06/54] tcg/i386: Introduce prepare_host_addr |
Date: |
Wed, 3 May 2023 07:56:41 +0100 |
Merge tcg_out_tlb_load, add_qemu_ldst_label,
tcg_out_test_alignment, and some code that lived in both
tcg_out_qemu_ld and tcg_out_qemu_st into one function
that returns HostAddress and TCGLabelQemuLdst structures.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
tcg/i386/tcg-target.c.inc | 344 ++++++++++++++++----------------------
1 file changed, 143 insertions(+), 201 deletions(-)
diff --git a/tcg/i386/tcg-target.c.inc b/tcg/i386/tcg-target.c.inc
index aae698121a..237b154194 100644
--- a/tcg/i386/tcg-target.c.inc
+++ b/tcg/i386/tcg-target.c.inc
@@ -1802,135 +1802,6 @@ static void * const qemu_st_helpers[(MO_SIZE |
MO_BSWAP) + 1] = {
[MO_BEUQ] = helper_be_stq_mmu,
};
-/* Perform the TLB load and compare.
-
- Inputs:
- ADDRLO and ADDRHI contain the low and high part of the address.
-
- MEM_INDEX and S_BITS are the memory context and log2 size of the load.
-
- WHICH is the offset into the CPUTLBEntry structure of the slot to read.
- This should be offsetof addr_read or addr_write.
-
- Outputs:
- LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
- positions of the displacements of forward jumps to the TLB miss case.
-
- Second argument register is loaded with the low part of the address.
- In the TLB hit case, it has been adjusted as indicated by the TLB
- and so is a host address. In the TLB miss case, it continues to
- hold a guest address.
-
- First argument register is clobbered. */
-
-static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg
addrhi,
- int mem_index, MemOp opc,
- tcg_insn_unit **label_ptr, int which)
-{
- TCGType ttype = TCG_TYPE_I32;
- TCGType tlbtype = TCG_TYPE_I32;
- int trexw = 0, hrexw = 0, tlbrexw = 0;
- unsigned a_bits = get_alignment_bits(opc);
- unsigned s_bits = opc & MO_SIZE;
- unsigned a_mask = (1 << a_bits) - 1;
- unsigned s_mask = (1 << s_bits) - 1;
- target_ulong tlb_mask;
-
- if (TCG_TARGET_REG_BITS == 64) {
- if (TARGET_LONG_BITS == 64) {
- ttype = TCG_TYPE_I64;
- trexw = P_REXW;
- }
- if (TCG_TYPE_PTR == TCG_TYPE_I64) {
- hrexw = P_REXW;
- if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
- tlbtype = TCG_TYPE_I64;
- tlbrexw = P_REXW;
- }
- }
- }
-
- tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
- tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
- TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
-
- tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
- TLB_MASK_TABLE_OFS(mem_index) +
- offsetof(CPUTLBDescFast, mask));
-
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
- TLB_MASK_TABLE_OFS(mem_index) +
- offsetof(CPUTLBDescFast, table));
-
- /* If the required alignment is at least as large as the access, simply
- copy the address and mask. For lesser alignments, check that we don't
- cross pages for the complete access. */
- if (a_bits >= s_bits) {
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
- } else {
- tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
- addrlo, s_mask - a_mask);
- }
- tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
- tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
-
- /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
- tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
- TCG_REG_L1, TCG_REG_L0, which);
-
- /* Prepare for both the fast path add of the tlb addend, and the slow
- path function argument setup. */
- tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
-
- /* jne slow_path */
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
- label_ptr[0] = s->code_ptr;
- s->code_ptr += 4;
-
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
- /* cmp 4(TCG_REG_L0), addrhi */
- tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, which + 4);
-
- /* jne slow_path */
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
- label_ptr[1] = s->code_ptr;
- s->code_ptr += 4;
- }
-
- /* TLB Hit. */
-
- /* add addend(TCG_REG_L0), TCG_REG_L1 */
- tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L1, TCG_REG_L0,
- offsetof(CPUTLBEntry, addend));
-}
-
-/*
- * Record the context of a call to the out of line helper code for the slow
path
- * for a load or store, so that we can later generate the correct helper code
- */
-static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
- TCGType type, MemOpIdx oi,
- TCGReg datalo, TCGReg datahi,
- TCGReg addrlo, TCGReg addrhi,
- tcg_insn_unit *raddr,
- tcg_insn_unit **label_ptr)
-{
- TCGLabelQemuLdst *label = new_ldst_label(s);
-
- label->is_ld = is_ld;
- label->oi = oi;
- label->type = type;
- label->datalo_reg = datalo;
- label->datahi_reg = datahi;
- label->addrlo_reg = addrlo;
- label->addrhi_reg = addrhi;
- label->raddr = tcg_splitwx_to_rx(raddr);
- label->label_ptr[0] = label_ptr[0];
- if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
- label->label_ptr[1] = label_ptr[1];
- }
-}
-
/*
* Generate code for the slow path for a load at the end of block
*/
@@ -2061,27 +1932,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s,
TCGLabelQemuLdst *l)
return true;
}
#else
-
-static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
- TCGReg addrhi, unsigned a_bits)
-{
- unsigned a_mask = (1 << a_bits) - 1;
- TCGLabelQemuLdst *label;
-
- tcg_out_testi(s, addrlo, a_mask);
- /* jne slow_path */
- tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
-
- label = new_ldst_label(s);
- label->is_ld = is_ld;
- label->addrlo_reg = addrlo;
- label->addrhi_reg = addrhi;
- label->raddr = tcg_splitwx_to_rx(s->code_ptr + 4);
- label->label_ptr[0] = s->code_ptr;
-
- s->code_ptr += 4;
-}
-
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
{
/* resolve label address */
@@ -2159,6 +2009,133 @@ static inline int setup_guest_base_seg(void)
#endif /* setup_guest_base_seg */
#endif /* SOFTMMU */
+/*
+ * For softmmu, perform the TLB load and compare.
+ * For useronly, perform any required alignment tests.
+ * In both cases, return a TCGLabelQemuLdst structure if the slow path
+ * is required and fill in @h with the host address for the fast path.
+ */
+static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
+ TCGReg addrlo, TCGReg addrhi,
+ MemOpIdx oi, bool is_ld)
+{
+ TCGLabelQemuLdst *ldst = NULL;
+ MemOp opc = get_memop(oi);
+ unsigned a_bits = get_alignment_bits(opc);
+ unsigned a_mask = (1 << a_bits) - 1;
+
+#ifdef CONFIG_SOFTMMU
+ int cmp_ofs = is_ld ? offsetof(CPUTLBEntry, addr_read)
+ : offsetof(CPUTLBEntry, addr_write);
+ TCGType ttype = TCG_TYPE_I32;
+ TCGType tlbtype = TCG_TYPE_I32;
+ int trexw = 0, hrexw = 0, tlbrexw = 0;
+ unsigned mem_index = get_mmuidx(oi);
+ unsigned s_bits = opc & MO_SIZE;
+ unsigned s_mask = (1 << s_bits) - 1;
+ target_ulong tlb_mask;
+
+ ldst = new_ldst_label(s);
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ if (TCG_TARGET_REG_BITS == 64) {
+ if (TARGET_LONG_BITS == 64) {
+ ttype = TCG_TYPE_I64;
+ trexw = P_REXW;
+ }
+ if (TCG_TYPE_PTR == TCG_TYPE_I64) {
+ hrexw = P_REXW;
+ if (TARGET_PAGE_BITS + CPU_TLB_DYN_MAX_BITS > 32) {
+ tlbtype = TCG_TYPE_I64;
+ tlbrexw = P_REXW;
+ }
+ }
+ }
+
+ tcg_out_mov(s, tlbtype, TCG_REG_L0, addrlo);
+ tcg_out_shifti(s, SHIFT_SHR + tlbrexw, TCG_REG_L0,
+ TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
+
+ tcg_out_modrm_offset(s, OPC_AND_GvEv + trexw, TCG_REG_L0, TCG_AREG0,
+ TLB_MASK_TABLE_OFS(mem_index) +
+ offsetof(CPUTLBDescFast, mask));
+
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, TCG_REG_L0, TCG_AREG0,
+ TLB_MASK_TABLE_OFS(mem_index) +
+ offsetof(CPUTLBDescFast, table));
+
+ /* If the required alignment is at least as large as the access, simply
+ copy the address and mask. For lesser alignments, check that we don't
+ cross pages for the complete access. */
+ if (a_bits >= s_bits) {
+ tcg_out_mov(s, ttype, TCG_REG_L1, addrlo);
+ } else {
+ tcg_out_modrm_offset(s, OPC_LEA + trexw, TCG_REG_L1,
+ addrlo, s_mask - a_mask);
+ }
+ tlb_mask = (target_ulong)TARGET_PAGE_MASK | a_mask;
+ tgen_arithi(s, ARITH_AND + trexw, TCG_REG_L1, tlb_mask, 0);
+
+ /* cmp 0(TCG_REG_L0), TCG_REG_L1 */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv + trexw,
+ TCG_REG_L1, TCG_REG_L0, cmp_ofs);
+
+ /*
+ * Prepare for both the fast path add of the tlb addend, and the slow
+ * path function argument setup.
+ */
+ *h = (HostAddress) {
+ .base = TCG_REG_L1,
+ .index = -1
+ };
+ tcg_out_mov(s, ttype, h->base, addrlo);
+
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ ldst->label_ptr[0] = s->code_ptr;
+ s->code_ptr += 4;
+
+ if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+ /* cmp 4(TCG_REG_L0), addrhi */
+ tcg_out_modrm_offset(s, OPC_CMP_GvEv, addrhi, TCG_REG_L0, cmp_ofs + 4);
+
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ ldst->label_ptr[1] = s->code_ptr;
+ s->code_ptr += 4;
+ }
+
+ /* TLB Hit. */
+
+ /* add addend(TCG_REG_L0), TCG_REG_L1 */
+ tcg_out_modrm_offset(s, OPC_ADD_GvEv + hrexw, h->base, TCG_REG_L0,
+ offsetof(CPUTLBEntry, addend));
+#else
+ if (a_bits) {
+ ldst = new_ldst_label(s);
+
+ ldst->is_ld = is_ld;
+ ldst->oi = oi;
+ ldst->addrlo_reg = addrlo;
+ ldst->addrhi_reg = addrhi;
+
+ tcg_out_testi(s, addrlo, a_mask);
+ /* jne slow_path */
+ tcg_out_opc(s, OPC_JCC_long + JCC_JNE, 0, 0, 0);
+ ldst->label_ptr[0] = s->code_ptr;
+ s->code_ptr += 4;
+ }
+
+ *h = x86_guest_base;
+ h->base = addrlo;
+#endif
+
+ return ldst;
+}
+
static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
HostAddress h, TCGType type, MemOp memop)
{
@@ -2258,35 +2235,18 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg
datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, TCGType data_type)
{
- MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#if defined(CONFIG_SOFTMMU)
- tcg_insn_unit *label_ptr[2];
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
+ tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, get_memop(oi));
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
- label_ptr, offsetof(CPUTLBEntry, addr_read));
-
- /* TLB Hit. */
- h.base = TCG_REG_L1;
- h.index = -1;
- h.ofs = 0;
- h.seg = 0;
- tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
-
- /* Record the current context of a load into ldst label */
- add_qemu_ldst_label(s, true, data_type, oi, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
-
- h = x86_guest_base;
- h.base = addrlo;
- tcg_out_qemu_ld_direct(s, datalo, datahi, h, data_type, opc);
-#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
@@ -2345,36 +2305,18 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg
datalo, TCGReg datahi,
TCGReg addrlo, TCGReg addrhi,
MemOpIdx oi, TCGType data_type)
{
- MemOp opc = get_memop(oi);
+ TCGLabelQemuLdst *ldst;
HostAddress h;
-#if defined(CONFIG_SOFTMMU)
- tcg_insn_unit *label_ptr[2];
+ ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
+ tcg_out_qemu_st_direct(s, datalo, datahi, h, get_memop(oi));
- tcg_out_tlb_load(s, addrlo, addrhi, get_mmuidx(oi), opc,
- label_ptr, offsetof(CPUTLBEntry, addr_write));
-
- /* TLB Hit. */
- h.base = TCG_REG_L1;
- h.index = -1;
- h.ofs = 0;
- h.seg = 0;
- tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
-
- /* Record the current context of a store into ldst label */
- add_qemu_ldst_label(s, false, data_type, oi, datalo, datahi,
- addrlo, addrhi, s->code_ptr, label_ptr);
-#else
- unsigned a_bits = get_alignment_bits(opc);
- if (a_bits) {
- tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
+ if (ldst) {
+ ldst->type = data_type;
+ ldst->datalo_reg = datalo;
+ ldst->datahi_reg = datahi;
+ ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
}
-
- h = x86_guest_base;
- h.base = addrlo;
-
- tcg_out_qemu_st_direct(s, datalo, datahi, h, opc);
-#endif
}
static void tcg_out_exit_tb(TCGContext *s, uintptr_t a0)
--
2.34.1
- [PATCH v4 00/54] tcg: Simplify calls to load/store helpers, Richard Henderson, 2023/05/03
- [PATCH v4 01/54] tcg/i386: Rationalize args to tcg_out_qemu_{ld,st}, Richard Henderson, 2023/05/03
- [PATCH v4 02/54] tcg/i386: Generalize multi-part load overlap test, Richard Henderson, 2023/05/03
- [PATCH v4 05/54] tcg/i386: Introduce tcg_out_testi, Richard Henderson, 2023/05/03
- [PATCH v4 04/54] tcg/i386: Drop r0+r1 local variables from tcg_out_tlb_load, Richard Henderson, 2023/05/03
- [PATCH v4 03/54] tcg/i386: Introduce HostAddress, Richard Henderson, 2023/05/03
- [PATCH v4 06/54] tcg/i386: Introduce prepare_host_addr,
Richard Henderson <=
- [PATCH v4 07/54] tcg/i386: Use indexed addressing for softmmu fast path, Richard Henderson, 2023/05/03
- [PATCH v4 08/54] tcg/aarch64: Rationalize args to tcg_out_qemu_{ld, st}, Richard Henderson, 2023/05/03
- [PATCH v4 09/54] tcg/aarch64: Introduce HostAddress, Richard Henderson, 2023/05/03
- [PATCH v4 11/54] tcg/arm: Rationalize args to tcg_out_qemu_{ld,st}, Richard Henderson, 2023/05/03
- [PATCH v4 10/54] tcg/aarch64: Introduce prepare_host_addr, Richard Henderson, 2023/05/03
- [PATCH v4 12/54] tcg/arm: Introduce HostAddress, Richard Henderson, 2023/05/03
- [PATCH v4 13/54] tcg/arm: Introduce prepare_host_addr, Richard Henderson, 2023/05/03
- [PATCH v4 14/54] tcg/loongarch64: Rationalize args to tcg_out_qemu_{ld, st}, Richard Henderson, 2023/05/03
- [PATCH v4 16/54] tcg/loongarch64: Introduce prepare_host_addr, Richard Henderson, 2023/05/03
- [PATCH v4 15/54] tcg/loongarch64: Introduce HostAddress, Richard Henderson, 2023/05/03