[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v2 5/8] tcg-arm: Move load of tlb addend into tcg_ou
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH v2 5/8] tcg-arm: Move load of tlb addend into tcg_out_tlb_read |
Date: |
Fri, 30 Aug 2013 10:47:14 -0700 |
This allows us to make more intelligent decisions about the relative
offsets of the tlb comparator and the addend, avoiding any need of
writeback addressing.
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/arm/tcg-target.c | 60 ++++++++++++++++++++--------------------------------
1 file changed, 23 insertions(+), 37 deletions(-)
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
index f9311ac..c834232 100644
--- a/tcg/arm/tcg-target.c
+++ b/tcg/arm/tcg-target.c
@@ -1171,42 +1171,39 @@ QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
> 0xffff);
-/* Load and compare a TLB entry, leaving the flags set. Leaves R2 pointing
- to the tlb entry. Clobbers R1 and TMP. */
+/* Load and compare a TLB entry, leaving the flags set. Leaves R1 containing
+ the addend of the tlb entry. Clobbers R0, R2, TMP. */
static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
- int s_bits, int tlb_offset)
+ int s_bits, int mem_index, bool is_load)
{
TCGReg base = TCG_AREG0;
+ int cmp_off =
+ (is_load
+ ? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
+ : offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
+ int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
/* Should generate something like the following:
- * pre-v7:
* shr tmp, addr_reg, #TARGET_PAGE_BITS (1)
- * add r2, env, #off & 0xff00
+ * add r2, env, #high
* and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
* add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
- * ldr r0, [r2, #off & 0xff]! (4)
+ * ldr r0, [r2, #cmp] (4)
* tst addr_reg, #s_mask
* cmpeq r0, tmp, lsl #TARGET_PAGE_BITS (5)
- *
- * v7 (not implemented yet):
- * ubfx r2, addr_reg, #TARGET_PAGE_BITS, #CPU_TLB_BITS (1)
- * movw tmp, #~TARGET_PAGE_MASK & ~s_mask
- * movw r0, #off
- * add r2, env, r2, lsl #CPU_TLB_ENTRY_BITS (2)
- * bic tmp, addr_reg, tmp
- * ldr r0, [r2, r0]! (3)
- * cmp r0, tmp (4)
+ * ldr r1, [r2, #add]
*/
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
/* We checked that the offset is contained within 16 bits above. */
- if (tlb_offset > 0xff) {
+ if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
- (24 << 7) | (tlb_offset >> 8));
- tlb_offset &= 0xff;
+ (24 << 7) | (cmp_off >> 8));
base = TCG_REG_R2;
+ add_off -= cmp_off & 0xff00;
+ cmp_off &= 0xff;
}
tcg_out_dat_imm(s, COND_AL, ARITH_AND,
@@ -1218,14 +1215,11 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg
addrlo, TCGReg addrhi,
but due to how the pointer needs setting up, ldm isn't useful.
Base arm5 doesn't have ldrd, but armv5te does. */
if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
- tcg_out_memop_8(s, COND_AL, INSN_LDRD_IMM, TCG_REG_R0,
- TCG_REG_R2, tlb_offset, 1, 1);
+ tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
} else {
- tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0,
- TCG_REG_R2, tlb_offset, 1, 1);
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
if (TARGET_LONG_BITS == 64) {
- tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R1,
- TCG_REG_R2, 4, 1, 0);
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
}
}
@@ -1242,6 +1236,9 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg
addrlo, TCGReg addrhi,
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
}
+
+ /* Load the tlb addend. */
+ tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, add_off);
}
/* Record the context of a call to the out of line helper code for the slow
@@ -1385,18 +1382,13 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg
*args, int opc)
mem_index = *args;
s_bits = opc & 3;
- tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits,
- offsetof(CPUArchState,
tlb_table[mem_index][0].addr_read));
+ tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, mem_index, 1);
/* This a conditional BL only to load a pointer within this opcode into LR
for the slow path. We will not be using the value for a tail call. */
label_ptr = s->code_ptr;
tcg_out_bl_noaddr(s, COND_NE);
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2,
- offsetof(CPUTLBEntry, addend)
- - offsetof(CPUTLBEntry, addr_read));
-
switch (opc) {
case 0:
tcg_out_ld8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
@@ -1532,13 +1524,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg
*args, int opc)
mem_index = *args;
s_bits = opc & 3;
- tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits,
- offsetof(CPUArchState,
- tlb_table[mem_index][0].addr_write));
-
- tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2,
- offsetof(CPUTLBEntry, addend)
- - offsetof(CPUTLBEntry, addr_write));
+ tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, mem_index, 0);
switch (opc) {
case 0:
--
1.8.1.4
- [Qemu-devel] [PATCH v2 0/8] tcg-arm ldst improvements, Richard Henderson, 2013/08/30
- [Qemu-devel] [PATCH v2 1/8] tcg-arm: Use ldrd/strd for appropriate qemu_ld/st64, Richard Henderson, 2013/08/30
- [Qemu-devel] [PATCH v2 3/8] tcg-arm: Use strd for tcg_out_arg_reg64, Richard Henderson, 2013/08/30
- [Qemu-devel] [PATCH v2 4/8] tcg-arm: Use QEMU_BUILD_BUG_ON to verify constraints on tlb, Richard Henderson, 2013/08/30
- [Qemu-devel] [PATCH v2 2/8] tcg-arm: Rearrange slow-path qemu_ld/st, Richard Henderson, 2013/08/30
- [Qemu-devel] [PATCH v2 5/8] tcg-arm: Move load of tlb addend into tcg_out_tlb_read,
Richard Henderson <=
- [Qemu-devel] [PATCH v2 6/8] tcg-arm: Return register containing tlb addend, Richard Henderson, 2013/08/30
- [Qemu-devel] [PATCH v2 7/8] tcg-arm: Remove restriction on qemu_ld output register, Richard Henderson, 2013/08/30
- [Qemu-devel] [PATCH v2 8/8] tcg-arm: Move the tlb addend load earlier, Richard Henderson, 2013/08/30