[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-stable] [Qemu-devel] [PATCH v2 1/3] tcg/arm: fix TLB access in
From: |
Laurent Desnogues |
Subject: |
Re: [Qemu-stable] [Qemu-devel] [PATCH v2 1/3] tcg/arm: fix TLB access in qemu-ld/st ops |
Date: |
Wed, 31 Oct 2012 14:54:38 +0100 |
On Tue, Oct 30, 2012 at 1:18 AM, <address@hidden> wrote:
> From: Aurelien Jarno <address@hidden>
>
> The TCG arm backend considers likely that the offset to the TLB
> entries does not exceed 12 bits for mem_index = 0. In practice this is
> not true for at least the MIPS target.
>
> The current patch fixes that by loading the bits 23-12 with a separate
> instruction, and using loads with address writeback, independently of
> the value of mem_idx. In total this allow a 24-bit offset, which is a
> lot more than needed.
>
> Cc: Andrzej Zaborowski <address@hidden>
> Cc: Peter Maydell <address@hidden>
> Cc: address@hidden
> Signed-off-by: Aurelien Jarno <address@hidden>
> ---
> tcg/arm/tcg-target.c | 77
> +++++++++++++++++++++++++++-----------------------
> 1 file changed, 41 insertions(+), 36 deletions(-)
>
> diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c
> index e790bf0..03b1576 100644
> --- a/tcg/arm/tcg-target.c
> +++ b/tcg/arm/tcg-target.c
> @@ -639,6 +639,22 @@ static inline void tcg_out_ld32_12(TCGContext *s, int
> cond,
> (rn << 16) | (rd << 12) | ((-im) & 0xfff));
> }
>
> +/* Offset pre-increment with base writeback. */
> +static inline void tcg_out_ld32_12wb(TCGContext *s, int cond,
> + int rd, int rn, tcg_target_long im)
> +{
> + /* ldr with writeback and both register equals is UNPREDICTABLE */
> + assert(rd != rn);
> +
> + if (im >= 0) {
> + tcg_out32(s, (cond << 28) | 0x05b00000 |
> + (rn << 16) | (rd << 12) | (im & 0xfff));
> + } else {
> + tcg_out32(s, (cond << 28) | 0x05300000 |
> + (rn << 16) | (rd << 12) | ((-im) & 0xfff));
> + }
> +}
> +
> static inline void tcg_out_st32_12(TCGContext *s, int cond,
> int rd, int rn, tcg_target_long im)
> {
> @@ -1071,7 +1087,7 @@ static inline void tcg_out_qemu_ld(TCGContext *s, const
> TCGArg *args, int opc)
> {
> int addr_reg, data_reg, data_reg2, bswap;
> #ifdef CONFIG_SOFTMMU
> - int mem_index, s_bits;
> + int mem_index, s_bits, tlb_offset;
> TCGReg argreg;
> # if TARGET_LONG_BITS == 64
> int addr_reg2;
> @@ -1111,19 +1127,15 @@ static inline void tcg_out_qemu_ld(TCGContext *s,
> const TCGArg *args, int opc)
> TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
> tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_AREG0,
> TCG_REG_R0, SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
> - /* In the
> - * ldr r1 [r0, #(offsetof(CPUArchState,
> tlb_table[mem_index][0].addr_read))]
> - * below, the offset is likely to exceed 12 bits if mem_index != 0 and
> - * not exceed otherwise, so use an
> - * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
> - * before.
> - */
> - if (mem_index)
> + /* We assume that the offset is contained within 24 bits. */
> + tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
> + assert(tlb_offset & ~0xffffff == 0);
> + if (tlb_offset > 0xfff) {
> tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
> - (mem_index << (TLB_SHIFT & 1)) |
> - ((16 - (TLB_SHIFT >> 1)) << 8));
> - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
> - offsetof(CPUArchState, tlb_table[0][0].addr_read));
> + 0xa00 | (tlb_offset >> 12));
Isn't it 20 bits rather than 24 bits since the immediate is 8-bit right-rotated
by 20?
Laurent
> + tlb_offset &= 0xfff;
> + }
> + tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
> tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
> TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
> /* Check alignment. */
> @@ -1131,15 +1143,14 @@ static inline void tcg_out_qemu_ld(TCGContext *s,
> const TCGArg *args, int opc)
> tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
> 0, addr_reg, (1 << s_bits) - 1);
> # if TARGET_LONG_BITS == 64
> - /* XXX: possibly we could use a block data load or writeback in
> - * the first access. */
> - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
> - offsetof(CPUArchState, tlb_table[0][0].addr_read) + 4);
> + /* XXX: possibly we could use a block data load in the first access. */
> + tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
> tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
> TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
> # endif
> tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
> - offsetof(CPUArchState, tlb_table[0][0].addend));
> + offsetof(CPUTLBEntry, addend)
> + - offsetof(CPUTLBEntry, addr_read));
>
> switch (opc) {
> case 0:
> @@ -1288,7 +1299,7 @@ static inline void tcg_out_qemu_st(TCGContext *s, const
> TCGArg *args, int opc)
> {
> int addr_reg, data_reg, data_reg2, bswap;
> #ifdef CONFIG_SOFTMMU
> - int mem_index, s_bits;
> + int mem_index, s_bits, tlb_offset;
> TCGReg argreg;
> # if TARGET_LONG_BITS == 64
> int addr_reg2;
> @@ -1325,19 +1336,14 @@ static inline void tcg_out_qemu_st(TCGContext *s,
> const TCGArg *args, int opc)
> TCG_REG_R0, TCG_REG_R8, CPU_TLB_SIZE - 1);
> tcg_out_dat_reg(s, COND_AL, ARITH_ADD, TCG_REG_R0,
> TCG_AREG0, TCG_REG_R0,
> SHIFT_IMM_LSL(CPU_TLB_ENTRY_BITS));
> - /* In the
> - * ldr r1 [r0, #(offsetof(CPUArchState,
> tlb_table[mem_index][0].addr_write))]
> - * below, the offset is likely to exceed 12 bits if mem_index != 0 and
> - * not exceed otherwise, so use an
> - * add r0, r0, #(mem_index * sizeof *CPUArchState.tlb_table)
> - * before.
> - */
> - if (mem_index)
> + /* We assume that the offset is contained within 24 bits. */
> + tlb_offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
> + if (tlb_offset > 0xfff) {
> tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R0, TCG_REG_R0,
> - (mem_index << (TLB_SHIFT & 1)) |
> - ((16 - (TLB_SHIFT >> 1)) << 8));
> - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R0,
> - offsetof(CPUArchState, tlb_table[0][0].addr_write));
> + 0xa00 | (tlb_offset >> 12));
> + tlb_offset &= 0xfff;
> + }
> + tcg_out_ld32_12wb(s, COND_AL, TCG_REG_R1, TCG_REG_R0, tlb_offset);
> tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R1,
> TCG_REG_R8, SHIFT_IMM_LSL(TARGET_PAGE_BITS));
> /* Check alignment. */
> @@ -1345,15 +1351,14 @@ static inline void tcg_out_qemu_st(TCGContext *s,
> const TCGArg *args, int opc)
> tcg_out_dat_imm(s, COND_EQ, ARITH_TST,
> 0, addr_reg, (1 << s_bits) - 1);
> # if TARGET_LONG_BITS == 64
> - /* XXX: possibly we could use a block data load or writeback in
> - * the first access. */
> - tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
> - offsetof(CPUArchState, tlb_table[0][0].addr_write) + 4);
> + /* XXX: possibly we could use a block data load in the first access. */
> + tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0, 4);
> tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
> TCG_REG_R1, addr_reg2, SHIFT_IMM_LSL(0));
> # endif
> tcg_out_ld32_12(s, COND_EQ, TCG_REG_R1, TCG_REG_R0,
> - offsetof(CPUArchState, tlb_table[0][0].addend));
> + offsetof(CPUTLBEntry, addend)
> + - offsetof(CPUTLBEntry, addr_write));
>
> switch (opc) {
> case 0:
> --
> 1.7.10.4
>
>
- Re: [Qemu-stable] [Qemu-devel] [PATCH v2 1/3] tcg/arm: fix TLB access in qemu-ld/st ops,
Laurent Desnogues <=