[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 7/7] tcg-i386: Perform tail call to qemu_ret_ld*
From: |
Paolo Bonzini |
Subject: |
Re: [Qemu-devel] [PATCH 7/7] tcg-i386: Perform tail call to qemu_ret_ld*_mmu |
Date: |
Thu, 29 Aug 2013 17:31:14 +0200 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:17.0) Gecko/20130805 Thunderbird/17.0.8 |
Il 27/08/2013 23:46, Richard Henderson ha scritto:
> This does require the fast path always load to the function return
> value register, but apparently the loaded value usually needs to be
> spilled back to its memory slot anyway so the change in register
> does not really change much.
Even for something like
mov (%rdi), %rax
add (%r8), %rax
? Memory operands should avoid the need to spill anything.
Is this change really an advantage considering the additional icache
footprint of the new helpers?
Paolo
> Signed-off-by: Richard Henderson <address@hidden>
> ---
> tcg/i386/tcg-target.c | 107
> ++++++++++++++++++--------------------------------
> 1 file changed, 39 insertions(+), 68 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 5aee0fa..b1d05b8 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -1025,11 +1025,20 @@ static void tcg_out_jmp(TCGContext *s,
> tcg_target_long dest)
> /* helper signature: helper_ret_ld_mmu(CPUState *env, target_ulong addr,
> * int mmu_idx, uintptr_t ra)
> */
> -static const void * const qemu_ld_helpers[4] = {
> +static const void * const qemu_ld_helpers[8] = {
> helper_ret_ldub_mmu,
> helper_ret_lduw_mmu,
> helper_ret_ldul_mmu,
> helper_ret_ldq_mmu,
> +
> + helper_ret_ldsb_mmu,
> + helper_ret_ldsw_mmu,
> +#if TCG_TARGET_REG_BITS == 64
> + helper_ret_ldsl_mmu,
> +#else
> + helper_ret_ldul_mmu,
> +#endif
> + helper_ret_ldq_mmu,
> };
>
> /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
> @@ -1473,9 +1482,8 @@ static void add_qemu_ldst_label(TCGContext *s,
> static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
> {
> int opc = l->opc;
> - int s_bits = opc & 3;
> - TCGReg data_reg;
> uint8_t **label_ptr = &l->label_ptr[0];
> + TCGReg retaddr;
>
> /* resolve label address */
> *(uint32_t *)label_ptr[0] = (uint32_t)(s->code_ptr - label_ptr[0] - 4);
> @@ -1500,58 +1508,21 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s,
> TCGLabelQemuLdst *l)
> tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, l->mem_index);
> ofs += 4;
>
> - tcg_out_sti(s, TCG_TYPE_I32, TCG_REG_ESP, ofs, (uintptr_t)l->raddr);
> + retaddr = TCG_REG_EAX;
> + tcg_out_movi(s, TCG_TYPE_I32, retaddr, (uintptr_t)l->raddr);
> + tcg_out_st(s, TCG_TYPE_I32, retaddr, TCG_REG_ESP, ofs);
> } else {
> tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
> TCG_AREG0);
> /* The second argument is already loaded with addrlo. */
> tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
> l->mem_index);
> - tcg_out_movi(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[3],
> - (uintptr_t)l->raddr);
> - }
> -
> - tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
> -
> - data_reg = l->datalo_reg;
> - switch(opc) {
> - case 0 | 4:
> - tcg_out_ext8s(s, data_reg, TCG_REG_EAX, P_REXW);
> - break;
> - case 1 | 4:
> - tcg_out_ext16s(s, data_reg, TCG_REG_EAX, P_REXW);
> - break;
> - case 0:
> - tcg_out_ext8u(s, data_reg, TCG_REG_EAX);
> - break;
> - case 1:
> - tcg_out_ext16u(s, data_reg, TCG_REG_EAX);
> - break;
> - case 2:
> - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
> - break;
> -#if TCG_TARGET_REG_BITS == 64
> - case 2 | 4:
> - tcg_out_ext32s(s, data_reg, TCG_REG_EAX);
> - break;
> -#endif
> - case 3:
> - if (TCG_TARGET_REG_BITS == 64) {
> - tcg_out_mov(s, TCG_TYPE_I64, data_reg, TCG_REG_RAX);
> - } else if (data_reg == TCG_REG_EDX) {
> - /* xchg %edx, %eax */
> - tcg_out_opc(s, OPC_XCHG_ax_r32 + TCG_REG_EDX, 0, 0, 0);
> - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EAX);
> - } else {
> - tcg_out_mov(s, TCG_TYPE_I32, data_reg, TCG_REG_EAX);
> - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_EDX);
> - }
> - break;
> - default:
> - tcg_abort();
> + retaddr = tcg_target_call_iarg_regs[3];
> + tcg_out_movi(s, TCG_TYPE_PTR, retaddr, (uintptr_t)l->raddr);
> }
>
> - /* Jump to the code corresponding to next IR of qemu_st */
> - tcg_out_jmp(s, (tcg_target_long)l->raddr);
> + /* "Tail call" to the helper, with the return address back inline. */
> + tcg_out_push(s, retaddr);
> + tcg_out_jmp(s, (tcg_target_long)qemu_ld_helpers[opc]);
> }
>
> /*
> @@ -2125,38 +2096,38 @@ static const TCGTargetOpDef x86_op_defs[] = {
> #endif
>
> #if TCG_TARGET_REG_BITS == 64
> - { INDEX_op_qemu_ld8u, { "r", "L" } },
> - { INDEX_op_qemu_ld8s, { "r", "L" } },
> - { INDEX_op_qemu_ld16u, { "r", "L" } },
> - { INDEX_op_qemu_ld16s, { "r", "L" } },
> - { INDEX_op_qemu_ld32, { "r", "L" } },
> - { INDEX_op_qemu_ld32u, { "r", "L" } },
> - { INDEX_op_qemu_ld32s, { "r", "L" } },
> - { INDEX_op_qemu_ld64, { "r", "L" } },
> + { INDEX_op_qemu_ld8u, { "a", "L" } },
> + { INDEX_op_qemu_ld8s, { "a", "L" } },
> + { INDEX_op_qemu_ld16u, { "a", "L" } },
> + { INDEX_op_qemu_ld16s, { "a", "L" } },
> + { INDEX_op_qemu_ld32, { "a", "L" } },
> + { INDEX_op_qemu_ld32u, { "a", "L" } },
> + { INDEX_op_qemu_ld32s, { "a", "L" } },
> + { INDEX_op_qemu_ld64, { "a", "L" } },
>
> { INDEX_op_qemu_st8, { "L", "L" } },
> { INDEX_op_qemu_st16, { "L", "L" } },
> { INDEX_op_qemu_st32, { "L", "L" } },
> { INDEX_op_qemu_st64, { "L", "L" } },
> #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
> - { INDEX_op_qemu_ld8u, { "r", "L" } },
> - { INDEX_op_qemu_ld8s, { "r", "L" } },
> - { INDEX_op_qemu_ld16u, { "r", "L" } },
> - { INDEX_op_qemu_ld16s, { "r", "L" } },
> - { INDEX_op_qemu_ld32, { "r", "L" } },
> - { INDEX_op_qemu_ld64, { "r", "r", "L" } },
> + { INDEX_op_qemu_ld8u, { "a", "L" } },
> + { INDEX_op_qemu_ld8s, { "a", "L" } },
> + { INDEX_op_qemu_ld16u, { "a", "L" } },
> + { INDEX_op_qemu_ld16s, { "a", "L" } },
> + { INDEX_op_qemu_ld32, { "a", "L" } },
> + { INDEX_op_qemu_ld64, { "a", "d", "L" } },
>
> { INDEX_op_qemu_st8, { "cb", "L" } },
> { INDEX_op_qemu_st16, { "L", "L" } },
> { INDEX_op_qemu_st32, { "L", "L" } },
> { INDEX_op_qemu_st64, { "L", "L", "L" } },
> #else
> - { INDEX_op_qemu_ld8u, { "r", "L", "L" } },
> - { INDEX_op_qemu_ld8s, { "r", "L", "L" } },
> - { INDEX_op_qemu_ld16u, { "r", "L", "L" } },
> - { INDEX_op_qemu_ld16s, { "r", "L", "L" } },
> - { INDEX_op_qemu_ld32, { "r", "L", "L" } },
> - { INDEX_op_qemu_ld64, { "r", "r", "L", "L" } },
> + { INDEX_op_qemu_ld8u, { "a", "L", "L" } },
> + { INDEX_op_qemu_ld8s, { "a", "L", "L" } },
> + { INDEX_op_qemu_ld16u, { "a", "L", "L" } },
> + { INDEX_op_qemu_ld16s, { "a", "L", "L" } },
> + { INDEX_op_qemu_ld32, { "a", "L", "L" } },
> + { INDEX_op_qemu_ld64, { "a", "d", "L", "L" } },
>
> { INDEX_op_qemu_st8, { "cb", "L", "L" } },
> { INDEX_op_qemu_st16, { "L", "L", "L" } },
>
- [Qemu-devel] [PATCH 0/7] Further tcg ldst improvements, Richard Henderson, 2013/08/27
- [Qemu-devel] [PATCH 1/7] exec: Reorganize the GETRA/GETPC macros, Richard Henderson, 2013/08/27
- [Qemu-devel] [PATCH 4/7] target: Include softmmu_exec.h where forgotten, Richard Henderson, 2013/08/27
- [Qemu-devel] [PATCH 3/7] exec: Rename USUFFIX to LSUFFIX, Richard Henderson, 2013/08/27
- [Qemu-devel] [PATCH 5/7] exec: Split softmmu_defs.h, Richard Henderson, 2013/08/27
- [Qemu-devel] [PATCH 2/7] tcg-i386: Don't perform GETPC adjustment in TCG code, Richard Henderson, 2013/08/27
- [Qemu-devel] [PATCH 7/7] tcg-i386: Perform tail call to qemu_ret_ld*_mmu, Richard Henderson, 2013/08/27
- Re: [Qemu-devel] [PATCH 7/7] tcg-i386: Perform tail call to qemu_ret_ld*_mmu, Aurelien Jarno, 2013/08/29
- [Qemu-devel] [PATCH 6/7] tcg: Introduce zero and sign-extended versions of load helpers, Richard Henderson, 2013/08/27