qemu-s390x
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v5 10/30] tcg: Add routines for calling slow-path helpers


From: Alex Bennée
Subject: Re: [PATCH v5 10/30] tcg: Add routines for calling slow-path helpers
Date: Wed, 10 May 2023 13:27:28 +0100
User-agent: mu4e 1.11.4; emacs 29.0.90

Richard Henderson <richard.henderson@linaro.org> writes:

> Add tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
> and tcg_out_st_helper_args.  These and their subroutines
> use the existing knowledge of the host function call abi
> to load the function call arguments and return results.
>
> These will be used to simplify the backends in turn.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
>  tcg/tcg.c | 456 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 453 insertions(+), 3 deletions(-)
>
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 057423c121..748be8426a 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -181,6 +181,22 @@ static bool tcg_target_const_match(int64_t val, TCGType 
> type, int ct);
>  static int tcg_out_ldst_finalize(TCGContext *s);
>  #endif
>  
> +typedef struct TCGLdstHelperParam {
> +    TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
> +    unsigned ntmp;
> +    int tmp[3];
> +} TCGLdstHelperParam;
> +
> +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
> +                                   const TCGLdstHelperParam *p)
> +    __attribute__((unused));
> +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
> +                                  bool load_sign, const TCGLdstHelperParam 
> *p)
> +    __attribute__((unused));
> +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
> +                                   const TCGLdstHelperParam *p)
> +    __attribute__((unused));
> +
>  TCGContext tcg_init_ctx;
>  __thread TCGContext *tcg_ctx;
>  
> @@ -459,9 +475,8 @@ static void tcg_out_movext1(TCGContext *s, const 
> TCGMovExtend *i)
>   * between the sources and destinations.
>   */
>  
> -static void __attribute__((unused))
> -tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
> -                const TCGMovExtend *i2, int scratch)
> +static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
> +                            const TCGMovExtend *i2, int scratch)
>  {
>      TCGReg src1 = i1->src;
>      TCGReg src2 = i2->src;
> @@ -715,6 +730,50 @@ static TCGHelperInfo all_helpers[] = {
>  };
>  static GHashTable *helper_table;
>  
> +#if TCG_TARGET_REG_BITS == 32
> +# define dh_typecode_ttl  dh_typecode_i32
> +#else
> +# define dh_typecode_ttl  dh_typecode_i64
> +#endif
> +

Might be worth a small comment to mention this is doing manually what
the HELPER wrappers do although we don't use them for reasons.

> +static TCGHelperInfo info_helper_ld32_mmu = {
> +    .flags = TCG_CALL_NO_WG,
> +    .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
> +              | dh_typemask(env, 1)
> +              | dh_typemask(tl, 2)   /* target_ulong addr */
> +              | dh_typemask(i32, 3)  /* unsigned oi */
> +              | dh_typemask(ptr, 4)  /* uintptr_t ra */
> +};
> +
> +static TCGHelperInfo info_helper_ld64_mmu = {
> +    .flags = TCG_CALL_NO_WG,
> +    .typemask = dh_typemask(i64, 0)  /* return uint64_t */
> +              | dh_typemask(env, 1)
> +              | dh_typemask(tl, 2)   /* target_ulong addr */
> +              | dh_typemask(i32, 3)  /* unsigned oi */
> +              | dh_typemask(ptr, 4)  /* uintptr_t ra */
> +};
> +
> +static TCGHelperInfo info_helper_st32_mmu = {
> +    .flags = TCG_CALL_NO_WG,
> +    .typemask = dh_typemask(void, 0)
> +              | dh_typemask(env, 1)
> +              | dh_typemask(tl, 2)   /* target_ulong addr */
> +              | dh_typemask(i32, 3)  /* uint32_t data */
> +              | dh_typemask(i32, 4)  /* unsigned oi */
> +              | dh_typemask(ptr, 5)  /* uintptr_t ra */
> +};
> +
> +static TCGHelperInfo info_helper_st64_mmu = {
> +    .flags = TCG_CALL_NO_WG,
> +    .typemask = dh_typemask(void, 0)
> +              | dh_typemask(env, 1)
> +              | dh_typemask(tl, 2)   /* target_ulong addr */
> +              | dh_typemask(i64, 3)  /* uint64_t data */
> +              | dh_typemask(i32, 4)  /* unsigned oi */
> +              | dh_typemask(ptr, 5)  /* uintptr_t ra */
> +};
> +
>  #ifdef CONFIG_TCG_INTERPRETER
>  static ffi_type *typecode_to_ffi(int argmask)
>  {
> @@ -1126,6 +1185,11 @@ static void tcg_context_init(unsigned max_cpus)
>                              (gpointer)&all_helpers[i]);
>      }
>  
> +    init_call_layout(&info_helper_ld32_mmu);
> +    init_call_layout(&info_helper_ld64_mmu);
> +    init_call_layout(&info_helper_st32_mmu);
> +    init_call_layout(&info_helper_st64_mmu);
> +
>  #ifdef CONFIG_TCG_INTERPRETER
>      init_ffi_layouts();
>  #endif
> @@ -5011,6 +5075,392 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp 
> *op)
>      }
>  }
>  
> +/*
> + * Similarly for qemu_ld/st slow path helpers.
> + * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
> + * using only the provided backend tcg_out_* functions.
> + */
> +
> +static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
> +{
> +    int ofs = arg_slot_stk_ofs(slot);
> +
> +    /*
> +     * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
> +     * require extension to uint64_t, adjust the address for uint32_t.
> +     */
> +    if (HOST_BIG_ENDIAN &&
> +        TCG_TARGET_REG_BITS == 64 &&
> +        type == TCG_TYPE_I32) {
> +        ofs += 4;
> +    }
> +    return ofs;
> +}
> +
> +static void tcg_out_helper_load_regs(TCGContext *s,
> +                                     unsigned nmov, TCGMovExtend *mov,
> +                                     unsigned ntmp, const int *tmp)
> +{
> +    switch (nmov) {
> +    default:
> +        /* The backend must have provided enough temps for the worst case. */
> +        tcg_debug_assert(ntmp + 1 >= nmov);
> +
> +        for (unsigned i = nmov - 1; i >= 2; --i) {
> +            TCGReg dst = mov[i].dst;
> +
> +            for (unsigned j = 0; j < i; ++j) {
> +                if (dst == mov[j].src) {
> +                    /*
> +                     * Conflict.
> +                     * Copy the source to a temporary, recurse for the
> +                     * remaining moves, perform the extension from our
> +                     * scratch on the way out.
> +                     */
> +                    TCGReg scratch = tmp[--ntmp];
> +                    tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
> +                    mov[i].src = scratch;
> +
> +                    tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
> +                    tcg_out_movext1(s, &mov[i]);
> +                    return;
> +                }
> +            }
> +
> +            /* No conflicts: perform this move and continue. */
> +            tcg_out_movext1(s, &mov[i]);
> +        }
> +        /* fall through for the final two moves */
> +
> +    case 2:
> +        tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
> +        return;
> +    case 1:
> +        tcg_out_movext1(s, mov);
> +        return;
> +    case 0:
> +        g_assert_not_reached();
> +    }
> +}
> +
> +static void tcg_out_helper_load_slots(TCGContext *s,
> +                                      unsigned nmov, TCGMovExtend *mov,
> +                                      const TCGLdstHelperParam *parm)

I think a documentation patch before this for TCGMoveExtend describing
the reg/slot duality for dst might be worthwhile.

> +{
> +    unsigned i;
> +
> +    /*
> +     * Start from the end, storing to the stack first.
> +     * This frees those registers, so we need not consider overlap.
> +     */
> +    for (i = nmov; i-- > 0; ) {
> +        unsigned slot = mov[i].dst;
> +
> +        if (arg_slot_reg_p(slot)) {
> +            goto found_reg;
> +        }
> +
> +        TCGReg src = mov[i].src;
> +        TCGType dst_type = mov[i].dst_type;
> +        MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
> +
> +        /* The argument is going onto the stack; extend into scratch. */
> +        if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
> +            tcg_debug_assert(parm->ntmp != 0);
> +            mov[i].dst = src = parm->tmp[0];
> +            tcg_out_movext1(s, &mov[i]);
> +        }
> +
> +        tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
> +                   tcg_out_helper_stk_ofs(dst_type, slot));
> +    }
> +    return;
> +
> + found_reg:
> +    /*
> +     * The remaining arguments are in registers.
> +     * Convert slot numbers to argument registers.
> +     */
> +    nmov = i + 1;
> +    for (i = 0; i < nmov; ++i) {
> +        mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
> +    }
> +    tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
> +}
> +
> +static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
> +                                    TCGType type, tcg_target_long imm,
> +                                    const TCGLdstHelperParam *parm)
> +{
> +    if (arg_slot_reg_p(slot)) {
> +        tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
> +    } else {
> +        int ofs = tcg_out_helper_stk_ofs(type, slot);
> +        if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
> +            tcg_debug_assert(parm->ntmp != 0);
> +            tcg_out_movi(s, type, parm->tmp[0], imm);
> +            tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
> +        }
> +    }
> +}
> +
> +static void tcg_out_helper_load_common_args(TCGContext *s,
> +                                            const TCGLabelQemuLdst *ldst,
> +                                            const TCGLdstHelperParam *parm,
> +                                            const TCGHelperInfo *info,
> +                                            unsigned next_arg)
> +{
> +    TCGMovExtend ptr_mov = {
> +        .dst_type = TCG_TYPE_PTR,
> +        .src_type = TCG_TYPE_PTR,
> +        .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
> +    };
> +    const TCGCallArgumentLoc *loc = &info->in[0];
> +    TCGType type;
> +    unsigned slot;
> +    tcg_target_ulong imm;
> +
> +    /*
> +     * Handle env, which is always first.
> +     */
> +    ptr_mov.dst = loc->arg_slot;
> +    ptr_mov.src = TCG_AREG0;
> +    tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
> +
> +    /*
> +     * Handle oi.
> +     */
> +    imm = ldst->oi;
> +    loc = &info->in[next_arg];
> +    type = TCG_TYPE_I32;
> +    switch (loc->kind) {
> +    case TCG_CALL_ARG_NORMAL:
> +        break;
> +    case TCG_CALL_ARG_EXTEND_U:
> +    case TCG_CALL_ARG_EXTEND_S:
> +        /* No extension required for MemOpIdx. */
> +        tcg_debug_assert(imm <= INT32_MAX);
> +        type = TCG_TYPE_REG;
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +    tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
> +    next_arg++;
> +
> +    /*
> +     * Handle ra.
> +     */
> +    loc = &info->in[next_arg];
> +    slot = loc->arg_slot;
> +    if (parm->ra_gen) {
> +        int arg_reg = -1;
> +        TCGReg ra_reg;
> +
> +        if (arg_slot_reg_p(slot)) {
> +            arg_reg = tcg_target_call_iarg_regs[slot];
> +        }
> +        ra_reg = parm->ra_gen(s, ldst, arg_reg);
> +
> +        ptr_mov.dst = slot;
> +        ptr_mov.src = ra_reg;
> +        tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
> +    } else {
> +        imm = (uintptr_t)ldst->raddr;
> +        tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
> +    }
> +}
> +
> +static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
> +                                       const TCGCallArgumentLoc *loc,
> +                                       TCGType dst_type, TCGType src_type,
> +                                       TCGReg lo, TCGReg hi)
> +{
> +    if (dst_type <= TCG_TYPE_REG) {
> +        MemOp src_ext;
> +
> +        switch (loc->kind) {
> +        case TCG_CALL_ARG_NORMAL:
> +            src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
> +            break;
> +        case TCG_CALL_ARG_EXTEND_U:
> +            dst_type = TCG_TYPE_REG;
> +            src_ext = MO_UL;
> +            break;
> +        case TCG_CALL_ARG_EXTEND_S:
> +            dst_type = TCG_TYPE_REG;
> +            src_ext = MO_SL;
> +            break;
> +        default:
> +            g_assert_not_reached();
> +        }
> +
> +        mov[0].dst = loc->arg_slot;
> +        mov[0].dst_type = dst_type;
> +        mov[0].src = lo;
> +        mov[0].src_type = src_type;
> +        mov[0].src_ext = src_ext;
> +        return 1;
> +    }
> +
> +    assert(TCG_TARGET_REG_BITS == 32);
> +
> +    mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
> +    mov[0].src = lo;
> +    mov[0].dst_type = TCG_TYPE_I32;
> +    mov[0].src_type = TCG_TYPE_I32;
> +    mov[0].src_ext = MO_32;
> +
> +    mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
> +    mov[1].src = hi;
> +    mov[1].dst_type = TCG_TYPE_I32;
> +    mov[1].src_type = TCG_TYPE_I32;
> +    mov[1].src_ext = MO_32;
> +
> +    return 2;
> +}
> +
> +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst 
> *ldst,
> +                                   const TCGLdstHelperParam *parm)
> +{
> +    const TCGHelperInfo *info;
> +    const TCGCallArgumentLoc *loc;
> +    TCGMovExtend mov[2];
> +    unsigned next_arg, nmov;
> +    MemOp mop = get_memop(ldst->oi);
> +
> +    switch (mop & MO_SIZE) {
> +    case MO_8:
> +    case MO_16:
> +    case MO_32:
> +        info = &info_helper_ld32_mmu;
> +        break;
> +    case MO_64:
> +        info = &info_helper_ld64_mmu;
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +
> +    /* Defer env argument. */
> +    next_arg = 1;

This could be assigned on creation but maybe even just open code:

 loc = &info->in[1]
 ..
 tcg_out_helper_load_common_args(s, ldst, parm, info, 1 + nmov);

although I concede that matching up with the store helper makes sense. 
 
> +
> +    loc = &info->in[next_arg];
> +    nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
> +                                  ldst->addrlo_reg, ldst->addrhi_reg);
> +    next_arg += nmov;
> +
> +    tcg_out_helper_load_slots(s, nmov, mov, parm);
> +
> +    /* No special attention for 32 and 64-bit return values. */
> +    tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
> +
> +    tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
> +}
> +
> +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst 
> *ldst,
> +                                  bool load_sign,
> +                                  const TCGLdstHelperParam *parm)
> +{
> +    TCGMovExtend mov[2];
> +
> +    if (ldst->type <= TCG_TYPE_REG) {
> +        MemOp mop = get_memop(ldst->oi);
> +
> +        mov[0].dst = ldst->datalo_reg;
> +        mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
> +        mov[0].dst_type = ldst->type;
> +        mov[0].src_type = TCG_TYPE_REG;
> +
> +        /*
> +         * If load_sign, then we allowed the helper to perform the
> +         * appropriate sign extension to tcg_target_ulong, and all
> +         * we need now is a plain move.
> +         *
> +         * If they do not, then we expect the relevant extension
> +         * instruction to be no more expensive than a move, and
> +         * we thus save the icache etc by only using one of two
> +         * helper functions.
> +         */
> +        if (load_sign || !(mop & MO_SIGN)) {
> +            if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
> +                mov[0].src_ext = MO_32;
> +            } else {
> +                mov[0].src_ext = MO_64;
> +            }
> +        } else {
> +            mov[0].src_ext = mop & MO_SSIZE;
> +        }
> +        tcg_out_movext1(s, mov);
> +    } else {
> +        assert(TCG_TARGET_REG_BITS == 32);
> +
> +        mov[0].dst = ldst->datalo_reg;
> +        mov[0].src =
> +            tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
> +        mov[0].dst_type = TCG_TYPE_I32;
> +        mov[0].src_type = TCG_TYPE_I32;
> +        mov[0].src_ext = MO_32;
> +
> +        mov[1].dst = ldst->datahi_reg;
> +        mov[1].src =
> +            tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
> +        mov[1].dst_type = TCG_TYPE_REG;
> +        mov[1].src_type = TCG_TYPE_REG;
> +        mov[1].src_ext = MO_32;
> +
> +        tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
> +    }
> +}
> +
> +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst 
> *ldst,
> +                                   const TCGLdstHelperParam *parm)
> +{
> +    const TCGHelperInfo *info;
> +    const TCGCallArgumentLoc *loc;
> +    TCGMovExtend mov[4];
> +    TCGType data_type;
> +    unsigned next_arg, nmov, n;
> +    MemOp mop = get_memop(ldst->oi);
> +
> +    switch (mop & MO_SIZE) {
> +    case MO_8:
> +    case MO_16:
> +    case MO_32:
> +        info = &info_helper_st32_mmu;
> +        data_type = TCG_TYPE_I32;
> +        break;
> +    case MO_64:
> +        info = &info_helper_st64_mmu;
> +        data_type = TCG_TYPE_I64;
> +        break;
> +    default:
> +        g_assert_not_reached();
> +    }
> +
> +    /* Defer env argument. */
> +    next_arg = 1;
> +    nmov = 0;
> +
> +    /* Handle addr argument. */
> +    loc = &info->in[next_arg];
> +    n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
> +                               ldst->addrlo_reg, ldst->addrhi_reg);
> +    next_arg += n;
> +    nmov += n;
> +
> +    /* Handle data argument. */
> +    loc = &info->in[next_arg];
> +    n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
> +                               ldst->datalo_reg, ldst->datahi_reg);
> +    next_arg += n;
> +    nmov += n;
> +    tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
> +
> +    tcg_out_helper_load_slots(s, nmov, mov, parm);
> +    tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
> +}
> +
>  #ifdef CONFIG_PROFILER
>  
>  /* avoid copy/paste errors */

Otherwise:

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>

-- 
Alex Bennée
Virtualisation Tech Lead @ Linaro



reply via email to

[Prev in Thread] Current Thread [Next in Thread]