[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v5 10/30] tcg: Add routines for calling slow-path helpers
From: |
Alex Bennée |
Subject: |
Re: [PATCH v5 10/30] tcg: Add routines for calling slow-path helpers |
Date: |
Wed, 10 May 2023 13:27:28 +0100 |
User-agent: |
mu4e 1.11.4; emacs 29.0.90 |
Richard Henderson <richard.henderson@linaro.org> writes:
> Add tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
> and tcg_out_st_helper_args. These and their subroutines
> use the existing knowledge of the host function call abi
> to load the function call arguments and return results.
>
> These will be used to simplify the backends in turn.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> tcg/tcg.c | 456 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
> 1 file changed, 453 insertions(+), 3 deletions(-)
>
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 057423c121..748be8426a 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -181,6 +181,22 @@ static bool tcg_target_const_match(int64_t val, TCGType
> type, int ct);
> static int tcg_out_ldst_finalize(TCGContext *s);
> #endif
>
> +typedef struct TCGLdstHelperParam {
> + TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
> + unsigned ntmp;
> + int tmp[3];
> +} TCGLdstHelperParam;
> +
> +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
> + const TCGLdstHelperParam *p)
> + __attribute__((unused));
> +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
> + bool load_sign, const TCGLdstHelperParam
> *p)
> + __attribute__((unused));
> +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
> + const TCGLdstHelperParam *p)
> + __attribute__((unused));
> +
> TCGContext tcg_init_ctx;
> __thread TCGContext *tcg_ctx;
>
> @@ -459,9 +475,8 @@ static void tcg_out_movext1(TCGContext *s, const
> TCGMovExtend *i)
> * between the sources and destinations.
> */
>
> -static void __attribute__((unused))
> -tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
> - const TCGMovExtend *i2, int scratch)
> +static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
> + const TCGMovExtend *i2, int scratch)
> {
> TCGReg src1 = i1->src;
> TCGReg src2 = i2->src;
> @@ -715,6 +730,50 @@ static TCGHelperInfo all_helpers[] = {
> };
> static GHashTable *helper_table;
>
> +#if TCG_TARGET_REG_BITS == 32
> +# define dh_typecode_ttl dh_typecode_i32
> +#else
> +# define dh_typecode_ttl dh_typecode_i64
> +#endif
> +
Might be worth a small comment to mention this is doing manually what
the HELPER wrappers do although we don't use them for reasons.
> +static TCGHelperInfo info_helper_ld32_mmu = {
> + .flags = TCG_CALL_NO_WG,
> + .typemask = dh_typemask(ttl, 0) /* return tcg_target_ulong */
> + | dh_typemask(env, 1)
> + | dh_typemask(tl, 2) /* target_ulong addr */
> + | dh_typemask(i32, 3) /* unsigned oi */
> + | dh_typemask(ptr, 4) /* uintptr_t ra */
> +};
> +
> +static TCGHelperInfo info_helper_ld64_mmu = {
> + .flags = TCG_CALL_NO_WG,
> + .typemask = dh_typemask(i64, 0) /* return uint64_t */
> + | dh_typemask(env, 1)
> + | dh_typemask(tl, 2) /* target_ulong addr */
> + | dh_typemask(i32, 3) /* unsigned oi */
> + | dh_typemask(ptr, 4) /* uintptr_t ra */
> +};
> +
> +static TCGHelperInfo info_helper_st32_mmu = {
> + .flags = TCG_CALL_NO_WG,
> + .typemask = dh_typemask(void, 0)
> + | dh_typemask(env, 1)
> + | dh_typemask(tl, 2) /* target_ulong addr */
> + | dh_typemask(i32, 3) /* uint32_t data */
> + | dh_typemask(i32, 4) /* unsigned oi */
> + | dh_typemask(ptr, 5) /* uintptr_t ra */
> +};
> +
> +static TCGHelperInfo info_helper_st64_mmu = {
> + .flags = TCG_CALL_NO_WG,
> + .typemask = dh_typemask(void, 0)
> + | dh_typemask(env, 1)
> + | dh_typemask(tl, 2) /* target_ulong addr */
> + | dh_typemask(i64, 3) /* uint64_t data */
> + | dh_typemask(i32, 4) /* unsigned oi */
> + | dh_typemask(ptr, 5) /* uintptr_t ra */
> +};
> +
> #ifdef CONFIG_TCG_INTERPRETER
> static ffi_type *typecode_to_ffi(int argmask)
> {
> @@ -1126,6 +1185,11 @@ static void tcg_context_init(unsigned max_cpus)
> (gpointer)&all_helpers[i]);
> }
>
> + init_call_layout(&info_helper_ld32_mmu);
> + init_call_layout(&info_helper_ld64_mmu);
> + init_call_layout(&info_helper_st32_mmu);
> + init_call_layout(&info_helper_st64_mmu);
> +
> #ifdef CONFIG_TCG_INTERPRETER
> init_ffi_layouts();
> #endif
> @@ -5011,6 +5075,392 @@ static void tcg_reg_alloc_call(TCGContext *s, TCGOp
> *op)
> }
> }
>
> +/*
> + * Similarly for qemu_ld/st slow path helpers.
> + * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
> + * using only the provided backend tcg_out_* functions.
> + */
> +
> +static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
> +{
> + int ofs = arg_slot_stk_ofs(slot);
> +
> + /*
> + * Each stack slot is TCG_TARGET_LONG_BITS. If the host does not
> + * require extension to uint64_t, adjust the address for uint32_t.
> + */
> + if (HOST_BIG_ENDIAN &&
> + TCG_TARGET_REG_BITS == 64 &&
> + type == TCG_TYPE_I32) {
> + ofs += 4;
> + }
> + return ofs;
> +}
> +
> +static void tcg_out_helper_load_regs(TCGContext *s,
> + unsigned nmov, TCGMovExtend *mov,
> + unsigned ntmp, const int *tmp)
> +{
> + switch (nmov) {
> + default:
> + /* The backend must have provided enough temps for the worst case. */
> + tcg_debug_assert(ntmp + 1 >= nmov);
> +
> + for (unsigned i = nmov - 1; i >= 2; --i) {
> + TCGReg dst = mov[i].dst;
> +
> + for (unsigned j = 0; j < i; ++j) {
> + if (dst == mov[j].src) {
> + /*
> + * Conflict.
> + * Copy the source to a temporary, recurse for the
> + * remaining moves, perform the extension from our
> + * scratch on the way out.
> + */
> + TCGReg scratch = tmp[--ntmp];
> + tcg_out_mov(s, mov[i].src_type, scratch, mov[i].src);
> + mov[i].src = scratch;
> +
> + tcg_out_helper_load_regs(s, i, mov, ntmp, tmp);
> + tcg_out_movext1(s, &mov[i]);
> + return;
> + }
> + }
> +
> + /* No conflicts: perform this move and continue. */
> + tcg_out_movext1(s, &mov[i]);
> + }
> + /* fall through for the final two moves */
> +
> + case 2:
> + tcg_out_movext2(s, mov, mov + 1, ntmp ? tmp[0] : -1);
> + return;
> + case 1:
> + tcg_out_movext1(s, mov);
> + return;
> + case 0:
> + g_assert_not_reached();
> + }
> +}
> +
> +static void tcg_out_helper_load_slots(TCGContext *s,
> + unsigned nmov, TCGMovExtend *mov,
> + const TCGLdstHelperParam *parm)
I think a documentation patch before this for TCGMoveExtend describing
the reg/slot duality for dst might be worthwhile.
> +{
> + unsigned i;
> +
> + /*
> + * Start from the end, storing to the stack first.
> + * This frees those registers, so we need not consider overlap.
> + */
> + for (i = nmov; i-- > 0; ) {
> + unsigned slot = mov[i].dst;
> +
> + if (arg_slot_reg_p(slot)) {
> + goto found_reg;
> + }
> +
> + TCGReg src = mov[i].src;
> + TCGType dst_type = mov[i].dst_type;
> + MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
> +
> + /* The argument is going onto the stack; extend into scratch. */
> + if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
> + tcg_debug_assert(parm->ntmp != 0);
> + mov[i].dst = src = parm->tmp[0];
> + tcg_out_movext1(s, &mov[i]);
> + }
> +
> + tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
> + tcg_out_helper_stk_ofs(dst_type, slot));
> + }
> + return;
> +
> + found_reg:
> + /*
> + * The remaining arguments are in registers.
> + * Convert slot numbers to argument registers.
> + */
> + nmov = i + 1;
> + for (i = 0; i < nmov; ++i) {
> + mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
> + }
> + tcg_out_helper_load_regs(s, nmov, mov, parm->ntmp, parm->tmp);
> +}
> +
> +static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
> + TCGType type, tcg_target_long imm,
> + const TCGLdstHelperParam *parm)
> +{
> + if (arg_slot_reg_p(slot)) {
> + tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
> + } else {
> + int ofs = tcg_out_helper_stk_ofs(type, slot);
> + if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
> + tcg_debug_assert(parm->ntmp != 0);
> + tcg_out_movi(s, type, parm->tmp[0], imm);
> + tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
> + }
> + }
> +}
> +
> +static void tcg_out_helper_load_common_args(TCGContext *s,
> + const TCGLabelQemuLdst *ldst,
> + const TCGLdstHelperParam *parm,
> + const TCGHelperInfo *info,
> + unsigned next_arg)
> +{
> + TCGMovExtend ptr_mov = {
> + .dst_type = TCG_TYPE_PTR,
> + .src_type = TCG_TYPE_PTR,
> + .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
> + };
> + const TCGCallArgumentLoc *loc = &info->in[0];
> + TCGType type;
> + unsigned slot;
> + tcg_target_ulong imm;
> +
> + /*
> + * Handle env, which is always first.
> + */
> + ptr_mov.dst = loc->arg_slot;
> + ptr_mov.src = TCG_AREG0;
> + tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
> +
> + /*
> + * Handle oi.
> + */
> + imm = ldst->oi;
> + loc = &info->in[next_arg];
> + type = TCG_TYPE_I32;
> + switch (loc->kind) {
> + case TCG_CALL_ARG_NORMAL:
> + break;
> + case TCG_CALL_ARG_EXTEND_U:
> + case TCG_CALL_ARG_EXTEND_S:
> + /* No extension required for MemOpIdx. */
> + tcg_debug_assert(imm <= INT32_MAX);
> + type = TCG_TYPE_REG;
> + break;
> + default:
> + g_assert_not_reached();
> + }
> + tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
> + next_arg++;
> +
> + /*
> + * Handle ra.
> + */
> + loc = &info->in[next_arg];
> + slot = loc->arg_slot;
> + if (parm->ra_gen) {
> + int arg_reg = -1;
> + TCGReg ra_reg;
> +
> + if (arg_slot_reg_p(slot)) {
> + arg_reg = tcg_target_call_iarg_regs[slot];
> + }
> + ra_reg = parm->ra_gen(s, ldst, arg_reg);
> +
> + ptr_mov.dst = slot;
> + ptr_mov.src = ra_reg;
> + tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
> + } else {
> + imm = (uintptr_t)ldst->raddr;
> + tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
> + }
> +}
> +
> +static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
> + const TCGCallArgumentLoc *loc,
> + TCGType dst_type, TCGType src_type,
> + TCGReg lo, TCGReg hi)
> +{
> + if (dst_type <= TCG_TYPE_REG) {
> + MemOp src_ext;
> +
> + switch (loc->kind) {
> + case TCG_CALL_ARG_NORMAL:
> + src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
> + break;
> + case TCG_CALL_ARG_EXTEND_U:
> + dst_type = TCG_TYPE_REG;
> + src_ext = MO_UL;
> + break;
> + case TCG_CALL_ARG_EXTEND_S:
> + dst_type = TCG_TYPE_REG;
> + src_ext = MO_SL;
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + mov[0].dst = loc->arg_slot;
> + mov[0].dst_type = dst_type;
> + mov[0].src = lo;
> + mov[0].src_type = src_type;
> + mov[0].src_ext = src_ext;
> + return 1;
> + }
> +
> + assert(TCG_TARGET_REG_BITS == 32);
> +
> + mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
> + mov[0].src = lo;
> + mov[0].dst_type = TCG_TYPE_I32;
> + mov[0].src_type = TCG_TYPE_I32;
> + mov[0].src_ext = MO_32;
> +
> + mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
> + mov[1].src = hi;
> + mov[1].dst_type = TCG_TYPE_I32;
> + mov[1].src_type = TCG_TYPE_I32;
> + mov[1].src_ext = MO_32;
> +
> + return 2;
> +}
> +
> +static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst
> *ldst,
> + const TCGLdstHelperParam *parm)
> +{
> + const TCGHelperInfo *info;
> + const TCGCallArgumentLoc *loc;
> + TCGMovExtend mov[2];
> + unsigned next_arg, nmov;
> + MemOp mop = get_memop(ldst->oi);
> +
> + switch (mop & MO_SIZE) {
> + case MO_8:
> + case MO_16:
> + case MO_32:
> + info = &info_helper_ld32_mmu;
> + break;
> + case MO_64:
> + info = &info_helper_ld64_mmu;
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + /* Defer env argument. */
> + next_arg = 1;
This could be assigned on creation but maybe even just open code:
loc = &info->in[1]
..
tcg_out_helper_load_common_args(s, ldst, parm, info, 1 + nmov);
although I concede that matching up with the store helper makes sense.
> +
> + loc = &info->in[next_arg];
> + nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
> + ldst->addrlo_reg, ldst->addrhi_reg);
> + next_arg += nmov;
> +
> + tcg_out_helper_load_slots(s, nmov, mov, parm);
> +
> + /* No special attention for 32 and 64-bit return values. */
> + tcg_debug_assert(info->out_kind == TCG_CALL_RET_NORMAL);
> +
> + tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
> +}
> +
> +static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst
> *ldst,
> + bool load_sign,
> + const TCGLdstHelperParam *parm)
> +{
> + TCGMovExtend mov[2];
> +
> + if (ldst->type <= TCG_TYPE_REG) {
> + MemOp mop = get_memop(ldst->oi);
> +
> + mov[0].dst = ldst->datalo_reg;
> + mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
> + mov[0].dst_type = ldst->type;
> + mov[0].src_type = TCG_TYPE_REG;
> +
> + /*
> + * If load_sign, then we allowed the helper to perform the
> + * appropriate sign extension to tcg_target_ulong, and all
> + * we need now is a plain move.
> + *
> + * If they do not, then we expect the relevant extension
> + * instruction to be no more expensive than a move, and
> + * we thus save the icache etc by only using one of two
> + * helper functions.
> + */
> + if (load_sign || !(mop & MO_SIGN)) {
> + if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
> + mov[0].src_ext = MO_32;
> + } else {
> + mov[0].src_ext = MO_64;
> + }
> + } else {
> + mov[0].src_ext = mop & MO_SSIZE;
> + }
> + tcg_out_movext1(s, mov);
> + } else {
> + assert(TCG_TARGET_REG_BITS == 32);
> +
> + mov[0].dst = ldst->datalo_reg;
> + mov[0].src =
> + tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
> + mov[0].dst_type = TCG_TYPE_I32;
> + mov[0].src_type = TCG_TYPE_I32;
> + mov[0].src_ext = MO_32;
> +
> + mov[1].dst = ldst->datahi_reg;
> + mov[1].src =
> + tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
> + mov[1].dst_type = TCG_TYPE_REG;
> + mov[1].src_type = TCG_TYPE_REG;
> + mov[1].src_ext = MO_32;
> +
> + tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
> + }
> +}
> +
> +static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst
> *ldst,
> + const TCGLdstHelperParam *parm)
> +{
> + const TCGHelperInfo *info;
> + const TCGCallArgumentLoc *loc;
> + TCGMovExtend mov[4];
> + TCGType data_type;
> + unsigned next_arg, nmov, n;
> + MemOp mop = get_memop(ldst->oi);
> +
> + switch (mop & MO_SIZE) {
> + case MO_8:
> + case MO_16:
> + case MO_32:
> + info = &info_helper_st32_mmu;
> + data_type = TCG_TYPE_I32;
> + break;
> + case MO_64:
> + info = &info_helper_st64_mmu;
> + data_type = TCG_TYPE_I64;
> + break;
> + default:
> + g_assert_not_reached();
> + }
> +
> + /* Defer env argument. */
> + next_arg = 1;
> + nmov = 0;
> +
> + /* Handle addr argument. */
> + loc = &info->in[next_arg];
> + n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_TL, TCG_TYPE_TL,
> + ldst->addrlo_reg, ldst->addrhi_reg);
> + next_arg += n;
> + nmov += n;
> +
> + /* Handle data argument. */
> + loc = &info->in[next_arg];
> + n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
> + ldst->datalo_reg, ldst->datahi_reg);
> + next_arg += n;
> + nmov += n;
> + tcg_debug_assert(nmov <= ARRAY_SIZE(mov));
> +
> + tcg_out_helper_load_slots(s, nmov, mov, parm);
> + tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
> +}
> +
> #ifdef CONFIG_PROFILER
>
> /* avoid copy/paste errors */
Otherwise:
Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
--
Alex Bennée
Virtualisation Tech Lead @ Linaro
- [PATCH v5 07/30] tcg/ppc: Introduce prepare_host_addr, (continued)
- [PATCH v5 07/30] tcg/ppc: Introduce prepare_host_addr, Richard Henderson, 2023/05/06
- [PATCH v5 08/30] tcg/riscv: Introduce prepare_host_addr, Richard Henderson, 2023/05/06
- [PATCH v5 09/30] tcg/s390x: Introduce prepare_host_addr, Richard Henderson, 2023/05/06
- [PATCH v5 11/30] tcg/i386: Convert tcg_out_qemu_ld_slow_path, Richard Henderson, 2023/05/06
- [PATCH v5 10/30] tcg: Add routines for calling slow-path helpers, Richard Henderson, 2023/05/06
- Re: [PATCH v5 10/30] tcg: Add routines for calling slow-path helpers,
Alex Bennée <=
- [PATCH v5 12/30] tcg/i386: Convert tcg_out_qemu_st_slow_path, Richard Henderson, 2023/05/06
- [PATCH v5 13/30] tcg/aarch64: Convert tcg_out_qemu_{ld,st}_slow_path, Richard Henderson, 2023/05/06
- [PATCH v5 14/30] tcg/arm: Convert tcg_out_qemu_{ld,st}_slow_path, Richard Henderson, 2023/05/06
- [PATCH v5 15/30] tcg/loongarch64: Convert tcg_out_qemu_{ld, st}_slow_path, Richard Henderson, 2023/05/06
- [PATCH v5 16/30] tcg/mips: Convert tcg_out_qemu_{ld,st}_slow_path, Richard Henderson, 2023/05/06