[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH v2 11/20] target-mips: add MSA I5 format instruc
From: |
James Hogan |
Subject: |
Re: [Qemu-devel] [PATCH v2 11/20] target-mips: add MSA I5 format instruction |
Date: |
Wed, 29 Oct 2014 23:23:43 +0000 |
User-agent: |
Mutt/1.5.22 (2013-10-16) |
Hi Yongbok,
On Wed, Oct 29, 2014 at 01:41:59AM +0000, Yongbok Kim wrote:
> +DEF_HELPER_5(msa_addvi_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_ceqi_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_clei_s_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_clei_u_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_clti_s_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_clti_u_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_4(msa_ldi_df, void, env, i32, i32, i32)
> +DEF_HELPER_5(msa_maxi_s_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_maxi_u_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_mini_s_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_mini_u_df, void, env, i32, i32, i32, s64)
> +DEF_HELPER_5(msa_subvi_df, void, env, i32, i32, i32, s64)
s64 seems like overkill for a 5bit field.
> diff --git a/target-mips/msa_helper.c b/target-mips/msa_helper.c
> index 46ffaa5..ffdde07 100644
> --- a/target-mips/msa_helper.c
> +++ b/target-mips/msa_helper.c
> @@ -114,3 +114,145 @@ void helper_msa_shf_df(CPUMIPSState *env, uint32_t df,
> uint32_t wd,
> msa_move_v(pwd, pwx);
> }
>
> +static inline int64_t msa_addv_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> + return arg1 + arg2;
> +}
> +
> +static inline int64_t msa_subv_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> + return arg1 - arg2;
> +}
> +
> +static inline int64_t msa_ceq_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> + return arg1 == arg2 ? -1 : 0;
> +}
> +
> +static inline int64_t msa_cle_s_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> + return arg1 <= arg2 ? -1 : 0;
> +}
> +
> +static inline int64_t msa_cle_u_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> + uint64_t u_arg1 = UNSIGNED(arg1, df);
> + uint64_t u_arg2 = UNSIGNED(arg2, df);
> + return u_arg1 <= u_arg2 ? -1 : 0;
> +}
> +
> +static inline int64_t msa_clt_s_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> + return arg1 < arg2 ? -1 : 0;
> +}
> +
> +static inline int64_t msa_clt_u_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> + uint64_t u_arg1 = UNSIGNED(arg1, df);
> + uint64_t u_arg2 = UNSIGNED(arg2, df);
> + return u_arg1 < u_arg2 ? -1 : 0;
> +}
> +
> +static inline int64_t msa_max_s_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> + return arg1 > arg2 ? arg1 : arg2;
> +}
> +
> +static inline int64_t msa_max_u_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> + uint64_t u_arg1 = UNSIGNED(arg1, df);
> + uint64_t u_arg2 = UNSIGNED(arg2, df);
> + return u_arg1 > u_arg2 ? arg1 : arg2;
> +}
> +
> +static inline int64_t msa_min_s_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> + return arg1 < arg2 ? arg1 : arg2;
> +}
> +
> +static inline int64_t msa_min_u_df(uint32_t df, int64_t arg1, int64_t arg2)
> +{
> + uint64_t u_arg1 = UNSIGNED(arg1, df);
> + uint64_t u_arg2 = UNSIGNED(arg2, df);
> + return u_arg1 < u_arg2 ? arg1 : arg2;
> +}
> +
> +#define MSA_BINOP_IMM_DF(helper, func) \
> +void helper_msa_ ## helper ## _df(CPUMIPSState *env, uint32_t df, \
> + uint32_t wd, uint32_t ws, int64_t u5) \
> +{ \
> + wr_t *pwd = &(env->active_fpu.fpr[wd].wr); \
> + wr_t *pws = &(env->active_fpu.fpr[ws].wr); \
> + uint32_t i; \
> + \
> + switch (df) { \
> + case DF_BYTE: \
> + for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) { \
> + pwd->b[i] = msa_ ## func ## _df(df, pws->b[i], u5); \
> + } \
> + break; \
> + case DF_HALF: \
> + for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) { \
> + pwd->h[i] = msa_ ## func ## _df(df, pws->h[i], u5); \
> + } \
> + break; \
> + case DF_WORD: \
> + for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) { \
> + pwd->w[i] = msa_ ## func ## _df(df, pws->w[i], u5); \
> + } \
> + break; \
> + case DF_DOUBLE: \
> + for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) { \
> + pwd->d[i] = msa_ ## func ## _df(df, pws->d[i], u5); \
> + } \
Have you checked whether the compiler is actually able to effectively
optimise this lot?
If not, I bet it'd have a much better chance if the inline functions
above were defined as macros, e.g.:
#define msa_min_s_df(TYPE_S, TYPE_U, ARG1, ARG2) \
((TYPE_S)(ARG1) < (TYPE_S)(ARG2) ? (ARG1) : (ARG2))
#define msa_min_u_df(TYPE_S, TYPE_U, ARG1, ARG2) \
((TYPE_U)(ARG1) < (TYPE_U)(ARG2) ? (ARG1) : (ARG2))
and called like this:
pwd->b[i] = msa_##func##_df(int8_t, uint8_t, pws->b[i], u5);
Having said that, I don't think it should block this patchset, but
certainly something it'd be nice to improve.
> +void helper_msa_ldi_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
> + uint32_t s10)
> +{
> + wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
> + int64_t s64 = ((int64_t)s10 << 54) >> 54;
Extending to 32-bits (int32_t) would work just as well.
> + uint32_t i;
> +
> + switch (df) {
> + case DF_BYTE:
> + for (i = 0; i < DF_ELEMENTS(DF_BYTE); i++) {
> + pwd->b[i] = (int8_t)s10;
> + }
> + break;
> + case DF_HALF:
> + for (i = 0; i < DF_ELEMENTS(DF_HALF); i++) {
> + pwd->h[i] = (int16_t)s64;
> + }
> + break;
> + case DF_WORD:
> + for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
> + pwd->w[i] = (int32_t)s64;
> + }
> + break;
> + case DF_DOUBLE:
> + for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
> + pwd->d[i] = s64;
> + }
> + break;
> + default:
> + assert(0);
> + }
> +}
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index b2934d7..62dd0b9 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -17395,6 +17395,81 @@ static void gen_msa_i8(CPUMIPSState *env,
> DisasContext *ctx)
> tcg_temp_free_i32(ti8);
> }
>
> +static void gen_msa_i5(CPUMIPSState *env, DisasContext *ctx)
> +{
> +#define MASK_MSA_I5(op) (MASK_MSA_MINOR(op) | (op & (0x7 << 23)))
> + uint32_t opcode = ctx->opcode;
> +
> + uint8_t df = (ctx->opcode >> 21) & 0x3;
> + int64_t s5 = (ctx->opcode >> 16) & 0x1f;
> + s5 = (s5 << 59) >> 59; /* sign extend s5 to 64 bits*/
> + uint8_t u5 = (ctx->opcode >> 16) & 0x1f;
> + uint8_t ws = (ctx->opcode >> 11) & 0x1f;
> + uint8_t wd = (ctx->opcode >> 6) & 0x1f;
> +
> + TCGv_i32 tdf = tcg_const_i32(df);
> + TCGv_i32 twd = tcg_const_i32(wd);
> + TCGv_i32 tws = tcg_const_i32(ws);
> + TCGv_i64 tu5 = tcg_const_i64(u5);
> + TCGv_i64 ts5 = tcg_const_i64(s5);
these (and the in64_t s5 above) don't really need to be 64bit since the
field is only 5 bits wide.
> +
> + switch (MASK_MSA_I5(opcode)) {
> + case OPC_ADDVI_df:
> + gen_helper_msa_addvi_df(cpu_env, tdf, twd, tws, tu5);
Since you know df at translation time, this could avoid the whole switch
in each helper every time one is executed by having a helper for each
df.
Again, for another patchset perhaps.
> + break;
> + case OPC_LDI_df:
> + {
> + int64_t s10 = (ctx->opcode >> 11) & 0x3ff;
> + s10 = (s10 << 54) >> 54; /* sign extend s10 to 64 bits*/
why use int64_t when you then put it in a const_i32?
> +
> + TCGv_i32 ts10 = tcg_const_i32(s10);
> + gen_helper_msa_ldi_df(cpu_env, tdf, twd, ts10);
> + tcg_temp_free_i32(ts10);
> + }
Cheers
James
- [Qemu-devel] [PATCH v2 07/20] target-mips: add msa_reset(), global msa register, (continued)
- [Qemu-devel] [PATCH v2 07/20] target-mips: add msa_reset(), global msa register, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 08/20] target-mips: add msa_helper.c, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 09/20] target-mips: add MSA branch instructions, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 10/20] target-mips: add MSA I8 format instructions, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 11/20] target-mips: add MSA I5 format instruction, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 12/20] target-mips: add MSA BIT format instructions, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 14/20] target-mips: add MSA ELM format instructions, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 13/20] target-mips: add MSA 3R format instructions, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 16/20] target-mips: add MSA VEC/2R format instructions, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 17/20] target-mips: add MSA 2RF format instructions, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 18/20] target-mips: add MSA MI10 format instructions, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 15/20] target-mips: add MSA 3RF format instructions, Yongbok Kim, 2014/10/28
- [Qemu-devel] [PATCH v2 20/20] target-mips: add MSA support to mips32r5-generic, Yongbok Kim, 2014/10/28