qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH v3 03/11] Hexagon (target/hexagon) Add overrides for S2_asr_r


From: Richard Henderson
Subject: Re: [PATCH v3 03/11] Hexagon (target/hexagon) Add overrides for S2_asr_r_r_sat/S2_asl_r_r_sat
Date: Sat, 5 Nov 2022 10:59:47 +1100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Thunderbird/102.2.2

On 11/5/22 06:26, Taylor Simpson wrote:
+static void gen_set_usr_field(int field, TCGv val)
+{
+    tcg_gen_deposit_tl(hex_new_value[HEX_REG_USR], hex_new_value[HEX_REG_USR],
+                       val,
+                       reg_field_info[field].offset,
+                       reg_field_info[field].width);
+}

If you plan to use this for something else, fine, but since you're never clearing USR_OVF, it would be better to use tcg_gen_ori_tl.


+static void gen_sat_i64(TCGv_i64 dst, TCGv_i64 src, uint32_t bits)
+{
+    TCGLabel *label = gen_new_label();
+
+    tcg_gen_sextract_i64(dst, src, 0, bits);
+    tcg_gen_brcond_i64(TCG_COND_EQ, dst, src, label);
+    {
+        TCGv_i64 min = tcg_constant_i64(-(1LL << (bits - 1)));
+        TCGv_i64 max = tcg_constant_i64((1LL << (bits - 1)) - 1);
+        tcg_gen_movcond_i64(TCG_COND_LT, dst, src, tcg_constant_i64(0),
+                            min, max);
+        gen_set_usr_fieldi(USR_OVF, 1);
+    }
+    gen_set_label(label);
+}

This could be done branchless:

   tcg_gen_smax_i64(dst, src, min);
   tcg_gen_smin_i64(dst, dst, max);

   tcg_gen_setcond_i64(TCG_COND_NE, o_64, dst, src);
   tcg_gen_trunc_i64_tl(o_tl, o_64);
   tcg_gen_shli_tl(o_tl, reg_field_info[USE_OVF].offset);
   tcg_gen_or_tl(hex_new_value[HEX_REG_USR], hex_new_value[HEX_REG_USR], o_tl);


+static void gen_satval(TCGv_i64 dest, TCGv_i64 source, uint32_t bits)
+{
+    TCGv_i64 min = tcg_constant_i64(-(1LL << (bits - 1)));
+    TCGv_i64 max = tcg_constant_i64((1LL << (bits - 1)) - 1);
+
+    gen_set_usr_fieldi(USR_OVF, 1);
+    tcg_gen_movcond_i64(TCG_COND_LT, dest, source, tcg_constant_i64(0),
+                        min, max);
+}

Likewise.

+/* Shift left with saturation */
+static void gen_shl_sat(TCGv RdV, TCGv RsV, TCGv shift_amt)
+{
+    /*
+     * int64_t A = (fCAST4_8s(RsV) << shift_amt;
+     * if (((int32_t)((fSAT(A)) ^ ((int32_t)(RsV)))) < 0) {
+     *     RdV = fSATVALN(32, ((int32_t)(RsV)))
+     * } else if (((RsV) > 0) && ((A) == 0)) {
+     *     RdV = fSATVALN(32, (RsV));
+     * } else {
+     *     RdV = fSAT(A);
+     * }
+     */

A little bit tricky to follow, but is this overflow condition to be the same as

    x_32 = rsv << shift_amt;
    y_32 = x_32 >> shift_amt;
    overflow = x_32 != y_32;

i.e. overflow if we've lost bits that are not copies of the sign bit (and thus restored by the arithmetic right shift)?

+    tcg_gen_ext_i32_i64(RsV_i64, RsV);
+    tcg_gen_ext_i32_i64(shift_amt_i64, shift_amt);
+    tcg_gen_shl_i64(A, RsV_i64, shift_amt_i64);
+
+    /* Check for saturation */
+    gen_sat_i64(A_sat_i64, A, 32);

Setting USR_OVF here...

+    tcg_gen_extrl_i64_i32(A_sat, A_sat_i64);
+    tcg_gen_xor_tl(tmp, A_sat, RsV);
+    tcg_gen_brcondi_tl(TCG_COND_GE, tmp, 0, label1);
+    gen_satval(RdV_i64, RsV_i64, 32);

... and also here.  Is this also computing the same saturation value that you 
did above?


+    tcg_gen_extrl_i64_i32(RdV, RdV_i64);
+    tcg_gen_br(done);
+
+    gen_set_label(label1);
+    tcg_gen_brcondi_tl(TCG_COND_LE, RsV, 0, label2);
+    tcg_gen_brcondi_i64(TCG_COND_NE, A, 0, label2);
+    gen_satval(RdV_i64, RsV_i64, 32);

and again?

+    tcg_gen_extrl_i64_i32(RdV, RdV_i64);
+    tcg_gen_br(done);
+
+    gen_set_label(label2);
+    tcg_gen_mov_tl(RdV, A_sat);
+
+    gen_set_label(done);
+
+    tcg_temp_free_i64(RsV_i64);
+    tcg_temp_free_i64(shift_amt_i64);
+    tcg_temp_free_i64(A);
+    tcg_temp_free_i64(A_sat_i64);
+    tcg_temp_free(A_sat);
+    tcg_temp_free_i64(RdV_i64);
+    tcg_temp_free(tmp);
+}
+
+static void gen_sar(TCGv RdV, TCGv RsV, TCGv shift_amt)
+{
+    /*
+     * if (shift_amt < 32) {
+     *     RdV = sar(RsV, shift_amt);
+     * } else {
+     *     if (RsV > 0) {
+     *         RdV = 0;
+     *     } else {
+     *         RdV = ~0;
+     *     }
+     * }
+     */
+    TCGLabel *shift_ge_32 = gen_new_label();
+    TCGLabel *done = gen_new_label();
+
+    tcg_gen_brcondi_tl(TCG_COND_GE, shift_amt, 32, shift_ge_32);
+    tcg_gen_sar_tl(RdV, RsV, shift_amt);
+    tcg_gen_br(done);
+
+    gen_set_label(shift_ge_32);
+    tcg_gen_movcond_tl(TCG_COND_LT, RdV, RsV, tcg_constant_tl(0),
+                       tcg_constant_tl(~0), tcg_constant_tl(0));

This is better done as

    shift = min(shift, 31);
    rdv = rsv >> shift;

without branches.


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]