[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v6 27/82] target/arm: Implement SVE2 SQSHRUN, SQRSHRUN
From: |
Richard Henderson |
Subject: |
[PATCH v6 27/82] target/arm: Implement SVE2 SQSHRUN, SQRSHRUN |
Date: |
Fri, 30 Apr 2021 13:25:15 -0700 |
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/helper-sve.h | 16 +++++++
target/arm/sve.decode | 4 ++
target/arm/sve_helper.c | 35 ++++++++++++++
target/arm/translate-sve.c | 98 ++++++++++++++++++++++++++++++++++++++
4 files changed, 153 insertions(+)
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 2b2ebea631..2e80d9d27b 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -2460,6 +2460,22 @@ DEF_HELPER_FLAGS_3(sve2_rshrnt_h, TCG_CALL_NO_RWG, void,
ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_rshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
DEF_HELPER_FLAGS_3(sve2_rshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqshrunt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunb_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_h, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+DEF_HELPER_FLAGS_3(sve2_sqrshrunt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG,
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 169486ecb2..18faa900ca 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1288,6 +1288,10 @@ SQXTUNT 01000101 .. 1 ..... 010 101 ..... .....
@rd_rn_tszimm_shl
## SVE2 bitwise shift right narrow
# Bit 23 == 0 is handled by esz > 0 in the translator.
+SQSHRUNB 01000101 .. 1 ..... 00 0000 ..... ..... @rd_rn_tszimm_shr
+SQSHRUNT 01000101 .. 1 ..... 00 0001 ..... ..... @rd_rn_tszimm_shr
+SQRSHRUNB 01000101 .. 1 ..... 00 0010 ..... ..... @rd_rn_tszimm_shr
+SQRSHRUNT 01000101 .. 1 ..... 00 0011 ..... ..... @rd_rn_tszimm_shr
SHRNB 01000101 .. 1 ..... 00 0100 ..... ..... @rd_rn_tszimm_shr
SHRNT 01000101 .. 1 ..... 00 0101 ..... ..... @rd_rn_tszimm_shr
RSHRNB 01000101 .. 1 ..... 00 0110 ..... ..... @rd_rn_tszimm_shr
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 3f864da3ab..d6b6293ab0 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -1874,6 +1874,16 @@ static inline uint64_t do_urshr(uint64_t x, unsigned sh)
}
}
+static inline int64_t do_srshr(int64_t x, unsigned sh)
+{
+ if (likely(sh < 64)) {
+ return (x >> sh) + ((x >> (sh - 1)) & 1);
+ } else {
+ /* Rounding the sign bit always produces 0. */
+ return 0;
+ }
+}
+
DO_ZPZI(sve_asr_zpzi_b, int8_t, H1, DO_SHR)
DO_ZPZI(sve_asr_zpzi_h, int16_t, H1_2, DO_SHR)
DO_ZPZI(sve_asr_zpzi_s, int32_t, H1_4, DO_SHR)
@@ -1936,6 +1946,31 @@ DO_SHRNT(sve2_rshrnt_h, uint16_t, uint8_t, H1_2, H1,
do_urshr)
DO_SHRNT(sve2_rshrnt_s, uint32_t, uint16_t, H1_4, H1_2, do_urshr)
DO_SHRNT(sve2_rshrnt_d, uint64_t, uint32_t, , H1_4, do_urshr)
+#define DO_SQSHRUN_H(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT8_MAX)
+#define DO_SQSHRUN_S(x, sh) do_sat_bhs((int64_t)(x) >> sh, 0, UINT16_MAX)
+#define DO_SQSHRUN_D(x, sh) \
+ do_sat_bhs((int64_t)(x) >> (sh < 64 ? sh : 63), 0, UINT32_MAX)
+
+DO_SHRNB(sve2_sqshrunb_h, int16_t, uint8_t, DO_SQSHRUN_H)
+DO_SHRNB(sve2_sqshrunb_s, int32_t, uint16_t, DO_SQSHRUN_S)
+DO_SHRNB(sve2_sqshrunb_d, int64_t, uint32_t, DO_SQSHRUN_D)
+
+DO_SHRNT(sve2_sqshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQSHRUN_H)
+DO_SHRNT(sve2_sqshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQSHRUN_S)
+DO_SHRNT(sve2_sqshrunt_d, int64_t, uint32_t, , H1_4, DO_SQSHRUN_D)
+
+#define DO_SQRSHRUN_H(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT8_MAX)
+#define DO_SQRSHRUN_S(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT16_MAX)
+#define DO_SQRSHRUN_D(x, sh) do_sat_bhs(do_srshr(x, sh), 0, UINT32_MAX)
+
+DO_SHRNB(sve2_sqrshrunb_h, int16_t, uint8_t, DO_SQRSHRUN_H)
+DO_SHRNB(sve2_sqrshrunb_s, int32_t, uint16_t, DO_SQRSHRUN_S)
+DO_SHRNB(sve2_sqrshrunb_d, int64_t, uint32_t, DO_SQRSHRUN_D)
+
+DO_SHRNT(sve2_sqrshrunt_h, int16_t, uint8_t, H1_2, H1, DO_SQRSHRUN_H)
+DO_SHRNT(sve2_sqrshrunt_s, int32_t, uint16_t, H1_4, H1_2, DO_SQRSHRUN_S)
+DO_SHRNT(sve2_sqrshrunt_d, int64_t, uint32_t, , H1_4, DO_SQRSHRUN_D)
+
#undef DO_SHRNB
#undef DO_SHRNT
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index c1a081acaa..5ff6b8ffb6 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -6858,6 +6858,104 @@ static bool trans_RSHRNT(DisasContext *s, arg_rri_esz
*a)
return do_sve2_shr_narrow(s, a, ops);
}
+static void gen_sqshrunb_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec n, int64_t shr)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+
+ tcg_gen_sari_vec(vece, n, n, shr);
+ tcg_gen_dupi_vec(vece, t, 0);
+ tcg_gen_smax_vec(vece, n, n, t);
+ tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+ tcg_gen_umin_vec(vece, d, n, t);
+ tcg_temp_free_vec(t);
+}
+
+static bool trans_SQSHRUNB(DisasContext *s, arg_rri_esz *a)
+{
+ static const TCGOpcode vec_list[] = {
+ INDEX_op_sari_vec, INDEX_op_smax_vec, INDEX_op_umin_vec, 0
+ };
+ static const GVecGen2i ops[3] = {
+ { .fniv = gen_sqshrunb_vec,
+ .opt_opc = vec_list,
+ .fno = gen_helper_sve2_sqshrunb_h,
+ .vece = MO_16 },
+ { .fniv = gen_sqshrunb_vec,
+ .opt_opc = vec_list,
+ .fno = gen_helper_sve2_sqshrunb_s,
+ .vece = MO_32 },
+ { .fniv = gen_sqshrunb_vec,
+ .opt_opc = vec_list,
+ .fno = gen_helper_sve2_sqshrunb_d,
+ .vece = MO_64 },
+ };
+ return do_sve2_shr_narrow(s, a, ops);
+}
+
+static void gen_sqshrunt_vec(unsigned vece, TCGv_vec d,
+ TCGv_vec n, int64_t shr)
+{
+ TCGv_vec t = tcg_temp_new_vec_matching(d);
+ int halfbits = 4 << vece;
+
+ tcg_gen_sari_vec(vece, n, n, shr);
+ tcg_gen_dupi_vec(vece, t, 0);
+ tcg_gen_smax_vec(vece, n, n, t);
+ tcg_gen_dupi_vec(vece, t, MAKE_64BIT_MASK(0, halfbits));
+ tcg_gen_umin_vec(vece, n, n, t);
+ tcg_gen_shli_vec(vece, n, n, halfbits);
+ tcg_gen_bitsel_vec(vece, d, t, d, n);
+ tcg_temp_free_vec(t);
+}
+
+static bool trans_SQSHRUNT(DisasContext *s, arg_rri_esz *a)
+{
+ static const TCGOpcode vec_list[] = {
+ INDEX_op_shli_vec, INDEX_op_sari_vec,
+ INDEX_op_smax_vec, INDEX_op_umin_vec, 0
+ };
+ static const GVecGen2i ops[3] = {
+ { .fniv = gen_sqshrunt_vec,
+ .opt_opc = vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqshrunt_h,
+ .vece = MO_16 },
+ { .fniv = gen_sqshrunt_vec,
+ .opt_opc = vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqshrunt_s,
+ .vece = MO_32 },
+ { .fniv = gen_sqshrunt_vec,
+ .opt_opc = vec_list,
+ .load_dest = true,
+ .fno = gen_helper_sve2_sqshrunt_d,
+ .vece = MO_64 },
+ };
+ return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_SQRSHRUNB(DisasContext *s, arg_rri_esz *a)
+{
+ static const GVecGen2i ops[3] = {
+ { .fno = gen_helper_sve2_sqrshrunb_h },
+ { .fno = gen_helper_sve2_sqrshrunb_s },
+ { .fno = gen_helper_sve2_sqrshrunb_d },
+ };
+ return do_sve2_shr_narrow(s, a, ops);
+}
+
+static bool trans_SQRSHRUNT(DisasContext *s, arg_rri_esz *a)
+{
+ static const GVecGen2i ops[3] = {
+ { .fno = gen_helper_sve2_sqrshrunt_h },
+ { .fno = gen_helper_sve2_sqrshrunt_s },
+ { .fno = gen_helper_sve2_sqrshrunt_d },
+ };
+ return do_sve2_shr_narrow(s, a, ops);
+}
+
static bool do_sve2_zpzz_fp(DisasContext *s, arg_rprr_esz *a,
gen_helper_gvec_4_ptr *fn)
{
--
2.25.1
- [PATCH v6 12/82] target/arm: Implement SVE2 integer add/subtract wide, (continued)
- [PATCH v6 12/82] target/arm: Implement SVE2 integer add/subtract wide, Richard Henderson, 2021/04/30
- [PATCH v6 14/82] target/arm: Implement PMULLB and PMULLT, Richard Henderson, 2021/04/30
- [PATCH v6 15/82] target/arm: Implement SVE2 bitwise shift left long, Richard Henderson, 2021/04/30
- [PATCH v6 16/82] target/arm: Implement SVE2 bitwise exclusive-or interleaved, Richard Henderson, 2021/04/30
- [PATCH v6 17/82] target/arm: Implement SVE2 bitwise permute, Richard Henderson, 2021/04/30
- [PATCH v6 20/82] target/arm: Implement SVE2 integer add/subtract long with carry, Richard Henderson, 2021/04/30
- [PATCH v6 24/82] target/arm: Implement SVE2 saturating extract narrow, Richard Henderson, 2021/04/30
- [PATCH v6 19/82] target/arm: Implement SVE2 integer absolute difference and accumulate long, Richard Henderson, 2021/04/30
- [PATCH v6 18/82] target/arm: Implement SVE2 complex integer add, Richard Henderson, 2021/04/30
- [PATCH v6 25/82] target/arm: Implement SVE2 floating-point pairwise, Richard Henderson, 2021/04/30
- [PATCH v6 27/82] target/arm: Implement SVE2 SQSHRUN, SQRSHRUN,
Richard Henderson <=
- [PATCH v6 21/82] target/arm: Implement SVE2 bitwise shift right and accumulate, Richard Henderson, 2021/04/30
- [PATCH v6 26/82] target/arm: Implement SVE2 SHRN, RSHRN, Richard Henderson, 2021/04/30
- [PATCH v6 22/82] target/arm: Implement SVE2 bitwise shift and insert, Richard Henderson, 2021/04/30
- [PATCH v6 23/82] target/arm: Implement SVE2 integer absolute difference and accumulate, Richard Henderson, 2021/04/30
- [PATCH v6 30/82] target/arm: Implement SVE2 WHILEGT, WHILEGE, WHILEHI, WHILEHS, Richard Henderson, 2021/04/30
- [PATCH v6 29/82] target/arm: Implement SVE2 SQSHRN, SQRSHRN, Richard Henderson, 2021/04/30
- [PATCH v6 28/82] target/arm: Implement SVE2 UQSHRN, UQRSHRN, Richard Henderson, 2021/04/30
- [PATCH v6 31/82] target/arm: Implement SVE2 WHILERW, WHILEWR, Richard Henderson, 2021/04/30
- [PATCH v6 32/82] target/arm: Implement SVE2 bitwise ternary operations, Richard Henderson, 2021/04/30
- [PATCH v6 34/82] target/arm: Implement SVE2 saturating multiply-add long, Richard Henderson, 2021/04/30