[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 09/31] target/arm: Implement SVE2 integer pairwise arithmetic
From: |
Richard Henderson |
Subject: |
[PATCH 09/31] target/arm: Implement SVE2 integer pairwise arithmetic |
Date: |
Thu, 26 Mar 2020 16:08:16 -0700 |
Signed-off-by: Richard Henderson <address@hidden>
---
target/arm/helper-sve.h | 45 +++++++++++++++++++++++++
target/arm/sve.decode | 8 +++++
target/arm/sve_helper.c | 67 ++++++++++++++++++++++++++++++++++++++
target/arm/translate-sve.c | 6 ++++
4 files changed, 126 insertions(+)
diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
index 149fff1fae..028c3b85a8 100644
--- a/target/arm/helper-sve.h
+++ b/target/arm/helper-sve.h
@@ -326,6 +326,51 @@ DEF_HELPER_FLAGS_5(sve_sel_zpzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_5(sve_sel_zpzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_addp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_smaxp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_umaxp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_sminp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_b, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+DEF_HELPER_FLAGS_5(sve2_uminp_zpzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, i32)
+
DEF_HELPER_FLAGS_5(sve_asr_zpzw_b, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, i32)
DEF_HELPER_FLAGS_5(sve_asr_zpzw_h, TCG_CALL_NO_RWG,
diff --git a/target/arm/sve.decode b/target/arm/sve.decode
index 54076bb607..86a6bf7088 100644
--- a/target/arm/sve.decode
+++ b/target/arm/sve.decode
@@ -1142,3 +1142,11 @@ SRHADD 01000100 .. 010 100 100 ... ..... .....
@rdn_pg_rm
URHADD 01000100 .. 010 101 100 ... ..... ..... @rdn_pg_rm
SHSUB 01000100 .. 010 110 100 ... ..... ..... @rdm_pg_rn # SHSUBR
UHSUB 01000100 .. 010 111 100 ... ..... ..... @rdm_pg_rn # UHSUBR
+
+### SVE2 integer pairwise arithmetic
+
+ADDP 01000100 .. 010 001 101 ... ..... ..... @rdn_pg_rm
+SMAXP 01000100 .. 010 100 101 ... ..... ..... @rdn_pg_rm
+UMAXP 01000100 .. 010 101 101 ... ..... ..... @rdn_pg_rm
+SMINP 01000100 .. 010 110 101 ... ..... ..... @rdn_pg_rm
+UMINP 01000100 .. 010 111 101 ... ..... ..... @rdn_pg_rm
diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
index 5d75aed7b7..d7c181ddb8 100644
--- a/target/arm/sve_helper.c
+++ b/target/arm/sve_helper.c
@@ -681,6 +681,73 @@ DO_ZPZZ_D(sve2_uhsub_zpzz_d, uint64_t, DO_HSUB_D)
#undef DO_ZPZZ
#undef DO_ZPZZ_D
+/*
+ * Three operand expander, operating on element pairs.
+ * If the slot I is even, the elements from from VN {I, I+1}.
+ * If the slot I is odd, the elements from from VM {I-1, I}.
+ */
+#define DO_ZPZZ_PAIR(NAME, TYPE, H, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc); \
+ for (i = 0; i < opr_sz; ) { \
+ uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3)); \
+ do { \
+ if (pg & 1) { \
+ void *p = (i & 1 ? vm : vn); \
+ TYPE nn = *(TYPE *)(p + H(i & ~1)); \
+ TYPE mm = *(TYPE *)(p + H(i | 1)); \
+ *(TYPE *)(vd + H(i)) = OP(nn, mm); \
+ } \
+ i += sizeof(TYPE), pg >>= sizeof(TYPE); \
+ } while (i & 15); \
+ } \
+}
+
+/* Similarly, specialized for 64-bit operands. */
+#define DO_ZPZZ_PAIR_D(NAME, TYPE, OP) \
+void HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) \
+{ \
+ intptr_t i, opr_sz = simd_oprsz(desc) / 8; \
+ TYPE *d = vd, *n = vn, *m = vm; \
+ uint8_t *pg = vg; \
+ for (i = 0; i < opr_sz; i += 1) { \
+ if (pg[H1(i)] & 1) { \
+ TYPE *p = (i & 1 ? m : n) + (i & ~1); \
+ TYPE nn = p[0], mm = p[1]; \
+ d[i] = OP(nn, mm); \
+ } \
+ } \
+}
+
+DO_ZPZZ_PAIR(sve2_addp_zpzz_b, uint8_t, H1_2, DO_ADD)
+DO_ZPZZ_PAIR(sve2_addp_zpzz_h, uint16_t, H1_2, DO_ADD)
+DO_ZPZZ_PAIR(sve2_addp_zpzz_s, uint32_t, H1_4, DO_ADD)
+DO_ZPZZ_PAIR_D(sve2_addp_zpzz_d, uint64_t, DO_ADD)
+
+DO_ZPZZ_PAIR(sve2_umaxp_zpzz_b, uint8_t, H1_2, DO_MAX)
+DO_ZPZZ_PAIR(sve2_umaxp_zpzz_h, uint16_t, H1_2, DO_MAX)
+DO_ZPZZ_PAIR(sve2_umaxp_zpzz_s, uint32_t, H1_4, DO_MAX)
+DO_ZPZZ_PAIR_D(sve2_umaxp_zpzz_d, uint64_t, DO_MAX)
+
+DO_ZPZZ_PAIR(sve2_uminp_zpzz_b, uint8_t, H1_2, DO_MIN)
+DO_ZPZZ_PAIR(sve2_uminp_zpzz_h, uint16_t, H1_2, DO_MIN)
+DO_ZPZZ_PAIR(sve2_uminp_zpzz_s, uint32_t, H1_4, DO_MIN)
+DO_ZPZZ_PAIR_D(sve2_uminp_zpzz_d, uint64_t, DO_MIN)
+
+DO_ZPZZ_PAIR(sve2_smaxp_zpzz_b, int8_t, H1_2, DO_MAX)
+DO_ZPZZ_PAIR(sve2_smaxp_zpzz_h, int16_t, H1_2, DO_MAX)
+DO_ZPZZ_PAIR(sve2_smaxp_zpzz_s, int32_t, H1_4, DO_MAX)
+DO_ZPZZ_PAIR_D(sve2_smaxp_zpzz_d, int64_t, DO_MAX)
+
+DO_ZPZZ_PAIR(sve2_sminp_zpzz_b, int8_t, H1_2, DO_MIN)
+DO_ZPZZ_PAIR(sve2_sminp_zpzz_h, int16_t, H1_2, DO_MIN)
+DO_ZPZZ_PAIR(sve2_sminp_zpzz_s, int32_t, H1_4, DO_MIN)
+DO_ZPZZ_PAIR_D(sve2_sminp_zpzz_d, int64_t, DO_MIN)
+
+#undef DO_ZPZZ_PAIR
+#undef DO_ZPZZ_PAIR_D
+
/* Three-operand expander, controlled by a predicate, in which the
* third operand is "wide". That is, for D = N op M, the same 64-bit
* value of M is used with all of the narrower values of N.
diff --git a/target/arm/translate-sve.c b/target/arm/translate-sve.c
index 7d619d7ad4..5f137c0e92 100644
--- a/target/arm/translate-sve.c
+++ b/target/arm/translate-sve.c
@@ -5992,3 +5992,9 @@ DO_SVE2_ZPZZ(SHSUB, shsub)
DO_SVE2_ZPZZ(UHADD, uhadd)
DO_SVE2_ZPZZ(URHADD, urhadd)
DO_SVE2_ZPZZ(UHSUB, uhsub)
+
+DO_SVE2_ZPZZ(ADDP, addp)
+DO_SVE2_ZPZZ(SMAXP, smaxp)
+DO_SVE2_ZPZZ(UMAXP, umaxp)
+DO_SVE2_ZPZZ(SMINP, sminp)
+DO_SVE2_ZPZZ(UMINP, uminp)
--
2.20.1
- [PATCH for-5.1 00/31] target/arm: SVE2, part 1, Richard Henderson, 2020/03/26
- [PATCH 01/31] target/arm: Add ID_AA64ZFR0 fields and isar_feature_aa64_sve2, Richard Henderson, 2020/03/26
- [PATCH 02/31] target/arm: Implement SVE2 Integer Multiply - Unpredicated, Richard Henderson, 2020/03/26
- [PATCH 03/31] target/arm: Implement SVE2 integer pairwise add and accumulate long, Richard Henderson, 2020/03/26
- [PATCH 04/31] target/arm: Remove fp_status from helper_{recpe, rsqrte}_u32, Richard Henderson, 2020/03/26
- [PATCH 05/31] target/arm: Implement SVE2 integer unary operations (predicated), Richard Henderson, 2020/03/26
- [PATCH 07/31] target/arm: Implement SVE2 saturating/rounding bitwise shift left (predicated), Richard Henderson, 2020/03/26
- [PATCH 08/31] target/arm: Implement SVE2 integer halving add/subtract (predicated), Richard Henderson, 2020/03/26
- [PATCH 06/31] target/arm: Split out saturating/rounding shifts from neon, Richard Henderson, 2020/03/26
- [PATCH 09/31] target/arm: Implement SVE2 integer pairwise arithmetic,
Richard Henderson <=
- [PATCH 10/31] target/arm: Implement SVE2 saturating add/subtract (predicated), Richard Henderson, 2020/03/26
- [PATCH 12/31] target/arm: Implement SVE2 integer add/subtract interleaved long, Richard Henderson, 2020/03/26
- [PATCH 11/31] target/arm: Implement SVE2 integer add/subtract long, Richard Henderson, 2020/03/26
- [PATCH 13/31] target/arm: Implement SVE2 integer add/subtract wide, Richard Henderson, 2020/03/26
- [PATCH 14/31] target/arm: Implement SVE2 integer multiply long, Richard Henderson, 2020/03/26
- [PATCH 15/31] target/arm: Implement PMULLB and PMULLT, Richard Henderson, 2020/03/26
- [PATCH 16/31] target/arm: Tidy SVE tszimm shift formats, Richard Henderson, 2020/03/26
- [PATCH 17/31] target/arm: Implement SVE2 bitwise shift left long, Richard Henderson, 2020/03/26
- [PATCH 18/31] target/arm: Implement SVE2 bitwise exclusive-or interleaved, Richard Henderson, 2020/03/26
- [PATCH 19/31] target/arm: Implement SVE2 bitwise permute, Richard Henderson, 2020/03/26