[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 65/76] target/arm: Handle FPCR.AH in negation step in SVE FMLS (v
From: |
Peter Maydell |
Subject: |
[PATCH 65/76] target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector) |
Date: |
Fri, 24 Jan 2025 16:28:25 +0000 |
Handle the FPCR.AH "don't negate the sign of a NaN" semantics fro the
SVE FMLS (vector) insns, by providing new helpers for the AH=1 case
which end up passing fpcr_ah = true to the do_fmla_zpzzz_* functions
that do the work.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/tcg/helper-sve.h | 21 ++++++
target/arm/tcg/sve_helper.c | 114 +++++++++++++++++++++++++++------
target/arm/tcg/translate-sve.c | 18 ++++--
3 files changed, 126 insertions(+), 27 deletions(-)
diff --git a/target/arm/tcg/helper-sve.h b/target/arm/tcg/helper-sve.h
index a2e96a498dd..0b1b5887834 100644
--- a/target/arm/tcg/helper-sve.h
+++ b/target/arm/tcg/helper-sve.h
@@ -1475,6 +1475,27 @@ DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
DEF_HELPER_FLAGS_7(sve_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmla_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_h, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_s, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+DEF_HELPER_FLAGS_7(sve_ah_fnmls_zpzzz_d, TCG_CALL_NO_RWG,
+ void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
+
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_h, TCG_CALL_NO_RWG,
void, ptr, ptr, ptr, ptr, ptr, fpst, i32)
DEF_HELPER_FLAGS_7(sve_fcmla_zpzzz_s, TCG_CALL_NO_RWG,
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index dc5a35b46ef..90bcf680fa4 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -4802,7 +4802,7 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint16_t neg1, uint16_t neg3)
+ uint16_t neg1, uint16_t neg3, bool fpcr_ah)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4814,9 +4814,15 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void
*vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float16 e1, e2, e3, r;
- e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
+ e1 = *(uint16_t *)(vn + H1_2(i));
e2 = *(uint16_t *)(vm + H1_2(i));
- e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
+ e3 = *(uint16_t *)(va + H1_2(i));
+ if (neg1 && !(fpcr_ah && float16_is_any_nan(e1))) {
+ e1 ^= neg1;
+ }
+ if (neg3 && !(fpcr_ah && float16_is_any_nan(e3))) {
+ e3 ^= neg3;
+ }
r = float16_muladd(e1, e2, e3, 0, status);
*(uint16_t *)(vd + H1_2(i)) = r;
}
@@ -4827,30 +4833,48 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void
*vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, false);
}
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, false);
}
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, false);
}
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, false);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, true);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, true);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, true);
}
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint32_t neg1, uint32_t neg3)
+ uint32_t neg1, uint32_t neg3, bool fpcr_ah)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4862,9 +4886,15 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void
*vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float32 e1, e2, e3, r;
- e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
+ e1 = *(uint32_t *)(vn + H1_4(i));
e2 = *(uint32_t *)(vm + H1_4(i));
- e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
+ e3 = *(uint32_t *)(va + H1_4(i));
+ if (neg1 && !(fpcr_ah && float32_is_any_nan(e1))) {
+ e1 ^= neg1;
+ }
+ if (neg3 && !(fpcr_ah && float32_is_any_nan(e3))) {
+ e3 ^= neg3;
+ }
r = float32_muladd(e1, e2, e3, 0, status);
*(uint32_t *)(vd + H1_4(i)) = r;
}
@@ -4875,30 +4905,48 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void
*vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, false);
}
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, false);
}
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000,
false);
}
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, false);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, true);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000,
true);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, true);
}
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint64_t neg1, uint64_t neg3)
+ uint64_t neg1, uint64_t neg3, bool fpcr_ah)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4910,9 +4958,15 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void
*vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float64 e1, e2, e3, r;
- e1 = *(uint64_t *)(vn + i) ^ neg1;
+ e1 = *(uint64_t *)(vn + i);
e2 = *(uint64_t *)(vm + i);
- e3 = *(uint64_t *)(va + i) ^ neg3;
+ e3 = *(uint64_t *)(va + i);
+ if (neg1 && !(fpcr_ah && float64_is_any_nan(e1))) {
+ e1 ^= neg1;
+ }
+ if (neg3 && !(fpcr_ah && float64_is_any_nan(e3))) {
+ e3 ^= neg3;
+ }
r = float64_muladd(e1, e2, e3, 0, status);
*(uint64_t *)(vd + i) = r;
}
@@ -4923,25 +4977,43 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void
*vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, false);
}
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, false);
}
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN,
false);
}
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, false);
+}
+
+void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, true);
+}
+
+void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN,
true);
+}
+
+void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
+ void *vg, float_status *status, uint32_t desc)
+{
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, true);
}
/* Two operand floating-point comparison controlled by a predicate.
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index a7033fe93ab..663634e3a39 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -3924,19 +3924,25 @@ TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp,
fcadd_fns[a->esz],
a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
-#define DO_FMLA(NAME, name) \
+#define DO_FMLA(NAME, name, ah_name) \
static gen_helper_gvec_5_ptr * const name##_fns[4] = { \
NULL, gen_helper_sve_##name##_h, \
gen_helper_sve_##name##_s, gen_helper_sve_##name##_d \
}; \
- TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, name##_fns[a->esz], \
+ static gen_helper_gvec_5_ptr * const name##_ah_fns[4] = { \
+ NULL, gen_helper_sve_##ah_name##_h, \
+ gen_helper_sve_##ah_name##_s, gen_helper_sve_##ah_name##_d \
+ }; \
+ TRANS_FEAT(NAME, aa64_sve, gen_gvec_fpst_zzzzp, \
+ s->fpcr_ah ? name##_ah_fns[a->esz] : name##_fns[a->esz], \
a->rd, a->rn, a->rm, a->ra, a->pg, 0, \
a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
-DO_FMLA(FMLA_zpzzz, fmla_zpzzz)
-DO_FMLA(FMLS_zpzzz, fmls_zpzzz)
-DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz)
-DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz)
+/* We don't need an ah_fmla_zpzzz because fmla doesn't negate anything */
+DO_FMLA(FMLA_zpzzz, fmla_zpzzz, fmla_zpzzz)
+DO_FMLA(FMLS_zpzzz, fmls_zpzzz, ah_fmls_zpzzz)
+DO_FMLA(FNMLA_zpzzz, fnmla_zpzzz, ah_fnmla_zpzzz)
+DO_FMLA(FNMLS_zpzzz, fnmls_zpzzz, ah_fnmls_zpzzz)
#undef DO_FMLA
--
2.34.1
- [PATCH 57/76] target/arm: Handle FPCR.AH in SVE FABD, (continued)
- [PATCH 57/76] target/arm: Handle FPCR.AH in SVE FABD, Peter Maydell, 2025/01/24
- [PATCH 58/76] target/arm: Handle FPCR.AH in negation steps in FCADD, Peter Maydell, 2025/01/24
- [PATCH 60/76] target/arm: Handle FPCR.AH in FMLSL, Peter Maydell, 2025/01/24
- [PATCH 61/76] target/arm: Handle FPCR.AH in FRECPS and FRSQRTS scalar insns, Peter Maydell, 2025/01/24
- [PATCH 64/76] target/arm: Handle FPCR.AH in negation in FMLS (vector), Peter Maydell, 2025/01/24
- [PATCH 65/76] target/arm: Handle FPCR.AH in negation step in SVE FMLS (vector),
Peter Maydell <=
- [PATCH 63/76] target/arm: Handle FPCR.AH in negation step in FMLS (indexed), Peter Maydell, 2025/01/24
- [PATCH 74/76] target/i386: Use correct type for get_float_exception_flags() values, Peter Maydell, 2025/01/24
- [PATCH 76/76] tests/tcg/x86_64/fma: add test for exact-denormal output, Peter Maydell, 2025/01/24
- [PATCH 41/76] target/arm: Handle FPCR.NEP in do_cvtf_scalar(), Peter Maydell, 2025/01/24