[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 22/34] target/arm: Use flags for AH negation in do_fmla_zpzzz_
From: |
Richard Henderson |
Subject: |
[PATCH v2 22/34] target/arm: Use flags for AH negation in do_fmla_zpzzz_* |
Date: |
Tue, 28 Jan 2025 17:38:45 -0800 |
The float*_muladd functions have a flags argument that can
perform optional negation of various operand. We don't use
that for "normal" arm fmla, because the muladd flags are not
applied when an input is a NaN. But since FEAT_AFP does not
negate NaNs, this behaviour is exactly what we need.
Since we have separate helper entry points for the various
fmla, fmls, fnmla, fnmls instructions, it's easy to just
pass down the exact values required so that no conditional
branch is required within the inner loop.
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
---
target/arm/tcg/sve_helper.c | 93 +++++++++++++++++--------------------
1 file changed, 42 insertions(+), 51 deletions(-)
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index a1f7743221..a01613f079 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -4814,7 +4814,7 @@ DO_ZPZ_FP(flogb_d, float64, H1_8, do_float64_logb_as_int)
static void do_fmla_zpzzz_h(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint16_t neg1, uint16_t neg3, bool fpcr_ah)
+ uint16_t neg1, uint16_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4826,16 +4826,10 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void
*vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float16 e1, e2, e3, r;
- e1 = *(uint16_t *)(vn + H1_2(i));
+ e1 = *(uint16_t *)(vn + H1_2(i)) ^ neg1;
e2 = *(uint16_t *)(vm + H1_2(i));
- e3 = *(uint16_t *)(va + H1_2(i));
- if (neg1 && !(fpcr_ah && float16_is_any_nan(e1))) {
- e1 ^= neg1;
- }
- if (neg3 && !(fpcr_ah && float16_is_any_nan(e3))) {
- e3 ^= neg3;
- }
- r = float16_muladd(e1, e2, e3, 0, status);
+ e3 = *(uint16_t *)(va + H1_2(i)) ^ neg3;
+ r = float16_muladd(e1, e2, e3, flags, status);
*(uint16_t *)(vd + H1_2(i)) = r;
}
} while (i & 63);
@@ -4845,48 +4839,51 @@ static void do_fmla_zpzzz_h(void *vd, void *vn, void
*vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, false);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, false);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, false);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, 0);
}
void HELPER(sve_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, false);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, 0);
}
void HELPER(sve_ah_fmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0, true);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
}
void HELPER(sve_ah_fnmla_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0x8000, 0x8000, true);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
}
void HELPER(sve_ah_fnmls_zpzzz_h)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0x8000, true);
+ do_fmla_zpzzz_h(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_s(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint32_t neg1, uint32_t neg3, bool fpcr_ah)
+ uint32_t neg1, uint32_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4898,16 +4895,10 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void
*vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float32 e1, e2, e3, r;
- e1 = *(uint32_t *)(vn + H1_4(i));
+ e1 = *(uint32_t *)(vn + H1_4(i)) ^ neg1;
e2 = *(uint32_t *)(vm + H1_4(i));
- e3 = *(uint32_t *)(va + H1_4(i));
- if (neg1 && !(fpcr_ah && float32_is_any_nan(e1))) {
- e1 ^= neg1;
- }
- if (neg3 && !(fpcr_ah && float32_is_any_nan(e3))) {
- e3 ^= neg3;
- }
- r = float32_muladd(e1, e2, e3, 0, status);
+ e3 = *(uint32_t *)(va + H1_4(i)) ^ neg3;
+ r = float32_muladd(e1, e2, e3, flags, status);
*(uint32_t *)(vd + H1_4(i)) = r;
}
} while (i & 63);
@@ -4917,48 +4908,51 @@ static void do_fmla_zpzzz_s(void *vd, void *vn, void
*vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, false);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, false);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000,
false);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000,
0);
}
void HELPER(sve_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, false);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, 0);
}
void HELPER(sve_ah_fmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0, true);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
}
void HELPER(sve_ah_fnmla_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0x80000000, 0x80000000,
true);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
}
void HELPER(sve_ah_fnmls_zpzzz_s)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0x80000000, true);
+ do_fmla_zpzzz_s(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
static void do_fmla_zpzzz_d(void *vd, void *vn, void *vm, void *va, void *vg,
float_status *status, uint32_t desc,
- uint64_t neg1, uint64_t neg3, bool fpcr_ah)
+ uint64_t neg1, uint64_t neg3, int flags)
{
intptr_t i = simd_oprsz(desc);
uint64_t *g = vg;
@@ -4970,16 +4964,10 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void
*vm, void *va, void *vg,
if (likely((pg >> (i & 63)) & 1)) {
float64 e1, e2, e3, r;
- e1 = *(uint64_t *)(vn + i);
+ e1 = *(uint64_t *)(vn + i) ^ neg1;
e2 = *(uint64_t *)(vm + i);
- e3 = *(uint64_t *)(va + i);
- if (neg1 && !(fpcr_ah && float64_is_any_nan(e1))) {
- e1 ^= neg1;
- }
- if (neg3 && !(fpcr_ah && float64_is_any_nan(e3))) {
- e3 ^= neg3;
- }
- r = float64_muladd(e1, e2, e3, 0, status);
+ e3 = *(uint64_t *)(va + i) ^ neg3;
+ r = float64_muladd(e1, e2, e3, flags, status);
*(uint64_t *)(vd + i) = r;
}
} while (i & 63);
@@ -4989,43 +4977,46 @@ static void do_fmla_zpzzz_d(void *vd, void *vn, void
*vm, void *va, void *vg,
void HELPER(sve_fmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, false);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0, 0);
}
void HELPER(sve_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, false);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, 0);
}
void HELPER(sve_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN,
false);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN, 0);
}
void HELPER(sve_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, false);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, 0);
}
void HELPER(sve_ah_fmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, 0, true);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product);
}
void HELPER(sve_ah_fnmla_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, INT64_MIN, INT64_MIN,
true);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_product | float_muladd_negate_c);
}
void HELPER(sve_ah_fnmls_zpzzz_d)(void *vd, void *vn, void *vm, void *va,
void *vg, float_status *status, uint32_t desc)
{
- do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, INT64_MIN, true);
+ do_fmla_zpzzz_d(vd, vn, vm, va, vg, status, desc, 0, 0,
+ float_muladd_negate_c);
}
/* Two operand floating-point comparison controlled by a predicate.
--
2.43.0
- [PATCH v2 17/34] target/arm: Move float*_ah_chs to vec_internal.h, (continued)
- [PATCH v2 17/34] target/arm: Move float*_ah_chs to vec_internal.h, Richard Henderson, 2025/01/28
- [PATCH v2 19/34] target/arm: Use float*_maybe_ah_chs in sve_ftssel_*, Richard Henderson, 2025/01/28
- [PATCH v2 12/34] target/arm: Remove fp_status_f16_a32, Richard Henderson, 2025/01/28
- [PATCH v2 16/34] target/arm: Simplify DO_VFP_cmp in vfp_helper.c, Richard Henderson, 2025/01/28
- [PATCH v2 18/34] target/arm: Introduce float*_maybe_ah_chs, Richard Henderson, 2025/01/28
- [PATCH v2 20/34] target/arm: Use float*_maybe_ah_chs in sve_fcadd_*, Richard Henderson, 2025/01/28
- [PATCH v2 21/34] target/arm: Use float*_maybe_ah_chs in sve_fcadd_*, Richard Henderson, 2025/01/28
- [PATCH v2 23/34] target/arm: Use flags for AH negation in sve_ftmad_*, Richard Henderson, 2025/01/28
- [PATCH v2 22/34] target/arm: Use flags for AH negation in do_fmla_zpzzz_*,
Richard Henderson <=
- [PATCH v2 24/34] target/arm: Use flags for AH negation in float*_ah_mulsub_f, Richard Henderson, 2025/01/28
- [PATCH v2 26/34] target/arm: Handle FPCR.AH in gvec_fcmla[hs]_idx, Richard Henderson, 2025/01/28
- [PATCH v2 28/34] target/arm: Split gvec_fmla_idx_* for fmls and ah_fmls, Richard Henderson, 2025/01/28
- [PATCH v2 25/34] target/arm: Handle FPCR.AH in gvec_fcmla[hsd], Richard Henderson, 2025/01/28
- [PATCH v2 27/34] target/arm: Handle FPCR.AH in sve_fcmla_zpzzz_*, Richard Henderson, 2025/01/28
- [PATCH v2 29/34] Revert "target/arm: Handle FPCR.AH in FMLSL", Richard Henderson, 2025/01/28
- [PATCH v2 30/34] target/arm: Handle FPCR.AH in gvec_fmlal_a64, Richard Henderson, 2025/01/28
- [PATCH v2 32/34] target/arm: Handle FPCR.AH in sve2_fmlal_zzzw_s, Richard Henderson, 2025/01/28
- [PATCH v2 33/34] target/arm: Read fz16 from env->vfp.fpcr, Richard Henderson, 2025/01/28