[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 59/76] target/arm: Handle FPCR.AH in negation steps in SVE FCADD
From: |
Peter Maydell |
Subject: |
[PATCH 59/76] target/arm: Handle FPCR.AH in negation steps in SVE FCADD |
Date: |
Fri, 24 Jan 2025 16:28:19 +0000 |
The negation steps in FCADD must honour FPCR.AH's "don't change the
sign of a NaN" semantics. Implement this in the same way we did for
the base ASIMD FCADD, by encoding FPCR.AH into the SIMD data field
passed to the helper and using that to decide whether to negate the
values.
Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
---
target/arm/tcg/sve_helper.c | 45 +++++++++++++++++++++++++++-------
target/arm/tcg/translate-sve.c | 2 +-
2 files changed, 37 insertions(+), 10 deletions(-)
diff --git a/target/arm/tcg/sve_helper.c b/target/arm/tcg/sve_helper.c
index 8527a7495a6..dc5a35b46ef 100644
--- a/target/arm/tcg/sve_helper.c
+++ b/target/arm/tcg/sve_helper.c
@@ -5131,7 +5131,9 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm,
void *vg,
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float16 neg_imag = float16_set_sign(0, simd_data(desc));
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ float16 neg_imag = float16_set_sign(0, rot);
float16 neg_real = float16_chs(neg_imag);
do {
@@ -5144,9 +5146,16 @@ void HELPER(sve_fcadd_h)(void *vd, void *vn, void *vm,
void *vg,
i -= 2 * sizeof(float16);
e0 = *(float16 *)(vn + H1_2(i));
- e1 = *(float16 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float16 *)(vm + H1_2(j));
e2 = *(float16 *)(vn + H1_2(j));
- e3 = *(float16 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float16 *)(vm + H1_2(i));
+
+ if (neg_real && !(fpcr_ah && float16_is_any_nan(e1))) {
+ e1 ^= neg_real;
+ }
+ if (neg_imag && !(fpcr_ah && float16_is_any_nan(e3))) {
+ e3 ^= neg_imag;
+ }
if (likely((pg >> (i & 63)) & 1)) {
*(float16 *)(vd + H1_2(i)) = float16_add(e0, e1, s);
@@ -5163,7 +5172,9 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm,
void *vg,
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float32 neg_imag = float32_set_sign(0, simd_data(desc));
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ float32 neg_imag = float32_set_sign(0, rot);
float32 neg_real = float32_chs(neg_imag);
do {
@@ -5176,9 +5187,16 @@ void HELPER(sve_fcadd_s)(void *vd, void *vn, void *vm,
void *vg,
i -= 2 * sizeof(float32);
e0 = *(float32 *)(vn + H1_2(i));
- e1 = *(float32 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float32 *)(vm + H1_2(j));
e2 = *(float32 *)(vn + H1_2(j));
- e3 = *(float32 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float32 *)(vm + H1_2(i));
+
+ if (neg_real && !(fpcr_ah && float32_is_any_nan(e1))) {
+ e1 ^= neg_real;
+ }
+ if (neg_imag && !(fpcr_ah && float32_is_any_nan(e3))) {
+ e3 ^= neg_imag;
+ }
if (likely((pg >> (i & 63)) & 1)) {
*(float32 *)(vd + H1_2(i)) = float32_add(e0, e1, s);
@@ -5195,7 +5213,9 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm,
void *vg,
{
intptr_t j, i = simd_oprsz(desc);
uint64_t *g = vg;
- float64 neg_imag = float64_set_sign(0, simd_data(desc));
+ bool rot = extract32(desc, SIMD_DATA_SHIFT, 1);
+ bool fpcr_ah = extract32(desc, SIMD_DATA_SHIFT + 1, 1);
+ float64 neg_imag = float64_set_sign(0, rot);
float64 neg_real = float64_chs(neg_imag);
do {
@@ -5208,9 +5228,16 @@ void HELPER(sve_fcadd_d)(void *vd, void *vn, void *vm,
void *vg,
i -= 2 * sizeof(float64);
e0 = *(float64 *)(vn + H1_2(i));
- e1 = *(float64 *)(vm + H1_2(j)) ^ neg_real;
+ e1 = *(float64 *)(vm + H1_2(j));
e2 = *(float64 *)(vn + H1_2(j));
- e3 = *(float64 *)(vm + H1_2(i)) ^ neg_imag;
+ e3 = *(float64 *)(vm + H1_2(i));
+
+ if (neg_real && !(fpcr_ah && float64_is_any_nan(e1))) {
+ e1 ^= neg_real;
+ }
+ if (neg_imag && !(fpcr_ah && float64_is_any_nan(e3))) {
+ e3 ^= neg_imag;
+ }
if (likely((pg >> (i & 63)) & 1)) {
*(float64 *)(vd + H1_2(i)) = float64_add(e0, e1, s);
diff --git a/target/arm/tcg/translate-sve.c b/target/arm/tcg/translate-sve.c
index 9200f7f8a49..0696192148c 100644
--- a/target/arm/tcg/translate-sve.c
+++ b/target/arm/tcg/translate-sve.c
@@ -3916,7 +3916,7 @@ static gen_helper_gvec_4_ptr * const fcadd_fns[] = {
gen_helper_sve_fcadd_s, gen_helper_sve_fcadd_d,
};
TRANS_FEAT(FCADD, aa64_sve, gen_gvec_fpst_zzzp, fcadd_fns[a->esz],
- a->rd, a->rn, a->rm, a->pg, a->rot,
+ a->rd, a->rn, a->rm, a->pg, a->rot | (s->fpcr_ah << 1),
a->esz == MO_16 ? FPST_FPCR_F16_A64 : FPST_FPCR_A64)
#define DO_FMLA(NAME, name) \
--
2.34.1
- [PATCH 44/76] target/arm: Handle FPCR.NEP for NEP for FMUL, FMULX scalar by element, (continued)
- [PATCH 44/76] target/arm: Handle FPCR.NEP for NEP for FMUL, FMULX scalar by element, Peter Maydell, 2025/01/24
- [PATCH 48/76] target/arm: Implement FPCR.AH semantics for FMINP and FMAXP, Peter Maydell, 2025/01/24
- [PATCH 53/76] target/arm: Implement FPCR.AH handling for scalar FABS and FABD, Peter Maydell, 2025/01/24
- [PATCH 51/76] target/arm: Implement FPCR.AH semantics for SVE FMIN/FMAX vector, Peter Maydell, 2025/01/24
- [PATCH 56/76] target/arm: Handle FPCR.AH in SVE FABS, Peter Maydell, 2025/01/24
- [PATCH 59/76] target/arm: Handle FPCR.AH in negation steps in SVE FCADD,
Peter Maydell <=
- [PATCH 71/76] target/arm: Implement increased precision FRSQRTE, Peter Maydell, 2025/01/24
- [PATCH 72/76] target/arm: Enable FEAT_RPRES for -cpu max, Peter Maydell, 2025/01/24
- Re: [PATCH 00/76] target/arm: Implement FEAT_AFP and FEAT_RPRES, Peter Maydell, 2025/01/24
- Re: [PATCH 00/76] target/arm: Implement FEAT_AFP and FEAT_RPRES, Peter Maydell, 2025/01/28