[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v6 05/82] target/arm: Split out saturating/rounding shifts fr
From: |
Peter Maydell |
Subject: |
Re: [PATCH v6 05/82] target/arm: Split out saturating/rounding shifts from neon |
Date: |
Tue, 11 May 2021 09:36:39 +0100 |
On Fri, 30 Apr 2021 at 21:30, Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> Split these operations out into a header that can be shared
> between neon and sve. The "sat" pointer acts both as a boolean
> for control of saturating behavior and controls the difference
> in behavior between neon and sve -- QC bit or no QC bit.
>
> Widen the shift operand in the new helpers, as the SVE2 insns treat
> the whole input element as significant. For the neon uses, truncate
> the shift to int8_t while passing the parameter.
>
> Implement right-shift rounding as
>
> tmp = src >> (shift - 1);
> dst = (tmp >> 1) + (tmp & 1);
>
> This is the same number of instructions as the current
>
> tmp = 1 << (shift - 1);
> dst = (src + tmp) >> shift;
>
> without any possibility of intermediate overflow.
>
> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
> ---
> v2: Widen the shift operand (laurent desnouges)
> ---
> target/arm/vec_internal.h | 138 +++++++++++
> target/arm/neon_helper.c | 507 +++++++-------------------------------
> 2 files changed, 221 insertions(+), 424 deletions(-)
>
> diff --git a/target/arm/vec_internal.h b/target/arm/vec_internal.h
> index e3eb3e7a6b..0102547a10 100644
> --- a/target/arm/vec_internal.h
> +++ b/target/arm/vec_internal.h
> @@ -30,4 +30,142 @@ static inline void clear_tail(void *vd, uintptr_t opr_sz,
> uintptr_t max_sz)
> }
> }
>
> +static inline int32_t do_sqrshl_bhs(int32_t src, int32_t shift, int bits,
> + bool round, uint32_t *sat)
> +{
> + if (shift <= -bits) {
> + /* Rounding the sign bit always produces 0. */
> + if (round) {
> + return 0;
> + }
> + return src >> 31;
> + } else if (shift < 0) {
> + if (round) {
> + src >>= -shift - 1;
> + return (src >> 1) + (src & 1);
> + }
> + return src >> -shift;
> + } else if (shift < bits) {
> + int32_t val = src << shift;
> + if (bits == 32) {
> + if (!sat || val >> shift == src) {
> + return val;
> + }
> + } else {
> + int32_t extval = sextract32(val, 0, bits);
> + if (!sat || val == extval) {
> + return extval;
> + }
> + }
> + } else if (!sat || src == 0) {
> + return 0;
> + }
> +
> + *sat = 1;
> + return (1u << (bits - 1)) - (src >= 0);
> +}
> +
> +static inline uint32_t do_uqrshl_bhs(uint32_t src, int32_t shift, int bits,
> + bool round, uint32_t *sat)
> +{
> + if (shift <= -(bits + round)) {
> + return 0;
> + } else if (shift < 0) {
> + if (round) {
> + src >>= -shift - 1;
> + return (src >> 1) + (src & 1);
> + }
> + return src >> -shift;
> + } else if (shift < bits) {
> + uint32_t val = src << shift;
> + if (bits == 32) {
> + if (!sat || val >> shift == src) {
> + return val;
> + }
> + } else {
> + uint32_t extval = extract32(val, 0, bits);
> + if (!sat || val == extval) {
> + return extval;
> + }
> + }
> + } else if (!sat || src == 0) {
> + return 0;
> + }
> +
> + *sat = 1;
> + return MAKE_64BIT_MASK(0, bits);
> +}
> +
> +static inline int32_t do_suqrshl_bhs(int32_t src, int32_t shift, int bits,
> + bool round, uint32_t *sat)
> +{
> + if (src < 0) {
> + *sat = 1;
Shouldn't this check whether sat is NULL ?
> + return 0;
> + }
> + return do_uqrshl_bhs(src, shift, bits, round, sat);
> +}
> +
> +static inline int64_t do_sqrshl_d(int64_t src, int64_t shift,
> + bool round, uint32_t *sat)
> +{
> + if (shift <= -64) {
> + /* Rounding the sign bit always produces 0. */
> + if (round) {
> + return 0;
> + }
> + return src >> 63;
> + } else if (shift < 0) {
> + if (round) {
> + src >>= -shift - 1;
> + return (src >> 1) + (src & 1);
> + }
> + return src >> -shift;
> + } else if (shift < 64) {
> + int64_t val = src << shift;
> + if (!sat || val >> shift == src) {
> + return val;
> + }
> + } else if (!sat || src == 0) {
> + return 0;
> + }
> +
> + *sat = 1;
> + return src < 0 ? INT64_MIN : INT64_MAX;
> +}
> +
> +static inline uint64_t do_uqrshl_d(uint64_t src, int64_t shift,
> + bool round, uint32_t *sat)
> +{
> + if (shift <= -(64 + round)) {
> + return 0;
> + } else if (shift < 0) {
> + if (round) {
> + src >>= -shift - 1;
> + return (src >> 1) + (src & 1);
> + }
> + return src >> -shift;
> + } else if (shift < 64) {
> + uint64_t val = src << shift;
> + if (!sat || val >> shift == src) {
> + return val;
> + }
> + } else if (!sat || src == 0) {
> + return 0;
> + }
> +
> + *sat = 1;
> + return UINT64_MAX;
> +}
> +
> +static inline int64_t do_suqrshl_d(int64_t src, int64_t shift,
> + bool round, uint32_t *sat)
> +{
> + if (src < 0) {
> + *sat = 1;
Missing NULL check again.
> + return 0;
> + }
> + return do_uqrshl_d(src, shift, round, sat);
> +}
Otherwise
Reviewed-by: Peter Maydell <peter.maydell@linaro.org>
thanks
-- PMM
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- Re: [PATCH v6 05/82] target/arm: Split out saturating/rounding shifts from neon,
Peter Maydell <=