lightning
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH 1/2] mips: Correct and optimize jit_rshr() and jit_rshr_u()


From: Paul Cercueil
Subject: Re: [PATCH 1/2] mips: Correct and optimize jit_rshr() and jit_rshr_u()
Date: Tue, 03 Oct 2023 23:41:04 +0200

Le mardi 03 octobre 2023 à 23:36 +0200, Paul Cercueil a écrit :
> Rework the branch-less path to shrink the size of jit_rshr() and
> jit_rshr_u(), and use one register less, so that the whole code path
> that uses branches can be dropped.
> 
> The case where O4 == __WORDSIZE in jit_rshr() was also handled
> incorrectly, as it would zero the O1 register instead of sign-
> extending
> it.

I notice now that I reference jit_rshr / jit_rshr_u everywhere in this
commit message, just like I referenced jit_lshr / jit_lshr_u in the
previous similar commit. I obviously meant jit_qrshr / jit_qrshr_u.

-Paul

> 
> Signed-off-by: Paul Cercueil <paul@crapouillou.net>
> ---
>  lib/jit_mips-cpu.c | 79 ++++++++++++++++----------------------------
> --
>  1 file changed, 27 insertions(+), 52 deletions(-)
> 
> diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
> index e59743e..cfbcd7e 100644
> --- a/lib/jit_mips-cpu.c
> +++ b/lib/jit_mips-cpu.c
> @@ -2425,72 +2425,47 @@ static void
>  _xrshr(jit_state_t *_jit, jit_bool_t sign,
>         jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_int32_t
> r3)
>  {
> -    jit_bool_t         branch;
> -    jit_word_t         over, zero, done, done_over;
> -    jit_int32_t                t0, s0, t1, s1, t2, s2, t3, s3;
> +    jit_int32_t                t0, s0, t2, s2, t3, s3;
>      s0 = jit_get_reg(jit_class_gpr);
>      t0 = rn(s0);
>      if (r0 == r2 || r1 == r2) {
> -       s2 = jit_get_reg(jit_class_gpr);
> -       t2 = rn(s2);
> -       movr(t2, r2);
> +        s2 = jit_get_reg(jit_class_gpr);
> +        t2 = rn(s2);
> +        movr(t2, r2);
>      }
>      else
> -       t2 = r2;
> +        t2 = r2;
>      if (r0 == r3 || r1 == r3) {
> -       s3 = jit_get_reg(jit_class_gpr);
> -       t3 = rn(s3);
> -       movr(t3, r3);
> +        s3 = jit_get_reg(jit_class_gpr);
> +        t3 = rn(s3);
> +        movr(t3, r3);
>      }
>      else
> -       t3 = r3;
> -    if ((s1 =
> jit_get_reg(jit_class_gpr|jit_class_nospill|jit_class_chk))) {
> -       t1 = rn(s1);
> -       branch = 0;
> +        t3 = r3;
> +
> +    if (sign) {
> +        /* underflow? */
> +        eqi(t0, t3, __WORDSIZE);
> +        subr(t0, t3, t0);
> +        rshr(r0, t2, t0);
> +    } else {
> +        /* underflow? */
> +        nei(t0, t3, __WORDSIZE);
> +        rshr_u(r0, t2, t3);
> +        movzr(r0, t0, t0);
>      }
> -    else
> -       branch = 1;
> +
>      rsbi(t0, t3, __WORDSIZE);
> -    if (sign)
> -       rshr(r0, t2, t3);
> -    else
> -       rshr_u(r0, t2, t3);
>      lshr(r1, t2, t0);
> -    if (branch) {
> -       zero = beqi(_jit->pc.w, t3, 0);
> -       over = beqi(_jit->pc.w, t3, __WORDSIZE);
> -       done = jmpi(_jit->pc.w, 1);
> -       flush();
> -       patch_at(over, _jit->pc.w);
> -       /* underflow */
> -       if (sign)
> -           rshi(r0, t2, __WORDSIZE - 1);
> -       else
> -           movi(r0, 0);
> -       done_over = jmpi(_jit->pc.w, 1);
> -       /* zero */
> -       flush();
> -       patch_at(zero, _jit->pc.w);
> -       movi(r1, 0);
> -       flush();
> -       patch_at(done, _jit->pc.w);
> -       patch_at(done_over, _jit->pc.w);
> -       jit_unget_reg(s1);
> -    }
> -    else {
> -       /* zero? */
> -       movi(t0, 0);
> -       movzr(r1, t0, t3);
> -       /* underflow? */
> -       eqi(t1, t3, __WORDSIZE);
> -       movnr(r0, t0, t1);
> -       jit_unget_reg(s1);
> -    }
> +
> +    /* zero? */
> +    movzr(r1, t3, t3);
> +
>      jit_unget_reg(s0);
>      if (t2 != r2)
> -       jit_unget_reg(s2);
> +        jit_unget_reg(s2);
>      if (t3 != r3)
> -       jit_unget_reg(s3);
> +        jit_unget_reg(s3);
>  }
>  
>  static void


reply via email to

[Prev in Thread] Current Thread [Next in Thread]