lightning
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 3/3] mips: Optimize _andi


From: Paul Cercueil
Subject: [PATCH 3/3] mips: Optimize _andi
Date: Fri, 7 May 2021 12:50:39 +0100

In the case where the immediate value is a high mask (bits 31-x set) or
low mask (bits x-0 set), it is faster to clear the unwanted bits by
shifing the bits left/right.

On MIPS32r2 and MIPS64r2, for low masks it is even possible to clear all
bits with one single EXT instruction.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
---
 lib/jit_mips-cpu.c | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index 3523b50..b73f4b1 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -107,6 +107,10 @@ typedef union {
 #  endif
 #  define can_sign_extend_short_p(im)  ((im) >= -32678 && (im) <= 32767)
 #  define can_zero_extend_short_p(im)  ((im) >= 0 && (im) <= 65535)
+#  define is_low_mask(im)              (((im) & 1) ? (__builtin_popcountl((im) 
+ 1) == 1) : 0)
+#  define is_high_mask(im)             ((im) ? (__builtin_popcountl((im) + (1 
<< __builtin_ctzl(im))) == 0) : 0)
+#  define masked_bits_count(im)                __builtin_popcountl(im)
+#  define unmasked_bits_count(im)      (__WORDSIZE - masked_bits_count(im))
 #  if __WORDSIZE == 32
 #    define can_sign_extend_int_p(im)  1
 #    define can_zero_extend_int_p(im)  1
@@ -1163,7 +1167,20 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_int32_t                reg;
     if (can_zero_extend_short_p(i0))
        ANDI(r0, r1, i0);
-    else {
+    else if (is_low_mask(i0)) {
+#if defined(_MIPS_ARCH_MIPS32R2) || defined(_MIPS_ARCH_MIPS64R2)
+       if (masked_bits_count(i0) <= 32)
+           EXT(r0, r1, 0, masked_bits_count(i0));
+       else
+#endif
+       {
+               lshi(r0, r1, unmasked_bits_count(i0));
+               rshi_u(r0, r0, unmasked_bits_count(i0));
+       }
+    } else if (is_high_mask(i0)) {
+       rshi(r0, r1, unmasked_bits_count(i0));
+       lshi(r0, r0, unmasked_bits_count(i0));
+    } else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
        AND(r0, r1, rn(reg));
-- 
2.30.2




reply via email to

[Prev in Thread] Current Thread [Next in Thread]