coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] build: for factor use C in more cases for arm64 and ppc64


From: Pádraig Brady
Subject: [PATCH] build: for factor use C in more cases for arm64 and ppc64
Date: Wed, 8 Mar 2017 18:26:17 -0800

* src/longlong.h: Sync from gmp repo incorporating:
Use asm-free umul_ppmm() on arm64 and ppc64.
---
 src/longlong.h | 73 +++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 60 insertions(+), 13 deletions(-)

diff --git a/src/longlong.h b/src/longlong.h
index 55b0c8d..bf27e69 100644
--- a/src/longlong.h
+++ b/src/longlong.h
@@ -426,9 +426,19 @@ long __MPN(count_leading_zeros) (UDItype);
 #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \
     && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3"                       \
+  do {                                                                 \
+    if (__builtin_constant_p (bl) && -(USItype)(bl) < 0x100)           \
+      __asm__ ("subs\t%1, %4, %5\n\tadc\t%0, %2, %3"                   \
+          : "=r" (sh), "=&r" (sl)                                      \
+              : "r" (ah), "rI" (bh),                                   \
+                "%r" (al), "rI" (-(USItype)(bl)) __CLOBBER_CC);        \
+    else                                                               \
+      __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3"                   \
           : "=r" (sh), "=&r" (sl)                                      \
-          : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+          : "r" (ah), "rI" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC);   \
+  } while (0)
+/* FIXME: Extend the immediate range for the low word by using both
+   ADDS and SUBS, since they set carry in the same way.  */
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {                                                                 \
     if (__builtin_constant_p (al))                                     \
@@ -492,6 +502,7 @@ long __MPN(count_leading_zeros) (UDItype);
           : "r" ((USItype) (a)), "r" ((USItype) (b)) __CLOBBER_CC);    \
   } while (0)
 #define UMUL_TIME 20
+#ifndef LONGLONG_STANDALONE
 #define udiv_qrnnd(q, r, n1, n0, d) \
   do { UWtype __r;                                                     \
     (q) = __MPN(udiv_qrnnd) (&__r, (n1), (n0), (d));                   \
@@ -499,6 +510,7 @@ long __MPN(count_leading_zeros) (UDItype);
   } while (0)
 extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 #define UDIV_TIME 200
+#endif /* LONGLONG_STANDALONE */
 #else /* ARMv4 or newer */
 #define umul_ppmm(xh, xl, a, b) \
   __asm__ ("umull %0,%1,%2,%3" : "=&r" (xl), "=&r" (xh) : "r" (a), "r" (b))
@@ -525,21 +537,48 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, 
UWtype, UWtype);
 /* FIXME: Extend the immediate range for the low word by using both
    ADDS and SUBS, since they set carry in the same way.  */
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"                    \
-          : "=r" (sh), "=&r" (sl)                                      \
-          : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)),                \
-            "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC)
+  do {                                                                 \
+    if (__builtin_constant_p (bl) && -(UDItype)(bl) < 0x1000)          \
+      __asm__ ("subs\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"                        
\
+              : "=r" (sh), "=&r" (sl)                                  \
+              : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)),            \
+                "%r" ((UDItype)(al)), "rI" (-(UDItype)(bl)) __CLOBBER_CC);\
+    else                                                               \
+      __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"                        
\
+              : "=r" (sh), "=&r" (sl)                                  \
+              : "rZ" ((UDItype)(ah)), "rZ" ((UDItype)(bh)),            \
+                "%r" ((UDItype)(al)), "rI" ((UDItype)(bl)) __CLOBBER_CC);\
+  } while (0)
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"                    \
-          : "=r,r" (sh), "=&r,&r" (sl)                                 \
-          : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)),          \
-            "r,Z"   ((UDItype)(al)), "rI,r"  ((UDItype)(bl)) __CLOBBER_CC)
+  do {                                                                 \
+    if (__builtin_constant_p (bl) && -(UDItype)(bl) < 0x1000)          \
+      __asm__ ("adds\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"                        
\
+              : "=r,r" (sh), "=&r,&r" (sl)                             \
+              : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)),      \
+                "r,Z"   ((UDItype)(al)), "rI,r" (-(UDItype)(bl)) 
__CLOBBER_CC);\
+    else                                                               \
+      __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"                        
\
+              : "=r,r" (sh), "=&r,&r" (sl)                             \
+              : "rZ,rZ" ((UDItype)(ah)), "rZ,rZ" ((UDItype)(bh)),      \
+                "r,Z"   ((UDItype)(al)), "rI,r"  ((UDItype)(bl)) 
__CLOBBER_CC);\
+  } while(0);
+#if __GMP_GNUC_PREREQ (4,9)
+#define umul_ppmm(w1, w0, u, v) \
+  do {                                                                 \
+    typedef unsigned int __ll_UTItype __attribute__((mode(TI)));       \
+    __ll_UTItype __ll = (__ll_UTItype)(u) * (v);                       \
+    w1 = __ll >> 64;                                                   \
+    w0 = __ll;                                                         \
+  } while (0)
+#endif
+#if !defined (umul_ppmm)
 #define umul_ppmm(ph, pl, m0, m1) \
   do {                                                                 \
     UDItype __m0 = (m0), __m1 = (m1);                                  \
     __asm__ ("umulh\t%0, %1, %2" : "=r" (ph) : "r" (__m0), "r" (__m1));        
\
     (pl) = __m0 * __m1;                                                        
\
   } while (0)
+#endif
 #define count_leading_zeros(count, x)  count_leading_zeros_gcc_clz(count, x)
 #define count_trailing_zeros(count, x)  count_trailing_zeros_gcc_ctz(count, x)
 #define COUNT_LEADING_ZEROS_0 64
@@ -1000,10 +1039,17 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, 
UWtype, UWtype);
           : "=r" (sh), "=&r" (sl)                                      \
           : "0" ((UDItype)(ah)), "rme" ((UDItype)(bh)),                \
             "1" ((UDItype)(al)), "rme" ((UDItype)(bl)))
+#if defined (HAVE_MULX)
 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("mulq %3"                                                   \
+  __asm__ ("mulx       %3, %0, %1"                                     \
+          : "=r" (w0), "=r" (w1)                                       \
+          : "%d" ((UDItype)(u)), "rm" ((UDItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+  __asm__ ("mulq       %3"                                             \
           : "=a" (w0), "=d" (w1)                                       \
           : "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
+#endif
 #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
   __asm__ ("divq %4"                /* stringification in K&R C */     \
           : "=a" (q), "=d" (r)                                         \
@@ -1468,7 +1514,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, 
UWtype, UWtype);
 #define count_leading_zeros(count, x) \
   __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x))
 #define COUNT_LEADING_ZEROS_0 64
-#if 0 && __GMP_GNUC_PREREQ (4,4) /* Disable, this results in libcalls! */
+#if __GMP_GNUC_PREREQ (4,8)
 #define umul_ppmm(w1, w0, u, v) \
   do {                                                                 \
     typedef unsigned int __ll_UTItype __attribute__((mode(TI)));       \
@@ -2099,7 +2145,8 @@ extern __longlong_h_C UWtype mpn_udiv_qrnnd_r (UWtype, 
UWtype, UWtype, UWtype *)
 
 /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
    __udiv_w_sdiv (defined in libgcc or elsewhere).  */
-#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd)
+#if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) \
+  && ! defined (LONGLONG_STANDALONE)
 #define udiv_qrnnd(q, r, nh, nl, d) \
   do {                                                                 \
     UWtype __r;                                                                
\
-- 
2.9.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]