coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] build: fix potential factor build failure on arm and powerpc


From: Pádraig Brady
Subject: [PATCH] build: fix potential factor build failure on arm and powerpc
Date: Fri, 29 Nov 2013 04:16:31 +0000

* src/longlong.h: Sync with the latest longlong.h from libgmp to:
- avoid arm asm when being compiled for the thumb instruction [sub]set
- old powerpc assembly is updated to be compatible with newer GCC
- add arm64 optimized count_trailing_zeros()
- add sparc64 optimized add_ssaaaa() and umul_ppmm()
---
 src/longlong.h |  253 ++++++++++++++++++++++++++++++--------------------------
 1 files changed, 137 insertions(+), 116 deletions(-)

diff --git a/src/longlong.h b/src/longlong.h
index eba2417..069cef7 100644
--- a/src/longlong.h
+++ b/src/longlong.h
@@ -138,30 +138,30 @@ along with this file.  If not, see 
http://www.gnu.org/licenses/.  */
    or want.  */
 
 #ifdef _LONG_LONG_LIMB
-#define count_leading_zeros_gcc_clz(count,x)    \
-  do {                                          \
-    ASSERT ((x) != 0);                          \
-    (count) = __builtin_clzll (x);              \
+#define count_leading_zeros_gcc_clz(count,x)   \
+  do {                                         \
+    ASSERT ((x) != 0);                         \
+    (count) = __builtin_clzll (x);             \
   } while (0)
 #else
-#define count_leading_zeros_gcc_clz(count,x)    \
-  do {                                          \
-    ASSERT ((x) != 0);                          \
-    (count) = __builtin_clzl (x);               \
+#define count_leading_zeros_gcc_clz(count,x)   \
+  do {                                         \
+    ASSERT ((x) != 0);                         \
+    (count) = __builtin_clzl (x);              \
   } while (0)
 #endif
 
 #ifdef _LONG_LONG_LIMB
-#define count_trailing_zeros_gcc_ctz(count,x)   \
-  do {                                          \
-    ASSERT ((x) != 0);                          \
-    (count) = __builtin_ctzll (x);              \
+#define count_trailing_zeros_gcc_ctz(count,x)  \
+  do {                                         \
+    ASSERT ((x) != 0);                         \
+    (count) = __builtin_ctzll (x);             \
   } while (0)
 #else
-#define count_trailing_zeros_gcc_ctz(count,x)   \
-  do {                                          \
-    ASSERT ((x) != 0);                          \
-    (count) = __builtin_ctzl (x);               \
+#define count_trailing_zeros_gcc_ctz(count,x)  \
+  do {                                         \
+    ASSERT ((x) != 0);                         \
+    (count) = __builtin_ctzl (x);              \
   } while (0)
 #endif
 
@@ -222,27 +222,27 @@ along with this file.  If not, see 
http://www.gnu.org/licenses/.  */
   __asm__("cttz %1,%0" : "=r"(COUNT) : "r"(X))
 #endif /* clz/ctz using cix */
 
-#if ! defined (count_leading_zeros)                             \
+#if ! defined (count_leading_zeros)                            \
   && defined (__GNUC__) && ! defined (LONGLONG_STANDALONE)
 /* ALPHA_CMPBGE_0 gives "cmpbge $31,src,dst", ie. test src bytes == 0.
    "$31" is written explicitly in the asm, since an "r" constraint won't
    select reg 31.  There seems no need to worry about "r31" syntax for cray,
-   since gcc itself (pre-release 3.4) emits just $31 in various places.  */
-#define ALPHA_CMPBGE_0(dst, src)                                        \
+   since gcc itself (pre-release 3.4) emits just $31 in various places.        
 */
+#define ALPHA_CMPBGE_0(dst, src)                                       \
   do { asm ("cmpbge $31, %1, %0" : "=r" (dst) : "r" (src)); } while (0)
 /* Zero bytes are turned into bits with cmpbge, a __clz_tab lookup counts
    them, locating the highest non-zero byte.  A second __clz_tab lookup
    counts the leading zero bits in that byte, giving the result.  */
-#define count_leading_zeros(count, x)                                   \
-  do {                                                                  \
-    UWtype  __clz__b, __clz__c, __clz__x = (x);                         \
-    ALPHA_CMPBGE_0 (__clz__b,  __clz__x);           /* zero bytes */    \
-    __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F];  /* 8 to 1 byte */   \
-    __clz__b = __clz__b * 8 - 7;                    /* 57 to 1 shift */ \
-    __clz__x >>= __clz__b;                                              \
-    __clz__c = __clz_tab [__clz__x];                /* 8 to 1 bit */    \
-    __clz__b = 65 - __clz__b;                                           \
-    (count) = __clz__b - __clz__c;                                      \
+#define count_leading_zeros(count, x)                                  \
+  do {                                                                 \
+    UWtype  __clz__b, __clz__c, __clz__x = (x);                                
\
+    ALPHA_CMPBGE_0 (__clz__b,  __clz__x);          /* zero bytes */    \
+    __clz__b = __clz_tab [(__clz__b >> 1) ^ 0x7F];  /* 8 to 1 byte */  \
+    __clz__b = __clz__b * 8 - 7;                   /* 57 to 1 shift */ \
+    __clz__x >>= __clz__b;                                             \
+    __clz__c = __clz_tab [__clz__x];               /* 8 to 1 bit */    \
+    __clz__b = 65 - __clz__b;                                          \
+    (count) = __clz__b - __clz__c;                                     \
   } while (0)
 #define COUNT_LEADING_ZEROS_NEED_CLZ_TAB
 #endif /* clz using cmpbge */
@@ -298,14 +298,14 @@ long __MPN(count_leading_zeros) (UDItype);
    code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
    register, which takes an extra cycle.  */
 #define sub_ddmmss(sh, sl, ah, al, bh, bl)      \
-  do {                                          \
-    UWtype __x;                                 \
-    __x = (al) - (bl);                          \
-    if ((al) < (bl))                            \
-      (sh) = (ah) - (bh) - 1;                   \
-    else                                        \
-      (sh) = (ah) - (bh);                       \
-    (sl) = __x;                                 \
+  do {                                         \
+    UWtype __x;                                        \
+    __x = (al) - (bl);                         \
+    if ((al) < (bl))                           \
+      (sh) = (ah) - (bh) - 1;                  \
+    else                                       \
+      (sh) = (ah) - (bh);                      \
+    (sl) = __x;                                        \
   } while (0)
 #if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
 /* Do both product parts in assembly, since that gives better code with
@@ -423,7 +423,7 @@ long __MPN(count_leading_zeros) (UDItype);
             "rIJ" ((USItype) (bl)))
 #endif
 
-#if defined (__arm__) && W_TYPE_SIZE == 32
+#if defined (__arm__) && !defined (__thumb__) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("adds\t%1, %4, %5\n\tadc\t%0, %2, %3"                       \
           : "=r" (sh), "=&r" (sl)                                      \
@@ -513,7 +513,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, 
UWtype);
 #define UDIV_TIME 200
 #endif /* LONGLONG_STANDALONE */
 #endif
-/* This is a bizarre test, but GCC doesn't define useful common symbol. */
+/* This is a bizarre test, but GCC doesn't define any useful common symbol. */
 #if defined (__ARM_ARCH_5__)  || defined (__ARM_ARCH_5T__) || \
     defined (__ARM_ARCH_5E__) || defined (__ARM_ARCH_5TE__)|| \
     defined (__ARM_ARCH_6__)  || defined (__ARM_ARCH_6J__) || \
@@ -532,13 +532,13 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, 
UWtype, UWtype);
 /* FIXME: Extend the immediate range for the low word by using both
    ADDS and SUBS, since they set carry in the same way.  */
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
-  __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"                     \
+  __asm__ ("adds\t%1, %x4, %5\n\tadc\t%0, %x2, %x3"                    \
           : "=r" (sh), "=&r" (sl)                                      \
-           : "rZ" (ah), "rZ" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
+          : "rZ" (ah), "rZ" (bh), "%r" (al), "rI" (bl) __CLOBBER_CC)
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"                     \
-           : "=r,r" (sh), "=&r,&r" (sl)                                 \
-           : "rZ,rZ" (ah), "rZ,rZ" (bh), "r,Z" (al), "rI,r" (bl) __CLOBBER_CC)
+  __asm__ ("subs\t%1, %x4, %5\n\tsbc\t%0, %x2, %x3"                    \
+          : "=r,r" (sh), "=&r,&r" (sl)                                 \
+          : "rZ,rZ" (ah), "rZ,rZ" (bh), "r,Z" (al), "rI,r" (bl) __CLOBBER_CC)
 #define umul_ppmm(ph, pl, m0, m1) \
   do {                                                                 \
     UDItype __m0 = (m0), __m1 = (m1);                                  \
@@ -547,6 +547,8 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, 
UWtype);
   } while (0)
 #define count_leading_zeros(count, x) \
   __asm__ ("clz\t%0, %1" : "=r" (count) : "r" (x))
+#define count_trailing_zeros(count, x) \
+  __asm__ ("rbit\t%0, %1\n\tclz\t%0, %0" : "=r" (count) : "r" (x))
 #define COUNT_LEADING_ZEROS_0 64
 #endif /* __aarch64__ */
 
@@ -945,7 +947,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, 
UWtype);
    being 1 code byte smaller.  "31-__cbtmp" is a workaround, probably at the
    cost of one extra instruction.  Do this for "i386" too, since that means
    generic x86.  */
-#if ! defined (count_leading_zeros) && __GNUC__ < 3                     \
+#if ! defined (count_leading_zeros) && __GNUC__ < 3                    \
   && (HAVE_HOST_CPU_i386                                               \
       || HAVE_HOST_CPU_i686                                            \
       || HAVE_HOST_CPU_pentiumpro                                      \
@@ -1026,7 +1028,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, 
UWtype, UWtype);
     ASSERT ((x) != 0);                                                 \
     __asm__ ("bsfq %1,%q0" : "=r" (count) : "rm" ((UDItype)(x)));      \
   } while (0)
-#endif /* x86_64 */
+#endif /* __amd64__ */
 
 #if defined (__i860__) && W_TYPE_SIZE == 32
 #define rshift_rhlc(r,h,l,c) \
@@ -1155,7 +1157,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, 
UWtype, UWtype);
      || defined (__mc68030__) || defined (mc68030) \
      || defined (__mc68040__) || defined (mc68040) \
      || defined (__mc68060__) || defined (mc68060) \
-     || defined (__NeXT__))                        \
+     || defined (__NeXT__))                       \
   && ! defined (__mcpu32__)
 #define count_leading_zeros(count, x) \
   __asm__ ("bfffo %1{%b2:%b2},%0"                                      \
@@ -1308,42 +1310,42 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, 
UWtype, UWtype);
    the system vendor compilers.  (Is that vendor compilers with inline asm,
    or what?)  */
 
-#if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc)        \
+#if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc)       \
   && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {                                                                 \
     if (__builtin_constant_p (bh) && (bh) == 0)                                
\
-      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"          \
-            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"                       \
+            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)                
\
-      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"          \
-            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"                       \
+            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
     else                                                               \
-      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"         \
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"                     \
             : "=r" (sh), "=&r" (sl)                                    \
             : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));               \
   } while (0)
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
   do {                                                                 \
     if (__builtin_constant_p (ah) && (ah) == 0)                                
\
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"      \
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"                     \
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
     else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0)                
\
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"      \
+      __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"                     \
               : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\
     else if (__builtin_constant_p (bh) && (bh) == 0)                   \
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"                
\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"                      \
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
     else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0)                
\
-      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"                
\
+      __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"                      \
               : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\
     else                                                               \
-      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"     \
+      __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"                   \
               : "=r" (sh), "=&r" (sl)                                  \
               : "r" (ah), "r" (bh), "rI" (al), "r" (bl));              \
   } while (0)
 #define count_leading_zeros(count, x) \
-  __asm__ ("{cntlz|cntlzw} %0,%1" : "=r" (count) : "r" (x))
+  __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x))
 #define COUNT_LEADING_ZEROS_0 32
 #if HAVE_HOST_CPU_FAMILY_powerpc
 #if __GMP_GNUC_PREREQ (4,4)
@@ -1391,55 +1393,55 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, 
UWtype, UWtype);
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   do {                                                                 \
     if (__builtin_constant_p (bh) && (bh) == 0)                                
\
-      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"          \
-            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+      __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2"                       \
+            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
     else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)                
\
-      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"          \
-            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\
+      __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2"                       \
+            : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl)); \
     else                                                               \
-      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"         \
+      __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3"                     \
             : "=r" (sh), "=&r" (sl)                                    \
             : "r" (ah), "r" (bh), "%r" (al), "rI" (bl));               \
   } while (0)
 /* We use "*rI" for the constant operand here, since with just "I", gcc barfs.
    This might seem strange, but gcc folds away the dead code late.  */
 #define sub_ddmmss(sh, sl, ah, al, bh, bl) \
-  do {                                                                       \
-    if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) {         \
-       if (__builtin_constant_p (ah) && (ah) == 0)                           \
-         __asm__ ("{ai|addic} %1,%3,%4\n\t{sfze|subfze} %0,%2"               \
+  do {                                                                 \
+    if (__builtin_constant_p (bl) && bl > -0x8000 && bl <= 0x8000) {   \
+       if (__builtin_constant_p (ah) && (ah) == 0)                     \
+         __asm__ ("addic %1,%3,%4\n\tsubfze %0,%2"                     \
                   : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); 
\
-       else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)           \
-         __asm__ ("{ai|addic} %1,%3,%4\n\t{sfme|subfme} %0,%2"               \
+       else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)     \
+         __asm__ ("addic %1,%3,%4\n\tsubfme %0,%2"                     \
                   : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "*rI" (-bl)); 
\
-       else if (__builtin_constant_p (bh) && (bh) == 0)                      \
-         __asm__ ("{ai|addic} %1,%3,%4\n\t{ame|addme} %0,%2"                 \
+       else if (__builtin_constant_p (bh) && (bh) == 0)                \
+         __asm__ ("addic %1,%3,%4\n\taddme %0,%2"                      \
                   : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); 
\
-       else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)           \
-         __asm__ ("{ai|addic} %1,%3,%4\n\t{aze|addze} %0,%2"                 \
+       else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)     \
+         __asm__ ("addic %1,%3,%4\n\taddze %0,%2"                      \
                   : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "*rI" (-bl)); 
\
-       else                                                                  \
-         __asm__ ("{ai|addic} %1,%4,%5\n\t{sfe|subfe} %0,%3,%2"              \
-                  : "=r" (sh), "=&r" (sl)                                    \
-                  : "r" (ah), "r" (bh), "rI" (al), "*rI" (-bl));             \
-      } else {                                                               \
-       if (__builtin_constant_p (ah) && (ah) == 0)                           \
-         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"         \
-                  : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));  \
-       else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)           \
-         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"         \
-                  : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));  \
-       else if (__builtin_constant_p (bh) && (bh) == 0)                      \
-         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"           \
-                  : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));  \
-       else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)           \
-         __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"           \
-                  : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));  \
-       else                                                                  \
-         __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"        \
-                  : "=r" (sh), "=&r" (sl)                                    \
-                  : "r" (ah), "r" (bh), "rI" (al), "r" (bl));                \
-      }                                                                        
      \
+       else                                                            \
+         __asm__ ("addic %1,%4,%5\n\tsubfe %0,%3,%2"                   \
+                  : "=r" (sh), "=&r" (sl)                              \
+                  : "r" (ah), "r" (bh), "rI" (al), "*rI" (-bl));       \
+    } else {                                                           \
+       if (__builtin_constant_p (ah) && (ah) == 0)                     \
+         __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2"                  \
+                  : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \
+       else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0)     \
+         __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2"                  \
+                  : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl)); \
+       else if (__builtin_constant_p (bh) && (bh) == 0)                \
+         __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2"                   \
+                  : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)); \
+       else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0)     \
+         __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2"                   \
+                  : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl)); \
+       else                                                            \
+         __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2"                \
+                  : "=r" (sh), "=&r" (sl)                              \
+                  : "r" (ah), "r" (bh), "rI" (al), "r" (bl));          \
+    }                                                                  \
   } while (0)
 #endif /* ! _LONG_LONG_LIMB */
 #define count_leading_zeros(count, x) \
@@ -1753,12 +1755,31 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, 
UWtype, UWtype);
       "        subccc  %r6,%7,%%g0\n"                                          
\
       "        subc    %r2,%3,%0"                                              
\
          : "=r" (sh), "=&r" (sl)                                       \
-         : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl),         \
+         : "rJ" (ah), "rI" (bh), "rJ" (al), "rI" (bl),                 \
            "rJ" ((al) >> 32), "rI" ((bl) >> 32)                        \
           __CLOBBER_CC)
+#if __VIS__ >= 0x300
+#undef add_ssaaaa
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  __asm__ (                                                            \
+       "addcc  %r4, %5, %1\n"                                          \
+      "        addxc   %r2, %r3, %0"                                           
\
+         : "=r" (sh), "=&r" (sl)                                       \
+         : "rJ" (ah), "rJ" (bh), "%rJ" (al), "rI" (bl) __CLOBBER_CC)
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {                                                                 \
+    UDItype __m0 = (m0), __m1 = (m1);                                  \
+    (pl) = __m0 * __m1;                                                        
\
+    __asm__ ("umulxhi\t%2, %1, %0"                                     \
+            : "=r" (ph)                                                \
+            : "%r" (__m0), "r" (__m1));                                \
+  } while (0)
+#define count_leading_zeros(count, x) \
+  __asm__ ("lzd\t%1,%0" : "=r" (count) : "r" (x))
+#endif
 #endif
 
-#if defined (__vax__) && W_TYPE_SIZE == 32
+#if (defined (__vax) || defined (__vax__)) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"                                 \
           : "=g" (sh), "=&g" (sl)                                      \
@@ -1798,7 +1819,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, 
UWtype, UWtype);
             : "g" ((USItype) (x)));                                    \
   } while (0)
 #endif
-#endif /* __vax__ */
+#endif /* vax */
 
 #if defined (__z8000__) && W_TYPE_SIZE == 16
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
@@ -1859,11 +1880,11 @@ extern UWtype mpn_umul_ppmm (UWtype *, UWtype, UWtype);
 
 #if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm  \
   && ! defined (LONGLONG_STANDALONE)
-#define umul_ppmm(wh, wl, u, v)                                                
      \
-  do {                                                                       \
-    UWtype __umul_ppmm__p0;                                                  \
-    (wh) = mpn_umul_ppmm (&__umul_ppmm__p0, (UWtype) (u), (UWtype) (v));      \
-    (wl) = __umul_ppmm__p0;                                                  \
+#define umul_ppmm(wh, wl, u, v)                                                
\
+  do {                                                                 \
+    UWtype __umul_ppmm__p0;                                            \
+    (wh) = mpn_umul_ppmm (&__umul_ppmm__p0, (UWtype) (u), (UWtype) (v));\
+    (wl) = __umul_ppmm__p0;                                            \
   } while (0)
 #endif
 
@@ -1872,11 +1893,11 @@ extern UWtype mpn_umul_ppmm_r (UWtype, UWtype, UWtype 
*);
 
 #if ! defined (umul_ppmm) && HAVE_NATIVE_mpn_umul_ppmm_r       \
   && ! defined (LONGLONG_STANDALONE)
-#define umul_ppmm(wh, wl, u, v)                                                
      \
-  do {                                                                       \
-    UWtype __umul_ppmm__p0;                                                  \
-    (wh) = mpn_umul_ppmm_r ((UWtype) (u), (UWtype) (v), &__umul_ppmm__p0);    \
-    (wl) = __umul_ppmm__p0;                                                  \
+#define umul_ppmm(wh, wl, u, v)                                                
\
+  do {                                                                 \
+    UWtype __umul_p0;                                                  \
+    (wh) = mpn_umul_ppmm_r ((UWtype) (u), (UWtype) (v), &__umul_p0);   \
+    (wl) = __umul_p0;                                                  \
   } while (0)
 #endif
 
@@ -1887,10 +1908,10 @@ extern UWtype mpn_udiv_qrnnd (UWtype *, UWtype, UWtype, 
UWtype);
   && ! defined (LONGLONG_STANDALONE)
 #define udiv_qrnnd(q, r, n1, n0, d)                                    \
   do {                                                                 \
-    UWtype __udiv_qrnnd__r;                                            \
-    (q) = mpn_udiv_qrnnd (&__udiv_qrnnd__r,                            \
+    UWtype __udiv_qrnnd_r;                                             \
+    (q) = mpn_udiv_qrnnd (&__udiv_qrnnd_r,                             \
                          (UWtype) (n1), (UWtype) (n0), (UWtype) d);    \
-    (r) = __udiv_qrnnd__r;                                             \
+    (r) = __udiv_qrnnd_r;                                              \
   } while (0)
 #endif
 
@@ -1901,10 +1922,10 @@ extern UWtype mpn_udiv_qrnnd_r (UWtype, UWtype, UWtype, 
UWtype *);
   && ! defined (LONGLONG_STANDALONE)
 #define udiv_qrnnd(q, r, n1, n0, d)                                    \
   do {                                                                 \
-    UWtype __udiv_qrnnd__r;                                            \
+    UWtype __udiv_qrnnd_r;                                             \
     (q) = mpn_udiv_qrnnd_r ((UWtype) (n1), (UWtype) (n0), (UWtype) d,  \
-                           &__udiv_qrnnd__r);                          \
-    (r) = __udiv_qrnnd__r;                                             \
+                           &__udiv_qrnnd_r);                           \
+    (r) = __udiv_qrnnd_r;                                              \
   } while (0)
 #endif
 
@@ -1926,7 +1947,7 @@ extern UWtype mpn_udiv_qrnnd_r (UWtype, UWtype, UWtype, 
UWtype *);
   do {                                                                 \
     UWtype __x;                                                                
\
     __x = (al) - (bl);                                                 \
-    (sh) = (ah) - (bh) - ((al) < (bl));                                 \
+    (sh) = (ah) - (bh) - ((al) < (bl));                                        
\
     (sl) = __x;                                                                
\
   } while (0)
 #endif
-- 
1.7.7.6




reply via email to

[Prev in Thread] Current Thread [Next in Thread]