emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master 6523359dfe2: Make string-lessp vectorisation safer


From: Mattias Engdegård
Subject: master 6523359dfe2: Make string-lessp vectorisation safer
Date: Fri, 31 Mar 2023 15:44:40 -0400 (EDT)

branch: master
commit 6523359dfe212633f5bd274b29a1f5e613384c79
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>

    Make string-lessp vectorisation safer
    
    Use memcpy for loading unaligned words on platforms where this can be
    done efficiently.  This guards against problems arising from future
    compiler autovectorisation improvements that might cause instructions
    that require aligned addresses to be emitted, and should also work
    with an address sanitiser enabled.
    
    * src/fns.c (HAVE_FAST_UNALIGNED_ACCESS): Only define when optimising.
    (load_unaligned_size_t): New.
    (Fstring_lessp): Use load_unaligned_size_t.
    * src/lisp.h (UNALIGNED_LOAD_SIZE): Remove now unused macro.
---
 src/fns.c  | 43 +++++++++++++++++++++++++------------------
 src/lisp.h | 20 --------------------
 2 files changed, 25 insertions(+), 38 deletions(-)

diff --git a/src/fns.c b/src/fns.c
index 0af9b725c7a..94505eda444 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -439,17 +439,29 @@ If string STR1 is greater, the value is a positive number 
N;
 }
 
 /* Check whether the platform allows access to unaligned addresses for
-   size_t integers without trapping or undue penalty (a few cycles is OK).
+   size_t integers without trapping or undue penalty (a few cycles is OK),
+   and that a word-sized memcpy can be used to generate such an access.
 
    This whitelist is incomplete but since it is only used to improve
    performance, omitting cases is safe.  */
-#if defined __x86_64__|| defined __amd64__     \
-    || defined __i386__ || defined __i386      \
-    || defined __arm64__ || defined __aarch64__        \
-    || defined __powerpc__ || defined __powerpc        \
-    || defined __ppc__ || defined __ppc                \
-    || defined __s390__ || defined __s390x__
+#if (defined __x86_64__|| defined __amd64__            \
+     || defined __i386__ || defined __i386             \
+     || defined __arm64__ || defined __aarch64__       \
+     || defined __powerpc__ || defined __powerpc       \
+     || defined __ppc__ || defined __ppc               \
+     || defined __s390__ || defined __s390x__)         \
+  && defined __OPTIMIZE__
 #define HAVE_FAST_UNALIGNED_ACCESS 1
+
+/* Load a word from a possibly unaligned address.  */
+static inline size_t
+load_unaligned_size_t (const void *p)
+{
+  size_t x;
+  memcpy (&x, p, sizeof x);
+  return x;
+}
+
 #else
 #define HAVE_FAST_UNALIGNED_ACCESS 0
 #endif
@@ -497,17 +509,12 @@ Symbols are also allowed; their print names are used 
instead.  */)
       if (HAVE_FAST_UNALIGNED_ACCESS)
        {
          /* First compare entire machine words.  */
-         typedef size_t word_t;
-         int ws = sizeof (word_t);
-         const word_t *w1 = (const word_t *) SDATA (string1);
-         const word_t *w2 = (const word_t *) SDATA (string2);
-         while (b < nb - ws + 1)
-           {
-             if (UNALIGNED_LOAD_SIZE (w1, b / ws)
-                 != UNALIGNED_LOAD_SIZE (w2, b / ws))
-               break;
-             b += ws;
-           }
+         int ws = sizeof (size_t);
+         const char *w1 = SSDATA (string1);
+         const char *w2 = SSDATA (string2);
+         while (b < nb - ws + 1 &&    load_unaligned_size_t (w1 + b)
+                                   == load_unaligned_size_t (w2 + b))
+           b += ws;
        }
 
       /* Scan forward to the differing byte.  */
diff --git a/src/lisp.h b/src/lisp.h
index cacd318c26f..165fa47b0b3 100644
--- a/src/lisp.h
+++ b/src/lisp.h
@@ -5305,26 +5305,6 @@ __lsan_ignore_object (void const *p)
 }
 #endif
 
-/* If built with USE_SANITIZER_UNALIGNED_LOAD defined, use compiler
-   provided ASan functions to perform unaligned loads, allowing ASan
-   to catch bugs which it might otherwise miss.  */
-#if defined HAVE_SANITIZER_COMMON_INTERFACE_DEFS_H \
-  && defined ADDRESS_SANITIZER                     \
-  && defined USE_SANITIZER_UNALIGNED_LOAD
-# include <sanitizer/common_interface_defs.h>
-# if (SIZE_MAX == UINT64_MAX)
-#  define UNALIGNED_LOAD_SIZE(a, i) \
-   (size_t) __sanitizer_unaligned_load64 ((void *) ((a) + (i)))
-# elif (SIZE_MAX == UINT32_MAX)
-#  define UNALIGNED_LOAD_SIZE(a, i) \
-   (size_t) __sanitizer_unaligned_load32 ((void *) ((a) + (i)))
-# else
-#  define UNALIGNED_LOAD_SIZE(a, i) *((a) + (i))
-# endif
-#else
-# define UNALIGNED_LOAD_SIZE(a, i) *((a) + (i))
-#endif
-
 extern void xputenv (const char *);
 
 extern char *egetenv_internal (const char *, ptrdiff_t);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]