coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] copy,dd: simplify and optimize NUL bytes detection


From: Pádraig Brady
Subject: [PATCH] copy,dd: simplify and optimize NUL bytes detection
Date: Thu, 22 Oct 2015 15:00:53 +0100

* src/system.h (is_nul): Reimplement with a version
that doesn't require a sentinel after the buffer,
and which calls down to (the system optimized) memcmp.
Performance analyzed at http://rusty.ozlabs.org/?p=560
* src/dd.c (alloc_obuf): Simplify the is_nul() call by
not needing to write the sentinel.
* src/copy.c (sparse_copy): Likewise.
(copy_reg): Simplify the buffer allocation by avoiding
consideration of the sentinel in the buffer size calculation.
---
 src/copy.c   | 22 ++++------------------
 src/dd.c     |  6 ------
 src/system.h | 35 ++++++++++++++++++-----------------
 3 files changed, 22 insertions(+), 41 deletions(-)

diff --git a/src/copy.c b/src/copy.c
index 5fe69ea..edf022e 100644
--- a/src/copy.c
+++ b/src/copy.c
@@ -245,17 +245,7 @@ sparse_copy (int src_fd, int dest_fd, char *buf, size_t 
buf_size,
           csize = MIN (csize, n_read);
 
           if (hole_size && csize)
-            {
-              /* Setup sentinel required by is_nul().  */
-              typedef uintptr_t word;
-              word isnul_tmp;
-              memcpy (&isnul_tmp, cbuf + csize, sizeof (word));
-              memset (cbuf + csize, 1, sizeof (word));
-
-              make_hole = is_nul (cbuf, csize);
-
-              memcpy (cbuf + csize, &isnul_tmp, sizeof (word));
-            }
+            make_hole = is_nul (cbuf, csize);
 
           bool transition = (make_hole != prev_hole) && psize;
           bool last_chunk = (n_read == csize && ! make_hole) || ! csize;
@@ -1201,11 +1191,8 @@ copy_reg (char const *src_name, char const *dst_name,
 
   if (data_copy_required)
     {
-      typedef uintptr_t word;
-
       /* Choose a suitable buffer size; it may be adjusted later.  */
-      size_t buf_alignment = lcm (getpagesize (), sizeof (word));
-      size_t buf_alignment_slop = sizeof (word) + buf_alignment - 1;
+      size_t buf_alignment = getpagesize ();
       size_t buf_size = io_blksize (sb);
       size_t hole_size = ST_BLKSIZE (sb);
 
@@ -1236,7 +1223,7 @@ copy_reg (char const *src_name, char const *dst_name,
         {
           /* Compute the least common multiple of the input and output
              buffer sizes, adjusting for outlandish values.  */
-          size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment_slop;
+          size_t blcm_max = MIN (SIZE_MAX, SSIZE_MAX) - buf_alignment;
           size_t blcm = buffer_lcm (io_blksize (src_open_sb), buf_size,
                                     blcm_max);
 
@@ -1254,8 +1241,7 @@ copy_reg (char const *src_name, char const *dst_name,
             buf_size = blcm;
         }
 
-      /* Make a buffer with space for a sentinel at the end.  */
-      buf_alloc = xmalloc (buf_size + buf_alignment_slop);
+      buf_alloc = xmalloc (buf_size + buf_alignment);
       buf = ptr_align (buf_alloc, buf_alignment);
 
       if (sparse_src)
diff --git a/src/dd.c b/src/dd.c
index e647294..054cf76 100644
--- a/src/dd.c
+++ b/src/dd.c
@@ -20,7 +20,6 @@
 
 #define SWAB_ALIGN_OFFSET 2
 
-#include <assert.h>
 #include <sys/types.h>
 #include <signal.h>
 #include <getopt.h>
@@ -728,11 +727,6 @@ alloc_obuf (void)
       alloc_ibuf ();
       obuf = ibuf;
     }
-
-  /* Write a sentinel to the slop after the buffer,
-   to allow efficient checking for NUL blocks.  */
-  assert (sizeof (uintptr_t) <= OUTPUT_BLOCK_SLOP);
-  memset (obuf + output_blocksize, 1, sizeof (uintptr_t));
 }
 
 static void
diff --git a/src/system.h b/src/system.h
index 8f6a2ea..de46e33 100644
--- a/src/system.h
+++ b/src/system.h
@@ -487,27 +487,28 @@ ptr_align (void const *ptr, size_t alignment)
 }
 
 /* Return whether the buffer consists entirely of NULs.
-   Note the word after the buffer must be non NUL. */
+   From CCAN by Rusty Russell <address@hidden>
+   released under CC0 (Public domain).  */
 
 static inline bool _GL_ATTRIBUTE_PURE
 is_nul (void const *buf, size_t bufsize)
 {
-  typedef uintptr_t word;
-  void const *vp;
-  char const *cbuf = buf;
-  word const *wp = buf;
-
-  /* Find first nonzero *word*, or the word with the sentinel.  */
-  while (*wp++ == 0)
-    continue;
-
-  /* Find the first nonzero *byte*, or the sentinel.  */
-  vp = wp - 1;
-  char const *cp = vp;
-  while (*cp++ == 0)
-    continue;
-
-  return cbuf + bufsize < cp;
+  const unsigned char *p = buf;
+  size_t len;
+
+  /* Check first 16 bytes manually.  */
+  for (len = 0; len < 16; len++)
+    {
+      if (! bufsize)
+        return true;
+      if (*p)
+        return false;
+      p++;
+      bufsize--;
+    }
+
+  /* Now we know that's zero, memcmp with self.  */
+  return memcmp (buf, p, bufsize) == 0;
 }
 
 /* If 10*Accum + Digit_val is larger than the maximum value for Type,
-- 
2.5.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]