bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] savedir: scale better when sorting by name


From: Paul Eggert
Subject: [PATCH] savedir: scale better when sorting by name
Date: Mon, 11 Dec 2023 18:29:51 -0800

* lib/savedir.c: Include attribute.h.
(direntry_t): The ‘name’ member is now idx_t, not char *,
so that it survives name_space relocation.
(direntry_cmp_name, direntry_cmp_inode, comparison_function):
Adjust to qsort_r API, and to direntry_t layout change.
(streamsavedir): Redo to avoid need for xstrdup on each directory
entry.  Instead, copy the string data into name_space; this
typically scales better the memory allocator is called O(log N)
rather than O(N) times.  Use qsort_r so that name_space can be
passed to the comparison functions.  Simplify calls to ‘free’ so
that lack of leakage is more obvious.
* modules/savedir (Depends-on): Add attribute, qsort_r.
---
 ChangeLog       | 14 +++++++++++
 lib/savedir.c   | 62 +++++++++++++++++++++++++------------------------
 modules/savedir |  2 ++
 3 files changed, 48 insertions(+), 30 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0b8c3fc640..467a060b30 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
 2023-12-11  Paul Eggert  <eggert@cs.ucla.edu>
 
+       savedir: scale better when sorting by name
+       * lib/savedir.c: Include attribute.h.
+       (direntry_t): The ‘name’ member is now idx_t, not char *,
+       so that it survives name_space relocation.
+       (direntry_cmp_name, direntry_cmp_inode, comparison_function):
+       Adjust to qsort_r API, and to direntry_t layout change.
+       (streamsavedir): Redo to avoid need for xstrdup on each directory
+       entry.  Instead, copy the string data into name_space; this
+       typically scales better the memory allocator is called O(log N)
+       rather than O(N) times.  Use qsort_r so that name_space can be
+       passed to the comparison functions.  Simplify calls to ‘free’ so
+       that lack of leakage is more obvious.
+       * modules/savedir (Depends-on): Add attribute, qsort_r.
+
        getopt: pacify gcc -Wanalyzer-null-dereference
        * lib/getopt.c (process_long_option): Simplify logic slightly.
        This pacifies gcc -flto -Wanalyzer-null-dereference when compiling
diff --git a/lib/savedir.c b/lib/savedir.c
index 3e8501f33d..c5e7e8b839 100644
--- a/lib/savedir.c
+++ b/lib/savedir.c
@@ -35,12 +35,16 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include "attribute.h"
 #include "xalloc.h"
 
 typedef struct
 {
-  char *name;
+  /* Offset of file name in name_space.  */
+  idx_t name;
+
 #if D_INO_IN_DIRENT
+  /* File inode number.  */
   ino_t ino;
 #endif
 } direntry_t;
@@ -48,19 +52,20 @@ typedef struct
 /* Compare the names of two directory entries */
 
 static int
-direntry_cmp_name (void const *a, void const *b)
+direntry_cmp_name (void const *a, void const *b, void *arg)
 {
   direntry_t const *dea = a;
   direntry_t const *deb = b;
+  char const *name_space = arg;
 
-  return strcmp (dea->name, deb->name);
+  return strcmp (name_space + dea->name, name_space + deb->name);
 }
 
 #if D_INO_IN_DIRENT
 /* Compare the inode numbers of two directory entries */
 
 static int
-direntry_cmp_inode (void const *a, void const *b)
+direntry_cmp_inode (void const *a, void const *b, MAYBE_UNUSED void *arg)
 {
   direntry_t const *dea = a;
   direntry_t const *deb = b;
@@ -69,7 +74,7 @@ direntry_cmp_inode (void const *a, void const *b)
 }
 #endif
 
-typedef int (*comparison_function) (void const *, void const *);
+typedef int (*comparison_function) (void const *, void const *, void *);
 
 static comparison_function const comparison_function_table[] =
   {
@@ -117,54 +122,51 @@ streamsavedir (DIR *dirp, enum savedir_option option)
       if (entry[entry[0] != '.' ? 0 : entry[1] != '.' ? 1 : 2] != '\0')
         {
           idx_t entry_size = _D_EXACT_NAMLEN (dp) + 1;
+          if (allocated - used <= entry_size)
+            name_space = xpalloc (name_space, &allocated,
+                                  entry_size - (allocated - used),
+                                  IDX_MAX - 1, sizeof *name_space);
+          memcpy (name_space + used, entry, entry_size);
           if (cmp)
             {
               if (entries_allocated == entries_used)
                 entries = xpalloc (entries, &entries_allocated, 1, -1,
                                    sizeof *entries);
-              entries[entries_used].name = xstrdup (entry);
+              entries[entries_used].name = used;
 #if D_INO_IN_DIRENT
               entries[entries_used].ino = dp->d_ino;
 #endif
               entries_used++;
             }
-          else
-            {
-              if (allocated - used <= entry_size)
-                name_space = xpalloc (name_space, &allocated,
-                                      entry_size - (allocated - used),
-                                      IDX_MAX - 1, sizeof *name_space);
-              memcpy (name_space + used, entry, entry_size);
-            }
           used += entry_size;
         }
     }
 
   if (errno != 0)
     {
-      free (entries);
       free (name_space);
-      return NULL;
+      name_space = NULL;
     }
-
-  if (cmp)
+  else if (cmp)
     {
       if (entries_used)
-        qsort (entries, entries_used, sizeof *entries, cmp);
-      name_space = ximalloc (used + 1);
-      used = 0;
+        qsort_r (entries, entries_used, sizeof *entries, cmp, name_space);
+      char *sorted_name_space = ximalloc (used + 1);
+      char *p = sorted_name_space;
       for (idx_t i = 0; i < entries_used; i++)
-        {
-          char *dest = name_space + used;
-          used += stpcpy (dest, entries[i].name) - dest + 1;
-          free (entries[i].name);
-        }
-      free (entries);
+        p = stpcpy (p, name_space + entries[i].name) + 1;
+      *p = '\0';
+      free (name_space);
+      name_space = sorted_name_space;
+    }
+  else
+    {
+      if (used == allocated)
+        name_space = xirealloc (name_space, used + 1);
+      name_space[used] = '\0';
     }
-  else if (used == allocated)
-    name_space = xirealloc (name_space, used + 1);
 
-  name_space[used] = '\0';
+  free (entries);
   return name_space;
 }
 
diff --git a/modules/savedir b/modules/savedir
index e01aee877f..4f13186f36 100644
--- a/modules/savedir
+++ b/modules/savedir
@@ -7,11 +7,13 @@ lib/savedir.c
 m4/savedir.m4
 
 Depends-on:
+attribute
 closedir
 dirent-safer
 fdopendir
 free-posix
 opendir
+qsort_r
 readdir
 stpcpy
 xalloc
-- 
2.43.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]