[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] savedir: scale better when sorting by name
From: |
Paul Eggert |
Subject: |
[PATCH] savedir: scale better when sorting by name |
Date: |
Mon, 11 Dec 2023 18:29:51 -0800 |
* lib/savedir.c: Include attribute.h.
(direntry_t): The ‘name’ member is now idx_t, not char *,
so that it survives name_space relocation.
(direntry_cmp_name, direntry_cmp_inode, comparison_function):
Adjust to qsort_r API, and to direntry_t layout change.
(streamsavedir): Redo to avoid need for xstrdup on each directory
entry. Instead, copy the string data into name_space; this
typically scales better the memory allocator is called O(log N)
rather than O(N) times. Use qsort_r so that name_space can be
passed to the comparison functions. Simplify calls to ‘free’ so
that lack of leakage is more obvious.
* modules/savedir (Depends-on): Add attribute, qsort_r.
---
ChangeLog | 14 +++++++++++
lib/savedir.c | 62 +++++++++++++++++++++++++------------------------
modules/savedir | 2 ++
3 files changed, 48 insertions(+), 30 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 0b8c3fc640..467a060b30 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,19 @@
2023-12-11 Paul Eggert <eggert@cs.ucla.edu>
+ savedir: scale better when sorting by name
+ * lib/savedir.c: Include attribute.h.
+ (direntry_t): The ‘name’ member is now idx_t, not char *,
+ so that it survives name_space relocation.
+ (direntry_cmp_name, direntry_cmp_inode, comparison_function):
+ Adjust to qsort_r API, and to direntry_t layout change.
+ (streamsavedir): Redo to avoid need for xstrdup on each directory
+ entry. Instead, copy the string data into name_space; this
+ typically scales better the memory allocator is called O(log N)
+ rather than O(N) times. Use qsort_r so that name_space can be
+ passed to the comparison functions. Simplify calls to ‘free’ so
+ that lack of leakage is more obvious.
+ * modules/savedir (Depends-on): Add attribute, qsort_r.
+
getopt: pacify gcc -Wanalyzer-null-dereference
* lib/getopt.c (process_long_option): Simplify logic slightly.
This pacifies gcc -flto -Wanalyzer-null-dereference when compiling
diff --git a/lib/savedir.c b/lib/savedir.c
index 3e8501f33d..c5e7e8b839 100644
--- a/lib/savedir.c
+++ b/lib/savedir.c
@@ -35,12 +35,16 @@
#include <stdlib.h>
#include <string.h>
+#include "attribute.h"
#include "xalloc.h"
typedef struct
{
- char *name;
+ /* Offset of file name in name_space. */
+ idx_t name;
+
#if D_INO_IN_DIRENT
+ /* File inode number. */
ino_t ino;
#endif
} direntry_t;
@@ -48,19 +52,20 @@ typedef struct
/* Compare the names of two directory entries */
static int
-direntry_cmp_name (void const *a, void const *b)
+direntry_cmp_name (void const *a, void const *b, void *arg)
{
direntry_t const *dea = a;
direntry_t const *deb = b;
+ char const *name_space = arg;
- return strcmp (dea->name, deb->name);
+ return strcmp (name_space + dea->name, name_space + deb->name);
}
#if D_INO_IN_DIRENT
/* Compare the inode numbers of two directory entries */
static int
-direntry_cmp_inode (void const *a, void const *b)
+direntry_cmp_inode (void const *a, void const *b, MAYBE_UNUSED void *arg)
{
direntry_t const *dea = a;
direntry_t const *deb = b;
@@ -69,7 +74,7 @@ direntry_cmp_inode (void const *a, void const *b)
}
#endif
-typedef int (*comparison_function) (void const *, void const *);
+typedef int (*comparison_function) (void const *, void const *, void *);
static comparison_function const comparison_function_table[] =
{
@@ -117,54 +122,51 @@ streamsavedir (DIR *dirp, enum savedir_option option)
if (entry[entry[0] != '.' ? 0 : entry[1] != '.' ? 1 : 2] != '\0')
{
idx_t entry_size = _D_EXACT_NAMLEN (dp) + 1;
+ if (allocated - used <= entry_size)
+ name_space = xpalloc (name_space, &allocated,
+ entry_size - (allocated - used),
+ IDX_MAX - 1, sizeof *name_space);
+ memcpy (name_space + used, entry, entry_size);
if (cmp)
{
if (entries_allocated == entries_used)
entries = xpalloc (entries, &entries_allocated, 1, -1,
sizeof *entries);
- entries[entries_used].name = xstrdup (entry);
+ entries[entries_used].name = used;
#if D_INO_IN_DIRENT
entries[entries_used].ino = dp->d_ino;
#endif
entries_used++;
}
- else
- {
- if (allocated - used <= entry_size)
- name_space = xpalloc (name_space, &allocated,
- entry_size - (allocated - used),
- IDX_MAX - 1, sizeof *name_space);
- memcpy (name_space + used, entry, entry_size);
- }
used += entry_size;
}
}
if (errno != 0)
{
- free (entries);
free (name_space);
- return NULL;
+ name_space = NULL;
}
-
- if (cmp)
+ else if (cmp)
{
if (entries_used)
- qsort (entries, entries_used, sizeof *entries, cmp);
- name_space = ximalloc (used + 1);
- used = 0;
+ qsort_r (entries, entries_used, sizeof *entries, cmp, name_space);
+ char *sorted_name_space = ximalloc (used + 1);
+ char *p = sorted_name_space;
for (idx_t i = 0; i < entries_used; i++)
- {
- char *dest = name_space + used;
- used += stpcpy (dest, entries[i].name) - dest + 1;
- free (entries[i].name);
- }
- free (entries);
+ p = stpcpy (p, name_space + entries[i].name) + 1;
+ *p = '\0';
+ free (name_space);
+ name_space = sorted_name_space;
+ }
+ else
+ {
+ if (used == allocated)
+ name_space = xirealloc (name_space, used + 1);
+ name_space[used] = '\0';
}
- else if (used == allocated)
- name_space = xirealloc (name_space, used + 1);
- name_space[used] = '\0';
+ free (entries);
return name_space;
}
diff --git a/modules/savedir b/modules/savedir
index e01aee877f..4f13186f36 100644
--- a/modules/savedir
+++ b/modules/savedir
@@ -7,11 +7,13 @@ lib/savedir.c
m4/savedir.m4
Depends-on:
+attribute
closedir
dirent-safer
fdopendir
free-posix
opendir
+qsort_r
readdir
stpcpy
xalloc
--
2.43.0
- [PATCH] savedir: scale better when sorting by name,
Paul Eggert <=