[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
new module "mpsort" for faster sorting
From: |
Paul Eggert |
Subject: |
new module "mpsort" for faster sorting |
Date: |
Sun, 28 Jan 2007 23:28:49 -0800 |
User-agent: |
Gnus/5.1008 (Gnus v5.10.8) Emacs/21.4 (gnu/linux) |
For quite some time Djamel Belazzougui has been suggesting speed
improvements to glibc's qsort function. I'm not sure yet that this is
a good idea, but it does seem to me that there is use for a function
that can sort a vector of pointers to data (as opposed to qsort, which
sorts an array of data). Such a function can lead to faster execution
in some important cases, as I have verified using GNU 'ls' as a guinea
pig. So I'm adding this module to gnulib, and plan to submit a patch
to coreutils 'ls' shortly.
2007-01-28 Paul Eggert <address@hidden>
* MODULES.html.sh: New module mpsort.
* lib/mpsort.c, lib/mpsort.h, m4/mpsort.m4, modules/mpsort: New files.
* lib/regex.h (_Restrict_): Renamed from __restrict, to avoid
a circularity problem with HP-UX ia64 reported by Bob Proulx in
<http://lists.gnu.org/archive/html/bug-gnulib/2007-01/msg00394.html>.
All uses changed.
(_Restrict_arr_): Renamed from __restrict_arr, for similar reasons.
All uses changed.
* lib/regcomp.c, lib/regexec.c: Change all uses from __restrict
to _Restrict_.
* lib/regexec.c (regexec): Declare pmatch with _Restrict_arr_, so that
the parameter matches the prototype.
Index: MODULES.html.sh
===================================================================
RCS file: /cvsroot/gnulib/gnulib/MODULES.html.sh,v
retrieving revision 1.177
diff -u -p -r1.177 MODULES.html.sh
--- MODULES.html.sh 27 Jan 2007 01:05:04 -0000 1.177
+++ MODULES.html.sh 29 Jan 2007 07:23:18 -0000
@@ -1540,6 +1540,16 @@ func_all_modules ()
func_module pagealign_alloc
func_end_table
+ element="Sorting functions <stdlib.h>"
+ element=`printf "%s" "$element" | sed -e "$sed_lt" -e "$sed_gt"`
+ func_section_wrap ansic_enh_stdlib_sorting
+ func_wrap H3
+ func_echo "$element"
+
+ func_begin_table
+ func_module mpsort
+ func_end_table
+
element="Date and time <time.h>"
element=`printf "%s" "$element" | sed -e "$sed_lt" -e "$sed_gt"`
func_section_wrap ansic_enh_time_datetime
Index: lib/mpsort.c
===================================================================
RCS file: lib/mpsort.c
diff -N lib/mpsort.c
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ lib/mpsort.c 29 Jan 2007 07:23:18 -0000
@@ -0,0 +1,157 @@
+/* Sort a vector of pointers to data.
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2, or (at your option)
+ any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with this program; if not, write to the Free Software Foundation,
+ Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
+
+/* Written by Paul Eggert. */
+
+#include <config.h>
+
+#include "mpsort.h"
+
+#include <string.h>
+
+/* The type of qsort-style comparison functions. */
+
+typedef int (*comparison_function) (void const *, void const *);
+
+static void mpsort_with_tmp (void const **restrict, size_t,
+ void const **restrict, comparison_function);
+
+/* Sort a vector BASE containing N pointers, placing the sorted array
+ into TMP. Compare pointers with CMP. N must be at least 2. */
+
+static void
+mpsort_into_tmp (void const **restrict base, size_t n,
+ void const **restrict tmp,
+ comparison_function cmp)
+{
+ size_t n1 = n / 2;
+ size_t n2 = n - n1;
+ size_t a = 0;
+ size_t alim = n1;
+ size_t b = n1;
+ size_t blim = n;
+ void const *ba;
+ void const *bb;
+
+ mpsort_with_tmp (base + n1, n2, tmp, cmp);
+ mpsort_with_tmp (base, n1, tmp, cmp);
+
+ ba = base[a];
+ bb = base[b];
+
+ for (;;)
+ if (cmp (ba, bb) <= 0)
+ {
+ *tmp++ = ba;
+ a++;
+ if (a == alim)
+ {
+ a = b;
+ alim = blim;
+ break;
+ }
+ ba = base[a];
+ }
+ else
+ {
+ *tmp++ = bb;
+ b++;
+ if (b == blim)
+ break;
+ bb = base[b];
+ }
+
+ memcpy (tmp, base + a, (alim - a) * sizeof *base);
+}
+
+/* Sort a vector BASE containing N pointers, in place. Use TMP
+ (containing N / 2 pointers) for temporary storage. Compare
+ pointers with CMP. */
+
+static void
+mpsort_with_tmp (void const **restrict base, size_t n,
+ void const **restrict tmp,
+ comparison_function cmp)
+{
+ if (n <= 2)
+ {
+ if (n == 2)
+ {
+ void const *p0 = base[0];
+ void const *p1 = base[1];
+ if (! (cmp (p0, p1) <= 0))
+ {
+ base[0] = p1;
+ base[1] = p0;
+ }
+ }
+ }
+ else
+ {
+ size_t n1 = n / 2;
+ size_t n2 = n - n1;
+ size_t i;
+ size_t t = 0;
+ size_t tlim = n1;
+ size_t b = n1;
+ size_t blim = n;
+ void const *bb;
+ void const *tt;
+
+ mpsort_with_tmp (base + n1, n2, tmp, cmp);
+
+ if (n1 < 2)
+ tmp[0] = base[0];
+ else
+ mpsort_into_tmp (base, n1, tmp, cmp);
+
+ tt = tmp[t];
+ bb = base[b];
+
+ for (i = 0; ; )
+ if (cmp (tt, bb) <= 0)
+ {
+ base[i++] = tt;
+ t++;
+ if (t == tlim)
+ break;
+ tt = tmp[t];
+ }
+ else
+ {
+ base[i++] = bb;
+ b++;
+ if (b == blim)
+ {
+ memcpy (base + i, tmp + t, (tlim - t) * sizeof *base);
+ break;
+ }
+ bb = base[b];
+ }
+ }
+}
+
+/* Sort a vector BASE containing N pointers, in place. BASE must
+ contain enough storage to hold N + N / 2 vectors; the trailing
+ vectors are used for temporaries. Compare pointers with CMP. */
+
+void
+mpsort (void const **base, size_t n, comparison_function cmp)
+{
+ return mpsort_with_tmp (base, n, base + n, cmp);
+}
Index: lib/mpsort.h
===================================================================
RCS file: lib/mpsort.h
diff -N lib/mpsort.h
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ lib/mpsort.h 29 Jan 2007 07:23:18 -0000
@@ -0,0 +1,2 @@
+#include <stddef.h>
+void mpsort (void const **, size_t, int (*) (void const *, void const *));
Index: m4/mpsort.m4
===================================================================
RCS file: m4/mpsort.m4
diff -N m4/mpsort.m4
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ m4/mpsort.m4 29 Jan 2007 07:23:18 -0000
@@ -0,0 +1,13 @@
+# Sort a vector of pointers to data.
+
+# Copyright (C) 2007 Free Software Foundation, Inc.
+
+# This file is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+AC_DEFUN([gl_MPSORT],
+[
+ AC_REQUIRE([AC_C_RESTRICT])
+ AC_LIBOBJ([mpsort])
+])
Index: modules/mpsort
===================================================================
RCS file: modules/mpsort
diff -N modules/mpsort
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ modules/mpsort 29 Jan 2007 07:23:18 -0000
@@ -0,0 +1,23 @@
+Description:
+Sort a vector of pointers to data.
+
+Files:
+lib/mpsort.h
+lib/mpsort.c
+m4/mpsort.m4
+
+Depends-on:
+
+configure.ac:
+gl_MPSORT
+
+Makefile.am:
+
+Include:
+"mpsort.h"
+
+License:
+GPL
+
+Maintainer:
+Paul Eggert
- new module "mpsort" for faster sorting,
Paul Eggert <=