bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

oset: Add an 'iterator_atleast' operation


From: Bruno Haible
Subject: oset: Add an 'iterator_atleast' operation
Date: Sun, 02 Aug 2020 21:05:04 +0200
User-agent: KMail/5.1.3 (Linux/4.4.0-186-generic; KDE/5.18.0; x86_64; ; )

In an application that uses an ordered set (gl_oset), I need an iterator
that starts at the first element that has a value >= threshold. And it
should be fast in the case when only 1 or 2 or 3 elements from that iterator
are needed, but the entire set is large.
It can't be realized with the existing API that provides (separately)
  - a function to find the first element that has a value >= threshold,
  - a function that returns at iterator that starts at the beginning.
This patch implements it.


2020-08-02  Bruno Haible  <bruno@clisp.org>

        oset: Add an 'iterator_atleast' operation.
        * lib/gl_array_oset.c (gl_array_indexof_atleast): New function,
        extracted from gl_array_search_atleast.
        (gl_array_search_atleast): Use it.
        (gl_array_iterator_atleast): New function.
        (gl_array_oset_implementation): Use it.
        * lib/gl_anytree_oset.h (gl_tree_iterator_atleast): New function.
        * lib/gl_avltree_oset.c (gl_avltree_oset_implementation): Use it.
        * lib/gl_rbtree_oset.c (gl_rbtree_oset_implementation): Likewise.
        * lib/gl_oset.h (struct gl_oset_implementation): Add 'iterator_atleast'
        member.
        (gl_oset_iterator_atleast): New function.
        * lib/gl_oset.hh (gl_OSet): Add 'begin_atleast' member.
        (gl_OSet::iterator): Add another auxiliary constructor.
        * tests/test-array_oset.c (is_at_least, gl_sortedlist_indexof_atleast):
        New functions.
        (main): Test also gl_oset_iterator_atleast.
        * tests/test-avltree_oset.c (is_at_least): New function.
        (main): Test also gl_oset_iterator_atleast.
        * tests/test-rbtree_oset.c (is_at_least): New function.
        (main): Test also gl_oset_iterator_atleast.
        * tests/test-oset-c++.cc (is_at_most): New function.
        (main): Test also gl_OSet::begin_atleast.

diff --git a/lib/gl_anytree_oset.h b/lib/gl_anytree_oset.h
index 8a64229..10957c0 100644
--- a/lib/gl_anytree_oset.h
+++ b/lib/gl_anytree_oset.h
@@ -375,6 +375,52 @@ gl_tree_iterator (gl_oset_t set)
   return result;
 }
 
+static gl_oset_iterator_t
+gl_tree_iterator_atleast (gl_oset_t set,
+                          gl_setelement_threshold_fn threshold_fn,
+                          const void *threshold)
+{
+  gl_oset_iterator_t result;
+  gl_oset_node_t node;
+
+  result.vtable = set->base.vtable;
+  result.set = set;
+  /* End point is past the rightmost node.  */
+  result.q = NULL;
+#if defined GCC_LINT || defined lint
+  result.i = 0;
+  result.j = 0;
+  result.count = 0;
+#endif
+
+  for (node = set->root; node != NULL; )
+    {
+      if (! threshold_fn (node->value, threshold))
+        node = node->right;
+      else
+        {
+          /* We have an element >= THRESHOLD.  But we need the leftmost such
+             element.  */
+          gl_oset_node_t found = node;
+          node = node->left;
+          for (; node != NULL; )
+            {
+              if (! threshold_fn (node->value, threshold))
+                node = node->right;
+              else
+                {
+                  found = node;
+                  node = node->left;
+                }
+            }
+          result.p = found;
+          return result;
+        }
+    }
+  result.p = NULL;
+  return result;
+}
+
 static bool
 gl_tree_iterator_next (gl_oset_iterator_t *iterator, const void **eltp)
 {
diff --git a/lib/gl_array_oset.c b/lib/gl_array_oset.c
index 86fb747..f44d6b0 100644
--- a/lib/gl_array_oset.c
+++ b/lib/gl_array_oset.c
@@ -107,11 +107,15 @@ gl_array_search (gl_oset_t set, const void *elt)
   return gl_array_indexof (set, elt) != (size_t)(-1);
 }
 
-static bool
-gl_array_search_atleast (gl_oset_t set,
-                         gl_setelement_threshold_fn threshold_fn,
-                         const void *threshold,
-                         const void **eltp)
+/* Searches the least element in the ordered set that compares greater or equal
+   to the given THRESHOLD.  The representation of the THRESHOLD is defined
+   by the THRESHOLD_FN.
+   Returns the position at which it was found, or gl_list_size (SET) if not
+   found.  */
+static size_t
+gl_array_indexof_atleast (gl_oset_t set,
+                          gl_setelement_threshold_fn threshold_fn,
+                          const void *threshold)
 {
   size_t count = set->count;
 
@@ -148,13 +152,29 @@ gl_array_search_atleast (gl_oset_t set,
                   else
                     high = mid2;
                 }
-              *eltp = set->elements[low];
-              return true;
+              return low;
             }
         }
       while (low < high);
     }
-  return false;
+  return count;
+}
+
+static bool
+gl_array_search_atleast (gl_oset_t set,
+                         gl_setelement_threshold_fn threshold_fn,
+                         const void *threshold,
+                         const void **eltp)
+{
+  size_t index = gl_array_indexof_atleast (set, threshold_fn, threshold);
+
+  if (index == set->count)
+    return false;
+  else
+    {
+      *eltp = set->elements[index];
+      return true;
+    }
 }
 
 /* Ensure that set->allocated > set->count.
@@ -421,6 +441,27 @@ gl_array_iterator (gl_oset_t set)
   return result;
 }
 
+static gl_oset_iterator_t
+gl_array_iterator_atleast (gl_oset_t set,
+                           gl_setelement_threshold_fn threshold_fn,
+                           const void *threshold)
+{
+  size_t index = gl_array_indexof_atleast (set, threshold_fn, threshold);
+  gl_oset_iterator_t result;
+
+  result.vtable = set->base.vtable;
+  result.set = set;
+  result.count = set->count;
+  result.p = set->elements + index;
+  result.q = set->elements + set->count;
+#if defined GCC_LINT || defined lint
+  result.i = 0;
+  result.j = 0;
+#endif
+
+  return result;
+}
+
 static bool
 gl_array_iterator_next (gl_oset_iterator_t *iterator, const void **eltp)
 {
@@ -463,6 +504,7 @@ const struct gl_oset_implementation 
gl_array_oset_implementation =
     gl_array_update,
     gl_array_free,
     gl_array_iterator,
+    gl_array_iterator_atleast,
     gl_array_iterator_next,
     gl_array_iterator_free
   };
diff --git a/lib/gl_avltree_oset.c b/lib/gl_avltree_oset.c
index 34de5a2..261350d 100644
--- a/lib/gl_avltree_oset.c
+++ b/lib/gl_avltree_oset.c
@@ -67,6 +67,7 @@ const struct gl_oset_implementation 
gl_avltree_oset_implementation =
     gl_tree_update,
     gl_tree_oset_free,
     gl_tree_iterator,
+    gl_tree_iterator_atleast,
     gl_tree_iterator_next,
     gl_tree_iterator_free
   };
diff --git a/lib/gl_oset.h b/lib/gl_oset.h
index 2908297..2f3fa76 100644
--- a/lib/gl_oset.h
+++ b/lib/gl_oset.h
@@ -69,6 +69,7 @@ extern "C" {
    gl_oset_search            O(log n) O(log n)
    gl_oset_search_atleast    O(log n) O(log n)
    gl_oset_iterator            O(1)   O(log n)
+   gl_oset_iterator_atleast  O(log n) O(log n)
    gl_oset_iterator_next       O(1)   O(log n)
  */
 
@@ -186,6 +187,13 @@ typedef struct
    except for removing the last returned element.  */
 extern gl_oset_iterator_t gl_oset_iterator (gl_oset_t set);
 
+/* Creates an iterator traversing the tail of an ordered set, that comprises
+   the elements that compare greater or equal to the given THRESHOLD.  The
+   representation of the THRESHOLD is defined by the THRESHOLD_FN.  */
+extern gl_oset_iterator_t gl_oset_iterator_atleast (gl_oset_t set,
+                                                    gl_setelement_threshold_fn 
threshold_fn,
+                                                    const void *threshold);
+
 /* If there is a next element, stores the next element in *ELTP, advances the
    iterator and returns true.  Otherwise, returns false.  */
 extern bool gl_oset_iterator_next (gl_oset_iterator_t *iterator,
@@ -217,6 +225,9 @@ struct gl_oset_implementation
   void (*oset_free) (gl_oset_t set);
   /* gl_oset_iterator_t functions.  */
   gl_oset_iterator_t (*iterator) (gl_oset_t set);
+  gl_oset_iterator_t (*iterator_atleast) (gl_oset_t set,
+                                          gl_setelement_threshold_fn 
threshold_fn,
+                                          const void *threshold);
   bool (*iterator_next) (gl_oset_iterator_t *iterator, const void **eltp);
   void (*iterator_free) (gl_oset_iterator_t *iterator);
 };
@@ -295,6 +306,15 @@ gl_oset_iterator (gl_oset_t set)
   return ((const struct gl_oset_impl_base *) set)->vtable->iterator (set);
 }
 
+GL_OSET_INLINE gl_oset_iterator_t
+gl_oset_iterator_atleast (gl_oset_t set,
+                          gl_setelement_threshold_fn threshold_fn,
+                          const void *threshold)
+{
+  return ((const struct gl_oset_impl_base *) set)->vtable
+         ->iterator_atleast (set, threshold_fn, threshold);
+}
+
 GL_OSET_INLINE bool
 gl_oset_iterator_next (gl_oset_iterator_t *iterator, const void **eltp)
 {
diff --git a/lib/gl_oset.hh b/lib/gl_oset.hh
index 157b8b9..8a7edc9 100644
--- a/lib/gl_oset.hh
+++ b/lib/gl_oset.hh
@@ -156,12 +156,21 @@ public:
   #else
   private:
     friend iterator gl_OSet::begin ();
+    template <typename THT>
+    friend iterator gl_OSet::begin_atleast (bool (*) (ELTYPE *, THT *), THT *);
   #endif
 
     iterator (gl_oset_t ptr)
       : _state (gl_oset_iterator (ptr))
       {}
 
+    template <typename THT>
+    iterator (gl_oset_t ptr,
+              bool (*threshold_fn) (ELTYPE * /*elt*/, THT * /*threshold*/),
+              THT * threshold)
+      : _state (gl_oset_iterator_atleast (ptr, 
reinterpret_cast<gl_setelement_threshold_fn>(threshold_fn), threshold))
+      {}
+
   private:
 
     gl_oset_iterator_t _state;
@@ -172,6 +181,16 @@ public:
      except for removing the last returned element.  */
   iterator begin ()
     { return iterator (_ptr); }
+
+  /* Creates an iterator traversing the tail of an ordered set, that comprises
+     the elements that compare greater or equal to the given THRESHOLD.  The
+     representation of the THRESHOLD is defined by the THRESHOLD_FN.
+     The set's contents must not be modified while the iterator is in use,
+     except for removing the last returned element.  */
+  template <typename THT>
+  iterator begin_atleast (bool (*threshold_fn) (ELTYPE * /*elt*/, THT * 
/*threshold*/),
+                          THT * threshold)
+    { return iterator (_ptr, threshold_fn, threshold); }
 };
 
 #endif /* _GL_OSET_HH */
diff --git a/lib/gl_rbtree_oset.c b/lib/gl_rbtree_oset.c
index 064c0c4..bc55ace 100644
--- a/lib/gl_rbtree_oset.c
+++ b/lib/gl_rbtree_oset.c
@@ -67,6 +67,7 @@ const struct gl_oset_implementation 
gl_rbtree_oset_implementation =
     gl_tree_update,
     gl_tree_oset_free,
     gl_tree_iterator,
+    gl_tree_iterator_atleast,
     gl_tree_iterator_next,
     gl_tree_iterator_free
   };
diff --git a/tests/test-array_oset.c b/tests/test-array_oset.c
index 6a7fd36..64285f4 100644
--- a/tests/test-array_oset.c
+++ b/tests/test-array_oset.c
@@ -68,6 +68,27 @@ check_all (gl_oset_t set1, gl_list_t set2)
   check_equals (set1, set2);
 }
 
+static bool
+is_at_least (const void *elt, const void *threshold)
+{
+  return strcmp ((const char *) elt, (const char *) threshold) >= 0;
+}
+
+static size_t
+gl_sortedlist_indexof_atleast (gl_list_t set,
+                               gl_setelement_threshold_fn threshold_fn,
+                               const void *threshold)
+{
+  /* This implementation is slow, but easy to verify.  */
+  size_t count = gl_list_size (set);
+  size_t index;
+
+  for (index = 0; index < count; index++)
+    if (threshold_fn (gl_list_get_at (set, index), threshold))
+      return index;
+  return (size_t)(-1);
+}
+
 int
 main (int argc, char *argv[])
 {
@@ -105,7 +126,7 @@ main (int argc, char *argv[])
 
     for (repeat = 0; repeat < 100000; repeat++)
       {
-        unsigned int operation = RANDOM (3);
+        unsigned int operation = RANDOM (4);
         switch (operation)
           {
           case 0:
@@ -131,6 +152,32 @@ main (int argc, char *argv[])
                       == gl_sortedlist_remove (set2, 
(gl_listelement_compar_fn)strcmp, obj));
             }
             break;
+          case 3:
+            {
+              const char *obj = RANDOM_OBJECT ();
+              gl_oset_iterator_t iter = gl_oset_iterator_atleast (set1, 
is_at_least, obj);
+              size_t index = gl_sortedlist_indexof_atleast (set2, is_at_least, 
obj);
+              const void *elt;
+              /* Check the first two values that the iterator produces.
+                 Checking them all would make this part of the test dominate 
the
+                 run time of the test.  */
+              if (index == (size_t)(-1))
+                ASSERT (!gl_oset_iterator_next (&iter, &elt));
+              else
+                {
+                  ASSERT (gl_oset_iterator_next (&iter, &elt));
+                  ASSERT (elt == gl_list_get_at (set2, index));
+                  if (index + 1 == gl_list_size (set2))
+                    ASSERT (!gl_oset_iterator_next (&iter, &elt));
+                  else
+                    {
+                      ASSERT (gl_oset_iterator_next (&iter, &elt));
+                      ASSERT (elt == gl_list_get_at (set2, index + 1));
+                    }
+                }
+              gl_oset_iterator_free (&iter);
+            }
+            break;
           }
         check_all (set1, set2);
       }
diff --git a/tests/test-avltree_oset.c b/tests/test-avltree_oset.c
index 65e2d32..a65e6d7 100644
--- a/tests/test-avltree_oset.c
+++ b/tests/test-avltree_oset.c
@@ -68,6 +68,12 @@ check_all (gl_oset_t set1, gl_oset_t set2)
   check_equals (set1, set2);
 }
 
+static bool
+is_at_least (const void *elt, const void *threshold)
+{
+  return strcmp ((const char *) elt, (const char *) threshold) >= 0;
+}
+
 int
 main (int argc, char *argv[])
 {
@@ -102,7 +108,7 @@ main (int argc, char *argv[])
 
     for (repeat = 0; repeat < 100000; repeat++)
       {
-        unsigned int operation = RANDOM (3);
+        unsigned int operation = RANDOM (4);
         switch (operation)
           {
           case 0:
@@ -123,6 +129,32 @@ main (int argc, char *argv[])
               ASSERT (gl_oset_remove (set1, obj) == gl_oset_remove (set2, 
obj));
             }
             break;
+          case 3:
+            {
+              const char *obj = RANDOM_OBJECT ();
+              gl_oset_iterator_t iter1 = gl_oset_iterator_atleast (set1, 
is_at_least, obj);
+              gl_oset_iterator_t iter2 = gl_oset_iterator_atleast (set2, 
is_at_least, obj);
+              const void *elt1;
+              const void *elt2;
+              /* Check the first two values that the iterator produces.
+                 Checking them all would make this part of the test dominate 
the
+                 run time of the test.  */
+              bool havenext1 = gl_oset_iterator_next (&iter1, &elt1);
+              bool havenext2 = gl_oset_iterator_next (&iter2, &elt2);
+              ASSERT (havenext1 == havenext2);
+              if (havenext1)
+                {
+                  ASSERT (elt1 == elt2);
+                  havenext1 = gl_oset_iterator_next (&iter1, &elt1);
+                  havenext2 = gl_oset_iterator_next (&iter2, &elt2);
+                  ASSERT (havenext1 == havenext2);
+                  if (havenext1)
+                    ASSERT (elt1 == elt2);
+                }
+              gl_oset_iterator_free (&iter1);
+              gl_oset_iterator_free (&iter2);
+            }
+            break;
           }
         check_all (set1, set2);
       }
diff --git a/tests/test-oset-c++.cc b/tests/test-oset-c++.cc
index e7eb86e..c14841c 100644
--- a/tests/test-oset-c++.cc
+++ b/tests/test-oset-c++.cc
@@ -37,6 +37,12 @@ action (const char *str, int *data)
   const_cast<char *> (str) [0] += *data;
 }
 
+static bool
+is_at_most (const char *str, const char *threshold)
+{
+  return strcmp (str, threshold) <= 0;
+}
+
 int
 main (int argc, char *argv[])
 {
@@ -77,6 +83,16 @@ main (int argc, char *argv[])
     ASSERT (!iter2.next (elt));
   }
 
+  {
+    gl_OSet<const char *>::iterator iter3 = set1.begin_atleast (is_at_most, 
"R");
+    const char *elt;
+    ASSERT (iter3.next (elt));
+    ASSERT (strcmp (elt, "D") == 0);
+    ASSERT (iter3.next (elt));
+    ASSERT (strcmp (elt, "C") == 0);
+    ASSERT (!iter3.next (elt));
+  }
+
   set1.free ();
 
   return 0;
diff --git a/tests/test-rbtree_oset.c b/tests/test-rbtree_oset.c
index 29f844a..a75f578 100644
--- a/tests/test-rbtree_oset.c
+++ b/tests/test-rbtree_oset.c
@@ -68,6 +68,12 @@ check_all (gl_oset_t set1, gl_oset_t set2)
   check_equals (set1, set2);
 }
 
+static bool
+is_at_least (const void *elt, const void *threshold)
+{
+  return strcmp ((const char *) elt, (const char *) threshold) >= 0;
+}
+
 int
 main (int argc, char *argv[])
 {
@@ -102,7 +108,7 @@ main (int argc, char *argv[])
 
     for (repeat = 0; repeat < 100000; repeat++)
       {
-        unsigned int operation = RANDOM (3);
+        unsigned int operation = RANDOM (4);
         switch (operation)
           {
           case 0:
@@ -123,6 +129,32 @@ main (int argc, char *argv[])
               ASSERT (gl_oset_remove (set1, obj) == gl_oset_remove (set2, 
obj));
             }
             break;
+          case 3:
+            {
+              const char *obj = RANDOM_OBJECT ();
+              gl_oset_iterator_t iter1 = gl_oset_iterator_atleast (set1, 
is_at_least, obj);
+              gl_oset_iterator_t iter2 = gl_oset_iterator_atleast (set2, 
is_at_least, obj);
+              const void *elt1;
+              const void *elt2;
+              /* Check the first two values that the iterator produces.
+                 Checking them all would make this part of the test dominate 
the
+                 run time of the test.  */
+              bool havenext1 = gl_oset_iterator_next (&iter1, &elt1);
+              bool havenext2 = gl_oset_iterator_next (&iter2, &elt2);
+              ASSERT (havenext1 == havenext2);
+              if (havenext1)
+                {
+                  ASSERT (elt1 == elt2);
+                  havenext1 = gl_oset_iterator_next (&iter1, &elt1);
+                  havenext2 = gl_oset_iterator_next (&iter2, &elt2);
+                  ASSERT (havenext1 == havenext2);
+                  if (havenext1)
+                    ASSERT (elt1 == elt2);
+                }
+              gl_oset_iterator_free (&iter1);
+              gl_oset_iterator_free (&iter2);
+            }
+            break;
           }
         check_all (set1, set2);
       }




reply via email to

[Prev in Thread] Current Thread [Next in Thread]