bug-recutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[bug-recutils] [PATCH 12/13] src: implement a tri vial query planner.


From: Michał Masłowski
Subject: [bug-recutils] [PATCH 12/13] src: implement a tri vial query planner.
Date: Mon, 20 Aug 2012 18:21:33 +0200

Seems improbable to give correct results in all cases, should be
possible to fix without major rewrites (although would need them to
better optimize more queries).

Another use of this code is documenting some missing features of index
trees.
---
 ChangeLog     |  18 +++++
 src/rec-db.c  | 214 ++++++++++++++++++++++++++++++++++++++++++++++++++++------
 src/rec-sex.c | 117 ++++++++++++++++++++++++++++++++
 src/rec.h     |  25 ++++---
 4 files changed, 345 insertions(+), 29 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 7650fd2..77f1c66 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,23 @@
 2012-08-20  Michał Masłowski  <address@hidden>
 
+       src: implement a trivial query planner.
+       * src/rec-db.c (rec_db_add_rsets_from_file): Store the rset and
+       parser in index.
+       (rec_db_query): Always copy the descriptor, remove it later if not
+       requested.  Use rset_iter_t instead of mset iterators, search
+       indexes for one to use for the query.  Sort if needed after
+       filtering records.
+       (rec_db_index_query): New function.
+       * src/rec-sex.c (struct rec_sex_index_tree_iterator_s): New
+       structure.
+       (rec_sex_do_index_tree_query): New function.
+       (rec_sex_index_tree_iterator_next): Likewise.
+       (rec_sex_index_tree_iterator_dispose): Likewise.
+       * src/rec.h: Add a prototype for rec_sex_do_index_tree_query, move
+       rec_iter_t and rec_idx_tree_t declarations earlier.
+
+2012-08-20  Michał Masłowski  <address@hidden>
+
        src: support index trees that point to a leaf too left of the key
        searched for.
        * src/rec-idx-tree.c (rec_idx_tree_scan_common): Check for
diff --git a/src/rec-db.c b/src/rec-db.c
index 96e8ae9..03f81f1 100644
--- a/src/rec-db.c
+++ b/src/rec-db.c
@@ -82,6 +82,8 @@ static bool rec_db_set_act_delete (rec_rset_t rset, 
rec_record_t record, rec_fex
 static rec_rset_t rec_db_join (rec_db_t db, const char *type1, const char 
*field, const char *type2);
 static rec_record_t rec_db_merge_records (rec_record_t record1, rec_record_t 
record2, const char *prefix);
 
+static rec_iter_t rec_db_index_query (rec_rset_t rset, rec_idx_tree_t 
index_tree, size_t *index, rec_sex_t sex, rec_fex_t sort_by, bool 
*needs_sorting, bool *in_interval);
+
 /*
  * Public functions.
  */
@@ -472,6 +474,8 @@ rec_db_add_rsets_from_file (rec_db_t db,
               rec_idx_tree_destroy (tree);
               break;
             }
+
+          rec_idx_tree_set_rset (tree, parser, rset);
         }
     }
 
@@ -584,26 +588,24 @@ rec_db_query (rec_db_t     db,
         }
     }
 
-  /* If a descriptor is requested then get a copy of the descriptor of
-     the referred record set, which exists only if it is not the
-     default.  */
+  /* Get a copy of the descriptor of the referred record set, which
+     exists only if it is not the default.  This might be needed even
+     if the descriptor is not requested, since sorting depends on
+     field types specified in the descriptor.  */
 
-  if (flags & REC_F_DESCRIPTOR)
+  rec_record_t descriptor = rec_rset_descriptor (rset);
+  if (descriptor)
     {
-      rec_record_t descriptor = rec_rset_descriptor (rset);
-      if (descriptor)
+      descriptor = rec_record_dup (descriptor);
+      if (!descriptor)
         {
-          descriptor = rec_record_dup (descriptor);
-          if (!descriptor)
-            {
-              /* Out of memory.  */
-              free (res);
-              return NULL;
-            }
+          /* Out of memory.  */
+          free (res);
+          return NULL;
         }
-
-      rec_rset_set_descriptor (res, descriptor);
     }
+
+  rec_rset_set_descriptor (res, descriptor);
   
   /* Generate a list of random indexes here if requested.  The
      generated random indexes are added to the indexes list, which
@@ -650,6 +652,13 @@ rec_db_query (rec_db_t     db,
 
       rec_record_t record = NULL;
       size_t num_rec = -1;
+      bool needs_sorting = true;  /* Output rset needs sorting after
+                                     the records are processed.  */
+      bool in_interval = false;  /* Are the records already known to
+                                    be in the output interval? */
+      rec_idx_tree_t index_tree = NULL;
+      rec_iter_t record_iterator = NULL;
+      rec_fex_t order_by;
 
       if (group_by)
         {
@@ -666,14 +675,34 @@ rec_db_query (rec_db_t     db,
             }
         }
 
-      if (!rec_rset_sort (rset, sort_by))
+      /* Check if there is an index which could provide a superset of
+         records to output.  The currently supported tree indexes
+         cannot help with fast string queries.  */
+      if (fast_string == NULL)
         {
-          /* Out of memory.  */
-          return NULL;
+          rec_mset_iterator_t index_iter = rec_mset_iterator 
(rec_rset_index_mset (rset));
+          while (rec_mset_iterator_next (&index_iter, MSET_INDEX_TREE, (const 
void **) &index_tree, NULL))
+            {
+              if ((record_iterator =
+                   rec_db_index_query (rset, index_tree,
+                                       index, sex, sort_by, &needs_sorting,
+                                       &in_interval))
+                  != NULL)
+                {
+                  break;
+                }
+            }
+
+          rec_mset_iterator_free (&index_iter);
         }
 
-      rec_mset_iterator_t iter = rec_mset_iterator (rec_rset_mset (rset));
-      while (rec_mset_iterator_next (&iter, MSET_RECORD, (const void **) 
&record, NULL))
+      if (record_iterator == NULL)
+        {
+          /* No index helped, check the whole rset. */
+          record_iterator = rec_iter_for_mset (rec_rset_mset (rset));
+        }
+
+      while (rec_iter_next (record_iterator, &record))
         {
           bool selected = false;
           num_rec++;
@@ -749,7 +778,18 @@ rec_db_query (rec_db_t     db,
             }
         
         }
-      rec_mset_iterator_free (&iter);
+      rec_iter_destroy (record_iterator);
+
+      if (needs_sorting && !rec_rset_sort (res, sort_by))
+        {
+          /* Out of memory.  */
+          return NULL;
+        }
+    }
+
+  if (!(flags & REC_F_DESCRIPTOR))
+    {
+      rec_rset_set_descriptor (res, NULL);
     }
 
   return res;
@@ -2052,4 +2092,136 @@ rec_db_index_file_dispose_fn (const void *elt)
   rec_idx_file_destroy (index_file);
 }
 
+/* Return an iterator over an index tree that will provide a superset
+   of records output by the query with specified index, sex and
+   sort_by.  If the records iterated are sorted, *needs_sorting will
+   be set to 'false'.  If the records iterated are already in
+   intervals specified by the index parameter, *in_interval will be
+   set to 'true'.  NULL is returned if the index cannot be used for
+   the query.  */
+
+static rec_iter_t
+rec_db_index_query (rec_rset_t rset,
+                    rec_idx_tree_t index_tree,
+                    size_t *index,
+                    rec_sex_t sex,
+                    rec_fex_t sort_by,
+                    bool *needs_sorting,
+                    bool *in_interval)
+{
+  rec_fex_t sort_fex;
+  size_t n, n_fields;
+  rec_fex_elem_t fex_elem;
+  rec_type_t field_type;
+  rec_idx_tree_key_t key_type;
+  const char *field_name;
+
+  /* Set parameters if the index won't be used. */
+  *needs_sorting = true;
+  *in_interval = false;
+
+  /* There three ways of getting records that could be supported by
+     index trees: interval queries, sex queries with field
+     comparisons, reading a whole record set.  The first and third
+     case uses the index to provide records in a sorted order.  For
+     the second one it is assumed that only few records will be found,
+     so the index query is not chosen for sorting.  In future this
+     should be changed to choose the best of several useful
+     indexes.  */
+
+  if (index || !sex)
+    {
+      /* Check if this index gives records in the expected sorted
+         order.  Don't use it otherwise, since record indexes cannot
+         be checked without scanning the whole rset with correct
+         sorting.  */
+
+      if (sort_by)
+        {
+          sort_fex = sort_by;
+        }
+      else
+        {
+          sort_fex = rec_rset_order_by_fields (rset);
+        }
+
+      if (sort_fex == NULL)
+        {
+          /* No index represents an unsorted record set yet. */
+          return NULL;
+        }
+
+      n_fields = rec_idx_tree_get_num_fields (index_tree);
+
+      /* To use an index for sorting (or an indexed query), it must
+         have records in the same order, i.e. with the same sorting
+         fields and the same ordering of them.  */
+      if (rec_fex_size (sort_fex) != n_fields)
+        {
+          return NULL;
+        }
+
+      for (n = 0; n < n_fields; n++)
+        {
+          fex_elem = rec_fex_get (sort_fex, n);
+          field_name = rec_fex_elem_field_name (fex_elem);
+
+          if (strcmp (rec_idx_tree_get_field_name (index_tree, n),
+                      field_name))
+            {
+              return NULL;
+            }
+
+          /* Check what key type should be used for ordering this
+             field.  */
+          field_type = rec_rset_get_field_type (rset, field_name);
+          switch (rec_type_kind (field_type))
+            {
+            case REC_TYPE_INT:
+            case REC_TYPE_RANGE:
+              return NULL;
+            case REC_TYPE_REAL:
+              return NULL;
+            case REC_TYPE_BOOL:
+              return NULL;
+            case REC_TYPE_DATE:
+              return NULL;
+            default:
+              key_type = REC_IDX_KEY_STRING;
+            }
+
+          if (rec_idx_tree_get_field_type (index_tree, n)
+              != key_type)
+            {
+              /* Different comparison order. */
+              return NULL;
+            }
+        }
+
+      if ((index[2] == REC_Q_NOINDEX) && (index[3] == REC_Q_NOINDEX))
+        {
+          *in_interval = true;
+        }
+      else
+        {
+          /* Should iterate results of multiple scans instead.  */
+          *in_interval = false;
+        }
+
+      *needs_sorting = false;
+      if (*in_interval)
+        {
+          return rec_idx_tree_scan_interval (index_tree, index[0], index[1]);
+        }
+      else
+        {
+          return rec_idx_tree_scan_interval (index_tree, 0, SIZE_MAX);
+        }
+    }
+
+  /* The case left: we want records matched by a selection
+     expression.  */
+  return rec_sex_do_index_tree_query (sex, index_tree);
+}
+
 /* End of rec-db.c */
diff --git a/src/rec-sex.c b/src/rec-sex.c
index 52e3701..1f87203 100644
--- a/src/rec-sex.c
+++ b/src/rec-sex.c
@@ -59,6 +59,12 @@ struct rec_sex_val_s
   char *str_val;
 };
 
+struct rec_sex_index_tree_iterator_s
+{
+  rec_iter_t iterator;
+  const char *value;
+};
+
 /* Static functions declarations.  */
 static struct rec_sex_val_s rec_sex_eval_node (rec_sex_t sex,
                                                rec_record_t record,
@@ -67,6 +73,9 @@ static struct rec_sex_val_s rec_sex_eval_node (rec_sex_t sex,
 static bool rec_sex_op_real_p (struct rec_sex_val_s op1,
                                struct rec_sex_val_s op2);
 
+static bool rec_sex_index_tree_iterator_next (void *data, rec_record_t 
*record);
+static void rec_sex_index_tree_iterator_dispose (void *data);
+
 /*
  * Public functions.
  */
@@ -267,6 +276,95 @@ rec_sex_print_ast (rec_sex_t sex)
   rec_sex_parser_print_ast (sex->parser);
 }
 
+rec_iter_t
+rec_sex_do_index_tree_query (rec_sex_t sex, rec_idx_tree_t tree)
+{
+  rec_sex_ast_node_t node;
+  struct rec_sex_index_tree_iterator_s *iter_data;
+  rec_iter_t iter;
+  const char *value = NULL;
+  const char *field = NULL;
+
+  node = rec_sex_ast_top (sex->ast);
+
+  /* Only equality comparisons with constant strings are supported.
+     This results from inequal comparisons on strings being
+     unsupported and index trees currently having only string
+     keys.  */
+  switch (rec_sex_ast_node_type (node))
+    {
+    case REC_SEX_OP_EQL:
+      {
+        rec_sex_ast_node_t child1, child2;
+
+        child1 = rec_sex_ast_node_child (node, 0);
+        child2 = rec_sex_ast_node_child (node, 1);
+
+        if (rec_sex_ast_node_type (child1) == REC_SEX_STR)
+          {
+            value = rec_sex_ast_node_str (child1);
+            if (rec_sex_ast_node_type (child2) == REC_SEX_NAME)
+              {
+                field = rec_sex_ast_node_name (child2);
+              }
+          }
+        else if (rec_sex_ast_node_type (child2) == REC_SEX_STR)
+          {
+            value = rec_sex_ast_node_str (child2);
+            if (rec_sex_ast_node_type (child1) == REC_SEX_NAME)
+              {
+                field = rec_sex_ast_node_name (child1);
+              }
+          }
+
+        if (!value || !field)
+          {
+            return NULL;
+          }
+
+        break;
+      default:
+        /* Unsupported operation. */
+        return NULL;
+      }
+    }
+
+  /* Check if this index can be used for this search. */
+  if ((rec_idx_tree_get_num_fields (tree) != 1)
+      || (rec_idx_tree_get_field_type (tree, 0) != REC_IDX_KEY_STRING)
+      || (strcmp (rec_idx_tree_get_field_name (tree, 0), field) != 0))
+    {
+      return NULL;
+    }
+
+  /* Allocate a structure holding an index tree iterator and the value
+     to be iterated.  */
+  iter_data = malloc (sizeof (*iter_data));
+  if (!iter_data)
+    {
+      return NULL;
+    }
+
+  iter_data->value = value;
+  iter_data->iterator = rec_idx_tree_scan (tree,
+                                           (const void **) &(iter_data->value),
+                                           (const void **) 
&(iter_data->value));
+  if (iter_data->iterator == NULL)
+    {
+      free (iter_data);
+      return NULL;
+    }
+
+  iter = rec_iter_new (iter_data, rec_sex_index_tree_iterator_next,
+                       rec_sex_index_tree_iterator_dispose);
+  if (!iter)
+    {
+      rec_sex_index_tree_iterator_dispose (iter_data);
+    }
+
+  return iter;
+}
+
 /*
  * Private functions.
  */
@@ -1147,4 +1245,23 @@ rec_sex_op_real_p (struct rec_sex_val_s op1,
   return ret;
 }
  
+static bool
+rec_sex_index_tree_iterator_next (void *data, rec_record_t *record)
+{
+  struct rec_sex_index_tree_iterator_s *iter_data =
+    (struct rec_sex_index_tree_iterator_s *) data;
+
+  return rec_iter_next (iter_data->iterator, record);
+}
+
+static void
+rec_sex_index_tree_iterator_dispose (void *data)
+{
+  struct rec_sex_index_tree_iterator_s *iter_data =
+    (struct rec_sex_index_tree_iterator_s *) data;
+
+  rec_iter_destroy (iter_data->iterator);
+  free (iter_data);
+}
+
 /* End of rec-sex.c */
diff --git a/src/rec.h b/src/rec.h
index 223c09a..ca1b1da 100644
--- a/src/rec.h
+++ b/src/rec.h
@@ -1324,6 +1324,18 @@ typedef struct rec_sex_s *rec_sex_t;
 
 typedef struct rec_aggregate_reg_s *rec_aggregate_reg_t;
 
+/* Opaque data type representing a record iterator.  This is placed
+   here as a forward declaration.  See below in this file for the
+   description of iterator functions.  */
+
+typedef struct rec_iter_s *rec_iter_t;
+
+/* Opaque data type representing a tree index.  This is placed here as
+   a forward declaration.  See below in this file for the description
+   of index tree functions.  */
+
+typedef struct rec_idx_tree_s *rec_idx_tree_t;
+
 /************ Creating and destrying databases *********************/
 
 /* Create a new empty database and return it.  This function returns
@@ -2151,6 +2163,11 @@ char *rec_sex_eval_str (rec_sex_t sex, rec_record_t 
record);
 
 void rec_sex_print_ast (rec_sex_t sex);
 
+/* Return an iterator over records in the index tree that might be
+   matched by the sex.  Returns NULL on error, if the sex is too
+   complex or if the tree does not optimize it.  */
+
+rec_iter_t rec_sex_do_index_tree_query (rec_sex_t sex, rec_idx_tree_t tree);
 
 /*
  * ENCRYPTION
@@ -2417,10 +2434,6 @@ char *rec_idx_get_file_name (const char *recfile_name);
  * index search.
  */
 
-/* Opaque data type representing a record iterator.  */
-
-typedef struct rec_iter_s *rec_iter_t;
-
 /* Types of arguments for rec_iter_new. */
 
 typedef bool (*rec_iter_next_fn_t) (void *data, rec_record_t *record);
@@ -2459,10 +2472,6 @@ void rec_iter_destroy (rec_iter_t iterator);
  * mapping ordered tuples of fields to records.
  */
 
-/* Opaque data type representing a tree index. */
-
-typedef struct rec_idx_tree_s *rec_idx_tree_t;
-
 /* Known key types.  They affect index storage size and sorting.  */
 
 enum rec_idx_tree_key_e
-- 
1.7.11.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]