bug-recutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Adding variance take 2


From: Frank Pursel
Subject: Adding variance take 2
Date: Tue, 13 Oct 2020 11:43:36 -0700

The previous diff contained a small error that this one fixes.
Hopefully this will do.

Sincerely,
Frank
diff --git a/recutils-1.8/ChangeLog b/recutils-1.8/ChangeLog
index d234bae..f3b8414 100644
--- a/recutils-1.8/ChangeLog
+++ b/recutils-1.8/ChangeLog
@@ -1,3 +1,10 @@
+2020-10-13  Frank Pursel <purself@yahoo.com>
+
+       * src/rec-aggregate.c: Added Var aggregate functions,
+       rec_aggregate_std_var and rec_aggregate_std_var_record.
+       * torture/utils/recsel.sh: Added test cases for the Var aggregate.
+       * doc/recutils.texi: Basic documentation for same.
+       
 2019-01-03  Jose E. Marchesi  <jose.marchesi@oracle.com>
 
        * configure.ac: Bump version to 1.8.
diff --git a/recutils-1.8/doc/recutils.texi b/recutils-1.8/doc/recutils.texi
index 38877a3..3762fb8 100644
--- a/recutils-1.8/doc/recutils.texi
+++ b/recutils-1.8/doc/recutils.texi
@@ -3527,6 +3527,8 @@ The supported aggregate functions are the following:
 Counts the number of occurrences of a field.
 @item Avg(FIELD)
 Calculates the average (mean) of the numerical values of a field.
+@item Var(FIELD)
+Calculates the population variance of the numerical values of a field.
 @item Sum(FIELD)
 Calculates the sum of the numerical values of a field.
 @item Min(FIELD)
diff --git a/recutils-1.8/src/rec-aggregate.c b/recutils-1.8/src/rec-aggregate.c
index e28f9d8..5194cc3 100644
--- a/recutils-1.8/src/rec-aggregate.c
+++ b/recutils-1.8/src/rec-aggregate.c
@@ -52,6 +52,12 @@ struct rec_aggregate_reg_s
   size_t num_functions;
 };
 
+struct rec_aggregate_reg_var_s
+{
+  int n;
+  double values[10000];
+};
+  
 /* Static functions defined in this file.  */
 
 static char *rec_aggregate_std_count (rec_rset_t rset,
@@ -64,6 +70,12 @@ static char *rec_aggregate_std_avg (rec_rset_t rset,
 static double rec_aggregate_std_avg_record (rec_record_t record,
                                             const char *field_name);
 
+static char *rec_aggregate_std_var (rec_rset_t rset,
+                                    rec_record_t record,
+                                    const char *field_name);
+static struct rec_aggregate_reg_var_s rec_aggregate_std_var_record 
(rec_record_t record,
+                                                            const char 
*field_name);
+
 static char *rec_aggregate_std_sum (rec_rset_t rset,
                                     rec_record_t record,
                                     const char *field_name);
@@ -96,11 +108,12 @@ struct rec_aggregate_descriptor_s
   rec_aggregate_t func;
 };
 
-#define NUM_STD_AGGREGATES 5
+#define NUM_STD_AGGREGATES 6
 
 static struct rec_aggregate_descriptor_s std_aggregates[] =
   {{"count", &rec_aggregate_std_count},
    {"avg",   &rec_aggregate_std_avg},
+   {"var",   &rec_aggregate_std_var},
    {"sum",   &rec_aggregate_std_sum},
    {"min",   &rec_aggregate_std_min},
    {"max",   &rec_aggregate_std_max}};
@@ -305,6 +318,102 @@ rec_aggregate_std_avg_record (rec_record_t record,
   return avg;
 }
 
+static char *
+rec_aggregate_std_var (rec_rset_t rset,
+                       rec_record_t record,
+                       const char *field_name)
+{
+  char *result = NULL;
+  double var = 0;
+  struct rec_aggregate_reg_var_s vals;
+  struct rec_aggregate_reg_var_s mval;
+  mval.n = 0;
+  
+  if (record)
+    {
+      vals = rec_aggregate_std_var_record (record, field_name);
+      if (vals.n < 2)
+       {
+         var = 0;
+       }
+      else
+       {
+         double avg = 0;
+         for (int i=0; i < vals.n; i++) { avg += vals.values[i]; }
+         avg = avg / vals.n;
+         for (int i=0; i < vals.n; i++)
+           {
+             var += ((vals.values[i] - avg) * (vals.values[i] - avg));
+           }
+         var = var / vals.n;
+       }
+    }
+  else if (rset)
+    {
+      int num_records = 0;
+      rec_record_t rec = NULL;
+      rec_mset_iterator_t iter = rec_mset_iterator (rec_rset_mset (rset));
+
+      while (rec_mset_iterator_next (&iter, MSET_RECORD, (void *) &rec, NULL))
+        {
+         
+         vals = rec_aggregate_std_var_record (rec, field_name);
+         for (int i=vals.n; i >= 0; i--)
+           {
+             mval.values[mval.n + i] = vals.values[i];
+           }
+         mval.n += vals.n;
+
+        }
+      rec_mset_iterator_free (&iter);
+
+      if (mval.n > 1) {
+       double avg = 0;
+       for (int i=0; i<mval.n; i++) { avg += mval.values[i]; }
+       avg = avg / mval.n;
+       for (int i=0; i<mval.n; i++) {
+         var += (mval.values[i] - avg) * (mval.values[i] - avg);
+       }
+       var = var / mval.n;
+      }
+      else
+       var = 0;
+    }
+  /* Return the average as a string.  Note that if NULL is returned it
+     will be returned by this function below to signal the
+     end-of-memory condition.  */
+  asprintf (&result, "%g", var);
+
+  return result;
+ 
+}
+
+static struct rec_aggregate_reg_var_s
+rec_aggregate_std_var_record (rec_record_t record,
+                              const char *field_name)
+{
+  struct rec_aggregate_reg_var_s part_var;
+  
+  rec_field_t field;
+  int num_fields = 0;
+  rec_mset_iterator_t iter = rec_mset_iterator (rec_record_mset (record));
+
+  while (rec_mset_iterator_next (&iter, MSET_FIELD, (void *) &field, NULL))
+    {
+      double field_value_double = 0;
+      const char *field_value = rec_field_value (field);
+
+      if (rec_field_name_equal_p (rec_field_name (field), field_name)
+          && rec_atod (field_value, &field_value_double))
+        {
+         part_var.values[part_var.n++] = field_value_double;
+        }
+    }
+  rec_mset_iterator_free (&iter);
+
+  return part_var;
+}
+
 #define REC_AGGREGATE_ACCUM_FUNC(NAME, OP, INIT_VAL)                    \
   static char *                                                         \
   rec_aggregate_std_##NAME (rec_rset_t rset,                            \
diff --git a/recutils-1.8/torture/utils/recsel.sh 
b/recutils-1.8/torture/utils/recsel.sh
index 2bedc18..b6f1b2b 100755
--- a/recutils-1.8/torture/utils/recsel.sh
+++ b/recutils-1.8/torture/utils/recsel.sh
@@ -1579,6 +1579,13 @@ test_tool recsel-aggregate-avg-overall ok \
 '39
 '
 
+test_tool recsel-aggregate-var-overall ok \
+         recsel \
+         '-P "Var(Cost)"' \
+         sales \
+'1133.6
+'
+
 test_tool recsel-aggregate-avg-grouped ok \
           recsel \
           '-p "Item,Avg(Cost)" -G Item' \
@@ -1596,6 +1603,23 @@ Item: D
 Avg_Cost: 100
 '
 
+test_tool recsel-aggregate-var-grouped ok \
+         recsel \
+         '-p "Item,Var(Cost)" -G Item' \
+         sales \
+'Item: A
+Var_Cost: 42.25
+
+Item: B
+Var_Cost: 0
+
+Item: C
+Var_Cost: 0
+
+Item: D
+Var_Cost: 0
+'
+
 test_tool recsel-aggregate-sum-overall ok \
           recsel \
           '-P "Sum(Cost)"' \

reply via email to

[Prev in Thread] Current Thread [Next in Thread]