gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gawk-diffs] [SCM] gawk branch, master, updated. 6cf1cd84870f44051434105


From: Arnold Robbins
Subject: [gawk-diffs] [SCM] gawk branch, master, updated. 6cf1cd84870f4405143410585cc4e3e7f719f8f5
Date: Fri, 22 Apr 2011 13:10:47 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, master has been updated
       via  6cf1cd84870f4405143410585cc4e3e7f719f8f5 (commit)
      from  26e0f72a6bb214f1f53326c7b2325715afe43fb6 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=6cf1cd84870f4405143410585cc4e3e7f719f8f5

commit 6cf1cd84870f4405143410585cc4e3e7f719f8f5
Author: Arnold D. Robbins <address@hidden>
Date:   Fri Apr 22 16:09:37 2011 +0300

    User function sorting added, documented, tested.

diff --git a/ChangeLog b/ChangeLog
index 5130222..5fcbb96 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Fri Apr 22 16:05:27 2011  John Haque      <address@hidden>
+
+       * array.c (sort_user_func): New routine to handle user-defined
+       quicksort comparison function.
+       (assoc_list): Adjust for user-defined comparison function.
+
 Fri Apr 22 09:18:16 2011  Arnold D. Robbins  <address@hidden>
 
        * array.c (awk_hash): Force results into 32 bits for consistency
diff --git a/array.c b/array.c
index 3d7ff2f..32b8506 100644
--- a/array.c
+++ b/array.c
@@ -64,7 +64,6 @@ static int sort_down_value_string(const void *, const void *);
 static int sort_up_value_number(const void *, const void *);
 static int sort_down_value_number(const void *, const void *);
 
-
 /* array_init --- possibly temporary function for experimentation purposes */
 
 void
@@ -1028,6 +1027,7 @@ dup_table(NODE *symbol, NODE *newsymb)
        return newsymb;
 }
 
+
 /* asort_actual --- do the actual work to sort the input array */
 
 static NODE *
@@ -1043,7 +1043,7 @@ asort_actual(int nargs, SORT_CTXT ctxt)
                sort_str = POP_STRING();
        else
                sort_str = Nnull_string;        /* "" => default sorting */
- 
+
        if (nargs >= 2) {  /* 2nd optional arg */
                dest = POP_PARAM();
                if (dest->type != Node_var_array) {
@@ -1098,7 +1098,7 @@ asort_actual(int nargs, SORT_CTXT ctxt)
                         */
 
                        *assoc_lookup(result, subs, FALSE) =
-                                               make_string(r->ahname_str, 
r->ahname_len);
+                                       make_string(r->ahname_str, 
r->ahname_len);
                } else {
                        NODE *val;
 
@@ -1270,6 +1270,7 @@ sort_up_index_number(const void *p1, const void *p2)
        return ret;
 }
 
+
 /* sort_down_index_number --- qsort comparison function; descending index 
numbers */
 
 static int
@@ -1295,17 +1296,12 @@ sort_up_value_string(const void *p1, const void *p2)
        n1 = t1->ahvalue;
        n2 = t2->ahvalue;
 
-       if (n1->type == Node_var_array && n2->type == Node_val)
-               return 1;       /* n1 is more */
-       if (n1->type == Node_val && n2->type == Node_var_array)
-               return -1;      /* n1 is less */
-
-       if (n1->type == Node_var_array && n2->type == Node_var_array) {
-               /* sub-array names contain respective indices, effectively 
resulting
-                * in an index-based (in parent array) ordering.
-                */
-               return strcmp(n1->vname, n2->vname);
+       if (n1->type == Node_var_array) {
+               /* return 0 if n2 is a sub-array too, else return 1 */
+               return (n2->type != Node_var_array);
        }
+       if (n2->type == Node_var_array)
+               return -1;              /* n1 (scalar) < n2 (sub-array) */
 
        /* n1 and n2 both have string values; See sort_force_value_string(). */
        return cmp_string(n1, n2);
@@ -1337,17 +1333,12 @@ sort_up_value_number(const void *p1, const void *p2)
        n1 = t1->ahvalue;
        n2 = t2->ahvalue;
 
-       if (n1->type == Node_var_array && n2->type == Node_val)
-               return 1;       /* n1 is more */
-       if (n1->type == Node_val && n2->type == Node_var_array)
-               return -1;      /* n1 is less */
-       if (n1->type == Node_var_array && n2->type == Node_var_array) {
-               /* sub-array names contain respective indices, effectively 
resulting
-                * in an index-based (in parent array) ordering.
-                */
-
-               return strcmp(n1->vname, n2->vname);
+       if (n1->type == Node_var_array) {
+               /* return 0 if n2 is a sub-array too, else return 1 */
+               return (n2->type != Node_var_array);
        }
+       if (n2->type == Node_var_array)
+               return -1;              /* n1 (scalar) < n2 (sub-array) */
 
        /* n1 and n2 both Node_val, and force_number'ed */
        if (n1->numbr < n2->numbr)
@@ -1366,7 +1357,6 @@ sort_up_value_number(const void *p1, const void *p2)
        return cmp_string(n1, n2);
 }
 
-
 /* sort_down_value_string --- descending value number */
 
 static int
@@ -1375,6 +1365,51 @@ sort_down_value_number(const void *p1, const void *p2)
        return -sort_up_value_number(p1, p2);
 }
 
+/* sort_user_func --- user defined qsort comparison function */
+
+static int
+sort_user_func(const void *p1, const void *p2)
+{
+       const NODE *t1, *t2;
+       NODE *idx1, *idx2, *val1, *val2, *r;
+       int ret;
+       INSTRUCTION *code;
+       extern int exiting;
+
+       t1 = *((const NODE *const *) p1);
+       t2 = *((const NODE *const *) p2);
+
+       idx1 = make_string(t1->ahname_str, t1->ahname_len);
+       idx2 = make_string(t2->ahname_str, t2->ahname_len);
+       val1 = t1->ahvalue;
+       val2 = t2->ahvalue;
+
+       code = TOP()->code_ptr; /* comparison function call instructions */
+
+       /* setup 4 arguments to comp_func() */
+       PUSH(idx1);
+       if (val1->type == Node_val)
+               UPREF(val1);
+       PUSH(val1);
+       PUSH(idx2);
+       if (val2->type == Node_val)
+               UPREF(val2);
+       PUSH(val2);
+
+       /* execute the comparison function */
+       (void) interpret(code);
+
+       if (exiting)    /* do not assume anything about the user-defined 
function! */
+               gawk_exit(exit_val);
+
+       /* return value of the comparison function */
+       r = POP_SCALAR();
+       ret = (int) force_number(r);
+       DEREF(r);
+
+       return ret;
+}
+
 
 /*
  * sort_selection --- parse user-specified sort specification;
@@ -1461,7 +1496,7 @@ sort_selection(NODE *sort_str, SORT_CTXT sort_ctxt)
                word_len = (size_t) (s - word);
 
                if (++num_words > 3)    /* too many words in phrase */
-                       goto un_recognized;
+                       return -1;
 
                bval = Unrecognized;
                for (i = 0; i < num_keys; i++) {
@@ -1482,7 +1517,7 @@ sort_selection(NODE *sort_str, SORT_CTXT sort_ctxt)
                        ||      ((allparts & bval) & GROUP_MASK
                                )                        /* invalid grouping of 
words e.g. "str num" */
                )
-                       goto un_recognized;
+                       return -1;
 
                allparts |= bval;
        }
@@ -1490,35 +1525,11 @@ sort_selection(NODE *sort_str, SORT_CTXT sort_ctxt)
        /* num_words <= 3 */
        return (allparts & INDEX_MASK);
 
-un_recognized:
-       switch (sort_ctxt) {
-       case ASORT:
-               fatal(_("asort: invalid sort specification `%s'"), 
sort_str->stptr);
-       case ASORTI:
-               fatal(_("asorti: invalid sort specification `%s'"), 
sort_str->stptr);
-
-       case SORTED_IN:
-               /* fall through */
-       default:
-               if (do_lint) {
-                       static NODE *warned_str = NULL;
-
-                       /* warning for each UNIQUE unrecognized sort_str 
specification */              
-                       if (warned_str == NULL || ! STREQ(warned_str->stptr, 
sort_str->stptr)) {
-                               lintwarn(_("PROCINFO[\"sorted_in\"]: invalid 
sort specification `%s'"),
-                                               sort_str->stptr);
-                               unref(warned_str);      /* unref(NULL) is OK */ 
-                               warned_str = dupnode(sort_str);
-                       }
-               }
-               break;
-       }
-       return (Unsorted & INDEX_MASK);
-
 #undef INDEX_MASK
 #undef GROUP_MASK
 }
 
+
 /* sort_force_index_number -- pre-process list items for sorting indices as 
numbers */
 
 static void
@@ -1580,7 +1591,6 @@ sort_force_value_string(NODE **list, size_t num_elems)
        }
 }
 
-
 /* assoc_list -- construct, and optionally sort, a list of array elements */  
 
 NODE **
@@ -1603,18 +1613,63 @@ assoc_list(NODE *array, NODE *sort_str, SORT_CTXT 
sort_ctxt)
                { sort_down_value_number,       sort_force_value_number },      
/* descending value number */
                { 0,    0 }                                             /* 
unsorted */
        };
-       int qi;
        NODE **list;
        NODE *r;
        size_t num_elems, i, j;
-       qsort_compfunc cmp_func;
-       qsort_prefunc pre_func;
-
+       qsort_compfunc cmp_func = 0;
+       qsort_prefunc pre_func = 0;
+       INSTRUCTION *code = NULL;
+       int qi;
+       extern int currule;
+       
        num_elems = array->table_size;
        assert(num_elems > 0);
 
        qi = sort_selection(sort_str, sort_ctxt);
 
+       if (qi >= 0) {
+               cmp_func = sort_funcs[qi].comp_func;
+               pre_func = sort_funcs[qi].pre_func;
+
+       } else {                /* unrecognized */
+               NODE *f;
+               char *sp;       
+
+               assert(sort_str != NULL);
+
+               (void) force_string(sort_str);
+               for (sp = sort_str->stptr; *sp != '\0'
+                               && ! isspace((unsigned char) *sp); sp++)
+                       ;
+
+               /* empty string or string with space(s) not valid as function 
name */
+               if (sp == sort_str->stptr || *sp != '\0')
+                       fatal(_("`%s' is invalid as a function name"), 
sort_str->stptr);
+
+               f = lookup(sort_str->stptr);
+               if (f == NULL || f->type != Node_func)
+                       fatal(_("sort comparison function `%s' is not 
defined"), sort_str->stptr);
+
+               cmp_func = sort_user_func;
+
+               /* make function call instructions */
+               code = bcalloc(Op_func_call, 2, 0);
+               code->func_body = f;
+               code->func_name = NULL;         /* not needed, func_body 
already assigned */
+               (code + 1)->expr_count = 4;     /* function takes 4 arguments */
+               code->nexti = bcalloc(Op_stop, 1, 0);   
+
+               /* make non-local jumps `next' and `nextfile' fatal in
+                * callback function by setting currule in interpret()
+                * to undefined (0). `exit' is handled in sort_user_func.
+                */
+
+               (code + 1)->inrule = currule;   /* save current rule */
+               currule = 0;
+
+               PUSH_CODE(code);
+       }
+
        /* allocate space for array; the extra space is used in for(i in a) 
opcode (eval.c) */
        emalloc(list, NODE **, (num_elems + 1) * sizeof(NODE *), "assoc_list");
 
@@ -1624,8 +1679,7 @@ assoc_list(NODE *array, NODE *sort_str, SORT_CTXT 
sort_ctxt)
                        list[j++] = ahash_dupnode(r);
        list[num_elems] = NULL;
 
-       cmp_func = sort_funcs[qi].comp_func;
-       if (! cmp_func) /* unsorted */  
+       if (! cmp_func) /* unsorted */
                return list;
 
        /* special pre-processing of list items */
@@ -1634,6 +1688,14 @@ assoc_list(NODE *array, NODE *sort_str, SORT_CTXT 
sort_ctxt)
                pre_func(list, num_elems);
 
        qsort(list, num_elems, sizeof(NODE *), cmp_func); /* shazzam! */
+
+       if (cmp_func == sort_user_func) {
+               code = POP_CODE();
+               currule = (code + 1)->inrule;   /* restore current rule */ 
+               bcfree(code->nexti);            /* Op_stop */
+               bcfree(code);                   /* Op_func_call */
+       }
+
        return list;
 }
 
diff --git a/awk.h b/awk.h
index 52ce5bf..7327a00 100644
--- a/awk.h
+++ b/awk.h
@@ -672,6 +672,9 @@ typedef struct exp_instruction {
 /* Op_func_call, Op_func */
 #define func_body       x.xn
 
+/* Op_func_call */
+#define inrule         d.dl
+
 /* Op_subscript */
 #define sub_count       d.dl
 
diff --git a/doc/gawk.info b/doc/gawk.info
index 06d2dbb..09828f4 100644
--- a/doc/gawk.info
+++ b/doc/gawk.info
@@ -10036,6 +10036,154 @@ value, regardless of what the subarray itself 
contains, and all
 subarrays are treated as being equal to each other.  Their order
 relative to each other is determined by their index strings.
 
+8.1.5.2 Controlling Array Scanning Order With a User-defined Function
+.....................................................................
+
+The value of `PROCINFO["sorted_in"]' can also be a function name that
+will let you traverse an array based on any custom criterion.  The
+array elements are ordered according to the return value of this
+function.  This comparison function should be defined with at least
+four arguments:
+
+     function comp_func(i1, v1, i2, v2)
+     {
+         COMPARE ELEMENTS 1 AND 2 IN SOME FASHION
+         RETURN < 0; 0; OR > 0
+     }
+
+   Here, I1 and I2 are the indices, and V1 and V2 are the corresponding
+values of the two elements being compared.  Either V1 or V2, or both,
+can be arrays if the array being traversed contains subarrays as
+values.  The three possible return values are interpreted this way:
+
+     * If the return value of `comp_func(i1, v1, i2, v2)' is less than
+     0, index I1 comes before index I2 during loop traversal.
+
+     * If `comp_func(i1, v1, i2, v2)' returns 0, I1 and I2 come
+     together but relative order with respect to each other is
+     undefined.
+
+     * If the return value of `comp_func(i1, v1, i2, v2)' is greater
+     than 0, I1 comes after I2.
+
+   The following comparison function can be used to scan an array in
+numerical order of the indices:
+
+     function cmp_num_idx(i1, v1, i2, v2)
+     {
+         # numerical index comparison, ascending order
+         return (i1 - i2)
+     }
+
+   This function will traverse an array based on an order by element
+values rather than by indices:
+
+     function cmp_str_val(i1, v1, i2, v2)
+     {
+         # string value comparison, ascending order
+       v1 = v1 ""
+       v2 = v2 ""
+       if (v1 < v2) return -1
+         return (v1 != v2)
+     }
+
+   A comparison function to make all numbers, and numeric strings
+without any leading or trailing spaces come out first during loop
+traversal:
+
+     function cmp_num_str_val(i1, v1, i2, v2,   n1, n2)
+     {
+         # numbers before string value comparison, ascending order
+         n1 = v1 + 0
+         n2 = v2 + 0
+         if (n1 == v1)
+             return (n2 == v2) ? (n1 - n2) : -1
+         else if (n2 == v2)
+             return 1
+         return (v1 < v2) ? -1 : (v1 != v2)
+     }
+
+   Consider sorting the entries of a GNU/Linux system password file
+according to login names.  The following program which sorts records by
+a specific field position can be used for this purpose:
+
+     # sort.awk --- simple program to sort by field position
+     # field position is specified by POS
+
+     function cmp_field(i1, v1, i2, v2)
+     {
+         # comparison by value, as string, and ascending order
+         return v1[POS] < v2[POS] ? -1 : (v1[POS] != v2[POS])
+     }
+
+     {
+         for (i = 1; i <= NF; i++)
+             a[NR][i] = $i
+     }
+
+     END {
+         PROCINFO["sorted_in"] = "cmp_field"
+         if (POS < 1 || POS > NF)
+             POS = 1
+         for (i in a) {
+             for (j = 1; j <= NF; j++)
+                 printf("%s%c", a[i][j], j < NF ? ":" : "")
+             print ""
+         }
+     }
+
+   The first field in each entry of the password file is the user's
+login name, and the fields are seperated by colons.  Running the
+program produces the following output:
+
+     $ gawk -vPOS=1 -F: -f sort.awk /etc/passwd
+     -| adm:x:3:4:adm:/var/adm:/sbin/nologin
+     -| apache:x:48:48:Apache:/var/www:/sbin/nologin
+     -| avahi:x:70:70:Avahi daemon:/:/sbin/nologin
+     ...
+
+   The comparison normally should always return the same value when
+given a specific pair of array elements as its arguments.  If
+inconsistent results are returned then the order is undefined.  This
+behavior is sometimes exploited to introduce random order in otherwise
+seemingly ordered data:
+
+     function cmp_randomize(i1, v1, i2, v2)
+     {
+         # random order
+         return (2 - 4 * rand())
+     }
+
+   As mentioned above, the order of the indices is arbitrary if two
+elements compare equal.  This is usually not a problem, but letting the
+tied elements come out in arbitrary order can be an issue, specially
+when comparing item values.  The partial ordering of the equal elements
+may change during next loop traversal, if other elements are added or
+removed from the array.  One way to resolve ties when comparing elements
+with otherwise equal values is to include the indices in the comparison
+rules.  Note that doing this may make the loop traversal less efficient,
+so consider it only if necessary.  The following comparison functions
+will force a deterministic order, and are based on the fact that the
+indices of two elements are never equal:
+
+     function cmp_numeric(i1, v1, i2, v2)
+     {
+         # numerical value (and index) comparison, descending order
+         return (v1 != v2) ? (v2 - v1) : (i2 - i1)
+     }
+
+     function cmp_string(i1, v1, i2, v2)
+     {
+         # string value (and index) comparison, descending order
+         v1 = v1 i1
+         v2 = v2 i2
+         return (v1 > v2) ? -1 : (v1 != v2)
+     }
+
+   A custom comparison function can often simplify ordered loop
+traversal, and the the sky is really the limit when it comes to
+designing such a function.
+
    When string comparisons are made during a sort, either for element
 values where one or both aren't numbers or for element indices handled
 as strings, the value of `IGNORECASE' (*note Built-in Variables::)
@@ -10399,6 +10547,10 @@ the call to asort in the above example can be replaced 
with:
 
      asort(source, dest, "descending number")
 
+   The third argument to `asort()' can also be a user-defined function
+name which is used to order the array elements before constructing the
+result array.  *Note Scanning an Array::, for more information.
+
    Often, what's needed is to sort on the values of the _indices_
 instead of the values of the elements.  To do that, use the `asorti()'
 function.  The interface is identical to that of `asort()', except that
@@ -10813,7 +10965,10 @@ pound sign (`#'):
      value of HOW.  If the `source' array contains subarrays as values,
      they will come out last(first) in the `dest' array for
      `ascending'(`descending') order specification.  The value of
-     `IGNORECASE' affects the sorting.  *Note Scanning an Array::, for
+     `IGNORECASE' affects the sorting.  The third argument can also be
+     a user-defined function name in which case the value returned by
+     the function is used to order the array elements before
+     constructing the result array.  *Note Scanning an Array::, for
      more information.
 
      For example, if the contents of `a' are as follows:
@@ -24550,7 +24705,7 @@ Index
 * arrays, names of:                      Arrays.              (line  18)
 * arrays, scanning:                      Scanning an Array.   (line   6)
 * arrays, sorting:                       Array Sorting.       (line   6)
-* arrays, sorting, IGNORECASE variable and: Array Sorting.    (line  81)
+* arrays, sorting, IGNORECASE variable and: Array Sorting.    (line  85)
 * arrays, sparse:                        Array Intro.         (line  71)
 * arrays, subscripts:                    Numeric Array Subscripts.
                                                               (line   6)
@@ -24563,7 +24718,7 @@ Index
 * asort() function (gawk) <1>:           String Functions.    (line  29)
 * asort() function (gawk):               Array Sorting.       (line   6)
 * asort() function (gawk), arrays, sorting: Array Sorting.    (line   6)
-* asorti() function (gawk):              String Functions.    (line  74)
+* asorti() function (gawk):              String Functions.    (line  77)
 * assert() function (C library):         Assert Function.     (line   6)
 * assert() user-defined function:        Assert Function.     (line  28)
 * assertions:                            Assert Function.     (line   6)
@@ -24807,7 +24962,7 @@ Index
 * caret (^), in bracket expressions:     Bracket Expressions. (line  16)
 * case keyword:                          Switch Statement.    (line   6)
 * case sensitivity, array indices and:   Array Intro.         (line  92)
-* case sensitivity, converting case:     String Functions.    (line 519)
+* case sensitivity, converting case:     String Functions.    (line 522)
 * case sensitivity, example programs:    Library Functions.   (line  42)
 * case sensitivity, gawk:                Case-sensitivity.    (line  26)
 * case sensitivity, regexps and <1>:     User-modified.       (line  82)
@@ -24882,7 +25037,7 @@ Index
 * common extensions, fflush() function:  I/O Functions.       (line  25)
 * common extensions, func keyword:       Definition Syntax.   (line  83)
 * common extensions, length() applied to an array: String Functions.
-                                                              (line 193)
+                                                              (line 196)
 * common extensions, nextfile statement: Nextfile Statement.  (line   6)
 * common extensions, RS as a regexp:     Records.             (line 115)
 * common extensions, single character fields: Single Character Fields.
@@ -24924,7 +25079,7 @@ Index
 * constants, types of:                   Constants.           (line   6)
 * continue statement:                    Continue Statement.  (line   6)
 * control statements:                    Statements.          (line   6)
-* converting, case:                      String Functions.    (line 519)
+* converting, case:                      String Functions.    (line 522)
 * converting, dates to timestamps:       Time Functions.      (line  74)
 * converting, during subscripting:       Numeric Array Subscripts.
                                                               (line  31)
@@ -24982,7 +25137,7 @@ Index
                                                               (line  20)
 * dark corner, input files:              Records.             (line  98)
 * dark corner, invoking awk:             Command Line.        (line  16)
-* dark corner, length() function:        String Functions.    (line 179)
+* dark corner, length() function:        String Functions.    (line 182)
 * dark corner, multiline records:        Multiple Line.       (line  35)
 * dark corner, NF variable, decrementing: Changing Fields.    (line 107)
 * dark corner, OFMT variable:            OFMT.                (line  27)
@@ -24992,7 +25147,7 @@ Index
                                                               (line 148)
 * dark corner, regexp constants, as arguments to user-defined functions: Using 
Constant Regexps.
                                                               (line  43)
-* dark corner, split() function:         String Functions.    (line 358)
+* dark corner, split() function:         String Functions.    (line 361)
 * dark corner, strings, storing:         Records.             (line 191)
 * dark corner, value of ARGV[0]:         Auto-set.            (line  35)
 * data, fixed-width:                     Constant Size.       (line   9)
@@ -25123,7 +25278,7 @@ Index
 * deleting elements in arrays:           Delete.              (line   6)
 * deleting entire arrays:                Delete.              (line  39)
 * dgawk:                                 Debugger.            (line   6)
-* differences between gawk and awk:      String Functions.    (line 193)
+* differences between gawk and awk:      String Functions.    (line 196)
 * differences in awk and gawk, ARGC/ARGV variables: ARGC and ARGV.
                                                               (line  88)
 * differences in awk and gawk, ARGIND variable: Auto-set.     (line  40)
@@ -25163,7 +25318,7 @@ Index
                                                               (line  34)
 * differences in awk and gawk, LINT variable: User-modified.  (line  98)
 * differences in awk and gawk, match() function: String Functions.
-                                                              (line 256)
+                                                              (line 259)
 * differences in awk and gawk, next/nextfile statements: Nextfile Statement.
                                                               (line   6)
 * differences in awk and gawk, print/printf statements: Format Modifiers.
@@ -25179,11 +25334,11 @@ Index
 * differences in awk and gawk, single-character fields: Single Character 
Fields.
                                                               (line   6)
 * differences in awk and gawk, split() function: String Functions.
-                                                              (line 346)
+                                                              (line 349)
 * differences in awk and gawk, strings:  Scalar Constants.    (line  20)
 * differences in awk and gawk, strings, storing: Records.     (line 187)
 * differences in awk and gawk, strtonum() function (gawk): String Functions.
-                                                              (line 401)
+                                                              (line 404)
 * differences in awk and gawk, TEXTDOMAIN variable: User-modified.
                                                               (line 153)
 * differences in awk and gawk, trunc-mod operation: Arithmetic Ops.
@@ -25331,7 +25486,7 @@ Index
 * extensions, common, fflush() function: I/O Functions.       (line  25)
 * extensions, common, func keyword:      Definition Syntax.   (line  83)
 * extensions, common, length() applied to an array: String Functions.
-                                                              (line 193)
+                                                              (line 196)
 * extensions, common, nextfile statement: Nextfile Statement. (line   6)
 * extensions, common, RS as a regexp:    Records.             (line 115)
 * extensions, common, single character fields: Single Character Fields.
@@ -25593,7 +25748,7 @@ Index
 * gawk, functions, adding:               Dynamic Extensions.  (line  10)
 * gawk, hexadecimal numbers and:         Nondecimal-numbers.  (line  42)
 * gawk, IGNORECASE variable in <1>:      String Functions.    (line  29)
-* gawk, IGNORECASE variable in <2>:      Array Sorting.       (line  81)
+* gawk, IGNORECASE variable in <2>:      Array Sorting.       (line  85)
 * gawk, IGNORECASE variable in <3>:      Array Intro.         (line  92)
 * gawk, IGNORECASE variable in <4>:      User-modified.       (line  82)
 * gawk, IGNORECASE variable in:          Case-sensitivity.    (line  26)
@@ -25646,7 +25801,7 @@ Index
                                                               (line  63)
 * General Public License (GPL):          Glossary.            (line 306)
 * General Public License, See GPL:       Manual History.      (line  11)
-* gensub() function (gawk) <1>:          String Functions.    (line  83)
+* gensub() function (gawk) <1>:          String Functions.    (line  86)
 * gensub() function (gawk):              Using Constant Regexps.
                                                               (line  43)
 * gensub() function (gawk), escape processing: Gory Details.  (line   6)
@@ -25715,10 +25870,10 @@ Index
 * group database, reading:               Group Functions.     (line   6)
 * group file:                            Group Functions.     (line   6)
 * groups, information about:             Group Functions.     (line   6)
-* gsub() function <1>:                   String Functions.    (line 136)
+* gsub() function <1>:                   String Functions.    (line 139)
 * gsub() function:                       Using Constant Regexps.
                                                               (line  43)
-* gsub() function, arguments of:         String Functions.    (line 459)
+* gsub() function, arguments of:         String Functions.    (line 462)
 * gsub() function, escape processing:    Gory Details.        (line   6)
 * h debugger command (alias for help):   Miscellaneous Dgawk Commands.
                                                               (line  68)
@@ -25752,11 +25907,11 @@ Index
 * igawk.sh program:                      Igawk Program.       (line 124)
 * ignore debugger command:               Breakpoint Control.  (line  86)
 * IGNORECASE variable <1>:               String Functions.    (line  29)
-* IGNORECASE variable <2>:               Array Sorting.       (line  81)
+* IGNORECASE variable <2>:               Array Sorting.       (line  85)
 * IGNORECASE variable <3>:               Array Intro.         (line  92)
 * IGNORECASE variable <4>:               User-modified.       (line  82)
 * IGNORECASE variable:                   Case-sensitivity.    (line  26)
-* IGNORECASE variable, array sorting and: Array Sorting.      (line  81)
+* IGNORECASE variable, array sorting and: Array Sorting.      (line  85)
 * IGNORECASE variable, array subscripts and: Array Intro.     (line  92)
 * IGNORECASE variable, in example programs: Library Functions.
                                                               (line  42)
@@ -25773,7 +25928,7 @@ Index
 * in operator, arrays and:               Reference to Elements.
                                                               (line  37)
 * increment operators:                   Increment Ops.       (line   6)
-* index() function:                      String Functions.    (line 152)
+* index() function:                      String Functions.    (line 155)
 * indexing arrays:                       Array Intro.         (line  50)
 * indirect function calls:               Indirect Calls.      (line   6)
 * info debugger command:                 Dgawk Info.          (line  12)
@@ -25909,7 +26064,7 @@ Index
                                                               (line  11)
 * left shift, bitwise:                   Bitwise Functions.   (line  32)
 * leftmost longest match:                Multiple Line.       (line  26)
-* length() function:                     String Functions.    (line 163)
+* length() function:                     String Functions.    (line 166)
 * Lesser General Public License (LGPL):  Glossary.            (line 385)
 * LGPL (Lesser General Public License):  Glossary.            (line 385)
 * libmawk:                               Other Versions.      (line 104)
@@ -25993,9 +26148,9 @@ Index
                                                               (line   6)
 * marked strings, extracting:            String Extraction.   (line   6)
 * Marx, Groucho:                         Increment Ops.       (line  61)
-* match() function:                      String Functions.    (line 203)
+* match() function:                      String Functions.    (line 206)
 * match() function, RSTART/RLENGTH variables: String Functions.
-                                                              (line 220)
+                                                              (line 223)
 * matching, expressions, See comparison expressions: Typing and Comparison.
                                                               (line   9)
 * matching, leftmost longest:            Multiple Line.       (line  26)
@@ -26194,7 +26349,7 @@ Index
 * parentheses ():                        Regexp Operators.    (line  79)
 * parentheses (), pgawk program:         Profiling.           (line 141)
 * password file:                         Passwd Functions.    (line  16)
-* patsplit() function:                   String Functions.    (line 290)
+* patsplit() function:                   String Functions.    (line 293)
 * patterns:                              Patterns and Actions.
                                                               (line   6)
 * patterns, comparison expressions as:   Expression Patterns. (line  14)
@@ -26252,7 +26407,7 @@ Index
 * portability, gawk:                     New Ports.           (line   6)
 * portability, gettext library and:      Explaining gettext.  (line  10)
 * portability, internationalization and: I18N Portability.    (line   6)
-* portability, length() function:        String Functions.    (line 172)
+* portability, length() function:        String Functions.    (line 175)
 * portability, new awk vs. old awk:      Conversion.          (line  55)
 * portability, next statement in user-defined functions: Pass By 
Value/Reference.
                                                               (line  91)
@@ -26260,7 +26415,7 @@ Index
 * portability, operators:                Increment Ops.       (line  61)
 * portability, operators, not in POSIX awk: Precedence.       (line  98)
 * portability, POSIXLY_CORRECT environment variable: Options. (line 305)
-* portability, substr() function:        String Functions.    (line 509)
+* portability, substr() function:        String Functions.    (line 512)
 * portable object files <1>:             Translator i18n.     (line   6)
 * portable object files:                 Explaining gettext.  (line  36)
 * portable object files, converting to message object files: I18N Example.
@@ -26296,7 +26451,7 @@ Index
 * POSIX awk, field separators and:       Fields.              (line   6)
 * POSIX awk, FS variable and:            User-modified.       (line  66)
 * POSIX awk, function keyword in:        Definition Syntax.   (line  83)
-* POSIX awk, functions and, length():    String Functions.    (line 172)
+* POSIX awk, functions and, length():    String Functions.    (line 175)
 * POSIX awk, GNU long options and:       Options.             (line  15)
 * POSIX awk, interval expressions in:    Regexp Operators.    (line 135)
 * POSIX awk, next/nextfile statements and: Next Statement.    (line  45)
@@ -26434,7 +26589,7 @@ Index
 * recursive functions:                   Definition Syntax.   (line  73)
 * redirection of input:                  Getline/File.        (line   6)
 * redirection of output:                 Redirection.         (line   6)
-* reference counting, sorting arrays:    Array Sorting.       (line  75)
+* reference counting, sorting arrays:    Array Sorting.       (line  79)
 * regexp constants <1>:                  Comparison Operators.
                                                               (line 103)
 * regexp constants <2>:                  Regexp Constants.    (line   6)
@@ -26501,7 +26656,7 @@ Index
 * right shift, bitwise:                  Bitwise Functions.   (line  32)
 * Ritchie, Dennis:                       Basic Data Typing.   (line  74)
 * RLENGTH variable:                      Auto-set.            (line 205)
-* RLENGTH variable, match() function and: String Functions.   (line 220)
+* RLENGTH variable, match() function and: String Functions.   (line 223)
 * Robbins, Arnold <1>:                   Future Extensions.   (line   6)
 * Robbins, Arnold <2>:                   Bugs.                (line  32)
 * Robbins, Arnold <3>:                   Contributors.        (line 106)
@@ -26526,7 +26681,7 @@ Index
 * RS variable, multiline records and:    Multiple Line.       (line  17)
 * rshift() function (gawk):              Bitwise Functions.   (line  51)
 * RSTART variable:                       Auto-set.            (line 211)
-* RSTART variable, match() function and: String Functions.    (line 220)
+* RSTART variable, match() function and: String Functions.    (line 223)
 * RT variable <1>:                       Auto-set.            (line 218)
 * RT variable <2>:                       Getline/Variable/File.
                                                               (line  10)
@@ -26553,7 +26708,7 @@ Index
 * search paths, for source files <2>:    PC Using.            (line  11)
 * search paths, for source files <3>:    Igawk Program.       (line 364)
 * search paths, for source files:        AWKPATH Variable.    (line   6)
-* searching:                             String Functions.    (line 152)
+* searching:                             String Functions.    (line 155)
 * searching, files for regular expressions: Egrep Program.    (line   6)
 * searching, for words:                  Dupword Program.     (line   6)
 * sed utility <1>:                       Glossary.            (line  12)
@@ -26644,10 +26799,10 @@ Index
 * sparse arrays:                         Array Intro.         (line  71)
 * Spencer, Henry:                        Glossary.            (line  12)
 * split utility:                         Split Program.       (line   6)
-* split() function:                      String Functions.    (line 312)
+* split() function:                      String Functions.    (line 315)
 * split() function, array elements, deleting: Delete.         (line  57)
 * split.awk program:                     Split Program.       (line  30)
-* sprintf() function <1>:                String Functions.    (line 377)
+* sprintf() function <1>:                String Functions.    (line 380)
 * sprintf() function:                    OFMT.                (line  15)
 * sprintf() function, OFMT variable and: User-modified.       (line 124)
 * sprintf() function, print/printf statements and: Round Function.
@@ -26696,14 +26851,14 @@ Index
 * strings, null:                         Regexp Field Splitting.
                                                               (line  43)
 * strings, numeric:                      Variable Typing.     (line   6)
-* strings, splitting:                    String Functions.    (line 332)
-* strtonum() function (gawk):            String Functions.    (line 384)
+* strings, splitting:                    String Functions.    (line 335)
+* strtonum() function (gawk):            String Functions.    (line 387)
 * strtonum() function (gawk), --non-decimal-data option and: Nondecimal Data.
                                                               (line  36)
-* sub() function <1>:                    String Functions.    (line 405)
+* sub() function <1>:                    String Functions.    (line 408)
 * sub() function:                        Using Constant Regexps.
                                                               (line  43)
-* sub() function, arguments of:          String Functions.    (line 459)
+* sub() function, arguments of:          String Functions.    (line 462)
 * sub() function, escape processing:     Gory Details.        (line   6)
 * subscript separators:                  User-modified.       (line 147)
 * subscripts in arrays, multidimensional: Multi-dimensional.  (line  10)
@@ -26716,7 +26871,7 @@ Index
 * SUBSEP variable:                       User-modified.       (line 147)
 * SUBSEP variable, multidimensional arrays: Multi-dimensional.
                                                               (line  16)
-* substr() function:                     String Functions.    (line 478)
+* substr() function:                     String Functions.    (line 481)
 * Sumner, Andrew:                        Other Versions.      (line  55)
 * switch statement:                      Switch Statement.    (line   6)
 * syntactic ambiguity: /= operator vs. /=.../ regexp constant: Assignment Ops.
@@ -26767,8 +26922,8 @@ Index
 * timestamps, converting dates to:       Time Functions.      (line  74)
 * timestamps, formatted:                 Gettimeofday Function.
                                                               (line   6)
-* tolower() function:                    String Functions.    (line 520)
-* toupper() function:                    String Functions.    (line 526)
+* tolower() function:                    String Functions.    (line 523)
+* toupper() function:                    String Functions.    (line 529)
 * tr utility:                            Translate Program.   (line   6)
 * trace debugger command:                Miscellaneous Dgawk Commands.
                                                               (line 110)
@@ -26791,9 +26946,9 @@ Index
 * troubleshooting, gawk, fatal errors, function arguments: Calling Built-in.
                                                               (line  16)
 * troubleshooting, getline function:     File Checking.       (line  25)
-* troubleshooting, gsub()/sub() functions: String Functions.  (line 469)
-* troubleshooting, match() function:     String Functions.    (line 285)
-* troubleshooting, patsplit() function:  String Functions.    (line 308)
+* troubleshooting, gsub()/sub() functions: String Functions.  (line 472)
+* troubleshooting, match() function:     String Functions.    (line 288)
+* troubleshooting, patsplit() function:  String Functions.    (line 311)
 * troubleshooting, print statement, omitting commas: Print Examples.
                                                               (line  31)
 * troubleshooting, printing:             Redirection.         (line 118)
@@ -26802,7 +26957,7 @@ Index
 * troubleshooting, regexp constants vs. string constants: Computed Regexps.
                                                               (line  38)
 * troubleshooting, string concatenation: Concatenation.       (line  27)
-* troubleshooting, substr() function:    String Functions.    (line 496)
+* troubleshooting, substr() function:    String Functions.    (line 499)
 * troubleshooting, system() function:    I/O Functions.       (line  85)
 * troubleshooting, typographical errors, global variables: Options.
                                                               (line  94)
@@ -27164,215 +27319,215 @@ Node: Assigning Elements416450
 Node: Array Example416941
 Node: Scanning an Array418673
 Node: Controlling Scanning421049
-Node: Delete424695
-Ref: Delete-Footnote-1427130
-Node: Numeric Array Subscripts427187
-Node: Uninitialized Subscripts429370
-Node: Multi-dimensional430998
-Node: Multi-scanning434089
-Node: Array Sorting435673
-Ref: Array Sorting-Footnote-1439452
-Node: Arrays of Arrays439646
-Node: Functions444219
-Node: Built-in445041
-Node: Calling Built-in446119
-Node: Numeric Functions448107
-Ref: Numeric Functions-Footnote-1451872
-Ref: Numeric Functions-Footnote-2452229
-Ref: Numeric Functions-Footnote-3452277
-Node: String Functions452546
-Ref: String Functions-Footnote-1475818
-Ref: String Functions-Footnote-2475947
-Ref: String Functions-Footnote-3476195
-Node: Gory Details476282
-Ref: table-sub-escapes477961
-Ref: table-posix-sub479275
-Ref: table-gensub-escapes480188
-Node: I/O Functions481359
-Ref: I/O Functions-Footnote-1488014
-Node: Time Functions488161
-Ref: Time Functions-Footnote-1499053
-Ref: Time Functions-Footnote-2499121
-Ref: Time Functions-Footnote-3499279
-Ref: Time Functions-Footnote-4499390
-Ref: Time Functions-Footnote-5499502
-Ref: Time Functions-Footnote-6499729
-Node: Bitwise Functions499995
-Ref: table-bitwise-ops500553
-Ref: Bitwise Functions-Footnote-1504713
-Node: Type Functions504897
-Node: I18N Functions505367
-Node: User-defined506994
-Node: Definition Syntax507798
-Ref: Definition Syntax-Footnote-1512708
-Node: Function Example512777
-Node: Function Caveats515371
-Node: Calling A Function515792
-Node: Variable Scope516907
-Node: Pass By Value/Reference518882
-Node: Return Statement522322
-Node: Dynamic Typing525303
-Node: Indirect Calls526038
-Node: Internationalization535723
-Node: I18N and L10N537149
-Node: Explaining gettext537835
-Ref: Explaining gettext-Footnote-1542901
-Ref: Explaining gettext-Footnote-2543085
-Node: Programmer i18n543250
-Node: Translator i18n547450
-Node: String Extraction548243
-Ref: String Extraction-Footnote-1549204
-Node: Printf Ordering549290
-Ref: Printf Ordering-Footnote-1552074
-Node: I18N Portability552138
-Ref: I18N Portability-Footnote-1554587
-Node: I18N Example554650
-Ref: I18N Example-Footnote-1557285
-Node: Gawk I18N557357
-Node: Advanced Features557974
-Node: Nondecimal Data559293
-Node: Two-way I/O560874
-Ref: Two-way I/O-Footnote-1566308
-Node: TCP/IP Networking566378
-Node: Profiling569222
-Node: Library Functions576696
-Ref: Library Functions-Footnote-1579703
-Node: Library Names579874
-Ref: Library Names-Footnote-1583345
-Ref: Library Names-Footnote-2583565
-Node: General Functions583651
-Node: Strtonum Function584604
-Node: Assert Function587534
-Node: Round Function590860
-Node: Cliff Random Function592403
-Node: Ordinal Functions593419
-Ref: Ordinal Functions-Footnote-1596489
-Ref: Ordinal Functions-Footnote-2596741
-Node: Join Function596950
-Ref: Join Function-Footnote-1598721
-Node: Gettimeofday Function598921
-Node: Data File Management602636
-Node: Filetrans Function603268
-Node: Rewind Function607407
-Node: File Checking608794
-Node: Empty Files609888
-Node: Ignoring Assigns612118
-Node: Getopt Function613671
-Ref: Getopt Function-Footnote-1624975
-Node: Passwd Functions625178
-Ref: Passwd Functions-Footnote-1634153
-Node: Group Functions634241
-Node: Walking Arrays642325
-Node: Sample Programs643894
-Node: Running Examples644559
-Node: Clones645287
-Node: Cut Program646511
-Node: Egrep Program656356
-Ref: Egrep Program-Footnote-1664129
-Node: Id Program664239
-Node: Split Program667855
-Ref: Split Program-Footnote-1671374
-Node: Tee Program671502
-Node: Uniq Program674305
-Node: Wc Program681734
-Ref: Wc Program-Footnote-1686000
-Ref: Wc Program-Footnote-2686200
-Node: Miscellaneous Programs686292
-Node: Dupword Program687480
-Node: Alarm Program689511
-Node: Translate Program694260
-Ref: Translate Program-Footnote-1698647
-Ref: Translate Program-Footnote-2698875
-Node: Labels Program699009
-Ref: Labels Program-Footnote-1702380
-Node: Word Sorting702464
-Node: History Sorting706348
-Node: Extract Program708187
-Ref: Extract Program-Footnote-1715670
-Node: Simple Sed715798
-Node: Igawk Program718860
-Ref: Igawk Program-Footnote-1733893
-Ref: Igawk Program-Footnote-2734094
-Node: Anagram Program734232
-Node: Signature Program737300
-Node: Debugger738400
-Node: Debugging739311
-Node: Debugging Concepts739724
-Node: Debugging Terms741580
-Node: Awk Debugging744202
-Node: Sample dgawk session745094
-Node: dgawk invocation745586
-Node: Finding The Bug746768
-Node: List of Debugger Commands753254
-Node: Breakpoint Control754565
-Node: Dgawk Execution Control758201
-Node: Viewing And Changing Data761552
-Node: Dgawk Stack764889
-Node: Dgawk Info766349
-Node: Miscellaneous Dgawk Commands770297
-Node: Readline Support775725
-Node: Dgawk Limitations776563
-Node: Language History778752
-Node: V7/SVR3.1780190
-Node: SVR4782511
-Node: POSIX783953
-Node: BTL784961
-Node: POSIX/GNU785695
-Node: Common Extensions790796
-Node: Contributors791897
-Node: Installation796036
-Node: Gawk Distribution796930
-Node: Getting797414
-Node: Extracting798240
-Node: Distribution contents799932
-Node: Unix Installation805154
-Node: Quick Installation805771
-Node: Additional Configuration Options807733
-Node: Configuration Philosophy809210
-Node: Non-Unix Installation811552
-Node: PC Installation812010
-Node: PC Binary Installation813309
-Node: PC Compiling815157
-Node: PC Testing818101
-Node: PC Using819277
-Node: Cygwin823462
-Node: MSYS824462
-Node: VMS Installation824976
-Node: VMS Compilation825579
-Ref: VMS Compilation-Footnote-1826586
-Node: VMS Installation Details826644
-Node: VMS Running828279
-Node: VMS Old Gawk829886
-Node: Bugs830360
-Node: Other Versions834270
-Node: Notes839549
-Node: Compatibility Mode840241
-Node: Additions841024
-Node: Accessing The Source841836
-Node: Adding Code843261
-Node: New Ports849228
-Node: Dynamic Extensions853341
-Node: Internals854717
-Node: Plugin License863820
-Node: Sample Library864454
-Node: Internal File Description865140
-Node: Internal File Ops868855
-Ref: Internal File Ops-Footnote-1873636
-Node: Using Internal File Ops873776
-Node: Future Extensions876153
-Node: Basic Concepts878657
-Node: Basic High Level879414
-Ref: Basic High Level-Footnote-1883449
-Node: Basic Data Typing883634
-Node: Floating Point Issues888159
-Node: String Conversion Precision889242
-Ref: String Conversion Precision-Footnote-1890936
-Node: Unexpected Results891045
-Node: POSIX Floating Point Problems892871
-Ref: POSIX Floating Point Problems-Footnote-1896573
-Node: Glossary896611
-Node: Copying920754
-Node: GNU Free Documentation License958311
-Node: Index983448
+Node: Delete429855
+Ref: Delete-Footnote-1432290
+Node: Numeric Array Subscripts432347
+Node: Uninitialized Subscripts434530
+Node: Multi-dimensional436158
+Node: Multi-scanning439249
+Node: Array Sorting440833
+Ref: Array Sorting-Footnote-1444819
+Node: Arrays of Arrays445013
+Node: Functions449586
+Node: Built-in450408
+Node: Calling Built-in451486
+Node: Numeric Functions453474
+Ref: Numeric Functions-Footnote-1457239
+Ref: Numeric Functions-Footnote-2457596
+Ref: Numeric Functions-Footnote-3457644
+Node: String Functions457913
+Ref: String Functions-Footnote-1481384
+Ref: String Functions-Footnote-2481513
+Ref: String Functions-Footnote-3481761
+Node: Gory Details481848
+Ref: table-sub-escapes483527
+Ref: table-posix-sub484841
+Ref: table-gensub-escapes485754
+Node: I/O Functions486925
+Ref: I/O Functions-Footnote-1493580
+Node: Time Functions493727
+Ref: Time Functions-Footnote-1504619
+Ref: Time Functions-Footnote-2504687
+Ref: Time Functions-Footnote-3504845
+Ref: Time Functions-Footnote-4504956
+Ref: Time Functions-Footnote-5505068
+Ref: Time Functions-Footnote-6505295
+Node: Bitwise Functions505561
+Ref: table-bitwise-ops506119
+Ref: Bitwise Functions-Footnote-1510279
+Node: Type Functions510463
+Node: I18N Functions510933
+Node: User-defined512560
+Node: Definition Syntax513364
+Ref: Definition Syntax-Footnote-1518274
+Node: Function Example518343
+Node: Function Caveats520937
+Node: Calling A Function521358
+Node: Variable Scope522473
+Node: Pass By Value/Reference524448
+Node: Return Statement527888
+Node: Dynamic Typing530869
+Node: Indirect Calls531604
+Node: Internationalization541289
+Node: I18N and L10N542715
+Node: Explaining gettext543401
+Ref: Explaining gettext-Footnote-1548467
+Ref: Explaining gettext-Footnote-2548651
+Node: Programmer i18n548816
+Node: Translator i18n553016
+Node: String Extraction553809
+Ref: String Extraction-Footnote-1554770
+Node: Printf Ordering554856
+Ref: Printf Ordering-Footnote-1557640
+Node: I18N Portability557704
+Ref: I18N Portability-Footnote-1560153
+Node: I18N Example560216
+Ref: I18N Example-Footnote-1562851
+Node: Gawk I18N562923
+Node: Advanced Features563540
+Node: Nondecimal Data564859
+Node: Two-way I/O566440
+Ref: Two-way I/O-Footnote-1571874
+Node: TCP/IP Networking571944
+Node: Profiling574788
+Node: Library Functions582262
+Ref: Library Functions-Footnote-1585269
+Node: Library Names585440
+Ref: Library Names-Footnote-1588911
+Ref: Library Names-Footnote-2589131
+Node: General Functions589217
+Node: Strtonum Function590170
+Node: Assert Function593100
+Node: Round Function596426
+Node: Cliff Random Function597969
+Node: Ordinal Functions598985
+Ref: Ordinal Functions-Footnote-1602055
+Ref: Ordinal Functions-Footnote-2602307
+Node: Join Function602516
+Ref: Join Function-Footnote-1604287
+Node: Gettimeofday Function604487
+Node: Data File Management608202
+Node: Filetrans Function608834
+Node: Rewind Function612973
+Node: File Checking614360
+Node: Empty Files615454
+Node: Ignoring Assigns617684
+Node: Getopt Function619237
+Ref: Getopt Function-Footnote-1630541
+Node: Passwd Functions630744
+Ref: Passwd Functions-Footnote-1639719
+Node: Group Functions639807
+Node: Walking Arrays647891
+Node: Sample Programs649460
+Node: Running Examples650125
+Node: Clones650853
+Node: Cut Program652077
+Node: Egrep Program661922
+Ref: Egrep Program-Footnote-1669695
+Node: Id Program669805
+Node: Split Program673421
+Ref: Split Program-Footnote-1676940
+Node: Tee Program677068
+Node: Uniq Program679871
+Node: Wc Program687300
+Ref: Wc Program-Footnote-1691566
+Ref: Wc Program-Footnote-2691766
+Node: Miscellaneous Programs691858
+Node: Dupword Program693046
+Node: Alarm Program695077
+Node: Translate Program699826
+Ref: Translate Program-Footnote-1704213
+Ref: Translate Program-Footnote-2704441
+Node: Labels Program704575
+Ref: Labels Program-Footnote-1707946
+Node: Word Sorting708030
+Node: History Sorting711914
+Node: Extract Program713753
+Ref: Extract Program-Footnote-1721236
+Node: Simple Sed721364
+Node: Igawk Program724426
+Ref: Igawk Program-Footnote-1739459
+Ref: Igawk Program-Footnote-2739660
+Node: Anagram Program739798
+Node: Signature Program742866
+Node: Debugger743966
+Node: Debugging744877
+Node: Debugging Concepts745290
+Node: Debugging Terms747146
+Node: Awk Debugging749768
+Node: Sample dgawk session750660
+Node: dgawk invocation751152
+Node: Finding The Bug752334
+Node: List of Debugger Commands758820
+Node: Breakpoint Control760131
+Node: Dgawk Execution Control763767
+Node: Viewing And Changing Data767118
+Node: Dgawk Stack770455
+Node: Dgawk Info771915
+Node: Miscellaneous Dgawk Commands775863
+Node: Readline Support781291
+Node: Dgawk Limitations782129
+Node: Language History784318
+Node: V7/SVR3.1785756
+Node: SVR4788077
+Node: POSIX789519
+Node: BTL790527
+Node: POSIX/GNU791261
+Node: Common Extensions796362
+Node: Contributors797463
+Node: Installation801602
+Node: Gawk Distribution802496
+Node: Getting802980
+Node: Extracting803806
+Node: Distribution contents805498
+Node: Unix Installation810720
+Node: Quick Installation811337
+Node: Additional Configuration Options813299
+Node: Configuration Philosophy814776
+Node: Non-Unix Installation817118
+Node: PC Installation817576
+Node: PC Binary Installation818875
+Node: PC Compiling820723
+Node: PC Testing823667
+Node: PC Using824843
+Node: Cygwin829028
+Node: MSYS830028
+Node: VMS Installation830542
+Node: VMS Compilation831145
+Ref: VMS Compilation-Footnote-1832152
+Node: VMS Installation Details832210
+Node: VMS Running833845
+Node: VMS Old Gawk835452
+Node: Bugs835926
+Node: Other Versions839836
+Node: Notes845115
+Node: Compatibility Mode845807
+Node: Additions846590
+Node: Accessing The Source847402
+Node: Adding Code848827
+Node: New Ports854794
+Node: Dynamic Extensions858907
+Node: Internals860283
+Node: Plugin License869386
+Node: Sample Library870020
+Node: Internal File Description870706
+Node: Internal File Ops874421
+Ref: Internal File Ops-Footnote-1879202
+Node: Using Internal File Ops879342
+Node: Future Extensions881719
+Node: Basic Concepts884223
+Node: Basic High Level884980
+Ref: Basic High Level-Footnote-1889015
+Node: Basic Data Typing889200
+Node: Floating Point Issues893725
+Node: String Conversion Precision894808
+Ref: String Conversion Precision-Footnote-1896502
+Node: Unexpected Results896611
+Node: POSIX Floating Point Problems898437
+Ref: POSIX Floating Point Problems-Footnote-1902139
+Node: Glossary902177
+Node: Copying926320
+Node: GNU Free Documentation License963877
+Node: Index989014
 
 End Tag Table
diff --git a/doc/gawk.texi b/doc/gawk.texi
index def2a01..b4b014e 100644
--- a/doc/gawk.texi
+++ b/doc/gawk.texi
@@ -13531,6 +13531,178 @@ numeric value, regardless of what the subarray itself 
contains,
 and all subarrays are treated as being equal to each other.  Their 
 order relative to each other is determined by their index strings.
 
address@hidden Controlling Array Scanning Order With a User-defined Function
+
+The value of @code{PROCINFO["sorted_in"]} can also be a function name
+that will let you traverse an array based on any custom criterion.
+The array elements are ordered according to the return value of this
+function.  This comparison function should be defined with at least
+four arguments:
+
address@hidden
+function comp_func(i1, v1, i2, v2)
address@hidden
+    @var{compare elements 1 and 2 in some fashion}
+    @var{return < 0; 0; or > 0}
address@hidden
address@hidden example
+
+Here, @var{i1} and @var{i2} are the indices, and @var{v1} and @var{v2}
+are the corresponding values of the two elements being compared.
+Either @var{v1} or @var{v2}, or both, can be arrays if the array being
+traversed contains subarrays as values.  The three possible return values
+are interpreted this way:
+
address@hidden
+* If the return value of @code{comp_func(i1, v1, i2, v2)} is less than 0,
+index @var{i1} comes before index @var{i2} during loop traversal.
+
+* If @code{comp_func(i1, v1, i2, v2)} returns 0, @var{i1} and @var{i2}
+come together but relative order with respect to each other is undefined.
+
+* If the return value of @code{comp_func(i1, v1, i2, v2)} is greater than 0,
address@hidden comes after @var{i2}.
address@hidden quotation
+
+The following comparison function can be used to scan an array in
+numerical order of the indices:
+
address@hidden
+function cmp_num_idx(i1, v1, i2, v2)
address@hidden
+    # numerical index comparison, ascending order
+    return (i1 - i2)
address@hidden
address@hidden example
+
+This function will traverse an array based on an order by element values
+rather than by indices:
+
address@hidden
+function cmp_str_val(i1, v1, i2, v2)
address@hidden
+    # string value comparison, ascending order
+       v1 = v1 ""
+       v2 = v2 ""
+       if (v1 < v2) return -1
+    return (v1 != v2)
address@hidden
address@hidden example
+
+A comparison function to make all numbers, and numeric strings without
+any leading or trailing spaces come out first during loop traversal:  
+
address@hidden
+function cmp_num_str_val(i1, v1, i2, v2,   n1, n2)
address@hidden
+    # numbers before string value comparison, ascending order
+    n1 = v1 + 0
+    n2 = v2 + 0
+    if (n1 == v1) 
+        return (n2 == v2) ? (n1 - n2) : -1
+    else if (n2 == v2)
+        return 1 
+    return (v1 < v2) ? -1 : (v1 != v2)
address@hidden
address@hidden example
+
+Consider sorting the entries of a GNU/Linux system password file
+according to login names.  The following program which sorts records
+by a specific field position can be used for this purpose:   
+
address@hidden
+# sort.awk --- simple program to sort by field position
+# field position is specified by POS
+
+function cmp_field(i1, v1, i2, v2)
address@hidden
+    # comparison by value, as string, and ascending order
+    return v1[POS] < v2[POS] ? -1 : (v1[POS] != v2[POS])
address@hidden
+
address@hidden
+    for (i = 1; i <= NF; i++)
+        a[NR][i] = $i
address@hidden
+
+END @{
+    PROCINFO["sorted_in"] = "cmp_field"
+    if (POS < 1 || POS > NF)
+        POS = 1
+    for (i in a) @{
+        for (j = 1; j <= NF; j++)
+            printf("%s%c", a[i][j], j < NF ? ":" : "")
+        print ""
+    @}
address@hidden
address@hidden example
+
+The first field in each entry of the password file is the user's login name,
+and the fields are seperated by colons.  Running the program produces the
+following output:
+
address@hidden
address@hidden gawk -vPOS=1 -F: -f sort.awk /etc/passwd}
address@hidden adm:x:3:4:adm:/var/adm:/sbin/nologin
address@hidden apache:x:48:48:Apache:/var/www:/sbin/nologin
address@hidden avahi:x:70:70:Avahi daemon:/:/sbin/nologin
address@hidden
address@hidden example
+
+The comparison normally should always return the same value when given a
+specific pair of array elements as its arguments.  If inconsistent
+results are returned then the order is undefined.  This behavior is
+sometimes exploited to introduce random order in otherwise seemingly
+ordered data:
+
address@hidden
+function cmp_randomize(i1, v1, i2, v2)
address@hidden
+    # random order
+    return (2 - 4 * rand())
address@hidden
address@hidden example
+
+As mentioned above, the order of the indices is arbitrary if two
+elements compare equal.  This is usually not a problem, but letting
+the tied elements come out in arbitrary order can be an issue, specially
+when comparing item values.  The partial ordering of the equal elements
+may change during next loop traversal, if other elements are added or
+removed from the array.  One way to resolve ties when comparing elements
+with otherwise equal values is to include the indices in the comparison
+rules.  Note that doing this may make the loop traversal less efficient,
+so consider it only if necessary.  The following comparison functions
+will force a deterministic order, and are based on the fact that the
+indices of two elements are never equal:
+
address@hidden
+function cmp_numeric(i1, v1, i2, v2)
address@hidden
+    # numerical value (and index) comparison, descending order
+    return (v1 != v2) ? (v2 - v1) : (i2 - i1)
address@hidden
+
+function cmp_string(i1, v1, i2, v2)
address@hidden
+    # string value (and index) comparison, descending order
+    v1 = v1 i1
+    v2 = v2 i2
+    return (v1 > v2) ? -1 : (v1 != v2)
address@hidden
address@hidden example
+
address@hidden
+Avoid using the term stable when describing the unpredictable behavior
+if two items compare equal.  Usually, the goal of a "stable algorithm"
+is to maintain the original order of the items, which is a meaningless
+concept for a list constructed from a hash.
address@hidden ignore
+
+A custom comparison function can often simplify ordered loop
+traversal, and the the sky is really the limit when it comes to
+designing such a function.
+
+
 When string comparisons are made during a sort, either for element
 values where one or both aren't numbers or for element indices
 handled as strings, the value of @code{IGNORECASE}
@@ -13992,6 +14164,12 @@ replaced with:
 asort(source, dest, "descending number")
 @end example
 
+The third argument to @code{asort()} can also be a user-defined
+function name which is used to order the array elements before
+constructing the result array.
address@hidden an Array}, for more information.
+ 
+
 Often, what's needed is to sort on the values of the @emph{indices}
 instead of the values of the elements.
 To do that, use the
@@ -14479,6 +14657,9 @@ An empty string "" is the same as the default 
@code{"ascending string"}
 for the value of @var{how}.  If the @samp{source} array contains subarrays as 
values,
 they will come out last(first) in the @samp{dest} array for 
@samp{ascending}(@samp{descending})
 order specification.  The value of @code{IGNORECASE} affects the sorting.
+The third argument can also be a user-defined function name in which case
+the value returned by the function is used to order the array elements
+before constructing the result array.
 @xref{Scanning an Array}, for more information.
 
 For example, if the contents of @code{a} are as follows:
diff --git a/eval.c b/eval.c
index 3328bfb..0923a8f 100644
--- a/eval.c
+++ b/eval.c
@@ -31,6 +31,7 @@ extern double modf(double x, double *yp);
 extern double fmod(double x, double y);
 NODE **fcall_list;
 long fcall_count;
+int currule = 0;
 IOBUF *curfile = NULL;         /* current data file */
 int exiting = FALSE;
 
@@ -1581,7 +1582,6 @@ r_interpret(INSTRUCTION *code)
        AWKNUM x, x1, x2;
        int di, pre = FALSE;
        Regexp *rp;
-       static int currule = 0;
 #if defined(GAWKDEBUG) || defined(ARRAYDEBUG)
        int last_was_stopme = FALSE;    /* builtin stopme() called ? */
 #endif
diff --git a/test/ChangeLog b/test/ChangeLog
index 3081a53..3754c55 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,7 @@
+Fri Apr 22 16:07:01 2011  John Haque         <address@hidden>
+
+       * sortu.awk, sortu.ok: New files.
+
 Fri Apr 22 09:19:06 2011  Arnold D. Robbins  <address@hidden>
 
        * arraysort.ok: Updated.
diff --git a/test/Makefile.am b/test/Makefile.am
index 321cbde..8eeb80d 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -634,6 +634,8 @@ EXTRA_DIST = \
        sortfor.awk \
        sortfor.in \
        sortfor.ok \
+       sortu.awk \
+       sortu.ok \
        space.ok \
        splitarg4.awk \
        splitarg4.in \
@@ -781,7 +783,7 @@ GAWK_EXT_TESTS = \
        lintold manyfiles match1 match2 match3 mbstr1 nondec nondec2 patsplit \
        posix profile1 profile2 profile3 printfbad1 printfbad2 \
        procinfs rebuf regx8bit reint reint2 rsstart1 rsstart2 rsstart3 \
-       rstest6 shadow sortfor splitarg4 strftime strtonum switch2
+       rstest6 shadow sortfor sortu splitarg4 strftime strtonum switch2
 
 EXTRA_TESTS = regtest inftest
 
diff --git a/test/Makefile.in b/test/Makefile.in
index 8319041..85b7e80 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -819,6 +819,8 @@ EXTRA_DIST = \
        sortfor.awk \
        sortfor.in \
        sortfor.ok \
+       sortu.awk \
+       sortu.ok \
        space.ok \
        splitarg4.awk \
        splitarg4.in \
@@ -965,7 +967,7 @@ GAWK_EXT_TESTS = \
        lintold manyfiles match1 match2 match3 mbstr1 nondec nondec2 patsplit \
        posix profile1 profile2 profile3 printfbad1 printfbad2 \
        procinfs rebuf regx8bit reint reint2 rsstart1 rsstart2 rsstart3 \
-       rstest6 shadow sortfor splitarg4 strftime strtonum switch2
+       rstest6 shadow sortfor sortu splitarg4 strftime strtonum switch2
 
 EXTRA_TESTS = regtest inftest
 INET_TESTS = inetechu inetecht inetdayu inetdayt
@@ -2760,6 +2762,11 @@ sortfor:
        @AWKPATH=$(srcdir) $(AWK) -f address@hidden  < $(srcdir)/address@hidden 
>_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
        @-$(CMP) $(srcdir)/address@hidden _$@ && rm -f _$@
 
+sortu:
+       @echo sortu
+       @AWKPATH=$(srcdir) $(AWK) -f address@hidden  >_$@ 2>&1 || echo EXIT 
CODE: $$? >>_$@
+       @-$(CMP) $(srcdir)/address@hidden _$@ && rm -f _$@
+
 splitarg4:
        @echo splitarg4
        @AWKPATH=$(srcdir) $(AWK) -f address@hidden  < $(srcdir)/address@hidden 
>_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/Maketests b/test/Maketests
index 2f2ff93..772ee65 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -1055,6 +1055,11 @@ sortfor:
        @AWKPATH=$(srcdir) $(AWK) -f address@hidden  < $(srcdir)/address@hidden 
>_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
        @-$(CMP) $(srcdir)/address@hidden _$@ && rm -f _$@
 
+sortu:
+       @echo sortu
+       @AWKPATH=$(srcdir) $(AWK) -f address@hidden  >_$@ 2>&1 || echo EXIT 
CODE: $$? >>_$@
+       @-$(CMP) $(srcdir)/address@hidden _$@ && rm -f _$@
+
 splitarg4:
        @echo splitarg4
        @AWKPATH=$(srcdir) $(AWK) -f address@hidden  < $(srcdir)/address@hidden 
>_$@ 2>&1 || echo EXIT CODE: $$? >>_$@
diff --git a/test/sortu.awk b/test/sortu.awk
new file mode 100644
index 0000000..b4d3013
--- /dev/null
+++ b/test/sortu.awk
@@ -0,0 +1,114 @@
+# numeric before string, ascending by index
+function comp_num_str(s1, v1, s2, v2,          n1, n2) {
+       n1 = s1 + 0
+       n2 = s2 + 0
+       if (n1 == s1)
+               return (n2 == s2) ? (n1 - n2) : -1
+       else if (n2 == s2)
+               return 1
+       return (s1 < s2) ? -1 : (s1 != s2)
+}
+
+# ascending index number
+function comp_idx_num(s1, v1, s2, v2)
+{
+       return (s1 - s2)
+}
+
+# ascending value number
+function comp_val_num(s1, v1, s2, v2)
+{
+       return (v1 - v2)
+}
+
+# ascending value string
+function comp_val_str(s1, v1, s2, v2)
+{
+       v1 = v1 ""
+       v2 = v2 ""
+       return (v1 < v2) ? -1 : (v1 != v2)
+}
+
+# deterministic, by value (and index), descending numeric
+function comp_val_idx(s1, v1, s2, v2)
+{
+       return (v1 != v2) ? (v2 - v1) : (s2 - s1)
+}
+
+BEGIN {
+       a[1] = 10; a[100] = 1; a[2] = 200
+       a["cat"] = "tac"; a["rat"] = "tar";a["bat"] = "tab"
+
+       print "--- number before string, ascending by index ---"
+       PROCINFO["sorted_in"] = "comp_num_str"
+       for (i in a)
+               printf("%-10s%-s\n", i, a[i])
+
+       delete a
+       a[11] = 10; a[100] = 5; a[2] = 200
+       a[4] = 1; a[20] = 10; a[14] = 10
+       print "--- deterministic, by value (index), descending numeric ---"
+       PROCINFO["sorted_in"] = "comp_val_idx"
+       for (i in a)
+               printf("%-10s%-s\n", i, a[i])
+
+       for (IGNORECASE=0; IGNORECASE <= 1; IGNORECASE++) {
+               makea(a)
+               SORT_STR =  "comp_val_num"
+               printf("--- asort(a, b, \"%s\"), IGNORECASE = %d---\n", 
SORT_STR, IGNORECASE)
+               asort2(a, "")
+
+               makea(a)
+               SORT_STR =  "comp_val_str"
+               printf("--- asort(a, b, \"%s\"), IGNORECASE = %d---\n", 
SORT_STR, IGNORECASE)
+               asort2(a, "")
+
+               makea(a)
+               SORT_STR = "comp_val_str"
+               printf("--- asort(a, a, \"%s\"), IGNORECASE = %d---\n", 
SORT_STR, IGNORECASE)
+               asort1(a, "")
+  }
+}
+
+function makea(aa)
+{
+       delete aa
+       aa[1] = "barz";
+       aa[2] = "blattt";
+       aa[3] = "Zebra";
+       aa[4] = 1234;
+       aa[5] = 234;
+}
+
+# source array != destination array 
+function asort2(c, s,  d, k, m) 
+{
+       if (SORT_STR < 0)
+               m = asort(c, d);
+       else
+               m = asort(c, d, SORT_STR);
+       for (k=1; k <= m; k++) {
+               if (isarray(d[k]))
+                       asort2(d[k], s"["k"]")
+               else
+                       printf("%-10s:%-10s%-10s\n", s"["k"]", c[k], d[k])
+       }
+}
+
+# source array == destination array
+function asort1(c, s,   k, m) 
+{
+       if (SORT_STR < 0)
+               m = asort(c)
+       else if (SORT_STR != "")
+               m = asort(c, c, SORT_STR)
+       else
+               m = asort(c, c);
+
+       for (k=1; k <= m; k++) {
+               if (isarray(c[k]))
+                       asort1(c[k], s"["k"]")
+               else
+                       printf("%-10s:%-10s\n", s"["k"]", c[k])
+       }
+}
diff --git a/test/sortu.ok b/test/sortu.ok
new file mode 100644
index 0000000..ba9ac99
--- /dev/null
+++ b/test/sortu.ok
@@ -0,0 +1,50 @@
+--- number before string, ascending by index ---
+1         10
+2         200
+100       1
+bat       tab
+cat       tac
+rat       tar
+--- deterministic, by value (index), descending numeric ---
+2         200
+20        10
+14        10
+11        10
+100       5
+4         1
+--- asort(a, b, "comp_val_num"), IGNORECASE = 0---
+[1]       :barz      barz      
+[2]       :blattt    blattt    
+[3]       :Zebra     Zebra     
+[4]       :1234      234       
+[5]       :234       1234      
+--- asort(a, b, "comp_val_str"), IGNORECASE = 0---
+[1]       :barz      1234      
+[2]       :blattt    234       
+[3]       :Zebra     Zebra     
+[4]       :1234      barz      
+[5]       :234       blattt    
+--- asort(a, a, "comp_val_str"), IGNORECASE = 0---
+[1]       :1234      
+[2]       :234       
+[3]       :Zebra     
+[4]       :barz      
+[5]       :blattt    
+--- asort(a, b, "comp_val_num"), IGNORECASE = 1---
+[1]       :barz      barz      
+[2]       :blattt    blattt    
+[3]       :Zebra     Zebra     
+[4]       :1234      234       
+[5]       :234       1234      
+--- asort(a, b, "comp_val_str"), IGNORECASE = 1---
+[1]       :barz      1234      
+[2]       :blattt    234       
+[3]       :Zebra     barz      
+[4]       :1234      blattt    
+[5]       :234       Zebra     
+--- asort(a, a, "comp_val_str"), IGNORECASE = 1---
+[1]       :1234      
+[2]       :234       
+[3]       :barz      
+[4]       :blattt    
+[5]       :Zebra     

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog        |    6 +
 array.c          |  178 ++++++++++-----
 awk.h            |    3 +
 doc/gawk.info    |  663 +++++++++++++++++++++++++++++++++---------------------
 doc/gawk.texi    |  181 +++++++++++++++
 eval.c           |    2 +-
 test/ChangeLog   |    4 +
 test/Makefile.am |    4 +-
 test/Makefile.in |    9 +-
 test/Maketests   |    5 +
 test/sortu.awk   |  114 ++++++++++
 test/sortu.ok    |   50 ++++
 12 files changed, 904 insertions(+), 315 deletions(-)
 create mode 100644 test/sortu.awk
 create mode 100644 test/sortu.ok


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]