bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: ^ in FS


From: Aharon Robbins
Subject: Re: ^ in FS
Date: Tue, 03 Feb 2009 22:14:23 +0200

Greetings. I have looked into the issue of ^ in FS in more detail,
and also consulted with Brian Kernighan.

I have changed gawk-stable to work the way the Bell Labs awk does. I
think that it is conceptually more correct to match ^ only at the
beginning of the entire record.  The full patch is included below.
The first patch I sent you offline is incomplete.

This will show up in CVS on Savannah shortly.

Thanks,

Arnold
--------------------------------------------------------------------
Thu Jan 29 21:14:30 2009  Arnold D. Robbins  <address@hidden>

        * field.c (parse_field, re_parse_field, def_parse_field,
        posix_def_parse_field, null_parse_field, sc_parse_field,
        fw_parse_field): Add new last arg `in_middle'.  Ignored by all
        except re_parse_field.
        (re_parse_field): Enhance logic to only allow ^ in a regex to match
        if indeed at the beginning of a record.
        (getfield): Adjust call to parse_field.

Index: awk.h
===================================================================
RCS file: /d/mongo/cvsrep/gawk-stable/awk.h,v
retrieving revision 1.20
diff -u -r1.20 awk.h
--- awk.h       27 Jan 2009 20:01:13 -0000      1.20
+++ awk.h       27 Jan 2009 20:31:32 -0000
@@ -265,7 +265,7 @@
 #endif /* GNU_REGEX */
 /* regexp matching flags: */
 #define RE_NEED_START  1       /* need to know start/end of match */
-#define RE_NO_BOL      2       /* for RS, not allowed to match ^ in regexp */
+#define RE_NO_BOL      2       /* not allowed to match ^ in regexp */
 
 /* Stuff for losing systems. */
 #if !defined(HAVE_STRTOD)
Index: field.c
===================================================================
RCS file: /d/mongo/cvsrep/gawk-stable/field.c,v
retrieving revision 1.10
diff -u -r1.10 field.c
--- field.c     23 Apr 2008 19:35:57 -0000      1.10
+++ field.c     29 Jan 2009 19:23:18 -0000
@@ -39,20 +39,20 @@
 typedef void (* Setfunc) P((long, char *, long, NODE *));
 
 static long (*parse_field) P((long, char **, int, NODE *,
-                            Regexp *, Setfunc, NODE *));
+                            Regexp *, Setfunc, NODE *, int));
 static void rebuild_record P((void));
 static long re_parse_field P((long, char **, int, NODE *,
-                            Regexp *, Setfunc, NODE *));
+                            Regexp *, Setfunc, NODE *, int));
 static long def_parse_field P((long, char **, int, NODE *,
-                             Regexp *, Setfunc, NODE *));
+                             Regexp *, Setfunc, NODE *, int));
 static long posix_def_parse_field P((long, char **, int, NODE *,
-                             Regexp *, Setfunc, NODE *));
+                             Regexp *, Setfunc, NODE *, int));
 static long null_parse_field P((long, char **, int, NODE *,
-                            Regexp *, Setfunc, NODE *));
+                            Regexp *, Setfunc, NODE *, int));
 static long sc_parse_field P((long, char **, int, NODE *,
-                            Regexp *, Setfunc, NODE *));
+                            Regexp *, Setfunc, NODE *, int));
 static long fw_parse_field P((long, char **, int, NODE *,
-                            Regexp *, Setfunc, NODE *));
+                            Regexp *, Setfunc, NODE *, int));
 static void set_element P((long num, char * str, long len, NODE *arr));
 static void grow_fields_arr P((long num));
 static void set_field P((long num, char *str, long len, NODE *dummy));
@@ -364,12 +364,14 @@
        NODE *fs ATTRIBUTE_UNUSED,
        Regexp *rp,
        Setfunc set,    /* routine to set the value of the parsed field */
-       NODE *n)
+       NODE *n,
+       int in_middle)
 {
        register char *scan = *buf;
        register long nf = parse_high_water;
        register char *field;
        register char *end = scan + len;
+       int regex_flags = RE_NEED_START;
 #ifdef MBS_SUPPORT
        size_t mbclen = 0;
        mbstate_t mbs;
@@ -377,6 +379,9 @@
                memset(&mbs, 0, sizeof(mbstate_t));
 #endif
 
+       if (in_middle)
+               regex_flags |= RE_NO_BOL;
+
        if (up_to == UNLIMITED)
                nf = 0;
        if (len == 0)
@@ -387,8 +392,9 @@
                        scan++;
        field = scan;
        while (scan < end
-              && research(rp, scan, 0, (end - scan), RE_NEED_START) != -1
+              && research(rp, scan, 0, (end - scan), regex_flags) != -1
               && nf < up_to) {
+               regex_flags |= RE_NO_BOL;
                if (REEND(rp, scan) == RESTART(rp, scan)) {   /* null match */
 #ifdef MBS_SUPPORT
                        if (gawk_mb_cur_max > 1)        {
@@ -439,7 +445,8 @@
        NODE *fs,
        Regexp *rp ATTRIBUTE_UNUSED,
        Setfunc set,    /* routine to set the value of the parsed field */
-       NODE *n)
+       NODE *n,
+       int in_middle ATTRIBUTE_UNUSED)
 {
        register char *scan = *buf;
        register long nf = parse_high_water;
@@ -506,7 +513,8 @@
        NODE *fs,
        Regexp *rp ATTRIBUTE_UNUSED,
        Setfunc set,    /* routine to set the value of the parsed field */
-       NODE *n)
+       NODE *n,
+       int in_middle ATTRIBUTE_UNUSED)
 {
        register char *scan = *buf;
        register long nf = parse_high_water;
@@ -570,7 +578,8 @@
        NODE *fs ATTRIBUTE_UNUSED,
        Regexp *rp ATTRIBUTE_UNUSED,
        Setfunc set,    /* routine to set the value of the parsed field */
-       NODE *n)
+       NODE *n,
+       int in_middle ATTRIBUTE_UNUSED)
 {
        register char *scan = *buf;
        register long nf = parse_high_water;
@@ -618,7 +627,8 @@
        NODE *fs,
        Regexp *rp ATTRIBUTE_UNUSED,
        Setfunc set,    /* routine to set the value of the parsed field */
-       NODE *n)
+       NODE *n,
+       int in_middle ATTRIBUTE_UNUSED)
 {
        register char *scan = *buf;
        register char fschar;
@@ -695,7 +705,8 @@
        NODE *fs ATTRIBUTE_UNUSED,
        Regexp *rp ATTRIBUTE_UNUSED,
        Setfunc set,    /* routine to set the value of the parsed field */
-       NODE *n)
+       NODE *n,
+       int in_middle ATTRIBUTE_UNUSED)
 {
        register char *scan = *buf;
        register long nf = parse_high_water;
@@ -763,6 +774,7 @@
 NODE **
 get_field(register long requested, Func_ptr *assign)
 {
+       int in_middle = FALSE;
        /*
         * if requesting whole line but some other field has been altered,
         * then the whole line must be rebuilt
@@ -775,7 +787,7 @@
                                        fields_arr[0]->stlen -
                                        (parse_extent - fields_arr[0]->stptr),
                                        save_FS, FS_regexp, set_field,
-                                       (NODE *) NULL);
+                                       (NODE *) NULL, in_middle);
                                parse_high_water = NF;
                        }
                        rebuild_record();
@@ -800,9 +812,11 @@
                 */
                if (parse_high_water == 0)      /* starting at the beginning */
                        parse_extent = fields_arr[0]->stptr;
+               else
+                       in_middle = TRUE;
                parse_high_water = (*parse_field)(requested, &parse_extent,
                     fields_arr[0]->stlen - (parse_extent - 
fields_arr[0]->stptr),
-                    save_FS, FS_regexp, set_field, (NODE *) NULL);
+                    save_FS, FS_regexp, set_field, (NODE *) NULL, in_middle);
 
                /*
                 * if we reached the end of the record, set NF to the number of
@@ -851,7 +865,7 @@
        NODE *src, *arr, *sep, *fs, *src2, *fs2, *tmp;
        char *s;
        long (*parseit) P((long, char **, int, NODE *,
-                        Regexp *, Setfunc, NODE *));
+                        Regexp *, Setfunc, NODE *, int));
        Regexp *rp = NULL;
 
        src = force_string(tree_eval(tree->lnode));
@@ -924,7 +938,7 @@
 
        s = src2->stptr;
        tmp = tmp_number((AWKNUM) (*parseit)(UNLIMITED, &s, (int) src2->stlen,
-                                            fs2, rp, set_element, arr));
+                                            fs2, rp, set_element, arr, FALSE));
        unref(src2);
        unref(fs2);
        return tmp;




reply via email to

[Prev in Thread] Current Thread [Next in Thread]