gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] gawk branch, gawk-5.1-stable, updated. gawk-4.1.0-4277-gbcc0594


From: Arnold Robbins
Subject: [SCM] gawk branch, gawk-5.1-stable, updated. gawk-4.1.0-4277-gbcc0594
Date: Fri, 13 Aug 2021 17:03:55 -0400 (EDT)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, gawk-5.1-stable has been updated
       via  bcc0594e9b64c89b56e8ea6891c0a9f8b97c57d1 (commit)
      from  492c24d65f760edea1f9228260930728eb747cf7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=bcc0594e9b64c89b56e8ea6891c0a9f8b97c57d1

commit bcc0594e9b64c89b56e8ea6891c0a9f8b97c57d1
Author: Arnold D. Robbins <arnold@skeeve.com>
Date:   Fri Aug 13 17:03:19 2021 -0400

    Rationalize strong regex as param to sub/gsub. Add tests.

diff --git a/ChangeLog b/ChangeLog
index c598dac..4e82bff 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2021-08-13         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * builtin.c (do_sub): Rationalize handling of strongly typed
+       regex as argument to sub/gsub, as well as rationalize the return
+       value from gensub to always be string. Thanks to John Naman
+       <jnaman2@gmail.com> for the bug report.
+
 2021-08-05         Andrew J. Schorr      <aschorr@telemetry-investments.com>
 
        * mpfr.c (do_mpfr_func): New argument, warn_negative. If true,
diff --git a/builtin.c b/builtin.c
index 454034f..e1ba5eb 100644
--- a/builtin.c
+++ b/builtin.c
@@ -2934,8 +2934,6 @@ do_sub(int nargs, unsigned int flags)
                        RESTART(rp, target->stptr) > target->stlen)
                goto done;
 
-       target->flags |= STRING;
-
        text = target->stptr;
        textlen = target->stlen;
 
@@ -3183,6 +3181,10 @@ done:
                        DEREF(target);
                        assert(buf != NULL);
                        return make_str_node(buf, textlen, ALREADY_MALLOCED);
+               } else if ((target->flags & STRING) == 0) {
+                       /* return a copy of original string */
+                       DEREF(target);
+                       return make_str_node(target->stptr, target->stlen, 0);
                }
 
                /* return the original string */
@@ -3193,8 +3195,34 @@ done:
        if ((flags & LITERAL) != 0)
                DEREF(target);
        else if (matches > 0) {
-               unref(*lhs);
-               *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
+               /*
+                * 8/2021: There's a bit of a song and dance here.  If someone 
does
+                *
+                *      x = @/abc/
+                *      sub(/b/, "x", x)
+                *
+                * What should the type of x be after the call? Does it get 
converted
+                * to string? Or does it remain a regexp?  We've decided to let 
it
+                * remain a regexp. In that case, we have to update the compiled
+                * regular expression that it holds.
+                */
+               bool is_regex = false;
+               NODE *target = *lhs;
+
+               if ((target->flags & REGEX) != 0) {
+                       is_regex = true;
+
+                       // free old regex registers
+                       refree(target->typed_re->re_reg[0]);
+                       if (target->typed_re->re_reg[1] != NULL)
+                               refree(target->typed_re->re_reg[1]);
+                       freenode(target->typed_re);
+               }
+               unref(*lhs);            // nuke original value
+               if (is_regex)
+                       *lhs = make_typed_regex(buf, textlen);
+               else
+                       *lhs = make_str_node(buf, textlen, ALREADY_MALLOCED);
        }
 
        return make_number((AWKNUM) matches);
diff --git a/pc/Makefile.tst b/pc/Makefile.tst
index 4b99204..b9572ba 100644
--- a/pc/Makefile.tst
+++ b/pc/Makefile.tst
@@ -216,7 +216,7 @@ GAWK_EXT_TESTS = \
        procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6 
\
        profile7 profile8 profile9 profile10 profile11 profile12 profile13 \
         profile14 profile15 pty1 pty2 \
-       rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline 
rsglstdin \
+       rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2 
rsgetline rsglstdin \
        rsstart1 rsstart2 rsstart3 rstest6 \
        sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \
        sourcesplit split_after_fpat \
@@ -3152,6 +3152,11 @@ profile15:
        @AWKPATH="$(srcdir)" $(AWK) -f $@.awk  --pretty-print=_$@ >_$@ 2>&1 || 
echo EXIT CODE: $$? >>_$@
        @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
 
+regexsub:
+       @echo $@
+       @AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: $$? 
>>_$@
+       @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
 regnul1:
        @echo $@
        @AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: $$? 
>>_$@
diff --git a/test/ChangeLog b/test/ChangeLog
index c92f018..c3dcd55 100644
--- a/test/ChangeLog
+++ b/test/ChangeLog
@@ -1,3 +1,8 @@
+2021-08-13         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * Makefile.am (EXTRA_DIST): regexsub, new test.
+       * regexsub.awk, regexsub.ok: New files.
+
 2021-05-15  Eli Zaretskii  <eliz@gnu.org>
 
        * iolint.ok: Reorder results to follow the order of iolint.awk.
diff --git a/test/Makefile.am b/test/Makefile.am
index 3f9e930..7ee2381 100644
--- a/test/Makefile.am
+++ b/test/Makefile.am
@@ -1057,6 +1057,8 @@ EXTRA_DIST = \
        regexpbrack2.ok \
        regexprange.awk \
        regexprange.ok \
+       regexsub.awk \
+       regexsub.ok \
        reginttrad.awk \
        reginttrad.ok \
        regnul1.awk \
@@ -1456,7 +1458,7 @@ GAWK_EXT_TESTS = \
        procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6 
\
        profile7 profile8 profile9 profile10 profile11 profile12 profile13 \
         profile14 profile15 pty1 pty2 \
-       rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline 
rsglstdin \
+       rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2 
rsgetline rsglstdin \
        rsstart1 rsstart2 rsstart3 rstest6 \
        sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \
        sourcesplit split_after_fpat \
diff --git a/test/Makefile.in b/test/Makefile.in
index e73a950..79ca9a3 100644
--- a/test/Makefile.in
+++ b/test/Makefile.in
@@ -1320,6 +1320,8 @@ EXTRA_DIST = \
        regexpbrack2.ok \
        regexprange.awk \
        regexprange.ok \
+       regexsub.awk \
+       regexsub.ok \
        reginttrad.awk \
        reginttrad.ok \
        regnul1.awk \
@@ -1719,7 +1721,7 @@ GAWK_EXT_TESTS = \
        procinfs profile0 profile1 profile2 profile3 profile4 profile5 profile6 
\
        profile7 profile8 profile9 profile10 profile11 profile12 profile13 \
         profile14 profile15 pty1 pty2 \
-       rebuf regnul1 regnul2 regx8bit reginttrad reint reint2 rsgetline 
rsglstdin \
+       rebuf regexsub regnul1 regnul2 regx8bit reginttrad reint reint2 
rsgetline rsglstdin \
        rsstart1 rsstart2 rsstart3 rstest6 \
        sandbox1 shadow shadowbuiltin sortfor sortfor2 sortu \
        sourcesplit split_after_fpat \
@@ -4814,6 +4816,11 @@ profile15:
        @AWKPATH="$(srcdir)" $(AWK) -f $@.awk  --pretty-print=_$@ >_$@ 2>&1 || 
echo EXIT CODE: $$? >>_$@
        @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
 
+regexsub:
+       @echo $@
+       @AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: $$? 
>>_$@
+       @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
 regnul1:
        @echo $@
        @AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: $$? 
>>_$@
diff --git a/test/Maketests b/test/Maketests
index a36ac8c..12cc164 100644
--- a/test/Maketests
+++ b/test/Maketests
@@ -1880,6 +1880,11 @@ profile15:
        @AWKPATH="$(srcdir)" $(AWK) -f $@.awk  --pretty-print=_$@ >_$@ 2>&1 || 
echo EXIT CODE: $$? >>_$@
        @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
 
+regexsub:
+       @echo $@
+       @AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: $$? 
>>_$@
+       @-$(CMP) "$(srcdir)"/$@.ok _$@ && rm -f _$@
+
 regnul1:
        @echo $@
        @AWKPATH="$(srcdir)" $(AWK) -f $@.awk  >_$@ 2>&1 || echo EXIT CODE: $$? 
>>_$@
diff --git a/test/regexsub.awk b/test/regexsub.awk
new file mode 100644
index 0000000..92dede7
--- /dev/null
+++ b/test/regexsub.awk
@@ -0,0 +1,48 @@
+BEGIN {
+       print "Initialize strong regex"
+       rgx2 = rgx1 = @/[abc]/
+       print "Test gsub on strong regex"
+       printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1))
+       printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 2, rgx2, 2, typeof(rgx2))
+       print "Test gsub() a strong regex"
+       gsub(/b/, "e", rgx2)
+       printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1))
+       printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 2, rgx2, 2, typeof(rgx2))
+
+       print "Test value not found in regex"
+       gsub(/x/, "y", rgx1)    # should not change
+       printf("rgx%d = '%s'\ttypeof(rgx%d) = '%s'\n", 1, rgx1, 1, typeof(rgx1))
+
+       print "Test gsub on numbers"
+       v2 = v1 = 12345
+       printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1))
+       printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 2, v2, 2, typeof(v2))
+       gsub(/3/, "x", v2)
+       printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1))
+       printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 2, v2, 2, typeof(v2))
+       print "Test value not found in number"
+       gsub(/9/, "x", v1)
+       printf("v%d = '%s'\ttypeof(v%d) = '%s'\n", 1, v1, 1, typeof(v1))
+
+       print "Test gensub on regex"
+       a = b = @/abc/
+       c = gensub(/b/, "x", "g", a)
+       printf("a = @/%s/\ttypeof(a) = '%s'\n", a, typeof(a))
+       printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+       print "Test value not found in regex"
+       c = gensub(/q/, "x", "g", b)
+       printf("b = @/%s/\ttypeof(b) = '%s'\n", b, typeof(b))
+       printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+
+       print "Test gensub on numbers"
+       a = b = 12345
+       c = gensub(/3/, "x", "g", a)
+       printf("a = \"%s\"\ttypeof(a) = '%s'\n", a, typeof(a))
+       printf("b = \"%s\"\ttypeof(b) = '%s'\n", b, typeof(b))
+       printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+       print "Test value not found in number"
+       c = gensub(/9/, "x", "g", b)
+       printf("b = \"%s\"\ttypeof(b) = '%s'\n", b, typeof(b))
+       printf("c = \"%s\"\ttypeof(c) = '%s'\n", c, typeof(c))
+       print typeof(c), c
+}
diff --git a/test/regexsub.ok b/test/regexsub.ok
new file mode 100644
index 0000000..44511eb
--- /dev/null
+++ b/test/regexsub.ok
@@ -0,0 +1,30 @@
+Initialize strong regex
+Test gsub on strong regex
+rgx1 = '[abc]' typeof(rgx1) = 'regexp'
+rgx2 = '[abc]' typeof(rgx2) = 'regexp'
+Test gsub() a strong regex
+rgx1 = '[abc]' typeof(rgx1) = 'regexp'
+rgx2 = '[aec]' typeof(rgx2) = 'regexp'
+Test value not found in regex
+rgx1 = '[abc]' typeof(rgx1) = 'regexp'
+Test gsub on numbers
+v1 = '12345'   typeof(v1) = 'number'
+v2 = '12345'   typeof(v2) = 'number'
+v1 = '12345'   typeof(v1) = 'number'
+v2 = '12x45'   typeof(v2) = 'string'
+Test value not found in number
+v1 = '12345'   typeof(v1) = 'number'
+Test gensub on regex
+a = @/abc/     typeof(a) = 'regexp'
+c = "axc"      typeof(c) = 'string'
+Test value not found in regex
+b = @/abc/     typeof(b) = 'regexp'
+c = "abc"      typeof(c) = 'string'
+Test gensub on numbers
+a = "12345"    typeof(a) = 'number'
+b = "12345"    typeof(b) = 'number'
+c = "12x45"    typeof(c) = 'string'
+Test value not found in number
+b = "12345"    typeof(b) = 'number'
+c = "12345"    typeof(c) = 'string'
+string 12345

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog         |  7 +++++++
 builtin.c         | 36 ++++++++++++++++++++++++++++++++----
 pc/Makefile.tst   |  7 ++++++-
 test/ChangeLog    |  5 +++++
 test/Makefile.am  |  4 +++-
 test/Makefile.in  |  9 ++++++++-
 test/Maketests    |  5 +++++
 test/regexsub.awk | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 test/regexsub.ok  | 30 ++++++++++++++++++++++++++++++
 9 files changed, 144 insertions(+), 7 deletions(-)
 create mode 100644 test/regexsub.awk
 create mode 100644 test/regexsub.ok


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]