gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[SCM] gawk branch, gawk-5.1-stable, updated. gawk-4.1.0-4405-g9a1e7448


From: Arnold Robbins
Subject: [SCM] gawk branch, gawk-5.1-stable, updated. gawk-4.1.0-4405-g9a1e7448
Date: Mon, 23 May 2022 16:39:16 -0400 (EDT)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, gawk-5.1-stable has been updated
       via  9a1e7448b6ac3f7b468e72af9f013f61fcf63239 (commit)
      from  5456e9b6c741ffb0323e862cadb0977330df51b4 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=9a1e7448b6ac3f7b468e72af9f013f61fcf63239

commit 9a1e7448b6ac3f7b468e72af9f013f61fcf63239
Author: Arnold D. Robbins <arnold@skeeve.com>
Date:   Mon May 23 16:39:00 2022 -0400

    Update support files.

diff --git a/support/ChangeLog b/support/ChangeLog
index 4948abe8..b422af16 100644
--- a/support/ChangeLog
+++ b/support/ChangeLog
@@ -1,3 +1,7 @@
+2022-05-23         Arnold D. Robbins     <arnold@skeeve.com>
+
+       * cdefs.h, dfa.c, dfa.h, libc-config.h: Sync with GNULIB.
+
 2022-04-27         Arnold D. Robbins     <arnold@skeeve.com>
 
        * regcomp.c (peek_token_bracket): Sync to version from GNULIB
diff --git a/support/cdefs.h b/support/cdefs.h
index cb251450..7b8ed5b3 100644
--- a/support/cdefs.h
+++ b/support/cdefs.h
@@ -164,13 +164,13 @@
    || (__builtin_constant_p (__l) && (__l) > 0))
 
 /* Length is known to be safe at compile time if the __L * __S <= __OBJSZ
-   condition can be folded to a constant and if it is true.  The -1 check is
-   redundant because since it implies that __glibc_safe_len_cond is true.  */
+   condition can be folded to a constant and if it is true, or unknown (-1) */
 #define __glibc_safe_or_unknown_len(__l, __s, __osz) \
-  (__glibc_unsigned_or_positive (__l)                                        \
-   && __builtin_constant_p (__glibc_safe_len_cond ((__SIZE_TYPE__) (__l),     \
-                                                  __s, __osz))               \
-   && __glibc_safe_len_cond ((__SIZE_TYPE__) (__l), __s, __osz))
+  ((__osz) == (__SIZE_TYPE__) -1                                             \
+   || (__glibc_unsigned_or_positive (__l)                                    \
+       && __builtin_constant_p (__glibc_safe_len_cond ((__SIZE_TYPE__) (__l), \
+                                                      (__s), (__osz)))       \
+       && __glibc_safe_len_cond ((__SIZE_TYPE__) (__l), (__s), (__osz))))
 
 /* Conversely, we know at compile time that the length is unsafe if the
    __L * __S <= __OBJSZ condition can be folded to a constant and if it is
diff --git a/support/dfa.c b/support/dfa.c
index 45adb80f..5d92b38b 100644
--- a/support/dfa.c
+++ b/support/dfa.c
@@ -44,13 +44,13 @@
 #define assume_nonnull(x) assume ((x) != NULL)
 
 static bool
-streq (char const *a, char const *b)
+str_eq (char const *a, char const *b)
 {
   return strcmp (a, b) == 0;
 }
 
 static bool
-isasciidigit (char c)
+c_isdigit (char c)
 {
   return '0' <= c && c <= '9';
 }
@@ -59,6 +59,7 @@ isasciidigit (char c)
 #define _(str) gettext (str)
 
 #include <wchar.h>
+#include <wctype.h>
 
 #include "xalloc.h"
 #include "localeinfo.h"
@@ -399,15 +400,12 @@ struct regex_syntax
 {
   /* Syntax bits controlling the behavior of the lexical analyzer.  */
   reg_syntax_t syntax_bits;
+  int dfaopts;
   bool syntax_bits_set;
 
   /* Flag for case-folding letters into sets.  */
   bool case_fold;
 
-  /* True if ^ and $ match only the start and end of data, and do not match
-     end-of-line within data.  */
-  bool anchor;
-
   /* End-of-line byte in data.  */
   unsigned char eolbyte;
 
@@ -836,7 +834,7 @@ unibyte_word_constituent (struct dfa const *dfa, unsigned 
char c)
 static int
 char_context (struct dfa const *dfa, unsigned char c)
 {
-  if (c == dfa->syntax.eolbyte && !dfa->syntax.anchor)
+  if (c == dfa->syntax.eolbyte && !(dfa->syntax.dfaopts & DFA_ANCHOR))
     return CTX_NEWLINE;
   if (unibyte_word_constituent (dfa, c))
     return CTX_LETTER;
@@ -930,7 +928,7 @@ static const struct dfa_ctype *_GL_ATTRIBUTE_PURE
 find_pred (const char *str)
 {
   for (int i = 0; prednames[i].name; i++)
-    if (streq (str, prednames[i].name))
+    if (str_eq (str, prednames[i].name))
       return &prednames[i];
   return NULL;
 }
@@ -1009,8 +1007,8 @@ parse_bracket_exp (struct dfa *dfa)
                    worry about that possibility.  */
                 {
                   char const *class
-                    = (dfa->syntax.case_fold && (streq (str, "upper")
-                                                 || streq (str, "lower"))
+                    = (dfa->syntax.case_fold && (str_eq (str, "upper")
+                                                 || str_eq (str, "lower"))
                        ? "alpha" : str);
                   const struct dfa_ctype *pred = find_pred (class);
                   if (!pred)
@@ -1090,7 +1088,7 @@ parse_bracket_exp (struct dfa *dfa)
               if (wc != wc2 || wc == WEOF)
                 {
                   if (dfa->localeinfo.simple
-                      || (isasciidigit (c) && isasciidigit (c2)))
+                      || (c_isdigit (c) & c_isdigit (c2)))
                     {
                       for (int ci = c; ci <= c2; ci++)
                         if (dfa->syntax.case_fold && isalpha (ci))
@@ -1140,7 +1138,9 @@ parse_bracket_exp (struct dfa *dfa)
   while ((wc = wc1, (c = c1) != ']'));
 
   if (colon_warning_state == 7)
-    dfawarn (_("character class syntax is [[:space:]], not [:space:]"));
+    ((dfa->syntax.dfaopts & DFA_CONFUSING_BRACKETS_ERROR
+      ? dfaerror : dfawarn)
+     (_("character class syntax is [[:space:]], not [:space:]")));
 
   if (! known_bracket_exp)
     return BACKREF;
@@ -1193,8 +1193,7 @@ lex (struct dfa *dfa)
      we set the backslash flag and go through the loop again.
      On the plus side, this avoids having a duplicate of the
      main switch inside the backslash case.  On the minus side,
-     it means that just about every case begins with
-     "if (backslash) ...".  */
+     it means that just about every case tests the backslash flag.  */
   for (int i = 0; i < 2; ++i)
     {
       if (! dfa->lex.left)
@@ -1249,52 +1248,67 @@ lex (struct dfa *dfa)
         case '7':
         case '8':
         case '9':
-          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_BK_REFS))
-            {
-              dfa->lex.laststart = false;
-              return dfa->lex.lasttok = BACKREF;
-            }
-          goto normal_char;
+          if (!backslash)
+            goto normal_char;
+          if (dfa->syntax.syntax_bits & RE_NO_BK_REFS)
+            goto stray_backslash;
+
+          dfa->lex.laststart = false;
+          return dfa->lex.lasttok = BACKREF;
 
         case '`':
-          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            {
-              /* FIXME: should be beginning of string */
-              return dfa->lex.lasttok = BEGLINE;
-            }
-          goto normal_char;
+          if (!backslash)
+            goto normal_char;
+          if (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)
+            goto stray_backslash;
+
+          /* FIXME: should be beginning of string */
+          return dfa->lex.lasttok = BEGLINE;
 
         case '\'':
-          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            {
-              /* FIXME: should be end of string */
-              return dfa->lex.lasttok = ENDLINE;
-            }
-          goto normal_char;
+          if (!backslash)
+            goto normal_char;
+          if (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)
+            goto stray_backslash;
+
+          /* FIXME: should be end of string */
+          return dfa->lex.lasttok = ENDLINE;
 
         case '<':
-          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lex.lasttok = BEGWORD;
-          goto normal_char;
+          if (!backslash)
+            goto normal_char;
+          if (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)
+            goto stray_backslash;
+
+          return dfa->lex.lasttok = BEGWORD;
 
         case '>':
-          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lex.lasttok = ENDWORD;
-          goto normal_char;
+          if (!backslash)
+            goto normal_char;
+          if (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)
+            goto stray_backslash;
+
+          return dfa->lex.lasttok = ENDWORD;
 
         case 'b':
-          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lex.lasttok = LIMWORD;
-          goto normal_char;
+          if (!backslash)
+            goto normal_char;
+          if (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)
+            goto stray_backslash;
+
+          return dfa->lex.lasttok = LIMWORD;
 
         case 'B':
-          if (backslash && !(dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
-            return dfa->lex.lasttok = NOTLIMWORD;
-          goto normal_char;
+          if (!backslash)
+            goto normal_char;
+          if (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)
+            goto stray_backslash;
+
+          return dfa->lex.lasttok = NOTLIMWORD;
 
         case '?':
           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
-            goto normal_char;
+            goto default_case;
           if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
             goto normal_char;
           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
@@ -1312,7 +1326,7 @@ lex (struct dfa *dfa)
 
         case '+':
           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
-            goto normal_char;
+            goto default_case;
           if (backslash != ((dfa->syntax.syntax_bits & RE_BK_PLUS_QM) != 0))
             goto normal_char;
           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
@@ -1322,7 +1336,7 @@ lex (struct dfa *dfa)
 
         case '{':
           if (!(dfa->syntax.syntax_bits & RE_INTERVALS))
-            goto normal_char;
+            goto default_case;
           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_BRACES) == 0))
             goto normal_char;
           if (!(dfa->syntax.syntax_bits & RE_CONTEXT_INDEP_OPS)
@@ -1339,7 +1353,7 @@ lex (struct dfa *dfa)
             char const *p = dfa->lex.ptr;
             char const *lim = p + dfa->lex.left;
             dfa->lex.minrep = dfa->lex.maxrep = -1;
-            for (; p != lim && isasciidigit (*p); p++)
+            for (; p != lim && c_isdigit (*p); p++)
               dfa->lex.minrep = (dfa->lex.minrep < 0
                                  ? *p - '0'
                                  : MIN (RE_DUP_MAX + 1,
@@ -1352,7 +1366,7 @@ lex (struct dfa *dfa)
                   {
                     if (dfa->lex.minrep < 0)
                       dfa->lex.minrep = 0;
-                    while (++p != lim && isasciidigit (*p))
+                    while (++p != lim && c_isdigit (*p))
                       dfa->lex.maxrep
                         = (dfa->lex.maxrep < 0
                            ? *p - '0'
@@ -1380,15 +1394,16 @@ lex (struct dfa *dfa)
 
         case '|':
           if (dfa->syntax.syntax_bits & RE_LIMITED_OPS)
-            goto normal_char;
+            goto default_case;
           if (backslash != ((dfa->syntax.syntax_bits & RE_NO_BK_VBAR) == 0))
             goto normal_char;
           dfa->lex.laststart = true;
           return dfa->lex.lasttok = OR;
 
         case '\n':
-          if (dfa->syntax.syntax_bits & RE_LIMITED_OPS
-              || backslash || !(dfa->syntax.syntax_bits & RE_NEWLINE_ALT))
+          if (!(dfa->syntax.syntax_bits & RE_NEWLINE_ALT))
+            goto default_case;
+          if (backslash)
             goto normal_char;
           dfa->lex.laststart = true;
           return dfa->lex.lasttok = OR;
@@ -1434,8 +1449,11 @@ lex (struct dfa *dfa)
 
         case 's':
         case 'S':
-          if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+          if (!backslash)
             goto normal_char;
+          if (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)
+            goto stray_backslash;
+
           if (!dfa->localeinfo.multibyte)
             {
               charclass ccl;
@@ -1467,8 +1485,10 @@ lex (struct dfa *dfa)
 
         case 'w':
         case 'W':
-          if (!backslash || (dfa->syntax.syntax_bits & RE_NO_GNU_OPS))
+          if (!backslash)
             goto normal_char;
+          if (dfa->syntax.syntax_bits & RE_NO_GNU_OPS)
+            goto stray_backslash;
 
           if (!dfa->localeinfo.multibyte)
             {
@@ -1506,6 +1526,26 @@ lex (struct dfa *dfa)
           return dfa->lex.lasttok = parse_bracket_exp (dfa);
 
         default:
+        default_case:
+          if (!backslash)
+            goto normal_char;
+        stray_backslash:
+          if (dfa->syntax.dfaopts & DFA_STRAY_BACKSLASH_WARN)
+            {
+              char const *msg;
+              char msgbuf[100];
+              if (!iswprint (dfa->lex.wctok))
+                msg = _("stray \\ before unprintable character");
+              else if (iswspace (dfa->lex.wctok))
+                msg = _("stray \\ before white space");
+              else
+                {
+                  int n = snprintf (msgbuf, sizeof msgbuf,
+                                    _("stray \\ before %lc"), dfa->lex.wctok);
+                  msg = 0 <= n && n < sizeof msgbuf ? msgbuf : _("stray \\");
+                }
+              dfawarn (msg);
+            }
         normal_char:
           dfa->lex.laststart = false;
           /* For multibyte character sets, folding is done in atom.  Always
@@ -1704,7 +1744,7 @@ add_utf8_anychar (struct dfa *dfa)
     /* G. ed (just a token).  */
 
     /* H. 80-9f: 2nd byte of a "GHC" sequence.  */
-    CHARCLASS_INIT (0, 0, 0, 0, 0xffff, 0, 0, 0),
+    CHARCLASS_INIT (0, 0, 0, 0, 0xffffffff, 0, 0, 0),
 
     /* I. f0 (just a token).  */
 
@@ -1717,7 +1757,7 @@ add_utf8_anychar (struct dfa *dfa)
     /* L. f4 (just a token).  */
 
     /* M. 80-8f: 2nd byte of a "LMCC" sequence.  */
-    CHARCLASS_INIT (0, 0, 0, 0, 0xff, 0, 0, 0),
+    CHARCLASS_INIT (0, 0, 0, 0, 0xffff, 0, 0, 0),
   };
 
   /* Define the character classes that are needed below.  */
@@ -4116,7 +4156,7 @@ dfamust (struct dfa const *d)
             idx_t j, ln, rn, n;
 
             /* Guaranteed to be.  Unlikely, but ...  */
-            if (streq (lmp->is, rmp->is))
+            if (str_eq (lmp->is, rmp->is))
               {
                 lmp->begline &= rmp->begline;
                 lmp->endline &= rmp->endline;
@@ -4163,7 +4203,7 @@ dfamust (struct dfa const *d)
           for (idx_t i = 0; mp->in[i] != NULL; i++)
             if (strlen (mp->in[i]) > strlen (result))
               result = mp->in[i];
-          if (streq (result, mp->is))
+          if (str_eq (result, mp->is))
             {
               if ((!need_begline || mp->begline) && (!need_endline
                                                      || mp->endline))
@@ -4327,9 +4367,9 @@ dfasyntax (struct dfa *dfa, struct localeinfo const 
*linfo,
   dfa->canychar = -1;
   dfa->syntax.syntax_bits_set = true;
   dfa->syntax.case_fold = (bits & RE_ICASE) != 0;
-  dfa->syntax.anchor = (dfaopts & DFA_ANCHOR) != 0;
   dfa->syntax.eolbyte = dfaopts & DFA_EOL_NUL ? '\0' : '\n';
   dfa->syntax.syntax_bits = bits;
+  dfa->syntax.dfaopts = dfaopts;
 
   for (int i = CHAR_MIN; i <= CHAR_MAX; ++i)
     {
diff --git a/support/dfa.h b/support/dfa.h
index e94e4354..8674929e 100644
--- a/support/dfa.h
+++ b/support/dfa.h
@@ -73,7 +73,14 @@ enum
     DFA_ANCHOR = 1 << 0,
 
     /* '\0' in data is end-of-line, instead of the traditional '\n'.  */
-    DFA_EOL_NUL = 1 << 1
+    DFA_EOL_NUL = 1 << 1,
+
+    /* Treat [:alpha:] etc. as an error at the top level, instead of
+       merely a warning.  */
+    DFA_CONFUSING_BRACKETS_ERROR = 1 << 2,
+
+    /* Warn about stray backslashes before ordinary characters.  */
+    DFA_STRAY_BACKSLASH_WARN = 1 << 3,
   };
 
 /* Initialize or reinitialize a DFA.  The arguments are:
diff --git a/support/libc-config.h b/support/libc-config.h
index 8fec4893..a56665b1 100644
--- a/support/libc-config.h
+++ b/support/libc-config.h
@@ -121,6 +121,7 @@
 # undef __attr_dealloc
 # undef __attr_dealloc_free
 # undef __attribute__
+# undef __attribute_alloc_align__
 # undef __attribute_alloc_size__
 # undef __attribute_artificial__
 # undef __attribute_const__
@@ -129,6 +130,7 @@
 # undef __attribute_format_arg__
 # undef __attribute_format_strfmon__
 # undef __attribute_malloc__
+# undef __attribute_maybe_unused__
 # undef __attribute_noinline__
 # undef __attribute_nonstring__
 # undef __attribute_pure__
@@ -142,16 +144,24 @@
 # undef __extern_always_inline
 # undef __extern_inline
 # undef __flexarr
+# undef __fortified_attr_access
 # undef __fortify_function
 # undef __glibc_c99_flexarr_available
+# undef __glibc_fortify
+# undef __glibc_fortify_n
 # undef __glibc_has_attribute
 # undef __glibc_has_builtin
 # undef __glibc_has_extension
+# undef __glibc_likely
 # undef __glibc_macro_warning
 # undef __glibc_macro_warning1
 # undef __glibc_objsize
 # undef __glibc_objsize0
+# undef __glibc_safe_len_cond
+# undef __glibc_safe_or_unknown_len
 # undef __glibc_unlikely
+# undef __glibc_unsafe_len
+# undef __glibc_unsigned_or_positive
 # undef __inline
 # undef __ptr_t
 # undef __restrict
@@ -159,6 +169,7 @@
 # undef __va_arg_pack
 # undef __va_arg_pack_len
 # undef __warnattr
+# undef __wur
 
 /* Include our copy of glibc <sys/cdefs.h>.  */
 # include <cdefs.h>

-----------------------------------------------------------------------

Summary of changes:
 support/ChangeLog     |   4 ++
 support/cdefs.h       |  12 ++--
 support/dfa.c         | 158 +++++++++++++++++++++++++++++++-------------------
 support/dfa.h         |   9 ++-
 support/libc-config.h |  11 ++++
 5 files changed, 128 insertions(+), 66 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]