[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-730

gawk-diffs
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-730

From:	Arnold Robbins
Subject:	[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-730-ga03d8ab
Date:	Wed, 12 Aug 2015 18:29:39 +0000
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, gawk-4.1-stable has been updated
       via  a03d8ab031ca0a58915b45df000ec8ca64ef4ae7 (commit)
      from  4b00462246822209b642a4dd63491e59a4fab759 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=a03d8ab031ca0a58915b45df000ec8ca64ef4ae7

commit a03d8ab031ca0a58915b45df000ec8ca64ef4ae7
Author: Arnold D. Robbins <address@hidden>
Date:   Wed Aug 12 21:29:18 2015 +0300

    Sync dfa with grep.

diff --git a/ChangeLog b/ChangeLog
index c8d655a..fa2dc86 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2015-08-12         Arnold D. Robbins     <address@hidden>
+
+       * dfa.c: Sync with GNU grep. Yet again, again.
+
 2015-08-02         Arnold D. Robbins     <address@hidden>
 
        * dfa.c: Sync with GNU grep. Yet again.
diff --git a/dfa.c b/dfa.c
index c55a5c9..b0cec2a 100644
--- a/dfa.c
+++ b/dfa.c
@@ -336,18 +336,6 @@ struct mb_char_classes
   bool invert;
   wchar_t *chars;               /* Normal characters.  */
   size_t nchars;
-  wctype_t *ch_classes;         /* Character classes.  */
-  size_t nch_classes;
-  struct                       /* Range characters.  */
-  {
-    wchar_t beg;               /* Range start.  */
-    wchar_t end;               /* Range end.  */
-  } *ranges;
-  size_t nranges;
-  char **equivs;                /* Equivalence classes.  */
-  size_t nequivs;
-  char **coll_elems;
-  size_t ncoll_elems;           /* Collating elements.  */
 };
 
 /* A compiled regular expression.  */
@@ -1032,9 +1020,9 @@ parse_bracket_exp (void)
 
   /* Work area to build a mb_char_classes.  */
   struct mb_char_classes *work_mbc;
-  size_t chars_al, ranges_al, ch_classes_al, equivs_al, coll_elems_al;
+  size_t chars_al;
 
-  chars_al = ranges_al = ch_classes_al = equivs_al = coll_elems_al = 0;
+  chars_al = 0;
   if (dfa->multibyte)
     {
       dfa->mbcsets = maybe_realloc (dfa->mbcsets, dfa->nmbcsets,
@@ -1112,20 +1100,11 @@ parse_bracket_exp (void)
                     dfaerror (_("invalid character class"));
 
                   if (dfa->multibyte && !pred->single_byte_only)
-                    {
-                      /* Store the character class as wctype_t.  */
-                      wctype_t wt = (wctype_t) wctype (class);
-
-                      work_mbc->ch_classes
-                        = maybe_realloc (work_mbc->ch_classes,
-                                         work_mbc->nch_classes, &ch_classes_al,
-                                         sizeof *work_mbc->ch_classes);
-                      work_mbc->ch_classes[work_mbc->nch_classes++] = wt;
-                    }
-
-                  for (c2 = 0; c2 < NOTCHAR; ++c2)
-                    if (pred->func (c2))
-                      setbit (c2, ccl);
+                    known_bracket_exp = false;
+                  else
+                    for (c2 = 0; c2 < NOTCHAR; ++c2)
+                      if (pred->func (c2))
+                        setbit (c2, ccl);
                 }
               else
                 known_bracket_exp = false;
@@ -1161,65 +1140,49 @@ parse_bracket_exp (void)
               c2 = ']';
             }
 
-          if (c2 != ']')
+          if (c2 == ']')
+            {
+              /* In the case [x-], the - is an ordinary hyphen,
+                 which is left in c1, the lookahead character.  */
+              lexptr -= cur_mb_len;
+              lexleft += cur_mb_len;
+            }
+          else
             {
               if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
                 FETCH_WC (c2, wc2, _("unbalanced ["));
 
-              if (dfa->multibyte)
-                {
-                  /* When case folding map a range, say [m-z] (or even [M-z])
-                     to the pair of ranges, [m-z] [M-Z].  Although this code
-                     is wrong in multiple ways, it's never used in practice.
-                     FIXME: Remove this (and related) unused code.  */
-                  if (wc != WEOF && wc2 != WEOF)
-                    {
-                      work_mbc->ranges
-                        = maybe_realloc (work_mbc->ranges,
-                                         work_mbc->nranges + 2,
-                                         &ranges_al, sizeof *work_mbc->ranges);
-                      work_mbc->ranges[work_mbc->nranges].beg
-                        = case_fold ? towlower (wc) : wc;
-                      work_mbc->ranges[work_mbc->nranges++].end
-                        = case_fold ? towlower (wc2) : wc2;
-
-                      if (case_fold && (iswalpha (wc) || iswalpha (wc2)))
-                        {
-                          work_mbc->ranges[work_mbc->nranges].beg
-                            = towupper (wc);
-                          work_mbc->ranges[work_mbc->nranges++].end
-                            = towupper (wc2);
-                        }
-                    }
-                }
-              else if (using_simple_locale ())
+              colon_warning_state |= 8;
+              FETCH_WC (c1, wc1, _("unbalanced ["));
+
+              /* Treat [x-y] as a range if x != y.  */
+              if (wc != wc2 || wc == WEOF)
                 {
-                  for (c1 = c; c1 <= c2; c1++)
-                    setbit (c1, ccl);
-                  if (case_fold)
+                  if (dfa->multibyte)
+                    known_bracket_exp = false;
+                  else if (using_simple_locale ())
                     {
-                      int uc = toupper (c);
-                      int uc2 = toupper (c2);
-                      for (c1 = 0; c1 < NOTCHAR; c1++)
+                      int ci;
+                      for (ci = c; ci <= c2; ci++)
+                        setbit (ci, ccl);
+                      if (case_fold)
                         {
-                          int uc1 = toupper (c1);
-                          if (uc <= uc1 && uc1 <= uc2)
-                            setbit (c1, ccl);
+                          int uc = toupper (c);
+                          int uc2 = toupper (c2);
+                          for (ci = 0; ci < NOTCHAR; ci++)
+                            {
+                              int uci = toupper (ci);
+                              if (uc <= uci && uci <= uc2)
+                                setbit (ci, ccl);
+                            }
                         }
                     }
-                }
-              else
-                known_bracket_exp = false;
+                  else
+                    known_bracket_exp = false;
 
-              colon_warning_state |= 8;
-              FETCH_WC (c1, wc1, _("unbalanced ["));
-              continue;
+                  continue;
+                }
             }
-
-          /* In the case [x-], the - is an ordinary hyphen,
-             which is left in c1, the lookahead character.  */
-          lexptr -= cur_mb_len;
-          lexleft += cur_mb_len;
         }
 
       colon_warning_state |= (c == ':') ? 2 : 4;
@@ -1680,46 +1643,27 @@ addtok (token t)
     {
       bool need_or = false;
       struct mb_char_classes *work_mbc = &dfa->mbcsets[dfa->nmbcsets - 1];
+      size_t i;
 
       /* Extract wide characters into alternations for better performance.
          This does not require UTF-8.  */
-      if (!work_mbc->invert)
+      for (i = 0; i < work_mbc->nchars; i++)
         {
-          size_t i;
-          for (i = 0; i < work_mbc->nchars; i++)
-            {
-              addtok_wc (work_mbc->chars[i]);
-              if (need_or)
-                addtok (OR);
-              need_or = true;
-            }
-          work_mbc->nchars = 0;
+          addtok_wc (work_mbc->chars[i]);
+          if (need_or)
+            addtok (OR);
+          need_or = true;
         }
+      work_mbc->nchars = 0;
 
-      /* If the MBCSET is non-inverted and doesn't include neither
-         character classes including multibyte characters, range
-         expressions, equivalence classes nor collating elements,
-         it can be replaced to a simple CSET. */
-      if (work_mbc->invert
-          || work_mbc->nch_classes != 0
-          || work_mbc->nranges != 0
-          || work_mbc->nequivs != 0 || work_mbc->ncoll_elems != 0)
+      /* Characters have been handled above, so it is possible
+         that the mbcset is empty now.  Do nothing in that case.  */
+      if (work_mbc->cset != -1)
         {
-          addtok_mb (MBCSET, ((dfa->nmbcsets - 1) << 2) + 3);
+          addtok (CSET + work_mbc->cset);
           if (need_or)
             addtok (OR);
         }
-      else
-        {
-          /* Characters have been handled above, so it is possible
-             that the mbcset is empty now.  Do nothing in that case.  */
-          if (work_mbc->cset != -1)
-            {
-              addtok (CSET + work_mbc->cset);
-              if (need_or)
-                addtok (OR);
-            }
-        }
     }
   else
     {
@@ -3105,97 +3049,6 @@ match_anychar (struct dfa *d, state_num s, position pos,
   return mbclen;
 }
 
-/* Match a bracket expression against the current context.
-   Return the length of the match, in bytes.
-   POS is the position of the bracket expression.  */
-static int
-match_mb_charset (struct dfa *d, state_num s, position pos,
-                  char const *p, wint_t wc, size_t match_len)
-{
-  size_t i;
-  bool match;              /* Matching succeeded.  */
-  int op_len;              /* Length of the operator.  */
-  char buffer[128];
-
-  /* Pointer to the structure to which we are currently referring.  */
-  struct mb_char_classes *work_mbc;
-
-  int context;
-
-  /* Check syntax bits.  */
-  if (wc == WEOF)
-    return 0;
-
-  context = wchar_context (wc);
-  if (!SUCCEEDS_IN_CONTEXT (pos.constraint, d->states[s].context, context))
-    return 0;
-
-  /* Assign the current referring operator to work_mbc.  */
-  work_mbc = &(d->mbcsets[(d->multibyte_prop[pos.index]) >> 2]);
-  match = !work_mbc->invert;
-
-  /* Match in range 0-255?  */
-  if (wc < NOTCHAR && work_mbc->cset != -1
-      && tstbit (to_uchar (wc), d->charclasses[work_mbc->cset]))
-    goto charset_matched;
-
-  /* match with a character class?  */
-  for (i = 0; i < work_mbc->nch_classes; i++)
-    {
-      if (iswctype ((wint_t) wc, work_mbc->ch_classes[i]))
-        goto charset_matched;
-    }
-
-  strncpy (buffer, p, match_len);
-  buffer[match_len] = '\0';
-
-  /* match with an equivalence class?  */
-  for (i = 0; i < work_mbc->nequivs; i++)
-    {
-      op_len = strlen (work_mbc->equivs[i]);
-      strncpy (buffer, p, op_len);
-      buffer[op_len] = '\0';
-      if (strcoll (work_mbc->equivs[i], buffer) == 0)
-        {
-          match_len = op_len;
-          goto charset_matched;
-        }
-    }
-
-  /* match with a collating element?  */
-  for (i = 0; i < work_mbc->ncoll_elems; i++)
-    {
-      op_len = strlen (work_mbc->coll_elems[i]);
-      strncpy (buffer, p, op_len);
-      buffer[op_len] = '\0';
-
-      if (strcoll (work_mbc->coll_elems[i], buffer) == 0)
-        {
-          match_len = op_len;
-          goto charset_matched;
-        }
-    }
-
-  /* match with a range?  */
-  for (i = 0; i < work_mbc->nranges; i++)
-    {
-      if (work_mbc->ranges[i].beg <= wc && wc <= work_mbc->ranges[i].end)
-        goto charset_matched;
-    }
-
-  /* match with a character?  */
-  for (i = 0; i < work_mbc->nchars; i++)
-    {
-      if (wc == work_mbc->chars[i])
-        goto charset_matched;
-    }
-
-  match = !match;
-
-charset_matched:
-  return match ? match_len : 0;
-}
-
 /* Check whether each of 'd->states[s].mbps.elem' can match.  Then return the
    array which corresponds to 'd->states[s].mbps.elem'; each element of the
    array contains the number of bytes with which the element can match.
@@ -3217,9 +3070,6 @@ check_matching_with_multibyte_ops (struct dfa *d, 
state_num s,
         case ANYCHAR:
           rarray[i] = match_anychar (d, s, pos, wc, mbclen);
           break;
-        case MBCSET:
-          rarray[i] = match_mb_charset (d, s, pos, p, wc, mbclen);
-          break;
         default:
           break;                /* cannot happen.  */
         }
@@ -3645,19 +3495,8 @@ free_mbdata (struct dfa *d)
 
   for (i = 0; i < d->nmbcsets; ++i)
     {
-      size_t j;
       struct mb_char_classes *p = &(d->mbcsets[i]);
       free (p->chars);
-      free (p->ch_classes);
-      free (p->ranges);
-
-      for (j = 0; j < p->nequivs; ++j)
-        free (p->equivs[j]);
-      free (p->equivs);
-
-      for (j = 0; j < p->ncoll_elems; ++j)
-        free (p->coll_elems[j]);
-      free (p->coll_elems);
     }
 
   free (d->mbcsets);

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog |    4 +
 dfa.c     |  261 ++++++++++++-------------------------------------------------
 2 files changed, 54 insertions(+), 211 deletions(-)


hooks/post-receive
-- 
gawk
[Prev in Thread]
Current Thread
[Next in Thread]
[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-730-ga03d8ab, Arnold Robbins <=
Prev by Date: [gawk-diffs] [SCM] gawk branch, feature/zOS-try2, created. gawk-4.1.0-730-gedc2856
Next by Date: [gawk-diffs] [SCM] gawk branch, feature/zOS-try2, updated. gawk-4.1.0-732-g09f77c4
Previous by thread: [gawk-diffs] [SCM] gawk branch, feature/zOS-try2, created. gawk-4.1.0-730-gedc2856
Next by thread: [gawk-diffs] [SCM] gawk branch, feature/zOS-try2, updated. gawk-4.1.0-732-g09f77c4
Index(es):
- Date
- Thread