[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-880

gawk-diffs
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-880

From:	Arnold Robbins
Subject:	[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-880-g7816969
Date:	Mon, 02 May 2016 19:12:10 +0000
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, gawk-4.1-stable has been updated
       via  78169694440b5258ddb20c4d1fe0eb445a479e1f (commit)
      from  1cf371db5f30bc9f40a4840f1d1d276ee46619a0 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=78169694440b5258ddb20c4d1fe0eb445a479e1f

commit 78169694440b5258ddb20c4d1fe0eb445a479e1f
Author: Arnold D. Robbins <address@hidden>
Date:   Mon May 2 22:05:25 2016 +0300

    Sync dfa with grep.

diff --git a/ChangeLog b/ChangeLog
index 75e92be..a963541 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,10 @@
 2016-05-02         Arnold D. Robbins     <address@hidden>
 
+       * dfa.h, dfa.c: Sync with grep.
+       * re.c (research): Adjust type of try_backref.
+
+2016-05-02         Arnold D. Robbins     <address@hidden>
+
        * awk.h (success_node): Declare.
        * array.c (success_node): Define.
        * cint_array.c, int_array.c, str_array.c: Use `& success_node'
diff --git a/dfa.c b/dfa.c
index fff4599..3156e5e 100644
--- a/dfa.c
+++ b/dfa.c
@@ -362,7 +362,8 @@ struct dfa
   mbstate_t mbs;               /* Multibyte conversion state.  */
 
   /* dfaexec implementation.  */
-  char *(*dfaexec) (struct dfa *, char const *, char *, int, size_t *, int *);
+  char *(*dfaexec) (struct dfa *, char const *, char *,
+                    bool, size_t *, bool *);
 
   /* The following are valid only if MB_CUR_MAX > 1.  */
 
@@ -675,7 +676,8 @@ charclass_index (charclass const s)
 }
 
 /* Syntax bits controlling the behavior of the lexical analyzer.  */
-static reg_syntax_t syntax_bits, syntax_bits_set;
+static reg_syntax_t syntax_bits;
+static bool syntax_bits_set;
 
 /* Flag for case-folding letters into sets.  */
 static bool case_fold;
@@ -686,6 +688,10 @@ static unsigned char eolbyte;
 /* Cache of char-context values.  */
 static int sbit[NOTCHAR];
 
+/* If never_trail[B], the byte B cannot be a non-initial byte in a
+   multibyte character.  */
+static bool never_trail[NOTCHAR];
+
 /* Set of characters considered letters.  */
 static charclass letters;
 
@@ -720,12 +726,12 @@ wchar_context (wint_t wc)
 
 /* Entry point to set syntax options.  */
 void
-dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
+dfasyntax (reg_syntax_t bits, bool fold, unsigned char eol)
 {
   int i;
-  syntax_bits_set = 1;
+  syntax_bits_set = true;
   syntax_bits = bits;
-  case_fold = fold != 0;
+  case_fold = fold;
   eolbyte = eol;
 
   for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
@@ -747,6 +753,11 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
           setbit (uc, newline);
           break;
         }
+
+      /* POSIX requires that the five bytes in "\n\r./" (including the
+         terminating NUL) cannot occur inside a multibyte character.  */
+      never_trail[uc] = (using_utf8 () ? (uc & 0xc0) != 0x80
+                         : strchr ("\n\r./", uc) != NULL);
     }
 }
 
@@ -782,7 +793,7 @@ setbit_case_fold_c (int b, charclass c)
 
 /* UTF-8 encoding allows some optimizations that we can't otherwise
    assume in a multibyte encoding.  */
-int
+bool
 using_utf8 (void)
 {
   static int utf8 = -1;
@@ -887,7 +898,7 @@ static wint_t wctok;                /* Wide character 
representation of the current
         lexptr += nbytes;                      \
         lexleft -= nbytes;                     \
       }                                                \
-  } while (0)
+  } while (false)
 
 #ifndef MIN
 # define MIN(a,b) ((a) < (b) ? (a) : (b))
@@ -1240,7 +1251,7 @@ parse_bracket_exp (void)
       lexptr = lexptr_saved;                   \
       lexleft = lexleft_saved;                 \
     }                                          \
-  while (0)
+  while (false)
 
 static token
 lex (void)
@@ -1942,7 +1953,7 @@ regexp (void)
 /* Main entry point for the parser.  S is a string to be parsed, len is the
    length of the string, so s can include NUL characters.  D is a pointer to
    the struct dfa to parse into.  */
-void
+static void
 dfaparse (char const *s, size_t len, struct dfa *d)
 {
   dfa = d;
@@ -2321,8 +2332,8 @@ state_separate_contexts (position_set const *s)
    Sets are stored as arrays of the elements, obeying a stack-like allocation
    scheme; the number of elements in each set deeper in the stack can be
    used to determine the address of a particular set's array.  */
-void
-dfaanalyze (struct dfa *d, int searchflag)
+static void
+dfaanalyze (struct dfa *d, bool searchflag)
 {
   /* Array allocated to hold position sets.  */
   position *posalloc = xnmalloc (d->nleaves, 2 * sizeof *posalloc);
@@ -2358,7 +2369,7 @@ dfaanalyze (struct dfa *d, int searchflag)
   putc ('\n', stderr);
 #endif
 
-  d->searchflag = searchflag != 0;
+  d->searchflag = searchflag;
   alloc_position_set (&merged, d->nleaves);
   d->follows = xcalloc (d->tindex, sizeof *d->follows);
 
@@ -2567,7 +2578,7 @@ dfaanalyze (struct dfa *d, int searchflag)
    If after comparing with every group there are characters remaining in C,
    create a new group labeled with the characters of C and insert this
    position in that group.  */
-void
+static void
 dfastate (state_num s, struct dfa *d, state_num trans[])
 {
   leaf_set grps[NOTCHAR];       /* As many as will ever be needed.  */
@@ -3198,15 +3209,20 @@ transit_state (struct dfa *d, state_num s, unsigned 
char const **pp,
    that are not a single byte character nor the first byte of a multibyte
    character.
 
-   Given DFA state d, use mbs_to_wchar to advance MBP until it reaches or
-   exceeds P.  If WCP is non-NULL, set *WCP to the final wide character
-   processed, or if no wide character is processed, set it to WEOF.
+   Given DFA state d, use mbs_to_wchar to advance MBP until it reaches
+   or exceeds P, and return the advanced MBP.  If WCP is non-NULL and
+   the result is greater than P, set *WCP to the final wide character
+   processed, or to WEOF if no wide character is processed.  Otherwise,
+   if WCP is non-NULL, *WCP may or may not be updated.
+
    Both P and MBP must be no larger than END.  */
 static unsigned char const *
 skip_remains_mb (struct dfa *d, unsigned char const *p,
                  unsigned char const *mbp, char const *end, wint_t *wcp)
 {
   wint_t wc = WEOF;
+  if (never_trail[*p])
+    return p;
   while (mbp < p)
     mbp += mbs_to_wchar (&wc, (char const *) mbp,
                          end - (char const *) mbp, d);
@@ -3235,7 +3251,7 @@ skip_remains_mb (struct dfa *d, unsigned char const *p,
     - word-delimiter-in-MB-locale: \<, \>, \b
     */
 static inline char *
-dfaexec_main (struct dfa *d, char const *begin, char *end, int allow_nl,
+dfaexec_main (struct dfa *d, char const *begin, char *end, bool allow_nl,
              size_t *count, bool multibyte)
 {
   state_num s, s1;              /* Current state.  */
@@ -3351,7 +3367,7 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end, int allow_nl,
                                                                         \
               mbp = p;                                                  \
               trans = d->trans;                                         \
-  } while (0)
+  } while (false)
 
               State_transition();
             }
@@ -3427,14 +3443,14 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end, int allow_nl,
 
 static char *
 dfaexec_mb (struct dfa *d, char const *begin, char *end,
-            int allow_nl, size_t *count, int *backref)
+            bool allow_nl, size_t *count, bool *backref)
 {
   return dfaexec_main (d, begin, end, allow_nl, count, true);
 }
 
 static char *
 dfaexec_sb (struct dfa *d, char const *begin, char *end,
-            int allow_nl, size_t *count, int *backref)
+            bool allow_nl, size_t *count, bool *backref)
 {
   return dfaexec_main (d, begin, end, allow_nl, count, false);
 }
@@ -3443,9 +3459,9 @@ dfaexec_sb (struct dfa *d, char const *begin, char *end,
    any regexp that uses a construct not supported by this code.  */
 static char *
 dfaexec_noop (struct dfa *d, char const *begin, char *end,
-              int allow_nl, size_t *count, int *backref)
+              bool allow_nl, size_t *count, bool *backref)
 {
-  *backref = 1;
+  *backref = true;
   return (char *) begin;
 }
 
@@ -3454,7 +3470,7 @@ dfaexec_noop (struct dfa *d, char const *begin, char *end,
 
 char *
 dfaexec (struct dfa *d, char const *begin, char *end,
-         int allow_nl, size_t *count, int *backref)
+         bool allow_nl, size_t *count, bool *backref)
 {
   return d->dfaexec (d, begin, end, allow_nl, count, backref);
 }
@@ -3492,7 +3508,7 @@ free_mbdata (struct dfa *d)
 
 /* Initialize the components of a dfa that the other routines don't
    initialize for themselves.  */
-void
+static void
 dfainit (struct dfa *d)
 {
   memset (d, 0, sizeof *d);
@@ -3648,7 +3664,7 @@ dfassbuild (struct dfa *d)
 
 /* Parse and analyze a single string of the given length.  */
 void
-dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
+dfacomp (char const *s, size_t len, struct dfa *d, bool searchflag)
 {
   dfainit (d);
   dfaparse (s, len, d);
diff --git a/dfa.h b/dfa.h
index 18be7f5..242f4cf 100644
--- a/dfa.h
+++ b/dfa.h
@@ -57,12 +57,12 @@ extern void dfamustfree (struct dfamust *);
 /* dfasyntax() takes three arguments; the first sets the syntax bits described
    earlier in this file, the second sets the case-folding flag, and the
    third specifies the line terminator. */
-extern void dfasyntax (reg_syntax_t, int, unsigned char);
+extern void dfasyntax (reg_syntax_t, bool, unsigned char);
 
 /* Compile the given string of the given length into the given struct dfa.
    Final argument is a flag specifying whether to build a searching or an
    exact matcher. */
-extern void dfacomp (char const *, size_t, struct dfa *, int);
+extern void dfacomp (char const *, size_t, struct dfa *, bool);
 
 /* Search through a buffer looking for a match to the given struct dfa.
    Find the first occurrence of a string matching the regexp in the
@@ -71,13 +71,13 @@ extern void dfacomp (char const *, size_t, struct dfa *, 
int);
    points to the beginning of the buffer, and END points to the first byte
    after its end.  Note however that we store a sentinel byte (usually
    newline) in *END, so the actual buffer must be one byte longer.
-   When NEWLINE is nonzero, newlines may appear in the matching string.
+   When ALLOW_NL is true, newlines may appear in the matching string.
    If COUNT is non-NULL, increment *COUNT once for each newline processed.
    Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
-   encountered a back-reference (1) or not (0).  The caller may use this
-   to decide whether to fall back on a backtracking matcher. */
+   encountered a back-reference.  The caller can use this to decide
+   whether to fall back on a backtracking matcher.  */
 extern char *dfaexec (struct dfa *d, char const *begin, char *end,
-                      int newline, size_t *count, int *backref);
+                      bool allow_nl, size_t *count, bool *backref);
 
 /* Return a superset for D.  The superset matches everything that D
    matches, along with some other strings (though the latter should be
@@ -91,22 +91,6 @@ extern bool dfaisfast (struct dfa const *) 
_GL_ATTRIBUTE_PURE;
 /* Free the storage held by the components of a struct dfa. */
 extern void dfafree (struct dfa *);
 
-/* Entry points for people who know what they're doing. */
-
-/* Initialize the components of a struct dfa. */
-extern void dfainit (struct dfa *);
-
-/* Incrementally parse a string of given length into a struct dfa. */
-extern void dfaparse (char const *, size_t, struct dfa *);
-
-/* Analyze a parsed regexp; second argument tells whether to build a searching
-   or an exact matcher. */
-extern void dfaanalyze (struct dfa *, int);
-
-/* Compute, for each possible character, the transitions out of a given
-   state, storing them in an array of integers. */
-extern void dfastate (ptrdiff_t, struct dfa *, ptrdiff_t []);
-
 /* Error handling. */
 
 /* dfawarn() is called by the regexp routines whenever a regex is compiled
@@ -120,4 +104,4 @@ extern void dfawarn (const char *);
    The user must supply a dfaerror.  */
 extern _Noreturn void dfaerror (const char *);
 
-extern int using_utf8 (void);
+extern bool using_utf8 (void);
diff --git a/re.c b/re.c
index c0d2e90..7513076 100644
--- a/re.c
+++ b/re.c
@@ -254,7 +254,7 @@ research(Regexp *rp, char *str, int start,
         size_t len, int flags)
 {
        const char *ret = str;
-       int try_backref = false;
+       bool try_backref = false;
        int need_start;
        int no_bol;
        int res;

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog |    5 +++++
 dfa.c     |   66 ++++++++++++++++++++++++++++++++++++++-----------------------
 dfa.h     |   30 +++++++---------------------
 re.c      |    2 +-
 4 files changed, 54 insertions(+), 49 deletions(-)


hooks/post-receive
-- 
gawk
[Prev in Thread]
Current Thread
[Next in Thread]
[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-880-g7816969, Arnold Robbins <=
Prev by Date: [gawk-diffs] [SCM] gawk branch, porting, updated. gawk-4.1.0-1766-g4c4a94f
Next by Date: [gawk-diffs] [SCM] gawk branch, master, updated. gawk-4.1.0-1768-ge299ba4
Previous by thread: [gawk-diffs] [SCM] gawk branch, porting, updated. gawk-4.1.0-1766-g4c4a94f
Next by thread: [gawk-diffs] [SCM] gawk branch, master, updated. gawk-4.1.0-1768-ge299ba4
Index(es):
- Date
- Thread