gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-728


From: Arnold Robbins
Subject: [gawk-diffs] [SCM] gawk branch, gawk-4.1-stable, updated. gawk-4.1.0-728-gc5137ae
Date: Sun, 02 Aug 2015 17:41:05 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, gawk-4.1-stable has been updated
       via  c5137ae530c49765049adb53777c79ebb7607ebe (commit)
       via  fa460b53f8173ebee36dbe672a9d95d3d69c9a9c (commit)
      from  aee0b2e5e1ac7c88de11f65c55a8b35a5b38f3ec (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=c5137ae530c49765049adb53777c79ebb7607ebe

commit c5137ae530c49765049adb53777c79ebb7607ebe
Author: Arnold D. Robbins <address@hidden>
Date:   Sun Aug 2 20:40:50 2015 +0300

    Bug fix in revoutput extension.

diff --git a/extension/ChangeLog b/extension/ChangeLog
index 993729a..3cd932b 100644
--- a/extension/ChangeLog
+++ b/extension/ChangeLog
@@ -1,3 +1,9 @@
+2015-08-02         Arnold D. Robbins     <address@hidden>
+
+       * revoutput.c (init_revoutput): Don't install REVOUT if it's
+       there already. Makes the extension usable with -v.
+       * revoutput.3am: Add a BUGS section.
+
 2015-06-17         Andrew J. Schorr     <address@hidden>
 
        * inplace.3am (BUGS): Document that ACLs are not preserved, and
diff --git a/extension/revoutput.3am b/extension/revoutput.3am
index 9c8f062..8620935 100644
--- a/extension/revoutput.3am
+++ b/extension/revoutput.3am
@@ -1,4 +1,4 @@
-.TH REVOUTPUT 3am "Jan 15 2013" "Free Software Foundation" "GNU Awk Extension 
Modules"
+.TH REVOUTPUT 3am "Aug 02 2015" "Free Software Foundation" "GNU Awk Extension 
Modules"
 .SH NAME
 revoutput \- Reverse output strings sample extension
 .SH SYNOPSIS
@@ -35,6 +35,8 @@ The output from this program is:
 dlrow ,olleh
 .fi
 .ft R
+.SH BUGS
+This extension does not affect the default standard output.
 .SH "SEE ALSO"
 .IR "GAWK: Effective AWK Programming" ,
 .IR filefuncs (3am),
diff --git a/extension/revoutput.c b/extension/revoutput.c
index ae4b444..6925716 100644
--- a/extension/revoutput.c
+++ b/extension/revoutput.c
@@ -7,7 +7,7 @@
  */
 
 /*
- * Copyright (C) 2012, 2013 the Free Software Foundation, Inc.
+ * Copyright (C) 2012, 2013, 2015 the Free Software Foundation, Inc.
  * 
  * This file is part of GAWK, the GNU implementation of the
  * AWK Programming Language.
@@ -47,7 +47,7 @@
 
 static const gawk_api_t *api;  /* for convenience macros to work */
 static awk_ext_id_t *ext_id;
-static const char *ext_version = "revoutput extension: version 1.0";
+static const char *ext_version = "revoutput extension: version 1.1";
 
 static awk_bool_t init_revoutput(void);
 static awk_bool_t (*init_func)(void) = init_revoutput;
@@ -120,11 +120,14 @@ init_revoutput()
 
        register_output_wrapper(& output_wrapper);
 
-       make_number(0.0, & value);      /* init to false */
-       if (! sym_update("REVOUT", & value)) {
-               warning(ext_id, _("revoutput: could not initialize REVOUT 
variable"));
+       if (! sym_lookup("REVOUT", AWK_SCALAR, & value)) {
+               /* only install it if not there, e.g. -v REVOUT=1 */
+               make_number(0.0, & value);      /* init to false */
+               if (! sym_update("REVOUT", & value)) {
+                       warning(ext_id, _("revoutput: could not initialize 
REVOUT variable"));
 
-               return awk_false;
+                       return awk_false;
+               }
        }
 
        return awk_true;

http://git.sv.gnu.org/cgit/gawk.git/commit/?id=fa460b53f8173ebee36dbe672a9d95d3d69c9a9c

commit fa460b53f8173ebee36dbe672a9d95d3d69c9a9c
Author: Arnold D. Robbins <address@hidden>
Date:   Sun Aug 2 20:29:46 2015 +0300

    Sync dfa.c with GNU grep.

diff --git a/ChangeLog b/ChangeLog
index bbca613..c8d655a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,7 @@
+2015-08-02         Arnold D. Robbins     <address@hidden>
+
+       * dfa.c: Sync with GNU grep. Yet again.
+
 2015-07-21         Arnold D. Robbins     <address@hidden>
 
        * dfa.c: Sync with GNU grep.
diff --git a/dfa.c b/dfa.c
index 782ef5c..c55a5c9 100644
--- a/dfa.c
+++ b/dfa.c
@@ -317,8 +317,6 @@ typedef struct
   size_t hash;                  /* Hash of the positions of this state.  */
   position_set elems;           /* Positions this state could match.  */
   unsigned char context;        /* Context from previous state.  */
-  bool has_backref;            /* This state matches a \<digit>.  */
-  bool has_mbcset;             /* This state matches a MBCSET.  */
   unsigned short constraint;    /* Constraint for this state to accept.  */
   token first_end;              /* Token value of the first END in elems.  */
   position_set mbps;            /* Positions which can match multibyte
@@ -2207,8 +2205,6 @@ state_index (struct dfa *d, position_set const *s, int 
context)
   alloc_position_set (&d->states[i].elems, s->nelem);
   copy (s, &d->states[i].elems);
   d->states[i].context = context;
-  d->states[i].has_backref = false;
-  d->states[i].has_mbcset = false;
   d->states[i].constraint = 0;
   d->states[i].first_end = 0;
   d->states[i].mbps.nelem = 0;
@@ -2224,10 +2220,7 @@ state_index (struct dfa *d, position_set const *s, int 
context)
           d->states[i].first_end = d->tokens[s->elems[j].index];
       }
     else if (d->tokens[s->elems[j].index] == BACKREF)
-      {
-        d->states[i].constraint = NO_CONSTRAINT;
-        d->states[i].has_backref = true;
-      }
+      d->states[i].constraint = NO_CONSTRAINT;
 
   ++d->sindex;
 
@@ -2686,9 +2679,6 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
           if (d->tokens[pos.index] == MBCSET
               || d->tokens[pos.index] == ANYCHAR)
             {
-              /* MB_CUR_MAX > 1 */
-              if (d->tokens[pos.index] == MBCSET)
-                d->states[s].has_mbcset = true;
               /* ANYCHAR and MBCSET must match with a single character, so we
                  must put it to d->states[s].mbps, which contains the positions
                  which can match with a single character not a byte.  */
@@ -3400,15 +3390,18 @@ skip_remains_mb (struct dfa *d, unsigned char const *p,
    When ALLOW_NL is nonzero, newlines may appear in the matching string.
    If COUNT is non-NULL, increment *COUNT once for each newline processed.
    Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
-   encountered a back-reference (1) or not (0).  The caller may use this
-   to decide whether to fall back on a backtracking matcher.
-
-   If MULTIBYTE, the input consists of multibyte characters and/or
-   encoding-error bytes.  Otherwise, the input consists of single-byte
-   characters.  */
+   encountered a DFA-unfriendly construct.  The caller may use this to
+   decide whether to fall back on a matcher like regex.  If MULTIBYTE,
+   the input consists of multibyte characters and/or encoding-error bytes.
+   Otherwise, the input consists of single-byte characters.
+   Here is the list of features that make this DFA matcher punt:
+    - [M-N]-range-in-MB-locale: regex is up to 25% faster on [a-z]
+    - back-reference: (.)\1
+    - word-delimiter-in-MB-locale: \<, \>, \b
+    */
 static inline char *
-dfaexec_main (struct dfa *d, char const *begin, char *end,
-             int allow_nl, size_t *count, int *backref, bool multibyte)
+dfaexec_main (struct dfa *d, char const *begin, char *end, int allow_nl,
+             size_t *count, bool multibyte)
 {
   state_num s, s1;              /* Current state.  */
   unsigned char const *p, *mbp; /* Current input character.  */
@@ -3498,16 +3491,6 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end,
                  Use a macro to avoid the risk that they diverge.  */
 #define State_transition()                                              \
   do {                                                                  \
-              /* Falling back to the glibc matcher in this case gives   \
-                 better performance (up to 25% better on [a-z], for     \
-                 example) and enables support for collating symbols and \
-                 equivalence classes.  */                               \
-              if (d->states[s].has_mbcset && backref)                   \
-                {                                                       \
-                  *backref = 1;                                         \
-                  goto done;                                            \
-                }                                                       \
-                                                                        \
               /* Can match with a multibyte character (and multi-character \
                  collating element).  Transition table might be updated.  */ \
               s = transit_state (d, s, &p, (unsigned char *) end);      \
@@ -3581,11 +3564,7 @@ dfaexec_main (struct dfa *d, char const *begin, char 
*end,
       if (d->fails[s])
         {
           if (d->success[s] & sbit[*p])
-            {
-              if (backref)
-                *backref = d->states[s].has_backref;
-              goto done;
-            }
+            goto done;
 
           s1 = s;
           if (multibyte)
@@ -3615,14 +3594,24 @@ static char *
 dfaexec_mb (struct dfa *d, char const *begin, char *end,
             int allow_nl, size_t *count, int *backref)
 {
-  return dfaexec_main (d, begin, end, allow_nl, count, backref, true);
+  return dfaexec_main (d, begin, end, allow_nl, count, true);
 }
 
 static char *
 dfaexec_sb (struct dfa *d, char const *begin, char *end,
             int allow_nl, size_t *count, int *backref)
 {
-  return dfaexec_main (d, begin, end, allow_nl, count, backref, false);
+  return dfaexec_main (d, begin, end, allow_nl, count, false);
+}
+
+/* Always set *BACKREF and return BEGIN.  Use this wrapper for
+   any regexp that uses a construct not supported by this code.  */
+static char *
+dfaexec_noop (struct dfa *d, char const *begin, char *end,
+              int allow_nl, size_t *count, int *backref)
+{
+  *backref = 1;
+  return (char *) begin;
 }
 
 /* Like dfaexec_main (D, BEGIN, END, ALLOW_NL, COUNT, BACKREF, D->multibyte),
@@ -3688,6 +3677,31 @@ dfainit (struct dfa *d)
   d->fast = !d->multibyte;
 }
 
+/* Return true if every construct in D is supported by this DFA matcher.  */
+static bool _GL_ATTRIBUTE_PURE
+dfa_supported (struct dfa const *d)
+{
+  size_t i;
+  for (i = 0; i < d->tindex; i++)
+    {
+      switch (d->tokens[i])
+        {
+        case BEGWORD:
+        case ENDWORD:
+        case LIMWORD:
+        case NOTLIMWORD:
+          if (!d->multibyte)
+            continue;
+          /* fallthrough */
+
+        case BACKREF:
+        case MBCSET:
+          return false;
+        }
+    }
+  return true;
+}
+
 static void
 dfaoptimize (struct dfa *d)
 {
@@ -3785,10 +3799,8 @@ dfassbuild (struct dfa *d)
           if (d->multibyte)
             {
               /* These constraints aren't supported in a multibyte locale.
-                 Ignore them in the superset DFA, and treat them as
-                 backreferences in the main DFA.  */
+                 Ignore them in the superset DFA.  */
               sup->tokens[j++] = EMPTY;
-              d->tokens[i] = BACKREF;
               break;
             }
         default:
@@ -3818,8 +3830,17 @@ dfacomp (char const *s, size_t len, struct dfa *d, int 
searchflag)
   dfambcache (d);
   dfaparse (s, len, d);
   dfassbuild (d);
-  dfaoptimize (d);
-  dfaanalyze (d, searchflag);
+
+  if (dfa_supported (d))
+    {
+      dfaoptimize (d);
+      dfaanalyze (d, searchflag);
+    }
+  else
+    {
+      d->dfaexec = dfaexec_noop;
+    }
+
   if (d->superset)
     {
       d->fast = true;

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog               |    4 ++
 dfa.c                   |  103 ++++++++++++++++++++++++++++-------------------
 extension/ChangeLog     |    6 +++
 extension/revoutput.3am |    4 +-
 extension/revoutput.c   |   15 ++++---
 5 files changed, 84 insertions(+), 48 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]