gawk-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[gawk-diffs] [SCM] gawk branch, master, updated. gawk-4.1.0-2355-g295eef


From: Arnold Robbins
Subject: [gawk-diffs] [SCM] gawk branch, master, updated. gawk-4.1.0-2355-g295eef2
Date: Tue, 29 Nov 2016 18:06:31 +0000 (UTC)

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".

The branch, master has been updated
       via  295eef206ed65daa9801fc72875b34994b23ca01 (commit)
      from  4931b67d7efa50576cea9f3045cc9d70ea779f2e (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=295eef206ed65daa9801fc72875b34994b23ca01

commit 295eef206ed65daa9801fc72875b34994b23ca01
Author: Arnold D. Robbins <address@hidden>
Date:   Tue Nov 29 20:06:08 2016 +0200

    Add dfacopysyntax function and use it.

diff --git a/ChangeLog b/ChangeLog
index 051e83e..b2f0e8c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,9 +1,22 @@
 2016-11-29         Arnold D. Robbins     <address@hidden>
 
+       Remove redundant flag from dfa:
+
        * dfa.c (dfasyntax): Use RE_ICASE instead of DFA_CASE_FOLD.
        * dfa.h (DFA_CASE_FOLD): Removed.
        * re.c (make_regexp): Use RE_ICASE for regex and dfa. Yay!
 
+       Unrelated: Don't have to recompute syntax stuff every time
+       we compile a regexp.
+
+       * dfa.c (dfacopysyntax): New function.
+       (dfaalloc): Zero out the newly allocated memory.
+       * dfa.h (dfacopysyntax): Declare it.
+       * re.c (make_regexp): Declare two static dfaregs, one for
+       with and without ignorecase. Compute the syntax once for each,
+       then use dfacopysyntax to copy the settings when compiling
+       a regexp.
+
 2016-11-28         Arnold D. Robbins     <address@hidden>
 
        Make gawk compile on HP-UX 11.33.
diff --git a/dfa.c b/dfa.c
index cd7dce6..0a23105 100644
--- a/dfa.c
+++ b/dfa.c
@@ -805,6 +805,23 @@ char_context (struct dfa const *dfa, unsigned char c)
   return CTX_NONE;
 }
 
+/* Copy the syntax settings from one dfa instance to another.
+   Saves considerable computation time if compiling many regular expressions
+   based on the same setting.  */
+void
+dfacopysyntax (struct dfa *to, const struct dfa *from)
+{
+  to->dfaexec = from->dfaexec;
+  to->simple_locale = from->simple_locale;
+  to->localeinfo = from->localeinfo;
+
+  to->fast = from->fast;
+
+  to->canychar = from->canychar;
+  to->lex.cur_mb_len = from->lex.cur_mb_len;
+  to->syntax = from->syntax;
+}
+
 /* Set a bit in the charclass for the given wchar_t.  Do nothing if WC
    is represented by a multi-byte sequence.  Even for MB_CUR_MAX == 1,
    this may happen when folding case in weird Turkish locales where
@@ -3999,7 +4016,12 @@ dfamustfree (struct dfamust *dm)
 struct dfa *
 dfaalloc (void)
 {
-  return xmalloc (sizeof (struct dfa));
+  void *p = xmalloc (sizeof (struct dfa));
+  if (p)
+    {
+      memset (p, 0, sizeof (struct dfa));
+    }
+  return p;
 }
 
 /* Initialize DFA.  */
diff --git a/dfa.h b/dfa.h
index 0fd9b2c..c68b4df 100644
--- a/dfa.h
+++ b/dfa.h
@@ -110,6 +110,11 @@ extern struct dfa *dfasuperset (struct dfa const *d) 
_GL_ATTRIBUTE_PURE;
 /* The DFA is likely to be fast.  */
 extern bool dfaisfast (struct dfa const *) _GL_ATTRIBUTE_PURE;
 
+/* Copy the syntax settings from one dfa instance to another.
+   Saves considerable computation time if compiling many regular expressions
+   based on the same setting.  */
+extern void dfacopysyntax (struct dfa *to, const struct dfa *from);
+
 /* Free the storage held by the components of a struct dfa. */
 extern void dfafree (struct dfa *);
 
diff --git a/re.c b/re.c
index 6c1e360..5be3d17 100644
--- a/re.c
+++ b/re.c
@@ -49,8 +49,8 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool 
dfa, bool canfatal)
        int c, c2;
        static bool first = true;
        static bool no_dfa = false;
-       reg_syntax_t dfa_syn;
        int i;
+       static struct dfa* dfaregs[2] = { NULL, NULL };
 
        /*
         * The number of bytes in the current multibyte character.
@@ -62,9 +62,9 @@ make_regexp(const char *s, size_t len, bool ignorecase, bool 
dfa, bool canfatal)
        memset(&mbs, 0, sizeof(mbstate_t)); /* Initialize.  */
 
        if (first) {
-               first = false;
                /* for debugging and testing */
                no_dfa = (getenv("GAWK_NO_DFA") != NULL);
+               /* don't set first to false here, we do it below */
        }
 
        /* always check */
@@ -202,9 +202,14 @@ make_regexp(const char *s, size_t len, bool ignorecase, 
bool dfa, bool canfatal)
                syn &= ~RE_ICASE;
        }
 
-       dfa_syn = syn;
-       if (ignorecase)
-               dfa_syn |= RE_ICASE;
+       /* initialize dfas to hold syntax */
+       if (first) {
+               first = false;
+               dfaregs[0] = dfaalloc();
+               dfaregs[1] = dfaalloc();
+               dfasyntax(dfaregs[0], & localeinfo, syn, DFA_ANCHOR);
+               dfasyntax(dfaregs[1], & localeinfo, syn | RE_ICASE, DFA_ANCHOR);
+       }
 
        re_set_syntax(syn);
 
@@ -222,7 +227,7 @@ make_regexp(const char *s, size_t len, bool ignorecase, 
bool dfa, bool canfatal)
        rp->pat.newline_anchor = false; /* don't get \n in middle of string */
        if (dfa && ! no_dfa) {
                rp->dfareg = dfaalloc();
-               dfasyntax(rp->dfareg, & localeinfo, dfa_syn, DFA_ANCHOR);
+               dfacopysyntax(rp->dfareg, dfaregs[ignorecase]);
                dfacomp(buf, len, rp->dfareg, true);
        } else
                rp->dfareg = NULL;

-----------------------------------------------------------------------

Summary of changes:
 ChangeLog |   13 +++++++++++++
 dfa.c     |   24 +++++++++++++++++++++++-
 dfa.h     |    5 +++++
 re.c      |   17 +++++++++++------
 4 files changed, 52 insertions(+), 7 deletions(-)


hooks/post-receive
-- 
gawk



reply via email to

[Prev in Thread] Current Thread [Next in Thread]