[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[gawk-diffs] [SCM] gawk branch, master, updated. gawk-4.1.0-1980-g45ee44
From: |
Arnold Robbins |
Subject: |
[gawk-diffs] [SCM] gawk branch, master, updated. gawk-4.1.0-1980-g45ee448 |
Date: |
Thu, 8 Sep 2016 02:52:02 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "gawk".
The branch, master has been updated
via 45ee4486199d449b9aeaeadcf98523d82f943ec8 (commit)
via ae953e96bdf5f52288bfe4b42c56269764c7ff0d (commit)
via a159bf0b87d8bbb576d736ea54c97d0271166620 (commit)
via c49b94108f48ea56f705d8549988c00289a0e2a2 (commit)
via 40527b84a7064ad233e41457ab0a5504917cecb2 (commit)
from 27abead1f32048b6a98a99a110c8541e2746f3b1 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=45ee4486199d449b9aeaeadcf98523d82f943ec8
commit 45ee4486199d449b9aeaeadcf98523d82f943ec8
Merge: a159bf0 ae953e9
Author: Arnold D. Robbins <address@hidden>
Date: Thu Sep 8 05:51:53 2016 +0300
Merge branch 'gawk-4.1-stable'
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=a159bf0b87d8bbb576d736ea54c97d0271166620
commit a159bf0b87d8bbb576d736ea54c97d0271166620
Author: Arnold D. Robbins <address@hidden>
Date: Thu Sep 8 05:50:26 2016 +0300
Merge grep dfa.
diff --git a/ChangeLog b/ChangeLog
index ecfb2ff..5542d69 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2016-09-08 Paul Eggert <address@hidden>
+
+ * dfa.c, dfa.h: Sync with grep.
+ * re.c (make_regexp): Adjust to DFA API changes.
+
2016-09-08 Arnold D. Robbins <address@hidden>
* command.y: Update license text to version 3. Oops.
diff --git a/dfa.c b/dfa.c
index 5d68af2..cf9b2ba 100644
--- a/dfa.c
+++ b/dfa.c
@@ -59,7 +59,6 @@
#define _(str) gettext (str)
#include <wchar.h>
-#include <wctype.h>
#include "xalloc.h"
@@ -363,6 +362,10 @@ struct regex_syntax
/* Flag for case-folding letters into sets. */
bool case_fold;
+ /* True if ^ and $ match only the start and end of data, and do not match
+ end-of-line within data. */
+ bool anchor;
+
/* End-of-line byte in data. */
unsigned char eolbyte;
@@ -782,7 +785,7 @@ unibyte_word_constituent (struct dfa const *dfa, unsigned
char c)
static int
char_context (struct dfa const *dfa, unsigned char c)
{
- if (c == dfa->syntax.eolbyte)
+ if (c == dfa->syntax.eolbyte && !dfa->syntax.anchor)
return CTX_NEWLINE;
if (unibyte_word_constituent (dfa, c))
return CTX_LETTER;
@@ -2699,18 +2702,9 @@ dfastate (state_num s, struct dfa *d, state_num trans[])
is to fail miserably. */
if (d->searchflag)
{
- /* Find the state(s) corresponding to the positions of state 0. */
- copy (&d->states[0].elems, &follows);
- separate_contexts = state_separate_contexts (&follows);
- state = state_index (d, &follows, separate_contexts ^ CTX_ANY);
- if (separate_contexts & CTX_NEWLINE)
- state_newline = state_index (d, &follows, CTX_NEWLINE);
- else
- state_newline = state;
- if (separate_contexts & CTX_LETTER)
- state_letter = state_index (d, &follows, CTX_LETTER);
- else
- state_letter = state;
+ state_newline = 0;
+ state_letter = d->min_trcount - 1;
+ state = d->initstate_notbol;
for (i = 0; i < NOTCHAR; ++i)
trans[i] = unibyte_word_constituent (d, i) ? state_letter : state;
@@ -3075,16 +3069,14 @@ transit_state (struct dfa *d, state_num s, unsigned
char const **pp,
Both P and MBP must be no larger than END. */
static unsigned char const *
skip_remains_mb (struct dfa *d, unsigned char const *p,
- unsigned char const *mbp, char const *end, wint_t *wcp)
+ unsigned char const *mbp, char const *end)
{
- wint_t wc = WEOF;
+ wint_t wc;
if (d->syntax.never_trail[*p])
return p;
while (mbp < p)
mbp += mbs_to_wchar (&wc, (char const *) mbp,
end - (char const *) mbp, d);
- if (wcp != NULL)
- *wcp = wc;
return mbp;
}
@@ -3141,46 +3133,22 @@ dfaexec_main (struct dfa *d, char const *begin, char
*end, bool allow_nl,
for (;;)
{
- if (multibyte)
+ while ((t = trans[s]) != NULL)
{
- while ((t = trans[s]) != NULL)
+ if (s < d->min_trcount)
{
- s1 = s;
-
- if (s < d->min_trcount)
+ if (!multibyte || d->states[s].mbps.nelem == 0)
{
- if (d->min_trcount == 1)
- {
- if (d->states[s].mbps.nelem == 0)
- {
- do
- {
- while (t[*p] == 0)
- p++;
- p = mbp = skip_remains_mb (d, p, mbp, end, NULL);
- }
- while (t[*p] == 0);
- }
- else
- p = mbp = skip_remains_mb (d, p, mbp, end, NULL);
- }
- else
- {
- wint_t wc;
- mbp = skip_remains_mb (d, p, mbp, end, &wc);
-
- /* If d->min_trcount is greater than 1, maybe
- transit to another initial state after skip. */
- if (p < mbp)
- {
- /* It's CTX_LETTER or CTX_NONE. CTX_NEWLINE
- cannot happen, as we assume that a newline
- is always a single byte character. */
- s1 = s = d->initstate_notbol;
- p = mbp;
- }
- }
+ while (t[*p] == s)
+ p++;
}
+ if (multibyte)
+ p = mbp = skip_remains_mb (d, p, mbp, end);
+ }
+
+ if (multibyte)
+ {
+ s1 = s;
if (d->states[s].mbps.nelem == 0
|| d->localeinfo.sbctowc[*p] != WEOF || (char *) p >= end)
@@ -3196,22 +3164,7 @@ dfaexec_main (struct dfa *d, char const *begin, char
*end, bool allow_nl,
trans = d->trans;
}
}
- }
- else
- {
- if (s == 0)
- {
- t = trans[s];
- if (t)
- {
- while (t[*p] == 0)
- p++;
- s1 = 0;
- s = t[*p++];
- }
- }
-
- while ((t = trans[s]) != NULL)
+ else
{
s1 = t[*p++];
t = trans[s1];
@@ -3222,6 +3175,11 @@ dfaexec_main (struct dfa *d, char const *begin, char
*end, bool allow_nl,
s1 = tmp; /* swap */
break;
}
+ if (s < d->min_trcount)
+ {
+ while (t[*p] == s1)
+ p++;
+ }
s = t[*p++];
}
}
@@ -3239,19 +3197,25 @@ dfaexec_main (struct dfa *d, char const *begin, char
*end, bool allow_nl,
nlcount++;
mbp = p;
- s = allow_nl ? d->newlines[s1] : 0;
+ s = (allow_nl ? d->newlines[s1]
+ : d->syntax.sbit[eol] == CTX_NEWLINE ? 0
+ : d->syntax.sbit[eol] == CTX_LETTER ? d->min_trcount - 1
+ : d->initstate_notbol);
}
else if (d->fails[s])
{
- if (d->success[s] & d->syntax.sbit[*p])
+ if ((d->success[s] & d->syntax.sbit[*p])
+ || ((char *) p == end
+ && ACCEPTS_IN_CONTEXT (d->states[s].context, CTX_NEWLINE, s,
+ *d)))
goto done;
+ if (multibyte && s < d->min_trcount)
+ p = mbp = skip_remains_mb (d, p, mbp, end);
+
s1 = s;
if (!multibyte || d->states[s].mbps.nelem == 0
- || (*p == eol && !allow_nl)
- || (*p == '\n' && !(d->syntax.syntax_bits & RE_DOT_NEWLINE))
- || (*p == '\0' && (d->syntax.syntax_bits & RE_DOT_NOT_NULL))
- || (char *) p >= end)
+ || d->localeinfo.sbctowc[*p] != WEOF || (char *) p >= end)
{
/* If a input character does not match ANYCHAR, do it
like a single-byte character. */
@@ -3813,9 +3777,11 @@ dfamust (struct dfa const *d)
bool exact = false;
bool begline = false;
bool endline = false;
+ size_t rj;
bool need_begline = false;
bool need_endline = false;
bool case_fold_unibyte = d->syntax.case_fold && MB_CUR_MAX == 1;
+ struct dfamust *dm;
for (ri = 0; ri < d->tindex; ++ri)
{
@@ -3992,7 +3958,7 @@ dfamust (struct dfa const *d)
}
}
- size_t rj = ri + 2;
+ rj = ri + 2;
if (d->tokens[ri + 1] == CAT)
{
for (; rj < d->tindex - 1; rj += 2)
@@ -4021,7 +3987,7 @@ dfamust (struct dfa const *d)
}
done:;
- struct dfamust *dm = NULL;
+ dm = NULL;
if (*result)
{
dm = xmalloc (sizeof *dm);
@@ -4057,7 +4023,7 @@ dfaalloc (void)
/* Initialize DFA. */
void
dfasyntax (struct dfa *dfa, struct localeinfo const *linfo,
- reg_syntax_t bits, bool fold, unsigned char eol)
+ reg_syntax_t bits, int dfaopts)
{
int i;
memset (dfa, 0, offsetof (struct dfa, dfaexec));
@@ -4070,9 +4036,10 @@ dfasyntax (struct dfa *dfa, struct localeinfo const
*linfo,
dfa->canychar = -1;
dfa->lex.cur_mb_len = 1;
dfa->syntax.syntax_bits_set = true;
+ dfa->syntax.case_fold = (dfaopts & DFA_CASE_FOLD) != 0;
+ dfa->syntax.anchor = (dfaopts & DFA_ANCHOR) != 0;
+ dfa->syntax.eolbyte = dfaopts & DFA_EOL_NUL ? '\0' : '\n';
dfa->syntax.syntax_bits = bits;
- dfa->syntax.case_fold = fold;
- dfa->syntax.eolbyte = eol;
for (i = CHAR_MIN; i <= CHAR_MAX; ++i)
{
diff --git a/dfa.h b/dfa.h
index 1fd37ec..8608b10 100644
--- a/dfa.h
+++ b/dfa.h
@@ -26,7 +26,11 @@
#endif /* HAVE_STDBOOL_H */
#include <stddef.h>
-#define _GL_ATTRIBUTE_MALLOC
+#if 3 <= __GNUC__
+# define _GL_ATTRIBUTE_MALLOC __attribute__ ((__malloc__))
+#else
+# define _GL_ATTRIBUTE_MALLOC
+#endif
struct localeinfo; /* See localeinfo.h. */
@@ -50,15 +54,29 @@ struct dfa;
calling dfafree() on it. */
extern struct dfa *dfaalloc (void) _GL_ATTRIBUTE_MALLOC;
+/* DFA options that can be ORed together, for dfasyntax's 4th arg. */
+enum
+ {
+ /* ^ and $ match only the start and end of data, and do not match
+ end-of-line within data. This is always false for grep, but
+ possibly true for other apps. */
+ DFA_ANCHOR = 1 << 0,
+
+ /* Ignore case while matching. */
+ DFA_CASE_FOLD = 1 << 1,
+
+ /* '\0' in data is end-of-line, instead of the traditional '\n'. */
+ DFA_EOL_NUL = 1 << 2
+ };
+
/* Initialize or reinitialize a DFA. This must be called before
any of the routines below. The arguments are:
1. The DFA to operate on.
2. Information about the current locale.
- 3. The syntax bits described earlier in this file.
- 4. The case-folding flag.
- 5. The line terminator. */
+ 3. Syntax bits described in regex.h.
+ 4. Additional DFA options described above. */
extern void dfasyntax (struct dfa *, struct localeinfo const *,
- reg_syntax_t, bool, unsigned char);
+ reg_syntax_t, int);
/* Build and return the struct dfamust from the given struct dfa. */
extern struct dfamust *dfamust (struct dfa const *);
diff --git a/helpers/ChangeLog b/helpers/ChangeLog
index 0958a02..4706223 100644
--- a/helpers/ChangeLog
+++ b/helpers/ChangeLog
@@ -1,3 +1,7 @@
+2016-09-08 Paul Eggert <address@hidden>
+
+ * testdfa.c: Adjust to DFA API changes.
+
2016-08-25 Arnold D. Robbins <address@hidden>
* 4.1.4: Release tar ball made.
diff --git a/helpers/testdfa.c b/helpers/testdfa.c
index 4495e11..fa7715f 100644
--- a/helpers/testdfa.c
+++ b/helpers/testdfa.c
@@ -44,6 +44,7 @@
#define _Noreturn
#define _GL_ATTRIBUTE_PURE
#include "dfa.h"
+#include "localeinfo.h"
const char *regexflags2str(int flags);
char *databuf(int fd);
@@ -71,7 +72,8 @@ void usage(const char *myname)
int main(int argc, char **argv)
{
- int c, ret, try_backref;
+ int c, ret;
+ bool try_backref;
struct re_pattern_buffer pat;
struct re_registers regs;
struct dfa *dfareg;
@@ -84,6 +86,7 @@ int main(int argc, char **argv)
char save;
size_t count = 0;
char *place;
+ struct localeinfo localeinfo;
if (argc < 2)
usage(argv[0]);
@@ -158,7 +161,6 @@ int main(int argc, char **argv)
dfa_syn = syn;
if (ignorecase)
dfa_syn |= RE_ICASE;
- dfasyntax(dfa_syn, ignorecase, '\n');
re_set_syntax(syn);
if ((rerr = re_compile_pattern(pattern, len, & pat)) != NULL) {
@@ -171,6 +173,10 @@ int main(int argc, char **argv)
pat.newline_anchor = false; /* don't get \n in middle of string */
dfareg = dfaalloc();
+ init_localeinfo(&localeinfo);
+ dfasyntax(dfareg, &localeinfo, dfa_syn,
+ ignorecase ? DFA_CASE_FOLD : 0);
+
printf("Calling dfacomp(%s, %d, %p, true)\n",
pattern, (int) len, dfareg);
diff --git a/re.c b/re.c
index 6a100db..69cc50e 100644
--- a/re.c
+++ b/re.c
@@ -227,7 +227,8 @@ make_regexp(const char *s, size_t len, bool ignorecase,
bool dfa, bool canfatal)
rp->pat.newline_anchor = false; /* don't get \n in middle of string */
if (dfa && ! no_dfa) {
rp->dfareg = dfaalloc();
- dfasyntax(rp->dfareg, & localeinfo, dfa_syn, ignorecase, '\n');
+ dfasyntax(rp->dfareg, & localeinfo, dfa_syn,
+ ignorecase ? DFA_CASE_FOLD : 0);
dfacomp(buf, len, rp->dfareg, true);
} else
rp->dfareg = NULL;
http://git.sv.gnu.org/cgit/gawk.git/commit/?id=c49b94108f48ea56f705d8549988c00289a0e2a2
commit c49b94108f48ea56f705d8549988c00289a0e2a2
Merge: 27abead 40527b8
Author: Arnold D. Robbins <address@hidden>
Date: Thu Sep 8 05:47:06 2016 +0300
Merge branch 'gawk-4.1-stable'
-----------------------------------------------------------------------
Summary of changes:
ChangeLog | 9 ++++
command.y | 2 +-
dfa.c | 129 ++++++++++++++++++++---------------------------------
dfa.h | 28 +++++++++---
helpers/ChangeLog | 4 ++
helpers/testdfa.c | 10 ++++-
pc/ChangeLog | 4 ++
pc/Makefile.tst | 9 +++-
re.c | 3 +-
9 files changed, 106 insertions(+), 92 deletions(-)
hooks/post-receive
--
gawk
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [gawk-diffs] [SCM] gawk branch, master, updated. gawk-4.1.0-1980-g45ee448,
Arnold Robbins <=