[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master 37130fd500f: regex.c: Fix recent regression with mutually_exclusi
From: |
Stefan Monnier |
Subject: |
master 37130fd500f: regex.c: Fix recent regression with mutually_exclusive_p |
Date: |
Tue, 3 Oct 2023 10:13:33 -0400 (EDT) |
branch: master
commit 37130fd500fbf78ff0d0037aa6275f0f70a415dd
Author: Stefan Monnier <monnier@iro.umontreal.ca>
Commit: Stefan Monnier <monnier@iro.umontreal.ca>
regex.c: Fix recent regression with mutually_exclusive_p
The new analysis code ended up increasing the scope of an optimization
a bit too far. Reign it in.
* src/regex-emacs.c (struct mutexcl_data): Add `unconstrained` field.
(mutually_exclusive_one): Use and set it.
(mutually_exclusive_p): Initialize it.
* test/src/regex-emacs-tests.el (regexp-tests-backtrack-optimization):
Add test.
---
src/regex-emacs.c | 41 +++++++++++++++++++++++++++++++++--------
test/src/regex-emacs-tests.el | 9 +++++----
2 files changed, 38 insertions(+), 12 deletions(-)
diff --git a/src/regex-emacs.c b/src/regex-emacs.c
index ffb8891d3a6..95c3366652d 100644
--- a/src/regex-emacs.c
+++ b/src/regex-emacs.c
@@ -3899,6 +3899,7 @@ mutually_exclusive_charset (struct re_pattern_buffer
*bufp, re_char *p1,
struct mutexcl_data {
struct re_pattern_buffer *bufp;
re_char *p1;
+ bool unconstrained;
};
static bool
@@ -3907,7 +3908,32 @@ mutually_exclusive_one (re_char *p2, void *arg)
struct mutexcl_data *data = arg;
switch (*p2)
{
+ case succeed:
+ /* If `p1` matches, `succeed` can still match, so we should return
+ `false`. *BUT* when N iterations of `p1` and N+1 iterations of `p1`
+ match, the `succeed` that comes after N+1 always takes precedence
+ over the one after N because we always prefer a longer match, so
+ the succeed after N can actually be replaced by a "fail" without
+ changing the end result.
+ In this sense, "if `p1` matches, `succeed` can't match".
+ So we can return `true`.
+ *BUT* this only holds if we're sure that the N+1 will indeed succeed,
+ so we need to make sure there is no other matching operator between
+ the exit of the iteration and the `succeed`. */
+ return data->unconstrained;
+
+/* Remember that there may be an empty matching operator on the way.
+ If we return true, this is the "end" of this control flow path,
+ so it can't get in the way of a subsequent `succeed. */
+#define RETURN_CONSTRAIN(v) \
+ { bool tmp = (v); \
+ if (!tmp) \
+ data->unconstrained = false; \
+ return tmp; \
+ }
+
case endline:
+ RETURN_CONSTRAIN (mutually_exclusive_exactn (data->bufp, data->p1, p2));
case exactn:
return mutually_exclusive_exactn (data->bufp, data->p1, p2);
case charset:
@@ -3945,18 +3971,17 @@ mutually_exclusive_one (re_char *p2, void *arg)
return (*data->p1 == categoryspec && data->p1[1] == p2[1]);
case endbuf:
- case succeed:
return true;
case wordbeg:
- return (*data->p1 == notsyntaxspec && data->p1[1] == Sword);
+ RETURN_CONSTRAIN (*data->p1 == notsyntaxspec && data->p1[1] == Sword);
case wordend:
- return (*data->p1 == syntaxspec && data->p1[1] == Sword);
+ RETURN_CONSTRAIN (*data->p1 == syntaxspec && data->p1[1] == Sword);
case symbeg:
- return (*data->p1 == notsyntaxspec
- && (data->p1[1] == Ssymbol || data->p1[1] == Sword));
+ RETURN_CONSTRAIN (*data->p1 == notsyntaxspec
+ && (data->p1[1] == Ssymbol || data->p1[1] == Sword));
case symend:
- return (*data->p1 == syntaxspec
- && (data->p1[1] == Ssymbol || data->p1[1] == Sword));
+ RETURN_CONSTRAIN (*data->p1 == syntaxspec
+ && (data->p1[1] == Ssymbol || data->p1[1] == Sword));
case duplicate:
/* At this point, we know nothing about what this can match, sadly. */
@@ -3976,7 +4001,7 @@ static bool
mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
re_char *p2)
{
- struct mutexcl_data data = { bufp, p1 };
+ struct mutexcl_data data = { bufp, p1, true };
return forall_firstchar (bufp, p2, NULL, mutually_exclusive_one, &data);
}
diff --git a/test/src/regex-emacs-tests.el b/test/src/regex-emacs-tests.el
index 621e4dbe2c0..615d905e140 100644
--- a/test/src/regex-emacs-tests.el
+++ b/test/src/regex-emacs-tests.el
@@ -555,10 +555,10 @@ known/benign differences in behavior.")
(defconst regex-tests-PTESTS-whitelist
[
- ;; emacs doesn't see DEL (0x7f) as a [:cntrl:] character
+ ;; Emacs doesn't see DEL (0x7f) as a [:cntrl:] character
138
- ;; emacs doesn't barf on weird ranges such as [b-a], but simply
+ ;; Emacs doesn't barf on weird ranges such as [b-a], but simply
;; fails to match
168
]
@@ -872,14 +872,14 @@ This evaluates the TESTS test cases from glibc."
(should (equal (string-match "\\`\\(?:ab\\)*\\'" "a") nil))
(should (equal (string-match "\\`a\\{2\\}*\\'" "a") nil)))
-(ert-deftest regexp-tests-backtrack-optimization () ;bug#61514
+(ert-deftest regexp-tests-backtrack-optimization ()
;; Make sure we don't use up the regexp stack needlessly.
(with-current-buffer (get-buffer-create "*bug*")
(erase-buffer)
(insert (make-string 1000000 ?x) "=")
(goto-char (point-min))
;; Make sure we do perform the optimization (if we don't, the
- ;; below will burp with regexp-stack-overflow).
+ ;; below will burp with regexp-stack-overflow). ;bug#61514
(should (looking-at "x*=*"))
(should (looking-at "x*\\(=\\|:\\)"))
(should (looking-at "x*\\(=\\|:\\)*"))
@@ -908,6 +908,7 @@ This evaluates the TESTS test cases from glibc."
(should (eq 0 (string-match "\\(ca*\\|ab\\)+d" "cabd")))
(should (string-match "\\(aa*\\|b\\)*c" "ababc"))
(should (string-match " \\sw*\\bfoo" " foo"))
+ (should (string-match ".*\\>" "hello "))
))
(ert-deftest regexp-tests-zero-width-assertion-repetition ()
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- master 37130fd500f: regex.c: Fix recent regression with mutually_exclusive_p,
Stefan Monnier <=