From 4523387ac645d8d6daab07114e29d9386a02450a Mon Sep 17 00:00:00 2001 From: Gregory Heytings Date: Mon, 20 Feb 2023 11:18:30 +0000 Subject: [PATCH] Make the backtracking depth of regexp searches modifiable * src/search.c (syms_of_search) : New variable, replacing the constant variable 'emacs_re_max_failures'. Initialize it with the constant 'max_regexp_max_backtracking_depth'. * src/regex-emacs.h: Replace the external definition of 'emacs_re_max_failures' with the constant 'max_regexp_max_backtracking_depth'. * src/regex-emacs.c (GROW_FAIL_STACK): Use the new variable instead of the constant. Reset it to its maximum value when necessary. * src/emacs.c (main): Use the new constant 'max_regexp_max_backtracking_depth' in the calculations. * lisp/nxml/xmltok.el (xmltok-scan-attributes): Bind 'regexp-max-backtracking-depth' to a small value, and add a comment. Fixes bug#61514. --- lisp/nxml/xmltok.el | 7 ++++++- src/emacs.c | 8 ++++---- src/regex-emacs.c | 26 +++++++++++--------------- src/regex-emacs.h | 7 +++++-- src/search.c | 13 +++++++++++++ 5 files changed, 39 insertions(+), 22 deletions(-) diff --git a/lisp/nxml/xmltok.el b/lisp/nxml/xmltok.el index c36d225c7c9..8201b773e0f 100644 --- a/lisp/nxml/xmltok.el +++ b/lisp/nxml/xmltok.el @@ -731,7 +731,12 @@ xmltok-scan-after-comment-open (defun xmltok-scan-attributes () (let ((recovering nil) - (atts-needing-normalization nil)) + (atts-needing-normalization nil) + ;; Limit the backtracking depth of regexp searches, to fail + ;; with a "Stack overflow in regexp matcher" error instead of + ;; inflooping in looking-at. This assumes that XML attributes + ;; do not use more than about 1 KB characters. See bug#61514. + (regexp-max-backtracking-depth 1000)) (while (cond ((or (looking-at (xmltok-attribute regexp)) ;; use non-greedy group (when (looking-at (concat "[^<>\n]+?" diff --git a/src/emacs.c b/src/emacs.c index 214e2e2a296..d0dca3f03ec 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -1499,9 +1499,9 @@ main (int argc, char **argv) rlim_t lim = rlim.rlim_cur; /* Approximate the amount regex-emacs.c needs per unit of - emacs_re_max_failures, then add 33% to cover the size of the - smaller stacks that regex-emacs.c successively allocates and - discards on its way to the maximum. */ + max_regexp_max_backtracking_depth, then add 33% to cover the + size of the smaller stacks that regex-emacs.c successively + allocates and discards on its way to the maximum. */ int min_ratio = 20 * sizeof (char *); int ratio = min_ratio + min_ratio / 3; @@ -1514,7 +1514,7 @@ main (int argc, char **argv) if (try_to_grow_stack) { - rlim_t newlim = emacs_re_max_failures * ratio + extra; + rlim_t newlim = max_regexp_max_backtracking_depth * ratio + extra; /* Round the new limit to a page boundary; this is needed for Darwin kernel 15.4.0 (see Bug#23622) and perhaps diff --git a/src/regex-emacs.c b/src/regex-emacs.c index 2dca0d16ad9..931db980e39 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c @@ -868,17 +868,6 @@ print_double_string (re_char *where, re_char *string1, ptrdiff_t size1, space, so it is not a hard limit. */ #define INIT_FAILURE_ALLOC 20 -/* Roughly the maximum number of failure points on the stack. Would be - exactly that if failure always used TYPICAL_FAILURE_SIZE items. - This is a variable only so users of regex can assign to it; we never - change it ourselves. We always multiply it by TYPICAL_FAILURE_SIZE - before using it, so it should probably be a byte-count instead. */ -/* Note that 4400 was enough to cause a crash on Alpha OSF/1, - whose default stack limit is 2mb. In order for a larger - value to work reliably, you have to try to make it accord - with the process stack limit. */ -ptrdiff_t emacs_re_max_failures = 40000; - union fail_stack_elt { re_char *pointer; @@ -912,7 +901,7 @@ #define INIT_FAIL_STACK() \ /* Double the size of FAIL_STACK, up to a limit - which allows approximately 'emacs_re_max_failures' items. + which allows approximately 'Vregexp_max_backtracking_depth' items. Return 1 if succeeds, and 0 if either ran out of memory allocating space for it or it was already too large. @@ -926,16 +915,23 @@ #define INIT_FAIL_STACK() \ #define FAIL_STACK_GROWTH_FACTOR 4 #define GROW_FAIL_STACK(fail_stack) \ - (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \ + ((Vregexp_max_backtracking_depth = \ + Vregexp_max_backtracking_depth <= 0 \ + || Vregexp_max_backtracking_depth \ + > max_regexp_max_backtracking_depth \ + ? max_regexp_max_backtracking_depth \ + : Vregexp_max_backtracking_depth), \ + ((fail_stack).size \ + >= Vregexp_max_backtracking_depth * TYPICAL_FAILURE_SIZE) \ ? 0 \ : ((fail_stack).stack \ = REGEX_REALLOCATE ((fail_stack).stack, \ (fail_stack).avail * sizeof (fail_stack_elt_t), \ - min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ + min (Vregexp_max_backtracking_depth * TYPICAL_FAILURE_SIZE, \ ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \ * sizeof (fail_stack_elt_t)), \ ((fail_stack).size \ - = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \ + = (min (Vregexp_max_backtracking_depth * TYPICAL_FAILURE_SIZE, \ ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)))), \ 1)) diff --git a/src/regex-emacs.h b/src/regex-emacs.h index 1bc973363e9..9ccc4177487 100644 --- a/src/regex-emacs.h +++ b/src/regex-emacs.h @@ -49,8 +49,11 @@ #define EMACS_REGEX_H 1 TODO: turn into an actual function parameter. */ extern Lisp_Object re_match_object; -/* Roughly the maximum number of failure points on the stack. */ -extern ptrdiff_t emacs_re_max_failures; +/* Maximum value for Vregexp_max_backtracking_depth. This is roughly + the maximum allowed number of failure points on the stack. It + would be exactly that if failure always used TYPICAL_FAILURE_SIZE + items. */ +#define max_regexp_max_backtracking_depth 40000 /* Amount of memory that we can safely stack allocate. */ extern ptrdiff_t emacs_re_safe_alloca; diff --git a/src/search.c b/src/search.c index 0bb52c03eef..fc5d7c2b8e2 100644 --- a/src/search.c +++ b/src/search.c @@ -3431,6 +3431,19 @@ syms_of_search (void) is to bind it with `let' around a small expression. */); Vinhibit_changing_match_data = Qnil; + DEFVAR_INT ("regexp-max-backtracking-depth", Vregexp_max_backtracking_depth, + doc: /* Maximum backtracking depth in a regexp search. + +When the number of pending branches in the search space reaches that +threshold, a regexp search fails with a "Stack overflow in regexp +matcher". Roughly speaking, this is the number of characters to which +a regexp search is limited, with a complex enough regexp. + +Note that this variable will be reset to its default value if it is +set to a non-positive value, or to a higher value than its default +value. */); + Vregexp_max_backtracking_depth = max_regexp_max_backtracking_depth; + defsubr (&Slooking_at); defsubr (&Sposix_looking_at); defsubr (&Sstring_match); -- 2.39.0