emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master 3fd7491: Optimise more inputs to `regexp-opt' (bug#


From: Mattias Engdegård
Subject: [Emacs-diffs] master 3fd7491: Optimise more inputs to `regexp-opt' (bug#36444)
Date: Thu, 4 Jul 2019 11:18:21 -0400 (EDT)

branch: master
commit 3fd74915121a3eac265170e20bd19b3cde6a2589
Author: Mattias Engdegård <address@hidden>
Commit: Mattias Engdegård <address@hidden>

    Optimise more inputs to `regexp-opt' (bug#36444)
    
    Use a more precise test to determine whether the input to `regexp-opt'
    is safe to optimise when KEEP-ORDER is non-nil, permitting more inputs
    to be optimised than before.  For example, ("good" "goal" "go") is now
    accepted.
    
    * lisp/emacs-lisp/regexp-opt.el (regexp-opt):
    More precise test for whether the list is safe w.r.t. KEEP-ORDER.
    (regexp-opt--contains-prefix): Remove.
    
    * test/lisp/emacs-lisp/regexp-opt-tests.el: Use lexical-binding.
    (regexp-opt-test--permutation, regexp-opt-test--factorial)
    (regexp-opt-test--permutations, regexp-opt-test--match-all)
    (regexp-opt-test--check-perm, regexp-opt-test--explain-perm)
    (regexp-opt-keep-order): Test KEEP-ORDER.
---
 lisp/emacs-lisp/regexp-opt.el            | 46 ++++++++++++------------
 test/lisp/emacs-lisp/regexp-opt-tests.el | 62 +++++++++++++++++++++++++++++++-
 2 files changed, 83 insertions(+), 25 deletions(-)

diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el
index b6104f2..ab52003 100644
--- a/lisp/emacs-lisp/regexp-opt.el
+++ b/lisp/emacs-lisp/regexp-opt.el
@@ -140,21 +140,34 @@ usually more efficient than that of a simplified version:
           (completion-ignore-case nil)
           (completion-regexp-list nil)
           (open (cond ((stringp paren) paren) (paren "\\(")))
-          (sorted-strings (delete-dups
-                           (sort (copy-sequence strings) 'string-lessp)))
           (re
             (cond
              ;; No strings: return an unmatchable regexp.
              ((null strings)
               (concat (or open "\\(?:") regexp-unmatchable "\\)"))
-             ;; If we cannot reorder, give up all attempts at
-             ;; optimisation.  There is room for improvement (Bug#34641).
-             ((and keep-order (regexp-opt--contains-prefix sorted-strings))
-              (concat (or open "\\(?:")
-                      (mapconcat #'regexp-quote strings "\\|")
-                      "\\)"))
+
+             ;; The algorithm will generate a pattern that matches
+             ;; longer strings in the list before shorter.  If the
+             ;; list order matters, then no string must come after a
+             ;; proper prefix of that string.  To check this, verify
+             ;; that a straight or-pattern matches each string
+             ;; entirely.
+             ((and keep-order
+                   (let* ((case-fold-search nil)
+                          (alts (mapconcat #'regexp-quote strings "\\|")))
+                     (and (let ((s strings))
+                            (while (and s
+                                        (string-match alts (car s))
+                                        (= (match-end 0) (length (car s))))
+                              (setq s (cdr s)))
+                            ;; If we exited early, we found evidence that
+                            ;; regexp-opt-group cannot be used.
+                            s)
+                          (concat (or open "\\(?:") alts "\\)")))))
              (t
-              (regexp-opt-group sorted-strings (or open t) (not open))))))
+              (regexp-opt-group
+               (delete-dups (sort (copy-sequence strings) 'string-lessp))
+               (or open t) (not open))))))
       (cond ((eq paren 'words)
             (concat "\\<" re "\\>"))
            ((eq paren 'symbols)
@@ -339,21 +352,6 @@ never matches anything."
              (concat "[" all "]")))))))
 
 
-(defun regexp-opt--contains-prefix (strings)
-  "Whether STRINGS contains a proper prefix of one of its other elements.
-STRINGS must be a list of sorted strings without duplicates."
-  (let ((s strings))
-    ;; In a lexicographically sorted list, a string always immediately
-    ;; succeeds one of its prefixes.
-    (while (and (cdr s)
-                (not (string-equal
-                      (car s)
-                      (substring (cadr s) 0 (min (length (car s))
-                                                 (length (cadr s)))))))
-      (setq s (cdr s)))
-    (cdr s)))
-
-
 (provide 'regexp-opt)
 
 ;;; regexp-opt.el ends here
diff --git a/test/lisp/emacs-lisp/regexp-opt-tests.el 
b/test/lisp/emacs-lisp/regexp-opt-tests.el
index 927de8c..3658964 100644
--- a/test/lisp/emacs-lisp/regexp-opt-tests.el
+++ b/test/lisp/emacs-lisp/regexp-opt-tests.el
@@ -1,4 +1,4 @@
-;;; regexp-opt-tests.el --- Tests for regexp-opt.el
+;;; regexp-opt-tests.el --- Tests for regexp-opt.el  -*- lexical-binding: t -*-
 
 ;; Copyright (C) 2013-2019 Free Software Foundation, Inc.
 
@@ -25,6 +25,66 @@
 
 (require 'regexp-opt)
 
+(defun regexp-opt-test--permutation (n list)
+  "The Nth permutation of LIST, 0 ≤ N < (length LIST)!."
+  (let ((len (length list))
+        (perm-list nil))
+    (dotimes (i len)
+      (let* ((d (- len i))
+             (k (mod n d)))
+        (push (nth k list) perm-list)
+        (setq list (append (butlast list (- (length list) k))
+                           (nthcdr (1+ k) list)))
+        (setq n (/ n d))))
+    (nreverse perm-list)))
+
+(defun regexp-opt-test--factorial (n)
+  "N!"
+  (apply #'* (number-sequence 1 n)))
+
+(defun regexp-opt-test--permutations (list)
+  "All permutations of LIST."
+  (mapcar (lambda (i) (regexp-opt-test--permutation i list))
+          (number-sequence 0 (1- (regexp-opt-test--factorial (length list))))))
+
+(defun regexp-opt-test--match-all (words re)
+  (mapcar (lambda (w) (and (string-match re w)
+                           (match-string 0 w)))
+          words))
+
+(defun regexp-opt-test--check-perm (perm)
+  (let* ((ref-re (mapconcat #'regexp-quote perm "\\|"))
+         (opt-re (regexp-opt perm nil t))
+         (ref (regexp-opt-test--match-all perm ref-re))
+         (opt (regexp-opt-test--match-all perm opt-re)))
+    (equal opt ref)))
+
+(defun regexp-opt-test--explain-perm (perm)
+  (let* ((ref-re (mapconcat #'regexp-quote perm "\\|"))
+         (opt-re (regexp-opt perm nil t))
+         (ref (regexp-opt-test--match-all perm ref-re))
+         (opt (regexp-opt-test--match-all perm opt-re)))
+    (concat "\n"
+            (format "Naïve regexp:     %s\n" ref-re)
+            (format "Optimised regexp: %s\n" opt-re)
+            (format "Got:      %s\n" opt)
+            (format "Expected: %s\n" ref))))
+
+(put 'regexp-opt-test--check-perm 'ert-explainer 
'regexp-opt-test--explain-perm)
+
+(ert-deftest regexp-opt-keep-order ()
+  "Check that KEEP-ORDER works."
+  (dolist (perm (regexp-opt-test--permutations '("abc" "bca" "cab")))
+    (should (regexp-opt-test--check-perm perm)))
+  (dolist (perm (regexp-opt-test--permutations '("abc" "ab" "bca" "bc")))
+    (should (regexp-opt-test--check-perm perm)))
+  (dolist (perm (regexp-opt-test--permutations '("abxy" "cdxy")))
+    (should (regexp-opt-test--check-perm perm)))
+  (dolist (perm (regexp-opt-test--permutations '("afgx" "bfgx" "afgy" "bfgy")))
+    (should (regexp-opt-test--check-perm perm)))
+  (dolist (perm (regexp-opt-test--permutations '("a" "ab" "ac" "abc")))
+    (should (regexp-opt-test--check-perm perm))))
+
 (ert-deftest regexp-opt-charset ()
   (should (equal (regexp-opt-charset '(?a ?b ?a)) "[ab]"))
   (should (equal (regexp-opt-charset '(?D ?d ?B ?a ?b ?C ?7 ?a ?c ?A))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]