bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#24603: [RFC 16/18] Refactor character class checking; optimise ASCII


From: Michal Nazarewicz
Subject: bug#24603: [RFC 16/18] Refactor character class checking; optimise ASCII case
Date: Sun, 06 Nov 2016 20:26:11 +0100
User-agent: Notmuch/0.19+53~g2e63a09 (http://notmuchmail.org) Emacs/26.0.50.2 (x86_64-unknown-linux-gnu)

On Tue, Oct 04 2016, Eli Zaretskii wrote:
> Thanks.  I think this change will require a benchmark to make sure we
> don't lose too much in terms of performance.

Benchmark and its results included below.

It’s a bit noisy and as all benchmarks of that kind it doesn’t really
measure the real usage, but I think it’s safe to say that things aren’t
getting worse.

---- >8 ------------------------------------------------------------------------

Class      [[:cc:]]   no-case    [^[:cc:]]  no-case  
---------  ---------  ---------  ---------  ---------

==== Add regex character class matching benchmark ====
alnum         59.870     60.148     63.548     64.048
alpha         60.355     60.137     63.333     62.684
digit         27.835     27.648      0.513      0.488
xdigit        27.160     27.320      0.969      0.883
upper         91.027     91.572     39.423     39.595
lower         60.591     61.307     60.332     59.730
word          36.201     36.046    108.118    109.396
punct        110.987    111.683     35.110     35.200
cntrl         27.005     26.756      1.212      1.176
graph         25.694     26.097     75.872     75.711
print         24.783     24.976     76.652     74.921
space        147.210    148.431      1.261      1.252
blank         27.602     27.722      0.373      0.189
ascii         23.243     23.302      4.550      4.486
nonascii       5.448      5.407     90.733     90.410
unibyte       22.986     23.342      4.559      4.655
multibyte      5.508      5.535     92.457     91.163
...all...      1.138      1.030     93.275     93.383

==== Refactor character class checking; optimise ASCII case ====
alnum         54.643     54.301     56.668     56.898
alpha         54.654     54.558     56.134     56.281
digit         26.103     26.044      0.495      0.443
xdigit        25.606     25.690      0.815      0.806
upper         83.269     83.306     36.704     36.487
lower         56.278     55.804     54.872     54.917
word          34.820     55.092     99.577    100.618
punct        103.410    103.465     31.673     31.590
cntrl         25.509     25.274      1.119      1.101
graph         23.593     23.673     69.335     69.481
print         23.003     23.123     69.962     70.132
space        132.224    132.458      1.143      1.120
blank         26.223     26.342      0.193      0.187
ascii         22.329     22.257      4.094      4.082
nonascii       4.910      4.897     84.633     84.515
unibyte       22.866     22.385      4.094      4.078
multibyte      4.913      4.886     95.385     85.341
...all...      0.942      0.936     88.979     88.744

==== Optimise character class matching in regexes ====
alnum         53.338     53.052     56.571     56.434
alpha         53.591     53.350     56.218     56.255
digit         26.266     26.502      0.438      0.438
xdigit        25.793     25.887      0.877      0.876
upper         82.539     82.700     31.994     32.200
lower         55.280     55.040     54.615     54.429
word          33.666     33.530    100.678    101.721
punct        101.714    101.715     31.766     31.620
cntrl         25.669     25.068      1.113      1.114
graph         27.848     28.067     81.669     81.619
print         27.128     28.297     82.326     82.306
space        131.847    132.242      1.124      1.128
blank         26.493     26.607      0.190      0.188
ascii         22.332     22.315      4.379      4.358
nonascii       5.169      5.159     84.872     85.488
unibyte       22.259     22.529      4.374      4.361
multibyte      5.193      5.181     86.421     86.568
...all...      0.945      0.939     92.903     93.209

==== Fix case-fold-search character class matching ====
alnum         53.553     53.527     56.918     56.886
alpha         53.657     53.758     56.541     57.107
digit         26.616     26.641      0.467      0.510
xdigit        27.255     26.271      0.894      0.923
upper         56.608     55.073     55.792     55.422
lower         55.419     55.330     55.486     55.018
word          35.537     35.434    103.414    103.516
punct        105.810    106.618     33.454     33.322
cntrl         25.875     26.020      1.274      1.271
graph         28.011     28.185     82.239     82.245
print         26.935     27.016     99.945     83.213
space        136.774    138.135      1.170      1.159
blank         26.984     26.976      0.192      0.204
ascii         22.365     22.661      4.652      4.652
nonascii       5.759      5.524     85.805     86.403
unibyte       22.568     22.375      4.995      4.909
multibyte      5.729      5.749     84.671     84.396
...all...      0.990      0.978     89.520     89.612

All times in ms; lower is better.

---- >8 ------------------------------------------------------------------------
>From 23d8fe0b093730406b64e0e20207c2fb929f707f Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Fri, 7 Oct 2016 02:44:30 +0200
Subject: [PATCH] Add regex character class matching benchmark

* test/src/regex-tests.el (regex-tests-benchmark-cc-match): New function
running character class matching benchmark.
---
 test/src/regex-tests.el | 59 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/test/src/regex-tests.el b/test/src/regex-tests.el
index fc50344..d0aad97 100644
--- a/test/src/regex-tests.el
+++ b/test/src/regex-tests.el
@@ -98,6 +98,65 @@ regex--test-cc
     (eval `(ert-deftest ,name () ,doc ,(cons 'regex--test-cc test)) t)))
 
 
+(defun regex-tests-benchmark-cc-match ()
+  "Benchmark regex character class matching."
+  (interactive)
+  (let* ((prn (if (called-interactively-p)
+                  'insert
+                (lambda (&rest args) (mapc 'princ args))))
+         (strings
+          (nconc (list
+                  (apply 'string (number-sequence 32 126))
+                  (apply 'string (number-sequence 0 127))
+                  (apply 'unibyte-string (number-sequence 128 255))
+                  (concat (apply 'string (number-sequence 0 255))
+                          (apply 'unibyte-string (number-sequence 128 255)))
+                  (make-string 10000 #x3FFF80)
+                  (make-string 10000 #x3FFFFF))
+                 (mapcar (lambda (ch) (make-string 10000 ch))
+                         (number-sequence 0 256))))
+
+         (ccs '("alnum" "alpha" "digit" "xdigit" "upper" "lower"
+                "word" "punct" "cntrl" "graph" "print" "space" "blank"
+                "ascii" "nonascii" "unibyte" "multibyte"))
+
+         (benchmark-re
+          (lambda (re)
+            (dolist (cf '(nil t))
+              ;; Compile the regex so it ends up in cache.
+              (string-match re "")
+              (let ((res (benchmark-run 10
+                           (dolist (str strings) (string-match re str)))))
+                (funcall prn (format " %10.3f"
+                                     (* (- (nth 0 res) (nth 2 res)) 100))))))))
+
+    (when (called-interactively-p)
+      (switch-to-buffer (get-buffer-create "*Regex Benchmark*"))
+      (delete-region (point-min) (point-max)))
+
+    (funcall prn (format "%-9s  %-9s  %-9s  %-9s  %-9s\n"
+                         "Class" "[[:cc:]]" "no-case"
+                         "[^[:cc:]]" "no-case")
+             (make-string 9 ?-)
+             "  " (make-string 9 ?-) "  " (make-string 9 ?-)
+             "  " (make-string 9 ?-) "  " (make-string 9 ?-) "\n")
+
+    (dolist (cc ccs)
+      (funcall prn (format "%-9s" cc))
+      (dolist (re (list (format "[[:%s:]]" cc)
+                        (format "[^[:%s:]]" cc)))
+        (funcall benchmark-re re))
+      (funcall prn "\n"))
+
+    (funcall prn (format "%-9s" "...all..."))
+    (let ((all-ccs (mapconcat (lambda (cc) (format "[:%s:]" cc)) ccs "")))
+      (funcall benchmark-re (concat "[" all-ccs "]"))
+      (funcall benchmark-re (concat "[^" all-ccs "]")))
+
+    (funcall prn "\n" (make-string 53 ?-)
+             "\nAll times in ms; lower is better.\n")))
+
+
 (defmacro regex-tests-generic-line (comment-char test-file whitelist &rest 
body)
   "Reads a line of the test file TEST-FILE, skipping
 comments (defined by COMMENT-CHAR), and evaluates the tests in
-- 
2.8.0.rc3.226.g39d4020





reply via email to

[Prev in Thread] Current Thread [Next in Thread]