[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master 8bd233a 3/3: Minor string-search optimisations (bug#43598)
From: |
Mattias Engdegård |
Subject: |
master 8bd233a 3/3: Minor string-search optimisations (bug#43598) |
Date: |
Sun, 27 Sep 2020 08:45:22 -0400 (EDT) |
branch: master
commit 8bd233a7eb6bc4709d0adf0577d30aaf167e75bf
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>
Minor string-search optimisations (bug#43598)
* src/fns.c (Fstring_search): Perform cheap all-ASCII checks before more
expensive ones. Use a faster loop when searching for non-ASCII
non-raw bytes.
* test/src/fns-tests.el (string-search): Add more test cases.
---
src/fns.c | 59 +++++++++++++++++++++++----------------------------
test/src/fns-tests.el | 22 +++++++++++++++++--
2 files changed, 47 insertions(+), 34 deletions(-)
diff --git a/src/fns.c b/src/fns.c
index 0f76871..f626fe1 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -5457,16 +5457,11 @@ It should not be used for anything security-related.
See
static bool
string_ascii_p (Lisp_Object string)
{
- if (STRING_MULTIBYTE (string))
- return SBYTES (string) == SCHARS (string);
- else
- {
- ptrdiff_t nbytes = SBYTES (string);
- for (ptrdiff_t i = 0; i < nbytes; i++)
- if (SREF (string, i) > 127)
- return false;
- return true;
- }
+ ptrdiff_t nbytes = SBYTES (string);
+ for (ptrdiff_t i = 0; i < nbytes; i++)
+ if (SREF (string, i) > 127)
+ return false;
+ return true;
}
DEFUN ("string-search", Fstring_search, Sstring_search, 2, 3, 0,
@@ -5505,9 +5500,14 @@ Case is always significant and text properties are
ignored. */)
haystart = SSDATA (haystack) + start_byte;
haybytes = SBYTES (haystack) - start_byte;
- if (STRING_MULTIBYTE (haystack) == STRING_MULTIBYTE (needle)
- || string_ascii_p (needle)
- || string_ascii_p (haystack))
+ /* We can do a direct byte-string search if both strings have the
+ same multibyteness, or if at least one of them consists of ASCII
+ characters only. */
+ if (STRING_MULTIBYTE (haystack)
+ ? (STRING_MULTIBYTE (needle)
+ || SCHARS (haystack) == SBYTES (haystack) || string_ascii_p (needle))
+ : (!STRING_MULTIBYTE (needle)
+ || SCHARS (needle) == SBYTES (needle) || string_ascii_p (haystack)))
res = memmem (haystart, haybytes,
SSDATA (needle), SBYTES (needle));
else if (STRING_MULTIBYTE (haystack)) /* unibyte needle */
@@ -5521,26 +5521,21 @@ Case is always significant and text properties are
ignored. */)
/* The only possible way we can find the multibyte needle in the
unibyte stack (since we know that neither are pure-ASCII) is
if they contain "raw bytes" (and no other non-ASCII chars.) */
- ptrdiff_t chars = SCHARS (needle);
- const unsigned char *src = SDATA (needle);
-
- for (ptrdiff_t i = 0; i < chars; i++)
- {
- int c = string_char_advance (&src);
-
- if (!CHAR_BYTE8_P (c)
- && !ASCII_CHAR_P (c))
- /* Found a char that can't be in the haystack. */
- return Qnil;
- }
+ ptrdiff_t nbytes = SBYTES (needle);
+ for (ptrdiff_t i = 0; i < nbytes; i++)
+ {
+ int c = SREF (needle, i);
+ if (CHAR_BYTE8_HEAD_P (c))
+ i++; /* Skip raw byte. */
+ else if (!ASCII_CHAR_P (c))
+ return Qnil; /* Found a char that can't be in the haystack. */
+ }
- {
- /* "Raw bytes" (aka eighth-bit) are represented differently in
- multibyte and unibyte strings. */
- Lisp_Object uni_needle = Fstring_to_unibyte (needle);
- res = memmem (haystart, haybytes,
- SSDATA (uni_needle), SBYTES (uni_needle));
- }
+ /* "Raw bytes" (aka eighth-bit) are represented differently in
+ multibyte and unibyte strings. */
+ Lisp_Object uni_needle = Fstring_to_unibyte (needle);
+ res = memmem (haystart, haybytes,
+ SSDATA (uni_needle), SBYTES (uni_needle));
}
if (! res)
diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el
index 41969f2..d3c22f9 100644
--- a/test/src/fns-tests.el
+++ b/test/src/fns-tests.el
@@ -913,6 +913,7 @@
(should (equal (string-search "ab\0" "ab") nil))
(should (equal (string-search "ab" "abababab" 3) 4))
(should (equal (string-search "ab" "ababac" 3) nil))
+ (should (equal (string-search "aaa" "aa") nil))
(let ((case-fold-search t))
(should (equal (string-search "ab" "AB") nil)))
@@ -936,14 +937,16 @@
(should (equal (string-search (string-to-multibyte "\377") "ab\377c") 2))
(should (equal (string-search "\303" "aøb") nil))
(should (equal (string-search "\270" "aøb") nil))
- ;; This test currently fails, but it shouldn't!
- ;;(should (equal (string-search "ø" "\303\270") nil))
+ (should (equal (string-search "ø" "\303\270") nil))
+
+ (should (equal (string-search "a\U00010f98z" "a\U00010f98a\U00010f98z") 2))
(should-error (string-search "a" "abc" -1))
(should-error (string-search "a" "abc" 4))
(should-error (string-search "a" "abc" 100000000000))
(should (equal (string-search "a" "aaa" 3) nil))
+ (should (equal (string-search "aa" "aa" 1) nil))
(should (equal (string-search "\0" "") nil))
(should (equal (string-search "" "") 0))
@@ -955,6 +958,21 @@
(should-error (string-search "" "abc" -1))
(should-not (string-search "ø" "foo\303\270"))
+ (should-not (string-search "\303\270" "ø"))
+ (should-not (string-search "\370" "ø"))
+ (should-not (string-search (string-to-multibyte "\370") "ø"))
+ (should-not (string-search "ø" "\370"))
+ (should-not (string-search "ø" (string-to-multibyte "\370")))
+ (should-not (string-search "\303\270" "\370"))
+ (should-not (string-search (string-to-multibyte "\303\270") "\370"))
+ (should-not (string-search "\303\270" (string-to-multibyte "\370")))
+ (should-not (string-search (string-to-multibyte "\303\270")
+ (string-to-multibyte "\370")))
+ (should-not (string-search "\370" "\303\270"))
+ (should-not (string-search (string-to-multibyte "\370") "\303\270"))
+ (should-not (string-search "\370" (string-to-multibyte "\303\270")))
+ (should-not (string-search (string-to-multibyte "\370")
+ (string-to-multibyte "\303\270")))
(should (equal (string-search (string-to-multibyte "o\303\270")
"foo\303\270")
2))
(should (equal (string-search "\303\270" "foo\303\270") 3)))