[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master 8c56968: Fix searching for multibyte needles in unibyte haystacks
From: |
Lars Ingebrigtsen |
Subject: |
master 8c56968: Fix searching for multibyte needles in unibyte haystacks |
Date: |
Sat, 26 Sep 2020 20:01:13 -0400 (EDT) |
branch: master
commit 8c569683f2ee5d14040f5605fd0570b2eb009c05
Author: Lars Ingebrigtsen <larsi@gnus.org>
Commit: Lars Ingebrigtsen <larsi@gnus.org>
Fix searching for multibyte needles in unibyte haystacks
* src/fns.c (Fstring_search): Make this work better when searching
unibyte haystacks for multibyte needles (bug#43598).
---
src/fns.c | 45 +++++++++++++++++++++++++++++++++++++++++----
test/src/fns-tests.el | 6 +++++-
2 files changed, 46 insertions(+), 5 deletions(-)
diff --git a/src/fns.c b/src/fns.c
index 2fcc282..0f76871 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -5454,6 +5454,21 @@ It should not be used for anything security-related. See
return make_digest_string (digest, SHA1_DIGEST_SIZE);
}
+static bool
+string_ascii_p (Lisp_Object string)
+{
+ if (STRING_MULTIBYTE (string))
+ return SBYTES (string) == SCHARS (string);
+ else
+ {
+ ptrdiff_t nbytes = SBYTES (string);
+ for (ptrdiff_t i = 0; i < nbytes; i++)
+ if (SREF (string, i) > 127)
+ return false;
+ return true;
+ }
+}
+
DEFUN ("string-search", Fstring_search, Sstring_search, 2, 3, 0,
doc: /* Search for the string NEEDLE in the string HAYSTACK.
The return value is the position of the first occurrence of NEEDLE in
@@ -5490,7 +5505,9 @@ Case is always significant and text properties are
ignored. */)
haystart = SSDATA (haystack) + start_byte;
haybytes = SBYTES (haystack) - start_byte;
- if (STRING_MULTIBYTE (haystack) == STRING_MULTIBYTE (needle))
+ if (STRING_MULTIBYTE (haystack) == STRING_MULTIBYTE (needle)
+ || string_ascii_p (needle)
+ || string_ascii_p (haystack))
res = memmem (haystart, haybytes,
SSDATA (needle), SBYTES (needle));
else if (STRING_MULTIBYTE (haystack)) /* unibyte needle */
@@ -5501,9 +5518,29 @@ Case is always significant and text properties are
ignored. */)
}
else /* unibyte haystack, multibyte needle */
{
- Lisp_Object uni_needle = Fstring_as_unibyte (needle);
- res = memmem (haystart, haybytes,
- SSDATA (uni_needle), SBYTES (uni_needle));
+ /* The only possible way we can find the multibyte needle in the
+ unibyte stack (since we know that neither are pure-ASCII) is
+ if they contain "raw bytes" (and no other non-ASCII chars.) */
+ ptrdiff_t chars = SCHARS (needle);
+ const unsigned char *src = SDATA (needle);
+
+ for (ptrdiff_t i = 0; i < chars; i++)
+ {
+ int c = string_char_advance (&src);
+
+ if (!CHAR_BYTE8_P (c)
+ && !ASCII_CHAR_P (c))
+ /* Found a char that can't be in the haystack. */
+ return Qnil;
+ }
+
+ {
+ /* "Raw bytes" (aka eighth-bit) are represented differently in
+ multibyte and unibyte strings. */
+ Lisp_Object uni_needle = Fstring_to_unibyte (needle);
+ res = memmem (haystart, haybytes,
+ SSDATA (uni_needle), SBYTES (uni_needle));
+ }
}
if (! res)
diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el
index f2e1a26..41969f2 100644
--- a/test/src/fns-tests.el
+++ b/test/src/fns-tests.el
@@ -953,4 +953,8 @@
(should (equal (string-search "" "abc" 3) 3))
(should-error (string-search "" "abc" 4))
(should-error (string-search "" "abc" -1))
- )
+
+ (should-not (string-search "ΓΈ" "foo\303\270"))
+ (should (equal (string-search (string-to-multibyte "o\303\270")
"foo\303\270")
+ 2))
+ (should (equal (string-search "\303\270" "foo\303\270") 3)))
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- master 8c56968: Fix searching for multibyte needles in unibyte haystacks,
Lars Ingebrigtsen <=