[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master cfda663282: Speed up string-to-unibyte
From: |
Mattias Engdegård |
Subject: |
master cfda663282: Speed up string-to-unibyte |
Date: |
Sun, 10 Jul 2022 12:21:28 -0400 (EDT) |
branch: master
commit cfda663282b788972c344e6733a8aa60a3e0f545
Author: Mattias Engdegård <mattiase@acm.org>
Commit: Mattias Engdegård <mattiase@acm.org>
Speed up string-to-unibyte
* src/character.h (str_to_unibyte):
* src/character.c (str_to_unibyte): Remove.
* src/fns.c (Fstring_to_unibyte): Ditch the call to str_to_unibyte and
the unnecessary heap allocation. Write new, faster code.
* test/src/fns-tests.el (fns--string-to-unibyte): New test.
---
src/character.c | 25 -------------------------
src/character.h | 2 --
src/fns.c | 25 +++++++++++++++----------
test/src/fns-tests.el | 15 +++++++++++++++
4 files changed, 30 insertions(+), 37 deletions(-)
diff --git a/src/character.c b/src/character.c
index c1a1b55389..d12df23f8e 100644
--- a/src/character.c
+++ b/src/character.c
@@ -734,31 +734,6 @@ str_as_unibyte (unsigned char *str, ptrdiff_t bytes)
return (to - str);
}
-/* Convert eight-bit chars in SRC (in multibyte form) to the
- corresponding byte and store in DST. CHARS is the number of
- characters in SRC. The value is the number of bytes stored in DST.
- Usually, the value is the same as CHARS, but is less than it if SRC
- contains a non-ASCII, non-eight-bit character. */
-
-ptrdiff_t
-str_to_unibyte (const unsigned char *src, unsigned char *dst, ptrdiff_t chars)
-{
- ptrdiff_t i;
-
- for (i = 0; i < chars; i++)
- {
- int c = string_char_advance (&src);
-
- if (CHAR_BYTE8_P (c))
- c = CHAR_TO_BYTE8 (c);
- else if (! ASCII_CHAR_P (c))
- return i;
- *dst++ = c;
- }
- return i;
-}
-
-
static ptrdiff_t
string_count_byte8 (Lisp_Object string)
{
diff --git a/src/character.h b/src/character.h
index 6ee6bcab20..2ca935ba04 100644
--- a/src/character.h
+++ b/src/character.h
@@ -569,8 +569,6 @@ extern ptrdiff_t str_as_multibyte (unsigned char *,
ptrdiff_t, ptrdiff_t,
ptrdiff_t *);
extern ptrdiff_t str_to_multibyte (unsigned char *, ptrdiff_t, ptrdiff_t);
extern ptrdiff_t str_as_unibyte (unsigned char *, ptrdiff_t);
-extern ptrdiff_t str_to_unibyte (const unsigned char *, unsigned char *,
- ptrdiff_t);
extern ptrdiff_t strwidth (const char *, ptrdiff_t);
extern ptrdiff_t c_string_width (const unsigned char *, ptrdiff_t, int,
ptrdiff_t *, ptrdiff_t *);
diff --git a/src/fns.c b/src/fns.c
index 49d76a0e7c..61ed01eee4 100644
--- a/src/fns.c
+++ b/src/fns.c
@@ -1413,19 +1413,24 @@ an error is signaled. */)
(Lisp_Object string)
{
CHECK_STRING (string);
+ if (!STRING_MULTIBYTE (string))
+ return string;
- if (STRING_MULTIBYTE (string))
+ ptrdiff_t chars = SCHARS (string);
+ Lisp_Object ret = make_uninit_string (chars);
+ unsigned char *src = SDATA (string);
+ unsigned char *dst = SDATA (ret);
+ for (ptrdiff_t i = 0; i < chars; i++)
{
- ptrdiff_t chars = SCHARS (string);
- unsigned char *str = xmalloc (chars);
- ptrdiff_t converted = str_to_unibyte (SDATA (string), str, chars);
-
- if (converted < chars)
- error ("Can't convert the %"pD"dth character to unibyte", converted);
- string = make_unibyte_string ((char *) str, chars);
- xfree (str);
+ unsigned char b = *src++;
+ if (b <= 0x7f)
+ *dst++ = b; /* ASCII */
+ else if (CHAR_BYTE8_HEAD_P (b))
+ *dst++ = 0x80 | (b & 1) << 6 | (*src++ & 0x3f); /* raw byte */
+ else
+ error ("Cannot convert character at index %"pD"d to unibyte", i);
}
- return string;
+ return ret;
}
diff --git a/test/src/fns-tests.el b/test/src/fns-tests.el
index ba56019d4c..0119e31df1 100644
--- a/test/src/fns-tests.el
+++ b/test/src/fns-tests.el
@@ -1344,4 +1344,19 @@
(should (equal (plist-member plist (copy-sequence "a") #'equal)
'("a" "c")))))
+(ert-deftest fns--string-to-unibyte ()
+ (dolist (str '("" "a" "abc" "a\x00\x7fz" "a\xaa\xbbz ""\x80\xdd\xff"))
+ (ert-info ((prin1-to-string str) :prefix "str: ")
+ (should-not (multibyte-string-p str))
+ (let* ((u (string-to-unibyte str)) ; should be identity
+ (m (string-to-multibyte u)) ; lossless conversion
+ (uu (string-to-unibyte m))) ; also lossless
+ (should-not (multibyte-string-p u))
+ (should (multibyte-string-p m))
+ (should-not (multibyte-string-p uu))
+ (should (equal str u))
+ (should (equal str uu)))))
+ (should-error (string-to-unibyte "å"))
+ (should-error (string-to-unibyte "ABC∀BC")))
+
;;; fns-tests.el ends here
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- master cfda663282: Speed up string-to-unibyte,
Mattias Engdegård <=