[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Gavin D. Smith |
Date: |
Mon, 23 Oct 2023 14:51:33 -0400 (EDT) |
branch: release/7.1
commit c76bcd0feed005aaf9db28a76f4883f3ae98295b
Author: Gavin Smith <gavinsmith0123@gmail.com>
AuthorDate: Mon Oct 23 19:51:00 2023 +0100
* tp/Texinfo/XS/xspara.c (get_utf8_codepoint):
Wrapper for mbrtowc/btowc.
[_WIN32]: Do not call btowc, as it was tested to be very slow
on MinGW. Report from Eli Zaretskii.
---
ChangeLog | 7 +++++++
tp/Texinfo/XS/xspara.c | 48 ++++++++++++++++++++++++++----------------------
2 files changed, 33 insertions(+), 22 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index e619109f5b..c4379ec56b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2023-10-23 Gavin Smith <gavinsmith0123@gmail.com>
+
+ * tp/Texinfo/XS/xspara.c (get_utf8_codepoint):
+ Wrapper for mbrtowc/btowc.
+ [_WIN32]: Do not call btowc, as it was tested to be very slow
+ on MinGW. Report from Eli Zaretskii.
+
2023-10-18 Gavin Smith <gavinsmith0123@gmail.com>
Texinfo 7.1
diff --git a/tp/Texinfo/XS/xspara.c b/tp/Texinfo/XS/xspara.c
index 7c6895a7ff..e1cddcdc2a 100644
--- a/tp/Texinfo/XS/xspara.c
+++ b/tp/Texinfo/XS/xspara.c
@@ -684,6 +684,30 @@ xspara_end (void)
/* characters triggering an end of sentence */
#define end_sentence_characters ".?!"
+/* Wrapper for mbrtowc. Set *PWC and return length of codepoint in bytes. */
+size_t
+get_utf8_codepoint (wchar_t *pwc, const char *mbs, size_t n)
+{
+#ifdef _WIN32
+ /* Use the above implementation of mbrtowc. Do not use btowc as
+ does not exist as standard on MS-Windows, and was tested to be
+ very slow on MinGW. */
+ return mbrtowc (pwc, mbs, n, NULL);
+#else
+ if (!PRINTABLE_ASCII(*mbs))
+ {
+ return mbrtowc (pwc, mbs, n, NULL);
+ }
+ else
+ {
+ /* Functionally the same as mbrtowc but (tested) slightly quicker. */
+ *pwc = btowc (*mbs);
+ return 1;
+ }
+#endif
+}
+
+
/* Add WORD to paragraph in RESULT, not refilling WORD. If we go past the end
of the line start a new one. TRANSPARENT means that the letters in WORD
are ignored for the purpose of deciding whether a full stop ends a sentence
@@ -730,18 +754,7 @@ xspara__add_next (TEXT *result, char *word, int word_len,
int transparent)
if (!strchr (end_sentence_characters
after_punctuation_characters, *p))
{
- if (!PRINTABLE_ASCII(*p))
- {
- wchar_t wc = L'\0';
- mbrtowc (&wc, p, len, NULL);
- state.last_letter = wc;
- break;
- }
- else
- {
- state.last_letter = btowc (*p);
- break;
- }
+ get_utf8_codepoint (&state.last_letter, p, len);
}
}
}
@@ -1013,16 +1026,7 @@ xspara_add_text (char *text, int len)
}
/************** Not a white space character. *****************/
- if (!PRINTABLE_ASCII(*p))
- {
- char_len = mbrtowc (&wc, p, len, NULL);
- }
- else
- {
- /* Functonally the same as mbrtowc but (tested) slightly quicker. */
- char_len = 1;
- wc = btowc (*p);
- }
+ char_len = get_utf8_codepoint (&wc, p, len);
if ((long) char_len == 0)
break; /* Null character. Shouldn't happen. */