[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
branch master updated: * tp/Texinfo/XS/xspara.c (xspara_add_text): Keep
From: |
Gavin D. Smith |
Subject: |
branch master updated: * tp/Texinfo/XS/xspara.c (xspara_add_text): Keep running total of return values of uc_width and pass it to xspara_add__next. Remove an apparently obsolete comment about the width of accent characters. (xspara__add_next): Take extra argument. If non-negative, use for the column count of the word, rather than using uc_width. This way we avoid calling it twice for every character. |
Date: |
Sun, 12 Nov 2023 07:13:36 -0500 |
This is an automated email from the git hooks/post-receive script.
gavin pushed a commit to branch master
in repository texinfo.
The following commit(s) were added to refs/heads/master by this push:
new 10b96c1995 * tp/Texinfo/XS/xspara.c (xspara_add_text): Keep running
total of return values of uc_width and pass it to xspara_add__next. Remove an
apparently obsolete comment about the width of accent characters.
(xspara__add_next): Take extra argument. If non-negative, use for the column
count of the word, rather than using uc_width. This way we avoid calling it
twice for every character.
10b96c1995 is described below
commit 10b96c19956e9a851b2abda9a65631694ccac6f6
Author: Gavin Smith <gavinsmith0123@gmail.com>
AuthorDate: Sun Nov 12 12:13:14 2023 +0000
* tp/Texinfo/XS/xspara.c (xspara_add_text):
Keep running total of return values of uc_width and pass it to
xspara_add__next. Remove an apparently obsolete comment about
the width of accent characters.
(xspara__add_next): Take extra argument. If non-negative, use for
the column count of the word, rather than using uc_width. This way
we avoid calling it twice for every character.
---
ChangeLog | 10 +++++
tp/Texinfo/XS/xspara.c | 99 +++++++++++++++++++++++++++++---------------------
2 files changed, 67 insertions(+), 42 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 728ff498e6..9a04e4ffd2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2023-11-12 Gavin Smith <gavinsmith0123@gmail.com>
+
+ * tp/Texinfo/XS/xspara.c (xspara_add_text):
+ Keep running total of return values of uc_width and pass it to
+ xspara_add__next. Remove an apparently obsolete comment about
+ the width of accent characters.
+ (xspara__add_next): Take extra argument. If non-negative, use for
+ the column count of the word, rather than using uc_width. This way
+ we avoid calling it twice for every character.
+
2023-11-12 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/XS/convert/convert_html.c (HTML_COMMAND_STRUCT)
diff --git a/tp/Texinfo/XS/xspara.c b/tp/Texinfo/XS/xspara.c
index f313d03046..09a2ac2d77 100644
--- a/tp/Texinfo/XS/xspara.c
+++ b/tp/Texinfo/XS/xspara.c
@@ -601,9 +601,11 @@ xspara_end (void)
/* Add WORD to paragraph in RESULT, not refilling WORD. If we go past the end
of the line start a new one. TRANSPARENT means that the letters in WORD
are ignored for the purpose of deciding whether a full stop ends a sentence
- or not. */
+ or not. If COL_COUNT is non-negative, it is the number of screen columns
+ taken up by the word. */
void
-xspara__add_next (TEXT *result, char *word, int word_len, int transparent)
+xspara__add_next (TEXT *result, char *word, int word_len,
+ int transparent, int col_count)
{
dTHX;
@@ -650,45 +652,50 @@ xspara__add_next (TEXT *result, char *word, int word_len,
int transparent)
}
else
{
- /* Calculate length of multibyte string in characters. */
- int len = 0;
- int left = word_len;
- char32_t w;
- char *p = word;
-
- while (left > 0)
+ if (col_count >= 0)
+ state.word_counter += col_count;
+ else
{
- int columns;
- int char_len;
+ /* Calculate length of multibyte string in characters. */
+ int len = 0;
+ int left = word_len;
+ char32_t w;
+ char *p = word;
- if (PRINTABLE_ASCII(*p))
+ while (left > 0)
{
- len++; p++; left--;
- continue;
- }
+ int columns;
+ int char_len;
- char_len = u8_mbtouc (&w, p, left);
- if (char_len == (size_t) -2) {
- /* unfinished multibyte character */
- char_len = left;
- } else if (char_len == (size_t) -1) {
- /* invalid character */
- char_len = 1;
- } else if (char_len == 0) {
- /* not sure what this means but we must avoid an infinite loop.
- Possibly only happens with invalid strings */
- char_len = 1;
- }
- left -= char_len;
-
- columns = uc_width (w, "UTF-8");
- if (columns > 0)
- len += columns;
-
- p += char_len;
- }
+ if (PRINTABLE_ASCII(*p))
+ {
+ len++; p++; left--;
+ continue;
+ }
+
+ char_len = u8_mbtouc (&w, p, left);
+ if (char_len == (size_t) -2) {
+ /* unfinished multibyte character */
+ char_len = left;
+ } else if (char_len == (size_t) -1) {
+ /* invalid character */
+ char_len = 1;
+ } else if (char_len == 0) {
+ /* not sure what this means but we must avoid an infinite loop.
+ Possibly only happens with invalid strings */
+ char_len = 1;
+ }
+ left -= char_len;
+
+ columns = uc_width (w, "UTF-8");
+ if (columns > 0)
+ len += columns;
+
+ p += char_len;
+ }
+ state.word_counter += len;
+ }
- state.word_counter += len;
if (state.counter != 0
&& state.counter + state.word_counter + state.space_counter
@@ -715,7 +722,7 @@ xspara_add_next (char *text, int text_len, int transparent)
text_reset (&t);
state.end_line_count = 0;
- xspara__add_next (&t, text, text_len, transparent);
+ xspara__add_next (&t, text, text_len, transparent, -1);
return t;
}
@@ -795,6 +802,12 @@ xspara_add_text (char *text, int len)
static TEXT result;
enum text_class type = type_NULL, next_type = type_NULL;
+ /* Column count of next type_regular block, either for type or
+ next_type. We do not have two type_regular blocks in a row so there
+ is no chance of this being overwritten before it is used. It is
+ zeroed when the block is output. */
+ int regular_col_count = 0;
+
dTHX;
text_reset (&result);
@@ -856,12 +869,13 @@ xspara_add_text (char *text, int len)
continue;
}
- /* Note: width == 0 includes accent characters which should not
- properly increase the column count. This is not what the pure
- Perl code does, though. */
+ /* Note: width == 0 includes accent characters. */
width = uc_width (wc, "UTF-8");
if (width == 1 || width == 0)
- next_type = type_regular;
+ {
+ regular_col_count += width;
+ next_type = type_regular;
+ }
else if (width == 2)
{
next_type = type_double_width;
@@ -1020,7 +1034,8 @@ xspara_add_text (char *text, int len)
/*************** Word character ******************************/
else if (type == type_regular)
{
- xspara__add_next (&result, p, q - p, 0);
+ xspara__add_next (&result, p, q - p, 0, regular_col_count);
+ regular_col_count = 0;
/* Now check for an end of sentence. We can iterate backwards
by bytes as all the end-sentence characters or punctuation are
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- branch master updated: * tp/Texinfo/XS/xspara.c (xspara_add_text): Keep running total of return values of uc_width and pass it to xspara_add__next. Remove an apparently obsolete comment about the width of accent characters. (xspara__add_next): Take extra argument. If non-negative, use for the column count of the word, rather than using uc_width. This way we avoid calling it twice for every character.,
Gavin D. Smith <=