texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: * tp/Texinfo/Convert/Unicode.pm (string_width): D


From: Gavin D. Smith
Subject: branch master updated: * tp/Texinfo/Convert/Unicode.pm (string_width): Do not reset the counter to zero at a newline, as this only occurs for malformed input and it is not worth the complication. * tp/Texinfo/XS/main/utils.c (width_multibyte): Make the same change. Reverses change on 2023-12-31.
Date: Sun, 28 Jan 2024 12:08:27 -0500

This is an automated email from the git hooks/post-receive script.

gavin pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 3a2c1ad6e0 * tp/Texinfo/Convert/Unicode.pm (string_width): Do not 
reset the counter to zero at a newline, as this only occurs for malformed input 
and it is not worth the complication. * tp/Texinfo/XS/main/utils.c 
(width_multibyte): Make the same change.  Reverses change on 2023-12-31.
3a2c1ad6e0 is described below

commit 3a2c1ad6e0eb670ef881c1241dd27e38854f3c96
Author: Gavin Smith <gavinsmith0123@gmail.com>
AuthorDate: Sun Jan 28 17:08:19 2024 +0000

    * tp/Texinfo/Convert/Unicode.pm (string_width):
    Do not reset the counter to zero at a newline, as this only
    occurs for malformed input and it is not worth the complication.
    * tp/Texinfo/XS/main/utils.c (width_multibyte): Make the same
    change.  Reverses change on 2023-12-31.
---
 ChangeLog                                          |   8 +++
 tp/Texinfo/Convert/Unicode.pm                      |  20 +++-----
 tp/Texinfo/XS/main/utils.c                         |   6 +--
 .../formats_encodings/at_commands_in_refs.pl       |  56 ++++++++++-----------
 .../at_commands_in_refs_latin1.pl                  |   2 +-
 .../res_info/at_commands_in_refs_latin1.info       | Bin 7995 -> 8002 bytes
 .../formats_encodings/at_commands_in_refs_utf8.pl  |   2 +-
 .../res_info/at_commands_in_refs_utf8.info         | Bin 8392 -> 8399 bytes
 .../unclosed_verb_on_section_line.pl               |   2 +-
 9 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 91576a5e24..bb125bb906 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2024-01-28  Gavin Smith <gavinsmith0123@gmail.com>
+
+       * tp/Texinfo/Convert/Unicode.pm (string_width):
+       Do not reset the counter to zero at a newline, as this only
+       occurs for malformed input and it is not worth the complication.
+       * tp/Texinfo/XS/main/utils.c (width_multibyte): Make the same
+       change.  Reverses change on 2023-12-31.
+
 2024-01-28  Gavin Smith <gavinsmith0123@gmail.com>
 
        * tp/Texinfo/Convert/Plaintext.pm
diff --git a/tp/Texinfo/Convert/Unicode.pm b/tp/Texinfo/Convert/Unicode.pm
index 0d4ca79017..b435452a0e 100644
--- a/tp/Texinfo/Convert/Unicode.pm
+++ b/tp/Texinfo/Convert/Unicode.pm
@@ -1701,16 +1701,14 @@ sub string_width($)
     return length($string);
   }
 
-  if ($string !~ /\n/) {
-    $string =~ s/\p{InFullwidth}/\x{02}/g;
-    $string =~ s/[\p{L}\p{N}\p{P}\p{S}\p{Zs}]/\x{01}/g;
-    $string =~ s/[^\x{01}\x{02}]/\x{00}/g;
-
-    # This sums up the byte values of the bytes in $string, which now are
-    # all either 0, 1 or 2.  This is faster.  The original, more readable
-    # version is below.
-    return unpack("U0%32A*", $string);
-  }
+  $string =~ s/\p{InFullwidth}/\x{02}/g;
+  $string =~ s/[\p{L}\p{N}\p{P}\p{S}\p{Zs}]/\x{01}/g;
+  $string =~ s/[^\x{01}\x{02}]/\x{00}/g;
+
+  # This sums up the byte values of the bytes in $string, which now are
+  # all either 0, 1 or 2.  This is faster.  The original, more readable
+  # version is below.
+  return unpack("U0%32A*", $string);
 
   if (! defined($string)) {
     cluck();
@@ -1721,8 +1719,6 @@ sub string_width($)
       $width += 2;
     } elsif ($character =~ /[\p{L}\p{N}\p{P}\p{S}\p{Zs}]/) {
       $width += 1;
-    } elsif ($character eq "\n") {
-      $width = 0;
     } else {
       # zero width character: \pC (including controls), \pM, \p{Zl}, \p{Zp}
     }
diff --git a/tp/Texinfo/XS/main/utils.c b/tp/Texinfo/XS/main/utils.c
index 877b242e01..39e6825788 100644
--- a/tp/Texinfo/XS/main/utils.c
+++ b/tp/Texinfo/XS/main/utils.c
@@ -227,16 +227,12 @@ to_upper_or_lower_multibyte (const char *text, int 
lower_or_upper)
   return result;
 }
 
-/* end of line resets the count, same as in perl */
 int
 width_multibyte (const char *text)
 {
   int result;
-  const char *p = strrchr (text, '\n');
-  if (!p)
-    p = text;
   /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
-  uint8_t *u8_text = u8_strconv_from_encoding (p, "UTF-8",
+  uint8_t *u8_text = u8_strconv_from_encoding (text, "UTF-8",
                                                  iconveh_question_mark);
   /* NOTE the libunistring documentation described encoding as
      The encoding argument identifies the encoding (e.g. "ISO-8859-2"
diff --git a/tp/t/results/formats_encodings/at_commands_in_refs.pl 
b/tp/t/results/formats_encodings/at_commands_in_refs.pl
index 8f6938c544..227c05137b 100644
--- a/tp/t/results/formats_encodings/at_commands_in_refs.pl
+++ b/tp/t/results/formats_encodings/at_commands_in_refs.pl
@@ -13821,7 +13821,7 @@ $result_texts{'at_commands_in_refs'} = 'Top
 
 2     ! 
  .  . ? @
-*********
+*****************
 
 3 @ { } \\ #
 ***********
@@ -15603,7 +15603,7 @@ $result_converted{'plaintext'}->{'at_commands_in_refs'} 
= 'Top
 
 2     !
 . . ? @
-*******
+**************
 
 3 @ { } \\ #
 ***********
@@ -16759,7 +16759,7 @@ File: ,  Node:     ! . . ? @,  Next: @ { } \\ #,  Prev: 
{ },  Up: Top
 
 2     !
 . . ? @
-*******
+**************
 
 
 File: ,  Node: @ { } \\ #,  Next: LaTeX TeX • , © ... ...,  Prev:     ! . . 
? @,  Up: Top
@@ -16971,31 +16971,31 @@ Tag Table:
 Node: Top27
 Node: { }785
 Node:     ! . . ? @864
-Node: @ { } \\ #961
-Node: LaTeX TeX • , © ... ...1080
-Node: ≡ error→ € ¡ ↦ −1230
-Node: ≥ ≤ →1362
-Node: ª º ⋆ £ ⊣ ¿ ®1460
-Node: ⇒ ° a b a sunny day å1579
-Node: Å æ œ Æ Œ ø Ø ß ł Ł Ð ð Þ þ1736
-Node: ä ẽ î â à é ç ē e̊ e̋ ę1915
-Node: ė ĕ e̲ ẹ ě ȷ e͡e2081
-Node: ı Ḕ Ḉ2211
-Node: “ ” ‘ ’ „ ‚2309
-Node: « » « » ‹ ›2414
-Node: `` \'\' --- -- ` \'2530
-Node: AAA (fff) AAA BBB2654
-Node: CCC (rrr) CCC DDD2794
-Node: the someone <someone@somewher> <no_explain@there>2967
-Node: [f--ile1] [image src="f--ile.png" alt="alt" text="Image 
description\\"\\"\\\\."]3267
-Node:  @ {} . 3616
-Node: cite asis in @w b in r SC *str* t VAR dfn i3819
-Node: env code option samp command file C-x <ESC>4063
-Node: 8.27in4325
-Node: sansserif slanted4459
-Node: indicateurl4583
-Node: _{g}H 3^{rd}4705
-Node: <http://somewhere_aaa> text (url) ls4844
+Node: @ { } \\ #968
+Node: LaTeX TeX • , © ... ...1087
+Node: ≡ error→ € ¡ ↦ −1237
+Node: ≥ ≤ →1369
+Node: ª º ⋆ £ ⊣ ¿ ®1467
+Node: ⇒ ° a b a sunny day å1586
+Node: Å æ œ Æ Œ ø Ø ß ł Ł Ð ð Þ þ1743
+Node: ä ẽ î â à é ç ē e̊ e̋ ę1922
+Node: ė ĕ e̲ ẹ ě ȷ e͡e2088
+Node: ı Ḕ Ḉ2218
+Node: “ ” ‘ ’ „ ‚2316
+Node: « » « » ‹ ›2421
+Node: `` \'\' --- -- ` \'2537
+Node: AAA (fff) AAA BBB2661
+Node: CCC (rrr) CCC DDD2801
+Node: the someone <someone@somewher> <no_explain@there>2974
+Node: [f--ile1] [image src="f--ile.png" alt="alt" text="Image 
description\\"\\"\\\\."]3274
+Node:  @ {} . 3623
+Node: cite asis in @w b in r SC *str* t VAR dfn i3826
+Node: env code option samp command file C-x <ESC>4070
+Node: 8.27in4332
+Node: sansserif slanted4466
+Node: indicateurl4590
+Node: _{g}H 3^{rd}4712
+Node: <http://somewhere_aaa> text (url) ls4851
 
 End Tag Table
 
diff --git a/tp/t/results/formats_encodings/at_commands_in_refs_latin1.pl 
b/tp/t/results/formats_encodings/at_commands_in_refs_latin1.pl
index 4699c78b33..4c59d7f069 100644
--- a/tp/t/results/formats_encodings/at_commands_in_refs_latin1.pl
+++ b/tp/t/results/formats_encodings/at_commands_in_refs_latin1.pl
@@ -13893,7 +13893,7 @@ Top
 
 2     ! 
  .  . ? @
-*********
+*****************
 
 3 @ { } \\ #
 ***********
diff --git 
a/tp/t/results/formats_encodings/at_commands_in_refs_latin1/res_info/at_commands_in_refs_latin1.info
 
b/tp/t/results/formats_encodings/at_commands_in_refs_latin1/res_info/at_commands_in_refs_latin1.info
index 7d90659b49..239a8a6f06 100644
Binary files 
a/tp/t/results/formats_encodings/at_commands_in_refs_latin1/res_info/at_commands_in_refs_latin1.info
 and 
b/tp/t/results/formats_encodings/at_commands_in_refs_latin1/res_info/at_commands_in_refs_latin1.info
 differ
diff --git a/tp/t/results/formats_encodings/at_commands_in_refs_utf8.pl 
b/tp/t/results/formats_encodings/at_commands_in_refs_utf8.pl
index c96fbc1d30..9745f7a371 100644
--- a/tp/t/results/formats_encodings/at_commands_in_refs_utf8.pl
+++ b/tp/t/results/formats_encodings/at_commands_in_refs_utf8.pl
@@ -13893,7 +13893,7 @@ Top
 
 2     ! 
  .  . ? @
-*********
+*****************
 
 3 @ { } \\ #
 ***********
diff --git 
a/tp/t/results/formats_encodings/at_commands_in_refs_utf8/res_info/at_commands_in_refs_utf8.info
 
b/tp/t/results/formats_encodings/at_commands_in_refs_utf8/res_info/at_commands_in_refs_utf8.info
index 2bec874b46..8c640e3893 100644
Binary files 
a/tp/t/results/formats_encodings/at_commands_in_refs_utf8/res_info/at_commands_in_refs_utf8.info
 and 
b/tp/t/results/formats_encodings/at_commands_in_refs_utf8/res_info/at_commands_in_refs_utf8.info
 differ
diff --git a/tp/t/results/invalid_nestings/unclosed_verb_on_section_line.pl 
b/tp/t/results/invalid_nestings/unclosed_verb_on_section_line.pl
index 4ebc85d3ab..62b098f04c 100644
--- a/tp/t/results/invalid_nestings/unclosed_verb_on_section_line.pl
+++ b/tp/t/results/invalid_nestings/unclosed_verb_on_section_line.pl
@@ -78,7 +78,7 @@ T}';
 $result_texts{'unclosed_verb_on_section_line'} = '1 in section ruc
 
 Now text.
-=========
+=========================
 ';
 
 $result_sectioning{'unclosed_verb_on_section_line'} = {



reply via email to

[Prev in Thread] Current Thread [Next in Thread]