[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
branch master updated: * tp/Texinfo/Convert/HTML.pm (url_protect_url_tex
From: |
Patrice Dumas |
Subject: |
branch master updated: * tp/Texinfo/Convert/HTML.pm (url_protect_url_text) (url_protect_file_text), tp/Texinfo/XS/convert/convert_html.c (url_protect_url_text, url_protect_file_text): replace end of lines by spaces before percent encoding. |
Date: |
Sun, 18 Feb 2024 10:39:21 -0500 |
This is an automated email from the git hooks/post-receive script.
pertusus pushed a commit to branch master
in repository texinfo.
The following commit(s) were added to refs/heads/master by this push:
new d0eab9ad38 * tp/Texinfo/Convert/HTML.pm (url_protect_url_text)
(url_protect_file_text), tp/Texinfo/XS/convert/convert_html.c
(url_protect_url_text, url_protect_file_text): replace end of lines by spaces
before percent encoding.
d0eab9ad38 is described below
commit d0eab9ad38d7bd7f9e34b7f9eb5580c9728ee3a4
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sun Feb 18 16:39:18 2024 +0100
* tp/Texinfo/Convert/HTML.pm (url_protect_url_text)
(url_protect_file_text), tp/Texinfo/XS/convert/convert_html.c
(url_protect_url_text, url_protect_file_text): replace end of lines by
spaces before percent encoding.
---
ChangeLog | 7 ++++
tp/Texinfo/Convert/HTML.pm | 8 ++--
tp/Texinfo/XS/convert/convert_html.c | 54 +++++++++++++++++---------
tp/t/results/html_tests/end_of_line_in_uref.pl | 2 +-
4 files changed, 49 insertions(+), 22 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 59d3d8a06e..54642d4b74 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2024-02-18 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/Convert/HTML.pm (url_protect_url_text)
+ (url_protect_file_text), tp/Texinfo/XS/convert/convert_html.c
+ (url_protect_url_text, url_protect_file_text): replace end of lines by
+ spaces before percent encoding.
+
2024-02-18 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/Convert/HTML.pm (_convert_heading_command),
diff --git a/tp/Texinfo/Convert/HTML.pm b/tp/Texinfo/Convert/HTML.pm
index f533f70375..89010b1163 100644
--- a/tp/Texinfo/Convert/HTML.pm
+++ b/tp/Texinfo/Convert/HTML.pm
@@ -9066,13 +9066,13 @@ sub convert_tree($$;$)
# Protect an url, in which characters with specific meaning in url are
# considered to have their specific meaning.
-# TODO turn end of lines to spaces? Currently, an end of line is percent
-# protected, it is most likely not what the author intended. Tested in
-# html_tests.t end_of_line_in_uref
sub url_protect_url_text($$)
{
my $self = shift;
my $input_string = shift;
+ # turn end of lines to spaces, as it is most likely what is expected
+ # rather than a percent encoded end of line.
+ $input_string =~ s/[\n\r]+/ /g;
# percent encode character string. It is better use UTF-8 irrespective
# of the actual charset of the HTML output file, according to the tests done.
my $href = encode("UTF-8", $input_string);
@@ -9088,6 +9088,8 @@ sub url_protect_file_text($$)
{
my $self = shift;
my $input_string = shift;
+ # turn end of lines to spaces, as it is most likely what is expected.
+ $input_string =~ s/[\n\r]+/ /g;
# percent encode character string. It is better use UTF-8 irrespective
# of the actual charset of the HTML output file, according to the tests done.
my $href = encode("UTF-8", $input_string);
diff --git a/tp/Texinfo/XS/convert/convert_html.c
b/tp/Texinfo/XS/convert/convert_html.c
index 4fe3e7656f..dab4658b83 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -2583,21 +2583,30 @@ url_protect_url_text (CONVERTER *self, const char
*input_string)
}
else
{
- int i;
- int char_len = 1;
- if (!isascii (*p))
+ int n = strspn (p, "\r\n");
+ if (n)
{
- /* Protect UTF-8 with continuation bytes. */
- while ((p[char_len] & 0xC0) == 0x80)
- char_len++;
+ text_printf (&text, "%%%02x", (unsigned char)' ');
+ p += n;
}
- for (i = 0; i < char_len; i++)
+ else
{
+ int i;
+ int char_len = 1;
+ if (!isascii (*p))
+ {
+ /* Protect UTF-8 with continuation bytes. */
+ while ((p[char_len] & 0xC0) == 0x80)
+ char_len++;
+ }
+ for (i = 0; i < char_len; i++)
+ {
/* the reason for forcing (unsigned char) is that the %x modifier
expects an unsigned int parameter and a char will usually be
promoted to an int when passed to a varargs function */
- text_printf (&text, "%%%02x", (unsigned char)*p);
- p += 1;
+ text_printf (&text, "%%%02x", (unsigned char)*p);
+ p += 1;
+ }
}
}
}
@@ -2642,21 +2651,30 @@ url_protect_file_text (CONVERTER *self, const char
*input_string)
}
else
{
- int i;
- int char_len = 1;
- if (!isascii (*p))
+ int n = strspn (p, "\r\n");
+ if (n)
{
- /* Protect UTF-8 with continuation bytes. */
- while ((p[char_len] & 0xC0) == 0x80)
- char_len++;
+ text_printf (&text, "%%%02x", (unsigned char)' ');
+ p += n;
}
- for (i = 0; i < char_len; i++)
+ else
{
+ int i;
+ int char_len = 1;
+ if (!isascii (*p))
+ {
+ /* Protect UTF-8 with continuation bytes. */
+ while ((p[char_len] & 0xC0) == 0x80)
+ char_len++;
+ }
+ for (i = 0; i < char_len; i++)
+ {
/* the reason for forcing (unsigned char) is that the %x modifier
expects an unsigned int parameter and a char will usually be
promoted to an int when passed to a varargs function */
- text_printf (&text, "%%%02x", (unsigned char)*p);
- p += 1;
+ text_printf (&text, "%%%02x", (unsigned char)*p);
+ p += 1;
+ }
}
}
}
diff --git a/tp/t/results/html_tests/end_of_line_in_uref.pl
b/tp/t/results/html_tests/end_of_line_in_uref.pl
index 5ddc9f74b0..97bbf79f53 100644
--- a/tp/t/results/html_tests/end_of_line_in_uref.pl
+++ b/tp/t/results/html_tests/end_of_line_in_uref.pl
@@ -92,7 +92,7 @@ $result_converted{'html'}->{'end_of_line_in_uref'} =
'<!DOCTYPE html>
</head>
<body lang="en">
-<p>See the <a class="uref"
href="https://gcc.gnu.org/codingconventions.html#Spelling%0aSpelling">terminology
and markup</a> section.</p>
+<p>See the <a class="uref"
href="https://gcc.gnu.org/codingconventions.html#Spelling%20Spelling">terminology
and markup</a> section.</p>
</body>
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- branch master updated: * tp/Texinfo/Convert/HTML.pm (url_protect_url_text) (url_protect_file_text), tp/Texinfo/XS/convert/convert_html.c (url_protect_url_text, url_protect_file_text): replace end of lines by spaces before percent encoding.,
Patrice Dumas <=