[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
branch master updated: Wrapper for u8_strconv_from_encoding
From: |
Gavin D. Smith |
Subject: |
branch master updated: Wrapper for u8_strconv_from_encoding |
Date: |
Mon, 19 Feb 2024 16:32:58 -0500 |
This is an automated email from the git hooks/post-receive script.
gavin pushed a commit to branch master
in repository texinfo.
The following commit(s) were added to refs/heads/master by this push:
new 9e3da1f8fb Wrapper for u8_strconv_from_encoding
9e3da1f8fb is described below
commit 9e3da1f8fb0518f6dea8a72ee097f0c2fd5d0b1f
Author: Gavin Smith <gavinsmith0123@gmail.com>
AuthorDate: Mon Feb 19 21:32:28 2024 +0000
Wrapper for u8_strconv_from_encoding
* tp/Texinfo/XS/main/unicode.c (utf8_from_string):
Create wrapper for u8_strconv_from_encoding. This simplifies
the code that calls it as it is always called in the same way,
with a "UTF-8" encoding specified, and allows potentially swapping
out this implementation with e.g. a simple cast. All callers of
u8_strconv_from_encoding updated.
---
ChangeLog | 11 +++++++++
tp/Texinfo/XS/convert/convert_html.c | 13 ++++------
tp/Texinfo/XS/convert/converter.c | 6 ++---
tp/Texinfo/XS/main/manipulate_indices.c | 10 +++-----
tp/Texinfo/XS/main/manipulate_tree.c | 4 ++--
tp/Texinfo/XS/main/node_name_normalization.c | 3 +--
tp/Texinfo/XS/main/unicode.c | 28 +++++++++++-----------
tp/Texinfo/XS/main/unicode.h | 3 +++
tp/Texinfo/XS/main/utils.c | 16 ++++---------
tp/Texinfo/XS/parsetexi/def.c | 7 +++---
.../XS/structuring_transfo/transformations.c | 7 +++---
11 files changed, 51 insertions(+), 57 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 45f932d352..4f9ace00ea 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,14 @@
+2024-02-19 Gavin Smith <gavinsmith0123@gmail.com>
+
+ Wrapper for u8_strconv_from_encoding
+
+ * tp/Texinfo/XS/main/unicode.c (utf8_from_string):
+ Create wrapper for u8_strconv_from_encoding. This simplifies
+ the code that calls it as it is always called in the same way,
+ with a "UTF-8" encoding specified, and allows potentially swapping
+ out this implementation with e.g. a simple cast. All callers of
+ u8_strconv_from_encoding updated.
+
2024-02-19 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/Structuring.pm (sectioning_structure),
diff --git a/tp/Texinfo/XS/convert/convert_html.c
b/tp/Texinfo/XS/convert/convert_html.c
index dab4658b83..181e7d7b7b 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -9455,8 +9455,7 @@ css_string_accent (CONVERTER *self, const char *text,
if (!p)
{
/* check if a character matches */
- encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
- iconveh_question_mark);
+ encoded_u8 = utf8_from_string (text);
next = u8_next (&first_char, encoded_u8);
if (next && (uc_is_general_category (first_char, UC_CATEGORY_L)
/* ASCII digits */
@@ -9481,8 +9480,7 @@ css_string_accent (CONVERTER *self, const char *text,
const uint8_t *remaining;
if (!next)
{
- encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
- iconveh_question_mark);
+ encoded_u8 = utf8_from_string (text);
next = encoded_u8;
}
remaining = u8_next (&second_char, next);
@@ -9562,9 +9560,7 @@ css_string_accent (CONVERTER *self, const char *text,
/* determine the hexadecimal unicode point of the normalized
character to output in the format expected in CSS strings */
char *next_text;
- uint8_t *encoded_u8 = u8_strconv_from_encoding (
- normalized_accent_text, "UTF-8",
- iconveh_question_mark);
+ uint8_t *encoded_u8 = utf8_from_string (normalized_accent_text);
ucs4_t first_char;
const uint8_t *next = u8_next (&first_char, encoded_u8);
text_printf (&accented_text, "\\%04lX ", first_char);
@@ -12554,8 +12550,7 @@ convert_printindex_command (CONVERTER *self, const enum
command_id cmd,
for (i = 0; i < index_sorted->letter_number; i++)
{
char *letter = index_sorted->letter_entries[i].letter;
- uint8_t *encoded_u8 = u8_strconv_from_encoding (letter, "UTF-8",
- iconveh_question_mark);
+ uint8_t *encoded_u8 = utf8_from_string (letter);
ucs4_t next_char;
u8_next (&next_char, encoded_u8);
letter_is_symbol[i]
diff --git a/tp/Texinfo/XS/convert/converter.c
b/tp/Texinfo/XS/convert/converter.c
index 546cda2330..8012d58a37 100644
--- a/tp/Texinfo/XS/convert/converter.c
+++ b/tp/Texinfo/XS/convert/converter.c
@@ -1022,8 +1022,7 @@ next_for_tieaccent (const char *text, const char **next)
}
else
{
- uint8_t *encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
- iconveh_question_mark);
+ uint8_t *encoded_u8 = utf8_from_string (text);
ucs4_t first_char;
u8_next (&first_char, encoded_u8);
free (encoded_u8);
@@ -1099,8 +1098,7 @@ xml_numeric_entity_accent (enum command_id cmd, const
char *text)
xasprintf (&accented_char, "%s%s", text,
unicode_diacritics[cmd].text);
normalized_char = normalize_NFC (accented_char);
- encoded_u8 = u8_strconv_from_encoding (normalized_char, "UTF-8",
- iconveh_question_mark);
+ encoded_u8 = utf8_from_string (normalized_char);
next = u8_next (&first_char, encoded_u8);
if (next)
{
diff --git a/tp/Texinfo/XS/main/manipulate_indices.c
b/tp/Texinfo/XS/main/manipulate_indices.c
index 5799b98ef5..387833d868 100644
--- a/tp/Texinfo/XS/main/manipulate_indices.c
+++ b/tp/Texinfo/XS/main/manipulate_indices.c
@@ -536,9 +536,7 @@ setup_index_entries_sort_strings (ERROR_MESSAGE_LIST
*error_messages,
= &entry_sort_string.sort_string_subentries[k];
/* TODO quite inefficient, only need the first character */
encoded_u8
- = u8_strconv_from_encoding (
- subentry_sort_string->sort_string,
- "UTF-8", iconveh_question_mark);
+ = utf8_from_string (subentry_sort_string->sort_string);
new_len = u8_strmbtouc (&next_char, encoded_u8);
if (new_len > 0
&& uc_is_property (next_char,
UC_PROPERTY_ALPHABETIC))
@@ -1018,8 +1016,7 @@ sort_indices_by_letter (DOCUMENT *document,
ERROR_MESSAGE_LIST *error_messages,
= &sortable_index_entries->sortable_entries[j];
char *sort_string
= sortable_entry->sortable_subentries[0].sort_string;
- uint8_t *encoded_u8 = u8_strconv_from_encoding (sort_string, "UTF-8",
- iconveh_question_mark);
+ uint8_t *encoded_u8 = utf8_from_string (sort_string);
uint8_t *current_u8 = encoded_u8;
char *letter_string;
char *upper_letter_string;
@@ -1055,8 +1052,7 @@ sort_indices_by_letter (DOCUMENT *document,
ERROR_MESSAGE_LIST *error_messages,
free (letter_string);
norm_letter_string = normalize_NFKD (upper_letter_string);
free (upper_letter_string);
- encoded_u8 = u8_strconv_from_encoding (norm_letter_string, "UTF-8",
- iconveh_question_mark);
+ encoded_u8 = utf8_from_string (norm_letter_string);
free (norm_letter_string);
current_u8 = encoded_u8;
diff --git a/tp/Texinfo/XS/main/manipulate_tree.c
b/tp/Texinfo/XS/main/manipulate_tree.c
index 26f7e7eb5c..3003a6be09 100644
--- a/tp/Texinfo/XS/main/manipulate_tree.c
+++ b/tp/Texinfo/XS/main/manipulate_tree.c
@@ -32,6 +32,7 @@
#include "targets.h"
#include "utils.h"
#include "manipulate_tree.h"
+#include "unicode.h"
/* copy_tree from Texinfo::Common */
@@ -878,8 +879,7 @@ protect_text (ELEMENT *current, char *to_protect)
if (current->source_mark_list.number)
{
- u8_text = u8_strconv_from_encoding (p, "UTF-8",
- iconveh_question_mark);
+ u8_text = utf8_from_string (p);
u8_p = u8_text;
current_position = 0;
diff --git a/tp/Texinfo/XS/main/node_name_normalization.c
b/tp/Texinfo/XS/main/node_name_normalization.c
index cb4cd57421..3f20b916ec 100644
--- a/tp/Texinfo/XS/main/node_name_normalization.c
+++ b/tp/Texinfo/XS/main/node_name_normalization.c
@@ -191,8 +191,7 @@ protect_unicode_char (const char *text, TEXT *result)
char *str;
/* determine unicode codepoint */
- encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
- iconveh_question_mark);
+ encoded_u8 = utf8_from_string (text);
next = u8_next (&next_char, encoded_u8);
if (next && *next)
bug ("Something left on next_str/encoded_u8\n");
diff --git a/tp/Texinfo/XS/main/unicode.c b/tp/Texinfo/XS/main/unicode.c
index 704f14041c..38a29d57e8 100644
--- a/tp/Texinfo/XS/main/unicode.c
+++ b/tp/Texinfo/XS/main/unicode.c
@@ -37,21 +37,26 @@
#include "accent_tables_8bit_codepoints.c"
+uint8_t *
+utf8_from_string (const char *text)
+{
+ /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
+ return u8_strconv_from_encoding (text, "UTF-8", iconveh_question_mark);
+}
+
char *
normalize_NFC (const char *text)
{
size_t lengthp;
char *result = 0;
- /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
- uint8_t *encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
- iconveh_question_mark);
+ uint8_t *encoded_u8 = utf8_from_string (text);
/* +1 to have the terminating NUL included in the string */
uint8_t *normalized_u8 = u8_normalize (UNINORM_NFC, encoded_u8,
u8_strlen (encoded_u8)+1,
NULL, &lengthp);
free (encoded_u8);
- result = u8_strconv_to_encoding (normalized_u8, "UTF-8",
iconveh_question_mark);
+ result = utf8_from_string (normalized_u8);
free (normalized_u8);
return result;
}
@@ -62,15 +67,13 @@ normalize_NFKD (const char *text)
size_t lengthp;
char *result = 0;
- /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
- uint8_t *encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
- iconveh_question_mark);
+ uint8_t *encoded_u8 = utf8_from_string (text);
/* +1 to have the terminating NUL included in the string */
uint8_t *normalized_u8 = u8_normalize (UNINORM_NFKD, encoded_u8,
u8_strlen (encoded_u8)+1,
NULL, &lengthp);
free (encoded_u8);
- result = u8_strconv_to_encoding (normalized_u8, "UTF-8",
iconveh_question_mark);
+ result = utf8_from_string (normalized_u8);
free (normalized_u8);
return result;
}
@@ -110,8 +113,7 @@ unicode_accent (const char *text, const ELEMENT *e)
{
/* tieaccent diacritic is naturally and correctly composed
between two characters */
- uint8_t *encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
- iconveh_question_mark);
+ uint8_t *encoded_u8 = utf8_from_string (text);
const uint8_t *next;
ucs4_t first_char;
next = u8_next (&first_char, encoded_u8);
@@ -133,15 +135,13 @@ unicode_accent (const char *text, const ELEMENT *e)
if (first_char_len < 0)
fatal ("u8_uctomb returns negative value");
first_char_u8[first_char_len] = 0;
- first_char_text = u8_strconv_to_encoding (first_char_u8,
"UTF-8",
-
iconveh_question_mark);
+ first_char_text = utf8_from_string (first_char_u8);
free (first_char_u8);
text_init (&accented_text);
text_append (&accented_text, first_char_text);
free (first_char_text);
text_append (&accented_text,
unicode_diacritics[e->cmd].text);
- next_text = u8_strconv_to_encoding (next, "UTF-8",
- iconveh_question_mark);
+ next_text = utf8_from_string (next);
text_append (&accented_text, next_text);
free (next_text);
result = normalize_NFC (accented_text.text);
diff --git a/tp/Texinfo/XS/main/unicode.h b/tp/Texinfo/XS/main/unicode.h
index 2bd700b664..7911ea2d33 100644
--- a/tp/Texinfo/XS/main/unicode.h
+++ b/tp/Texinfo/XS/main/unicode.h
@@ -3,6 +3,7 @@
#define UNICODE_H
#include <stddef.h>
+#include <unitypes.h>
#include "tree_types.h"
@@ -78,6 +79,8 @@ typedef struct DIACRITIC_UNICODE {
extern DIACRITIC_UNICODE unicode_diacritics[];
extern COMMAND_UNICODE unicode_character_brace_no_arg_commands[];
+uint8_t *utf8_from_string (const char *text);
+
int unicode_point_decoded_in_encoding (const char *encoding, char *codepoint);
char *normalize_NFC (const char *text);
diff --git a/tp/Texinfo/XS/main/utils.c b/tp/Texinfo/XS/main/utils.c
index 49639f38c0..e6a376eef1 100644
--- a/tp/Texinfo/XS/main/utils.c
+++ b/tp/Texinfo/XS/main/utils.c
@@ -45,6 +45,7 @@
#include "builtin_commands.h"
#include "api_to_perl.h"
#include "utils.h"
+#include "unicode.h"
#define min_level command_structuring_level[CM_chapter]
#define max_level command_structuring_level[CM_subsubsection]
@@ -193,9 +194,7 @@ isascii_upper (int c)
size_t
count_multibyte (const char *text)
{
- /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
- uint8_t *u8_text = u8_strconv_from_encoding (text, "UTF-8",
- iconveh_question_mark);
+ uint8_t *u8_text = utf8_from_string (text);
size_t result = u8_mbsnlen (u8_text, u8_strlen (u8_text));
free (u8_text);
@@ -209,9 +208,7 @@ to_upper_or_lower_multibyte (const char *text, int
lower_or_upper)
char *result;
size_t lengthp;
uint8_t *u8_result;
- /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
- uint8_t *u8_text = u8_strconv_from_encoding (text, "UTF-8",
- iconveh_question_mark);
+ uint8_t *u8_text = utf8_from_string (text);
if (lower_or_upper > 0)
/* the + 1 is there to hold the terminating NULL */
u8_result = u8_toupper (u8_text, u8_strlen (u8_text) + 1,
@@ -231,9 +228,7 @@ int
width_multibyte (const char *text)
{
int result;
- /* TODO error checking? Or cast (uint8_t *) instead of conversion? */
- uint8_t *u8_text = u8_strconv_from_encoding (text, "UTF-8",
- iconveh_question_mark);
+ uint8_t *u8_text = utf8_from_string (text);
/* NOTE the libunistring documentation described encoding as
The encoding argument identifies the encoding (e.g. "ISO-8859-2"
for Polish). Looking at the code, it seems that it is only
@@ -250,8 +245,7 @@ width_multibyte (const char *text)
int
word_bytes_len_multibyte (const char *text)
{
- uint8_t *encoded_u8 = u8_strconv_from_encoding (text, "UTF-8",
- iconveh_question_mark);
+ uint8_t *encoded_u8 = utf8_from_string (text);
uint8_t *current_u8 = encoded_u8;
int len = 0;
while (1)
diff --git a/tp/Texinfo/XS/parsetexi/def.c b/tp/Texinfo/XS/parsetexi/def.c
index f46c684d24..21bbee40f4 100644
--- a/tp/Texinfo/XS/parsetexi/def.c
+++ b/tp/Texinfo/XS/parsetexi/def.c
@@ -34,6 +34,7 @@
#include "manipulate_tree.h"
#include "source_marks.h"
#include "commands.h"
+#include "unicode.h"
void
gather_def_item (ELEMENT *current, enum command_id next_command)
@@ -198,8 +199,7 @@ split_delimiters (ELEMENT *current, int starting_idx)
if (e->source_mark_list.number)
{
- u8_text = u8_strconv_from_encoding (p, "UTF-8",
- iconveh_question_mark);
+ u8_text = utf8_from_string (p);
u8_p = u8_text;
current_position = 0;
@@ -284,8 +284,7 @@ split_def_args (ELEMENT *current, int starting_idx)
if (e->source_mark_list.number)
{
- u8_text = u8_strconv_from_encoding (p, "UTF-8",
- iconveh_question_mark);
+ u8_text = utf8_from_string (p);
u8_p = u8_text;
current_position = 0;
diff --git a/tp/Texinfo/XS/structuring_transfo/transformations.c
b/tp/Texinfo/XS/structuring_transfo/transformations.c
index 9dac12ee4f..821c6cba4f 100644
--- a/tp/Texinfo/XS/structuring_transfo/transformations.c
+++ b/tp/Texinfo/XS/structuring_transfo/transformations.c
@@ -45,6 +45,7 @@
#include "targets.h"
#include "node_name_normalization.h"
#include "transformations.h"
+#include "unicode.h"
/* in Common.pm */
@@ -105,8 +106,7 @@ protect_first_parenthesis (ELEMENT *element)
if (content->source_mark_list.number)
{
- u8_text = u8_strconv_from_encoding (p, "UTF-8",
- iconveh_question_mark);
+ u8_text = utf8_from_string (p);
u8_p = u8_text;
current_position = 0;
@@ -1368,8 +1368,7 @@ protect_hashchar_at_line_beginning_internal (const char
*type,
memset (&(current->source_mark_list), 0,
sizeof (SOURCE_MARK_LIST));
- u8_text = u8_strconv_from_encoding (p, "UTF-8",
- iconveh_question_mark);
+ u8_text = utf8_from_string (p);
u8_p = u8_text;
current_position = 0;
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- branch master updated: Wrapper for u8_strconv_from_encoding,
Gavin D. Smith <=