[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Tue, 9 Jan 2024 08:08:19 -0500 (EST) |
branch: master
commit 03c9181681c0a98b318ccdce2ef7dfb5314ae14c
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Tue Jan 9 14:08:07 2024 +0100
* tp/Texinfo/XS/main/utils.c (copy_strings, set_conf_string)
(set_informative_command_value): add copy_strings and set_conf_string
functions. Use set_conf_string in set_informative_command_value.
* tp/Texinfo/XS/main/convert_to_text.c
(copy_options_for_convert_text): allocate text options fields that can
be reset if converter options are reset.
* tp/Texinfo/XS/main/convert_to_text.c
(copy_options_for_convert_text, text_set_options_encoding)
(text_set_options_encoding_if_not_ascii, text_reset_options_encoding):
remove copy_options_for_convert_text enable_encoding_if_not_ascii
argument. Add text_set_options_encoding,
text_set_options_encoding_if_not_ascii and text_reset_options_encoding
to modify text options encoding field.
* tp/Texinfo/XS/convert/convert_html.c (external_node_href)
(convert_def_line_type, html_prepare_converted_output_info)
(convert_to_html_internal), tp/Texinfo/XS/convert/converter.c
(free_generic_converter), tp/Texinfo/XS/main/converter_types.h
(CONVERTER), tp/Texinfo/XS/main/get_perl_info.c
(converter_initialize): add convert_text_options field to the
CONVERTER, set it in converter_initialize and free it in
free_generic_converter. Use those text options in HTML formatting and
use the new text_*set_options* functions to set and reset the text
options, instead of settng up new text options each time one is used.
* tp/Texinfo/XS/main/get_perl_info.c (converter_initialize),
tp/Texinfo/XS/main/utils.c (set_output_encoding): implement
set_output_encoding without setting the perl specific variable. Set
output encoding early enough to have it available for
copy_options_for_convert_text call, as the corresponding perl function
is called just ater.
---
ChangeLog | 36 +++++++++++++++++++
tp/Texinfo/XS/convert/convert_html.c | 60 ++++++++++++++------------------
tp/Texinfo/XS/convert/converter.c | 3 ++
tp/Texinfo/XS/main/convert_to_text.c | 67 ++++++++++++++++++++++++++++--------
tp/Texinfo/XS/main/convert_to_text.h | 8 +++--
tp/Texinfo/XS/main/converter_types.h | 5 +++
tp/Texinfo/XS/main/get_perl_info.c | 7 +++-
tp/Texinfo/XS/main/unicode.c | 4 +--
tp/Texinfo/XS/main/unicode.h | 4 +--
tp/Texinfo/XS/main/utils.c | 54 ++++++++++++++++++++++++++---
tp/Texinfo/XS/main/utils.h | 3 ++
11 files changed, 191 insertions(+), 60 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 349cd93eb5..d4300b4055 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,39 @@
+2024-01-09 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/XS/main/utils.c (copy_strings, set_conf_string)
+ (set_informative_command_value): add copy_strings and set_conf_string
+ functions. Use set_conf_string in set_informative_command_value.
+
+ * tp/Texinfo/XS/main/convert_to_text.c
+ (copy_options_for_convert_text): allocate text options fields that can
+ be reset if converter options are reset.
+
+ * tp/Texinfo/XS/main/convert_to_text.c
+ (copy_options_for_convert_text, text_set_options_encoding)
+ (text_set_options_encoding_if_not_ascii, text_reset_options_encoding):
+ remove copy_options_for_convert_text enable_encoding_if_not_ascii
+ argument. Add text_set_options_encoding,
+ text_set_options_encoding_if_not_ascii and text_reset_options_encoding
+ to modify text options encoding field.
+
+ * tp/Texinfo/XS/convert/convert_html.c (external_node_href)
+ (convert_def_line_type, html_prepare_converted_output_info)
+ (convert_to_html_internal), tp/Texinfo/XS/convert/converter.c
+ (free_generic_converter), tp/Texinfo/XS/main/converter_types.h
+ (CONVERTER), tp/Texinfo/XS/main/get_perl_info.c
+ (converter_initialize): add convert_text_options field to the
+ CONVERTER, set it in converter_initialize and free it in
+ free_generic_converter. Use those text options in HTML formatting and
+ use the new text_*set_options* functions to set and reset the text
+ options, instead of settng up new text options each time one is used.
+
+ * tp/Texinfo/XS/main/get_perl_info.c (converter_initialize),
+ tp/Texinfo/XS/main/utils.c (set_output_encoding): implement
+ set_output_encoding without setting the perl specific variable. Set
+ output encoding early enough to have it available for
+ copy_options_for_convert_text call, as the corresponding perl function
+ is called just ater.
+
2024-01-09 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/Convert/Text.pm (copy_options_for_convert_text): replace
diff --git a/tp/Texinfo/XS/convert/convert_html.c
b/tp/Texinfo/XS/convert/convert_html.c
index 71b3b778b7..c1be81ef30 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -3083,12 +3083,11 @@ external_node_href (CONVERTER *self, const ELEMENT
*external_node,
enum htmlxref_split_type split_found = htmlxref_split_type_none;
int manual_len;
HTMLXREF_MANUAL *htmlxref_manual;
- TEXT_OPTIONS *text_conv_options = copy_options_for_convert_text (self,
0);
- text_conv_options->code_state = 1;
- manual_name = convert_to_text (manual_content, text_conv_options);
-
- free (text_conv_options);
+ self->convert_text_options->code_state++;
+ manual_name = convert_to_text (manual_content,
+ self->convert_text_options);
+ self->convert_text_options->code_state--;
if (self->conf->IGNORE_REF_TO_TOP_NODE_UP.integer > 0 && !strlen
(target))
{
@@ -14358,10 +14357,7 @@ convert_def_line_type (CONVERTER *self, const enum
element_type type,
{
/* should probably never happen */
char *text;
- TEXT_OPTIONS *text_conv_options
- = copy_options_for_convert_text (self, 0);
- text = convert_to_text (element, text_conv_options);
- free (text_conv_options);
+ text = convert_to_text (element, self->convert_text_options);
format_protect_text (self, text, result);
}
@@ -15473,16 +15469,13 @@ html_prepare_converted_output_info (CONVERTER *self)
{
char *copying_comment;
ELEMENT *tmp = new_element (ET_NONE);
- TEXT_OPTIONS *text_conv_options
- = copy_options_for_convert_text (self, 0);
tmp->contents = self->document->global_commands->copying->contents;
- copying_comment = convert_to_text (tmp, text_conv_options);
+ copying_comment = convert_to_text (tmp, self->convert_text_options);
tmp->contents.list = 0;
destroy_element (tmp);
- free (text_conv_options);
if (copying_comment && strlen (copying_comment) > 0)
{
@@ -17566,13 +17559,11 @@ convert_to_html_internal (CONVERTER *self, const
ELEMENT *element,
if (arg_flags & F_AFT_monospacetext)
{
char *text;
- TEXT_OPTIONS *text_conv_options
- = copy_options_for_convert_text (self, 0);
- text_conv_options->code_state = 1;
-
- text = convert_to_text (arg, text_conv_options);
- free (text_conv_options);
+ self->convert_text_options->code_state++;
+ text = convert_to_text (arg,
+ self->convert_text_options);
+ self->convert_text_options->code_state--;
arg_formatted->formatted[AFT_type_monospacetext]
= text;
@@ -17580,30 +17571,31 @@ convert_to_html_internal (CONVERTER *self, const
ELEMENT *element,
if (arg_flags & F_AFT_filenametext)
{
char *text;
+ self->convert_text_options->code_state++;
/* Always use encoded characters for file names */
- TEXT_OPTIONS *text_conv_options
- = copy_options_for_convert_text (self, 1);
- text_conv_options->code_state = 1;
-
- text = convert_to_text (arg, text_conv_options);
-
- free (text_conv_options);
+ text_set_options_encoding_if_not_ascii (self,
+ self->convert_text_options);
+ text = convert_to_text (arg,
+ self->convert_text_options);
+ text_reset_options_encoding
+ (self->convert_text_options);
+ self->convert_text_options->code_state--;
arg_formatted->formatted[AFT_type_filenametext] =
text;
}
if (arg_flags & F_AFT_url)
{
char *text;
+ self->convert_text_options->code_state++;
/* set the encoding to UTF-8 to always have a string that is
suitable
for percent encoding. */
- TEXT_OPTIONS *text_conv_options
- = copy_options_for_convert_text (self, 1);
- text_conv_options->code_state = 1;
- text_conv_options->encoding = "utf-8";
-
- text = convert_to_text (arg, text_conv_options);
-
- free (text_conv_options);
+ text_set_options_encoding (
+ self->convert_text_options, "utf-8");
+ text = convert_to_text (arg,
+ self->convert_text_options);
+ text_reset_options_encoding
+ (self->convert_text_options);
+ self->convert_text_options->code_state--;
arg_formatted->formatted[AFT_type_url] = text;
}
diff --git a/tp/Texinfo/XS/convert/converter.c
b/tp/Texinfo/XS/convert/converter.c
index 2c0264fa87..55cee45dc2 100644
--- a/tp/Texinfo/XS/convert/converter.c
+++ b/tp/Texinfo/XS/convert/converter.c
@@ -32,6 +32,7 @@
#include "utils.h"
#include "errors.h"
#include "builtin_commands.h"
+#include "convert_to_text.h"
#include "node_name_normalization.h"
#include "convert_utils.h"
#include "translations.h"
@@ -868,6 +869,8 @@ free_generic_converter (CONVERTER *self)
free_output_files_information (&self->output_files_information);
free_output_unit_files (&self->output_unit_files);
+ destroy_text_options (self->convert_text_options);
+
wipe_error_message_list (&self->error_messages);
}
diff --git a/tp/Texinfo/XS/main/convert_to_text.c
b/tp/Texinfo/XS/main/convert_to_text.c
index 313093bad0..4a009dc1bc 100644
--- a/tp/Texinfo/XS/main/convert_to_text.c
+++ b/tp/Texinfo/XS/main/convert_to_text.c
@@ -145,21 +145,18 @@ destroy_text_options (TEXT_OPTIONS *text_options)
tico_option_name(ASCII_GLYPH) \
tico_option_name(TEST)
-/* note that nothing is allocated, except for the TEXT_OPTIONS themselves */
+/* the string and strlist options need to be copied, in case they are
+ deallocated if options are reset */
TEXT_OPTIONS *
-copy_options_for_convert_text (CONVERTER *self,
- int enable_encoding_if_not_ascii)
+copy_options_for_convert_text (CONVERTER *self)
{
TEXT_OPTIONS *options = new_text_options ();
int text_indicator_option;
- if ((self->conf->ENABLE_ENCODING.integer > 0
+ if (self->conf->ENABLE_ENCODING.integer > 0
&& self->conf->OUTPUT_ENCODING_NAME.string)
- || (enable_encoding_if_not_ascii
- && self->conf->OUTPUT_ENCODING_NAME.string
- && strcmp (self->conf->OUTPUT_ENCODING_NAME.string, "us-ascii")))
{
- options->encoding = self->conf->OUTPUT_ENCODING_NAME.string;
+ options->encoding = strdup (self->conf->OUTPUT_ENCODING_NAME.string);
}
#define tico_option_name(name) \
@@ -169,12 +166,11 @@ copy_options_for_convert_text (CONVERTER *self,
TEXT_INDICATOR_CONVERTER_OPTIONS
#undef tico_option_name
- free (options->expanded_formats);
- options->expanded_formats = self->expanded_formats;
+ memcpy (options->expanded_formats, self->expanded_formats,
+ expanded_formats_number () * sizeof (EXPANDED_FORMAT));
- memcpy (&options->include_directories,
- &self->conf->INCLUDE_DIRECTORIES.strlist,
- sizeof (STRING_LIST));
+ copy_strings (&options->include_directories,
+ self->conf->INCLUDE_DIRECTORIES.strlist);
options->other_converter_options = self->conf;
options->converter = self;
@@ -182,6 +178,49 @@ copy_options_for_convert_text (CONVERTER *self,
return options;
}
+void
+text_set_options_encoding_if_not_ascii (CONVERTER *self,
+ TEXT_OPTIONS *text_options)
+{
+ if (self->conf->OUTPUT_ENCODING_NAME.string
+ && strcmp (self->conf->OUTPUT_ENCODING_NAME.string, "us-ascii"))
+ {
+ if (text_options->_saved_enabled_encoding)
+ {
+ fprintf (stderr,
+ "BUG: if_not_ascii _saved_enabled_encoding set: %s / %s\n",
+ text_options->_saved_enabled_encoding,
+ self->conf->OUTPUT_ENCODING_NAME.string);
+ text_options->_saved_enabled_encoding = 0;
+ }
+
+ text_options->_saved_enabled_encoding = text_options->encoding;
+ text_options->encoding = self->conf->OUTPUT_ENCODING_NAME.string;
+ }
+}
+
+/* the caller should ensure that encoding will remain allocated until
+ the next call to text_reset_options_encoding */
+void
+text_set_options_encoding (TEXT_OPTIONS *text_options, char *encoding)
+{
+ if (text_options->_saved_enabled_encoding)
+ {
+ fprintf (stderr, "BUG: _saved_enabled_encoding set: %s / %s\n",
+ text_options->_saved_enabled_encoding, encoding);
+ text_options->_saved_enabled_encoding = 0;
+ }
+ text_options->_saved_enabled_encoding = text_options->encoding;
+ text_options->encoding = encoding;
+}
+
+void
+text_reset_options_encoding (TEXT_OPTIONS *text_options)
+{
+ text_options->encoding = text_options->_saved_enabled_encoding;
+ text_options->_saved_enabled_encoding = 0;
+}
+
static TEXT_OPTIONS text_accents_options;
/* format an accent command and nested accents within as Text. */
@@ -217,7 +256,7 @@ brace_no_arg_command (const ELEMENT *e, TEXT_OPTIONS
*options)
{
char *result = 0;
enum command_id cmd = e->cmd;
- char *encoding = 0;
+ const char *encoding = 0;
if (options->encoding)
encoding = options->encoding;
diff --git a/tp/Texinfo/XS/main/convert_to_text.h
b/tp/Texinfo/XS/main/convert_to_text.h
index 7d72347ba2..82b44bbfe0 100644
--- a/tp/Texinfo/XS/main/convert_to_text.h
+++ b/tp/Texinfo/XS/main/convert_to_text.h
@@ -10,6 +10,7 @@
typedef struct TEXT_OPTIONS {
int set_case;
char *encoding; /* enabled_encoding */
+ char *_saved_enabled_encoding; /* used to keep the main encoding */
int code_state; /* code */
int raw_state;
int sort_string;
@@ -32,7 +33,10 @@ typedef struct TEXT_OPTIONS {
char *convert_to_text (const ELEMENT *root, TEXT_OPTIONS *text_options);
TEXT_OPTIONS *new_text_options (void);
void destroy_text_options (TEXT_OPTIONS *text_options);
-TEXT_OPTIONS *copy_options_for_convert_text (CONVERTER *self,
- int enable_encoding_if_not_ascii);
+TEXT_OPTIONS *copy_options_for_convert_text (CONVERTER *self);
+void text_set_options_encoding_if_not_ascii (CONVERTER *self,
+ TEXT_OPTIONS *text_options);
+void text_set_options_encoding (TEXT_OPTIONS *text_options, char *encoding);
+void text_reset_options_encoding (TEXT_OPTIONS *text_options);
#endif
diff --git a/tp/Texinfo/XS/main/converter_types.h
b/tp/Texinfo/XS/main/converter_types.h
index 1254fb6c27..c6c9e3c119 100644
--- a/tp/Texinfo/XS/main/converter_types.h
+++ b/tp/Texinfo/XS/main/converter_types.h
@@ -30,6 +30,9 @@
/* for interdependency with options_types.h */
struct OPTIONS;
+/* for interdependency with convert_to_text.h */
+struct TEXT_OPTIONS;
+
/* for string information passing to/from perl */
enum sv_string_type {
svt_byte,
@@ -708,6 +711,8 @@ typedef struct CONVERTER {
INDEX_SORTED_BY_LETTER *index_entries_by_letter;
int document_units_descriptor;
+ struct TEXT_OPTIONS *convert_text_options;
+
/* output unit files API */
FILE_NAME_PATH_COUNTER_LIST output_unit_files;
diff --git a/tp/Texinfo/XS/main/get_perl_info.c
b/tp/Texinfo/XS/main/get_perl_info.c
index 20f6603aea..bf12fda410 100644
--- a/tp/Texinfo/XS/main/get_perl_info.c
+++ b/tp/Texinfo/XS/main/get_perl_info.c
@@ -523,7 +523,6 @@ copy_converter_conf_sv (HV *hv, CONVERTER *converter,
}
}
-/* reset output_init_conf. Can be called after it has been modified */
/* Texinfo::Convert::Converter generic initialization for all the converters */
/* Called early, in particuliar before any format specific code has been
called */
@@ -605,6 +604,11 @@ converter_initialize (SV *converter_sv)
get_expanded_formats (hv_in, &converter->expanded_formats);
+ set_output_encoding (converter->conf, converter->document);
+
+ converter->convert_text_options
+ = copy_options_for_convert_text (converter);
+
converter->hv = hv_in;
/* store converter_descriptor in perl converter */
@@ -615,6 +619,7 @@ converter_initialize (SV *converter_sv)
return converter_descriptor;
}
+/* reset output_init_conf. Can be called after it has been modified */
void
reset_output_init_conf (SV *sv_in)
{
diff --git a/tp/Texinfo/XS/main/unicode.c b/tp/Texinfo/XS/main/unicode.c
index 392308c292..8c809e4d30 100644
--- a/tp/Texinfo/XS/main/unicode.c
+++ b/tp/Texinfo/XS/main/unicode.c
@@ -405,7 +405,7 @@ encoded_accents (CONVERTER *self, const char *text, const
ELEMENT_STACK *stack,
/* UNICODE_POINT is a string describing an hexadecimal number with
letters in upper case */
/* returns the index in unicode_to_eight_bit +1 if > 0 */
-int unicode_point_decoded_in_encoding (char *encoding, char *codepoint)
+int unicode_point_decoded_in_encoding (const char *encoding, char *codepoint)
{
if (encoding)
{
@@ -452,7 +452,7 @@ int unicode_point_decoded_in_encoding (char *encoding, char
*codepoint)
}
char *
-unicode_brace_no_arg_command (enum command_id cmd, char *encoding)
+unicode_brace_no_arg_command (enum command_id cmd, const char *encoding)
{
if (unicode_character_brace_no_arg_commands[cmd].text
&& unicode_point_decoded_in_encoding (encoding,
diff --git a/tp/Texinfo/XS/main/unicode.h b/tp/Texinfo/XS/main/unicode.h
index bfcc7b53a0..15b3320a2f 100644
--- a/tp/Texinfo/XS/main/unicode.h
+++ b/tp/Texinfo/XS/main/unicode.h
@@ -77,7 +77,7 @@ typedef struct DIACRITIC_UNICODE {
extern DIACRITIC_UNICODE unicode_diacritics[];
extern COMMAND_UNICODE unicode_character_brace_no_arg_commands[];
-int unicode_point_decoded_in_encoding (char *encoding, char *codepoint);
+int unicode_point_decoded_in_encoding (const char *encoding, char *codepoint);
char *normalize_NFC (const char *text);
char *normalize_NFKD (const char *text);
@@ -88,6 +88,6 @@ char *encoded_accents (CONVERTER *self, const char *text,
char *(*format_accent)(CONVERTER *self, const char *text,
const ELEMENT *element, int set_case),
int set_case);
-char *unicode_brace_no_arg_command (enum command_id cmd, char *encoding);
+char *unicode_brace_no_arg_command (enum command_id cmd, const char *encoding);
#endif
diff --git a/tp/Texinfo/XS/main/utils.c b/tp/Texinfo/XS/main/utils.c
index f8ac34da18..f3e9de84d6 100644
--- a/tp/Texinfo/XS/main/utils.c
+++ b/tp/Texinfo/XS/main/utils.c
@@ -851,6 +851,22 @@ merge_strings (STRING_LIST *strings_list, STRING_LIST
*merged_strings)
strings_list->number += merged_strings->number;
}
+void
+copy_strings (STRING_LIST *dest_list, STRING_LIST *source_list)
+{
+ int i;
+ if (dest_list->number + source_list->number > dest_list->space)
+ {
+ dest_list->space = dest_list->number + source_list->number +5;
+ dest_list->list = realloc (dest_list->list,
+ sizeof (char *) * dest_list->space);
+ }
+ for (i = 0; i < source_list->number; i++)
+ {
+ add_string (source_list->list[i], dest_list);
+ }
+}
+
/* return the index +1, to return 0 if not found */
size_t
find_string (STRING_LIST *strings_list, const char *target)
@@ -936,6 +952,34 @@ destroy_strings_list (STRING_LIST *strings)
}
+
+void
+set_conf_string (OPTION *option, const char *value)
+{
+ if (option->type != GO_char && option->type != GO_bytes)
+ fatal ("set_conf_string bad option type\n");
+
+ if (option->configured > 0)
+ return;
+
+ free (option->string);
+ option->string = strdup (value);
+}
+
+/* In perl, OUTPUT_PERL_ENCODING is set too. Note that if the perl
+ version is called later on, the OUTPUT_PERL_ENCODING value will be re-set */
+void
+set_output_encoding (OPTIONS *customization_information, DOCUMENT *document)
+{
+ if (customization_information
+ && document && document->global_info
+ && document->global_info->input_encoding_name) {
+ set_conf_string (&customization_information->OUTPUT_ENCODING_NAME,
+ document->global_info->input_encoding_name);
+ }
+}
+
+
/* code related to document global info used both in parser and other codes */
void
delete_global_info (GLOBAL_INFO *global_info_ref)
@@ -1078,15 +1122,15 @@ set_informative_command_value (OPTIONS *options, const
ELEMENT *element)
cmd = CM_shortcontents;
option = get_command_option (options, cmd);
- if (option && option->configured <= 0)
+ if (option)
{
if (option->type == GO_integer)
- option->integer = strtoul (value, NULL, 10);
- else
{
- free (option->string);
- option->string = strdup (value);
+ if (option->configured <= 0)
+ option->integer = strtoul (value, NULL, 10);
}
+ else
+ set_conf_string (option, value);
}
}
}
diff --git a/tp/Texinfo/XS/main/utils.h b/tp/Texinfo/XS/main/utils.h
index aa28a5ea0d..4e987e3685 100644
--- a/tp/Texinfo/XS/main/utils.h
+++ b/tp/Texinfo/XS/main/utils.h
@@ -182,6 +182,7 @@ void free_strings_list (STRING_LIST *strings);
void destroy_strings_list (STRING_LIST *strings);
char *add_string (const char *string, STRING_LIST *strings_list);
void merge_strings (STRING_LIST *strings_list, STRING_LIST *merged_strings);
+void copy_strings (STRING_LIST *dest_list, STRING_LIST *source_list);
size_t find_string (STRING_LIST *strings_list, const char *string);
void destroy_accent_stack (ACCENTS_STACK *accent_stack);
@@ -195,6 +196,8 @@ void clear_options (OPTIONS *options);
void free_options (OPTIONS *options);
OPTIONS *new_options (void);
+void set_output_encoding (OPTIONS *customization_information,
+ DOCUMENT *document);
OPTION *get_command_option (OPTIONS *options, enum command_id cmd);
void add_include_directory (char *filename, STRING_LIST *include_dirs_list);
- master updated (ae85f7133c -> 03c9181681), Patrice Dumas, 2024/01/09
- [no subject], Patrice Dumas, 2024/01/09
- [no subject], Patrice Dumas, 2024/01/09
- [no subject], Patrice Dumas, 2024/01/09
- [no subject], Patrice Dumas, 2024/01/09
- [no subject], Patrice Dumas, 2024/01/09
- [no subject], Patrice Dumas, 2024/01/09
- [no subject], Patrice Dumas, 2024/01/09
- [no subject],
Patrice Dumas <=