[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Wed, 15 Nov 2023 03:37:20 -0500 (EST) |
branch: master
commit ad3205d85fd183bb1ddf8205d7e48ae5465bbde6
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Tue Nov 14 23:12:52 2023 +0100
* tp/Texinfo/Convert/HTML.pm (%XS_overrides, %XS_conversion_overrides)
(import): use hashes for association of overriden sub with XS sub.
* tp/Texinfo/Convert/HTML.pm (_XS_format_init, import),
tp/Texinfo/XS/convert/ConvertXS.xs (html_format_init),
tp/Texinfo/XS/convert/convert_html.c (html_converter_initialize)
(html_format_init): add html_format_init function to be called once to
setup C data that does not need any information on customization
with code from html_converter_initialize. Setup XS interface.
* tp/Texinfo/XS/convert/convert_html.c (convert_to_html_internal):
use current_commands_conversion and not commands_conversion and
current_types_conversion and not types_conversion.
* tp/Texinfo/XS/convert/convert_html.c
(html_default_format_protect_text)
(default_css_string_format_protect_text): implement.
* tp/Texinfo/XS/convert/converter.c
(xml_format_text_with_numeric_entities, xml_protect_text): implement.
---
ChangeLog | 23 ++++++++
tp/Texinfo/Convert/Converter.pm | 12 ++--
tp/Texinfo/Convert/HTML.pm | 109 +++++++++++++++++------------------
tp/Texinfo/XS/convert/ConvertXS.xs | 3 +
tp/Texinfo/XS/convert/convert_html.c | 105 +++++++++++++++++++++++++++------
tp/Texinfo/XS/convert/convert_html.h | 2 +
tp/Texinfo/XS/convert/converter.c | 92 +++++++++++++++++++++++++++++
tp/Texinfo/XS/convert/converter.h | 3 +
8 files changed, 271 insertions(+), 78 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index b1f0d0e300..0a7210931f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -8,6 +8,29 @@
Report from Ihor Radchenko <yantar92@posteo.net> for Org mode manual.
+2023-11-14 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/Convert/HTML.pm (%XS_overrides, %XS_conversion_overrides)
+ (import): use hashes for association of overriden sub with XS sub.
+
+ * tp/Texinfo/Convert/HTML.pm (_XS_format_init, import),
+ tp/Texinfo/XS/convert/ConvertXS.xs (html_format_init),
+ tp/Texinfo/XS/convert/convert_html.c (html_converter_initialize)
+ (html_format_init): add html_format_init function to be called once to
+ setup C data that does not need any information on customization
+ with code from html_converter_initialize. Setup XS interface.
+
+ * tp/Texinfo/XS/convert/convert_html.c (convert_to_html_internal):
+ use current_commands_conversion and not commands_conversion and
+ current_types_conversion and not types_conversion.
+
+ * tp/Texinfo/XS/convert/convert_html.c
+ (html_default_format_protect_text)
+ (default_css_string_format_protect_text): implement.
+
+ * tp/Texinfo/XS/convert/converter.c
+ (xml_format_text_with_numeric_entities, xml_protect_text): implement.
+
2023-11-14 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/Translations.pm (import), tp/Texinfo/XS/Makefile.am,
diff --git a/tp/Texinfo/Convert/Converter.pm b/tp/Texinfo/Convert/Converter.pm
index 166886a411..2a6e6d2403 100644
--- a/tp/Texinfo/Convert/Converter.pm
+++ b/tp/Texinfo/Convert/Converter.pm
@@ -1570,12 +1570,12 @@ sub get_output_files_XS_unclosed_streams($)
# XML related methods and variables that may be used in different
# XML Converters.
-my $xml_numeric_entity_mdash = '&#'.hex('2014').';';
-my $xml_numeric_entity_ndash = '&#'.hex('2013').';';
-my $xml_numeric_entity_ldquo = '&#'.hex('201C').';';
-my $xml_numeric_entity_rdquo = '&#'.hex('201D').';';
-my $xml_numeric_entity_lsquo = '&#'.hex('2018').';';
-my $xml_numeric_entity_rsquo = '&#'.hex('2019').';';
+my $xml_numeric_entity_mdash = '&#'.hex('2014').';'; #8212
+my $xml_numeric_entity_ndash = '&#'.hex('2013').';'; #8211
+my $xml_numeric_entity_ldquo = '&#'.hex('201C').';'; #8220
+my $xml_numeric_entity_rdquo = '&#'.hex('201D').';'; #8221
+my $xml_numeric_entity_lsquo = '&#'.hex('2018').';'; #8216
+my $xml_numeric_entity_rsquo = '&#'.hex('2019').';'; #8217
sub xml_format_text_with_numeric_entities($$)
{
diff --git a/tp/Texinfo/Convert/HTML.pm b/tp/Texinfo/Convert/HTML.pm
index 0684ac6a30..d4801b32ef 100644
--- a/tp/Texinfo/Convert/HTML.pm
+++ b/tp/Texinfo/Convert/HTML.pm
@@ -92,73 +92,72 @@ my $XS_convert = 0;
$XS_convert = 1 if (defined $ENV{TEXINFO_XS_CONVERT}
and $ENV{TEXINFO_XS_CONVERT} eq '1');
+my %XS_overrides = (
+ "Texinfo::Convert::HTML::_default_format_protect_text"
+ => "Texinfo::MiscXS::default_format_protect_text",
+ "Texinfo::Convert::HTML::_entity_text"
+ => "Texinfo::MiscXS::entity_text",
+);
+
+my %XS_conversion_overrides = (
+ "Texinfo::Convert::HTML::_XS_format_init"
+ => "Texinfo::Convert::ConvertXS::html_format_init",
+ "Texinfo::Convert::HTML::_XS_converter_initialize"
+ => "Texinfo::Convert::ConvertXS::html_converter_initialize_sv",
+ "Texinfo::Convert::HTML::_XS_initialize_output_state"
+ => "Texinfo::Convert::ConvertXS::html_initialize_output_state",
+ "Texinfo::Convert::HTML::_finalize_output_state"
+ => "Texinfo::Convert::ConvertXS::html_finalize_output_state",
+ "Texinfo::Convert::HTML::_new_document_context"
+ => "Texinfo::Convert::ConvertXS::html_new_document_context",
+ "Texinfo::Convert::HTML::_pop_document_context"
+ => "Texinfo::Convert::ConvertXS::html_pop_document_context",
+ "Texinfo::Convert::HTML::_XS_get_index_entries_sorted_by_letter"
+ => "Texinfo::Convert::ConvertXS::get_index_entries_sorted_by_letter",
+ "Texinfo::Convert::HTML::_XS_html_merge_index_entries"
+ => "Texinfo::Convert::ConvertXS::html_merge_index_entries",
+ "Texinfo::Convert::HTML::_prepare_conversion_units"
+ => "Texinfo::Convert::ConvertXS::html_prepare_conversion_units",
+ "Texinfo::Convert::HTML::_prepare_units_directions_files"
+ => "Texinfo::Convert::ConvertXS::html_prepare_units_directions_files",
+ "Texinfo::Convert::HTML::_prepare_output_units_global_targets"
+ => "Texinfo::Convert::ConvertXS::html_prepare_output_units_global_targets",
+ "Texinfo::Convert::HTML::_translate_names"
+ => "Texinfo::Convert::ConvertXS::html_translate_names",
+ "Texinfo::Convert::HTML::_prepare_title_titlepage"
+ => "Texinfo::Convert::ConvertXS::html_prepare_title_titlepage",
+ "Texinfo::Convert::HTML::_html_convert_convert"
+ => "Texinfo::Convert::ConvertXS::html_convert_convert",
+ "Texinfo::Convert::HTML::_html_convert_output"
+ => "Texinfo::Convert::ConvertXS::html_convert_output",
+ #"Texinfo::Convert::HTML::_XS_html_convert_tree"
+ # => "Texinfo::Convert::ConvertXS::html_convert_tree",
+);
+
+# XS function does initialization independent of customization
+sub _XS_format_init()
+{
+}
+
our $module_loaded = 0;
sub import {
if (!$module_loaded) {
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_default_format_protect_text",
- "Texinfo::MiscXS::default_format_protect_text");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_entity_text",
- "Texinfo::MiscXS::entity_text");
+ foreach my $sub (keys %XS_overrides) {
+ Texinfo::XSLoader::override ($sub, $XS_overrides{$sub});
+ }
if ($XS_convert) {
-
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_XS_converter_initialize",
- "Texinfo::Convert::ConvertXS::html_converter_initialize_sv");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_XS_initialize_output_state",
- "Texinfo::Convert::ConvertXS::html_initialize_output_state");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_finalize_output_state",
- "Texinfo::Convert::ConvertXS::html_finalize_output_state");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_new_document_context",
- "Texinfo::Convert::ConvertXS::html_new_document_context");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_pop_document_context",
- "Texinfo::Convert::ConvertXS::html_pop_document_context");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_XS_get_index_entries_sorted_by_letter",
- "Texinfo::Convert::ConvertXS::get_index_entries_sorted_by_letter");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_XS_html_merge_index_entries",
- "Texinfo::Convert::ConvertXS::html_merge_index_entries");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_prepare_conversion_units",
- "Texinfo::Convert::ConvertXS::html_prepare_conversion_units");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_prepare_units_directions_files",
- "Texinfo::Convert::ConvertXS::html_prepare_units_directions_files");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_prepare_output_units_global_targets",
- "Texinfo::Convert::ConvertXS::html_prepare_output_units_global_targets");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_translate_names",
- "Texinfo::Convert::ConvertXS::html_translate_names");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_prepare_title_titlepage",
- "Texinfo::Convert::ConvertXS::html_prepare_title_titlepage");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_html_convert_convert",
- "Texinfo::Convert::ConvertXS::html_convert_convert");
- Texinfo::XSLoader::override(
- "Texinfo::Convert::HTML::_html_convert_output",
- "Texinfo::Convert::ConvertXS::html_convert_output");
- #Texinfo::XSLoader::override(
- #"Texinfo::Convert::HTML::_XS_html_convert_tree",
- #"Texinfo::Convert::ConvertXS::html_convert_tree");
+ foreach my $sub (keys %XS_conversion_overrides) {
+ Texinfo::XSLoader::override ($sub, $XS_conversion_overrides{$sub});
+ }
+ _XS_format_init();
}
-
$module_loaded = 1;
}
# The usual import method
goto &Exporter::import;
}
-
-
my %nobrace_commands = %Texinfo::Commands::nobrace_commands;
my %line_commands = %Texinfo::Commands::line_commands;
my %nobrace_symbol_text = %Texinfo::Common::nobrace_symbol_text;
diff --git a/tp/Texinfo/XS/convert/ConvertXS.xs
b/tp/Texinfo/XS/convert/ConvertXS.xs
index d61bd0b537..aeec850a6f 100644
--- a/tp/Texinfo/XS/convert/ConvertXS.xs
+++ b/tp/Texinfo/XS/convert/ConvertXS.xs
@@ -175,6 +175,9 @@ text_convert_tree (SV *text_options_in, SV *tree_in,
unused=0)
# HTML
+void
+html_format_init ()
+
int
html_converter_initialize_sv (SV *converter_in, SV
*default_formatting_references, SV *default_css_string_formatting_references,
SV *default_commands_open, SV *default_commands_conversion, SV
*default_css_string_commands_conversion, SV *default_types_open, SV
*default_types_conversion, SV *default_css_string_types_conversion, SV
*default_output_units_conversion)
diff --git a/tp/Texinfo/XS/convert/convert_html.c
b/tp/Texinfo/XS/convert/convert_html.c
index e261f91b95..3f750f7881 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -2219,6 +2219,75 @@ html_prepare_units_directions_files (CONVERTER *self,
return files_source_info;
}
+
+#define ADDN(str,nr) text_append_n (result, str, nr)
+void
+html_default_format_protect_text (const char *text, TEXT *result)
+{
+ const char *p = text;
+
+ while (*p)
+ {
+ int before_sep_nr = strcspn (p, "<>&\"\f");
+ if (before_sep_nr)
+ {
+ text_append_n (result, p, before_sep_nr);
+ p += before_sep_nr;
+ }
+ if (!*p)
+ break;
+ switch (*p)
+ {
+ case '<':
+ ADDN("<", 4);
+ break;
+ case '>':
+ ADDN(">", 4);
+ break;
+ case '&':
+ ADDN("&", 5);
+ break;
+ case '"':
+ ADDN(""", 6);
+ break;
+ case '\f':
+ ADDN("", 5);
+ break;
+ }
+ p++;
+ }
+}
+
+void
+default_css_string_format_protect_text (const char *text, TEXT *result)
+{
+ const char *p = text;
+
+ while (*p)
+ {
+ int before_sep_nr = strcspn (p, "\\'");
+ if (before_sep_nr)
+ {
+ text_append_n (result, p, before_sep_nr);
+ p += before_sep_nr;
+ }
+ if (!*p)
+ break;
+ switch (*p)
+ {
+ case '\\':
+ ADDN("\\\\", 2);
+ break;
+ case '\'':
+ ADDN("\\'", 2);
+ break;
+ }
+ p++;
+ }
+}
+
+#undef ADDN
+
static char *
command_conversion (CONVERTER *self, enum command_id cmd,
const ELEMENT *element, HTML_ARGS_FORMATTED
*args_formatted,
@@ -2278,7 +2347,6 @@ type_open (CONVERTER *self, enum element_type type, const
ELEMENT *element)
return 0;
}
-
static void
push_html_formatting_context (HTML_FORMATTING_CONTEXT_STACK *stack,
char *context_name)
@@ -2404,22 +2472,15 @@ reset_translated_special_unit_info_tree (CONVERTER
*self)
}
}
-/* most of the initialization is done by html_converter_initialize_sv
- in get_perl_info, the initialization that do not require information
- from perl is done here. This is called after information from perl
- has been gathered */
+/* set information that is independent of customization, only called once */
void
-html_converter_initialize (CONVERTER *self)
+html_format_init ()
{
int i;
- int nr_special_units;
int nr_default_commands
= sizeof (default_commands_args) / sizeof (default_commands_args[0]);
int max_args = MAX_COMMAND_ARGS_NR;
- /* first set information that is fully independent from information
- coming from perl */
-
for (i = 0; i < nr_default_commands; i++)
{
/* we file the status for specified commands, to distinguish them
@@ -2477,7 +2538,17 @@ html_converter_initialize (CONVERTER *self)
html_commands_data[CM_float].flags |= HF_composition_context;
html_commands_data[CM_sc].flags |= HF_upper_case;
+}
+/* most of the initialization is done by html_converter_initialize_sv
+ in get_perl_info, the initialization that do not require information
+ directly from perl data is done here. This is called after information
+ from perl has been gathered */
+void
+html_converter_initialize (CONVERTER *self)
+{
+ int i;
+ int nr_special_units;
/* initialization needing some information from perl */
nr_special_units = self->special_unit_varieties.number;
@@ -3174,9 +3245,9 @@ convert_to_html_internal (CONVERTER *self, const ELEMENT
*element,
}
if ((element->type
- && self->types_conversion[element->type].status == FRS_status_ignored)
+ && self->current_types_conversion[element->type].status ==
FRS_status_ignored)
|| (cmd
- && self->commands_conversion[cmd].status == FRS_status_ignored))
+ && self->current_commands_conversion[cmd].status ==
FRS_status_ignored))
{
if (self->conf->DEBUG > 0)
{
@@ -3248,7 +3319,7 @@ convert_to_html_internal (CONVERTER *self, const ELEMENT
*element,
self->modified_state |= HMSF_current_root;
}
- if (self->commands_conversion[cmd].status)
+ if (self->current_commands_conversion[cmd].status)
{
int convert_to_latex = 0;
HTML_ARGS_FORMATTED *args_formatted = 0;
@@ -3662,7 +3733,7 @@ convert_to_html_internal (CONVERTER *self, const ELEMENT
*element,
}
/* args are formatted, now format the command itself */
- if (self->commands_conversion[cmd].status)
+ if (self->current_commands_conversion[cmd].status)
{
char *conv_str = command_conversion (self, cmd,
element, args_formatted,
@@ -3786,7 +3857,7 @@ convert_to_html_internal (CONVERTER *self, const ELEMENT
*element,
}
}
- if (self->types_conversion[type].status)
+ if (self->current_types_conversion[type].status)
{
char *conversion_result
= type_conversion (self, type, element,
@@ -3864,8 +3935,8 @@ convert_to_html_internal (CONVERTER *self, const ELEMENT
*element,
{
if (self->conf->DEBUG > 0)
fprintf (stderr, "UNNAMED empty\n");
- if (self->types_conversion[0].status
- && self->types_conversion[0].status != FRS_status_ignored)
+ if (self->current_types_conversion[0].status
+ && self->current_types_conversion[0].status != FRS_status_ignored)
{
char *conversion_result
= type_conversion (self, 0, element, "");
diff --git a/tp/Texinfo/XS/convert/convert_html.h
b/tp/Texinfo/XS/convert/convert_html.h
index 85d39a3648..289fd3fbba 100644
--- a/tp/Texinfo/XS/convert/convert_html.h
+++ b/tp/Texinfo/XS/convert/convert_html.h
@@ -14,6 +14,8 @@ extern char *html_formatting_reference_names[];
extern TRANSLATED_SUI_ASSOCIATION translated_special_unit_info[];
extern const char *special_unit_info_type_names[SUI_type_heading + 1];
+void html_format_init (void);
+
void html_converter_initialize (CONVERTER *self);
void html_initialize_output_state (CONVERTER *self, char *context);
diff --git a/tp/Texinfo/XS/convert/converter.c
b/tp/Texinfo/XS/convert/converter.c
index b413d807ba..dfe58b34f8 100644
--- a/tp/Texinfo/XS/convert/converter.c
+++ b/tp/Texinfo/XS/convert/converter.c
@@ -580,3 +580,95 @@ free_generic_converter (CONVERTER *self)
free_output_files_information (&self->output_files_information);
free_output_unit_files (&self->output_unit_files);
}
+
+
+/* XML conversion functions */
+
+#define ADD(x) text_append_n (result, "&#" #x ";", 7)
+void
+xml_format_text_with_numeric_entities (const char *text, TEXT *result)
+{
+ const char *p;
+ int str_len;
+
+ p = text;
+ while (*p)
+ {
+ int before_sep_nr = strcspn (p, "-'`");
+ if (before_sep_nr)
+ {
+ text_append_n (result, p, before_sep_nr);
+ p += before_sep_nr;
+ }
+ if (!*p)
+ break;
+ str_len = strlen (p);
+ if ((str_len > 1) && (!strncmp (p, "``", 2)))
+ {
+ ADD(8220);
+ p += 2;
+ }
+ else if ((str_len > 1) && (!strncmp (p, "''", 2)))
+ {
+ ADD(8221);
+ p += 2;
+ }
+ else if ((str_len > 2) && !strncmp (p, "---", 3))
+ {
+ ADD(8212);
+ p += 3;
+ }
+ else if ((str_len > 1) && !strncmp (p, "--", 2))
+ {
+ ADD(8211);
+ p += 2;
+ }
+ else
+ {
+ if (*p == '\'')
+ ADD(8217);
+ else if (*p == '`')
+ ADD(8216);
+ p++;
+ }
+ }
+}
+#undef ADD
+
+#define ADDN(str,nr) text_append_n (result, str, nr)
+void
+xml_protect_text (const char *text, TEXT *result)
+{
+ const char *p;
+
+ p = text;
+
+ while (*p)
+ {
+ int before_sep_nr = strcspn (p, "<>&\"\f");
+ if (before_sep_nr)
+ {
+ text_append_n (result, p, before_sep_nr);
+ p += before_sep_nr;
+ }
+ if (!*p)
+ break;
+ switch (*p)
+ {
+ case '<':
+ ADDN("<", 4);
+ break;
+ case '>':
+ ADDN(">", 4);
+ break;
+ case '&':
+ ADDN("&", 5);
+ break;
+ case '"':
+ ADDN(""", 6);
+ break;
+ }
+ p++;
+ }
+}
+#undef ADDN
diff --git a/tp/Texinfo/XS/convert/converter.h
b/tp/Texinfo/XS/convert/converter.h
index 00102ab73a..c864784f47 100644
--- a/tp/Texinfo/XS/convert/converter.h
+++ b/tp/Texinfo/XS/convert/converter.h
@@ -39,4 +39,7 @@ void clear_output_unit_files (FILE_NAME_PATH_COUNTER_LIST
*output_unit_files);
void free_output_unit_files (FILE_NAME_PATH_COUNTER_LIST *output_unit_files);
void free_generic_converter (CONVERTER *self);
+
+
+void xml_format_text_with_numeric_entities (const char *text, TEXT *result);
#endif