[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Sat, 25 May 2024 04:03:31 -0400 (EDT) |
branch: master
commit 005c7fbf9f4ee75f8f1797ee2e6ae2a2761daae4
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sat May 25 09:54:35 2024 +0200
* tp/Texinfo/XS/main/converter_types.h (CONVERTER): rename seen_ids as
registered_ids. Update users.
* tp/Texinfo/XS/convert/ConvertXS.xs (html_register_id)
(html_id_is_registered), tp/Texinfo/XS/convert/convert_html.c
(html_id_is_registered, html_register_id): add html_id_is_registered
and html_register_id functions for registered_ids access.
* tp/Texinfo/XS/convert/call_html_perl_function.c
(init_registered_ids_hv, is_hv_registered_id, hv_register_id)
(clear_registered_ids_hv, free_registered_ids_hv),
tp/Texinfo/XS/convert/convert_html.c (USE_PERL_HASHMAP)
(html_id_is_registered, html_register_id, html_converter_initialize)
(html_reset_converter, html_free_converter),
tp/Texinfo/XS/main/converter_types.h (CONVERTER): add an alternative
interface to register and retrieve registered ids, using a Perl hash.
If USE_PERL_HASHMAP is defined, the default, the Perl hash map is
used.
---
ChangeLog | 21 ++++++++
tp/TODO | 16 +++---
tp/Texinfo/XS/convert/ConvertXS.xs | 4 +-
tp/Texinfo/XS/convert/call_html_perl_function.c | 42 ++++++++++++++++
tp/Texinfo/XS/convert/call_html_perl_function.h | 8 +++
tp/Texinfo/XS/convert/convert_html.c | 66 +++++++++++++++++++------
tp/Texinfo/XS/convert/convert_html.h | 3 ++
tp/Texinfo/XS/main/converter_types.h | 6 ++-
8 files changed, 143 insertions(+), 23 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 19a0ea2ea4..f29abbd2fb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2024-05-25 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/XS/main/converter_types.h (CONVERTER): rename seen_ids as
+ registered_ids. Update users.
+
+ * tp/Texinfo/XS/convert/ConvertXS.xs (html_register_id)
+ (html_id_is_registered), tp/Texinfo/XS/convert/convert_html.c
+ (html_id_is_registered, html_register_id): add html_id_is_registered
+ and html_register_id functions for registered_ids access.
+
+ * tp/Texinfo/XS/convert/call_html_perl_function.c
+ (init_registered_ids_hv, is_hv_registered_id, hv_register_id)
+ (clear_registered_ids_hv, free_registered_ids_hv),
+ tp/Texinfo/XS/convert/convert_html.c (USE_PERL_HASHMAP)
+ (html_id_is_registered, html_register_id, html_converter_initialize)
+ (html_reset_converter, html_free_converter),
+ tp/Texinfo/XS/main/converter_types.h (CONVERTER): add an alternative
+ interface to register and retrieve registered ids, using a Perl hash.
+ If USE_PERL_HASHMAP is defined, the default, the Perl hash map is
+ used.
+
2024-05-24 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/XS/main/parser_conf.c: add a NOTE in comment explaining
diff --git a/tp/TODO b/tp/TODO
index e1a02c2da1..84350546fa 100644
--- a/tp/TODO
+++ b/tp/TODO
@@ -109,12 +109,16 @@ valgrind --tool=callgrind perl -w texi2any.pl
../doc/texinfo.texi --html
valgrind --tool=callgrind --separate-callers=3 --separate-recs=10 perl -w
texi2any.pl ../doc/texinfo.texi --html
kcachegrind callgrind.out.XXXXXX
-C code could be checked to see if using an hash map implementation,
-by compiling C code as C++ and using the standard C++ library hash map
-could be interesting (Patrice 2023-10-14).
-Could be interesting for find_string:
- unique_target -> find_string
- (and, though much less used output_files_open_out -> find_string)
+
+A Perl hash map is used for fast access, see USE_PERL_HASHMAP in
+convert_html.c and interface in call_html_perl_function.c.
+C code could be checked to see if using more hash maps could be interesting:
+Do something similar for output_files_open_out -> find_string?
+
+If a hash without Perl dependency is needed, C++ std::unordered_map could
+be used instead of a Perl hash map, by setting up an interface with
+functions similar with the call_html_perl_function.c defined as extern "C".
+
For the Texinfo manual with full XS, Perl uses 22% of the time (for html),
now only for code hopefully called once. Calling Perl getSortKey uses about
diff --git a/tp/Texinfo/XS/convert/ConvertXS.xs
b/tp/Texinfo/XS/convert/ConvertXS.xs
index 730a803c1f..e0528bcdf0 100644
--- a/tp/Texinfo/XS/convert/ConvertXS.xs
+++ b/tp/Texinfo/XS/convert/ConvertXS.xs
@@ -577,7 +577,7 @@ html_register_id (SV *converter_in, id)
self = get_sv_converter (converter_in, "html_register_id");
if (self)
/* note that we do not care about having the same id twice */
- add_string (id, &self->seen_ids);
+ html_register_id (self, id);
int html_id_is_registered (SV *converter_in, id)
@@ -588,7 +588,7 @@ int html_id_is_registered (SV *converter_in, id)
CODE:
self = get_sv_converter (converter_in, "html_id_is_registered");
if (self)
- found = find_string (&self->seen_ids, id);
+ found = html_id_is_registered (self, id);
RETVAL = found;
OUTPUT:
RETVAL
diff --git a/tp/Texinfo/XS/convert/call_html_perl_function.c
b/tp/Texinfo/XS/convert/call_html_perl_function.c
index 5904960a12..546d0360f3 100644
--- a/tp/Texinfo/XS/convert/call_html_perl_function.c
+++ b/tp/Texinfo/XS/convert/call_html_perl_function.c
@@ -2313,3 +2313,45 @@ call_button_direction_function (CONVERTER *self,
}
+
+/* Interface with Perl hash map for registered ids */
+
+void
+init_registered_ids_hv (CONVERTER *self)
+{
+ dTHX;
+
+ self->registered_ids_hv = newHV ();
+}
+
+int
+is_hv_registered_id (CONVERTER *self, const char *string)
+{
+ dTHX;
+
+ return hv_exists (self->registered_ids_hv, string, strlen (string));
+}
+
+void
+hv_register_id (CONVERTER *self, const char *string)
+{
+ dTHX;
+
+ hv_store (self->registered_ids_hv, string, strlen (string), newSViv (1), 0);
+}
+
+void
+clear_registered_ids_hv (CONVERTER *self)
+{
+ dTHX;
+
+ hv_clear (self->registered_ids_hv);
+}
+
+void
+free_registered_ids_hv (CONVERTER *self)
+{
+ dTHX;
+
+ hv_undef (self->registered_ids_hv);
+}
diff --git a/tp/Texinfo/XS/convert/call_html_perl_function.h
b/tp/Texinfo/XS/convert/call_html_perl_function.h
index 96e91f0d9a..117fde2bd4 100644
--- a/tp/Texinfo/XS/convert/call_html_perl_function.h
+++ b/tp/Texinfo/XS/convert/call_html_perl_function.h
@@ -143,4 +143,12 @@ FORMATTED_BUTTON_INFO *call_button_direction_function
(CONVERTER *self,
void *formatting_reference_sv,
int direction, const ELEMENT *element);
+
+/* Interface with Perl hash map for registered ids */
+void init_registered_ids_hv (CONVERTER *self);
+int is_hv_registered_id (CONVERTER *self, const char *string);
+void hv_register_id (CONVERTER *self, const char *string);
+void clear_registered_ids_hv (CONVERTER *self);
+void free_registered_ids_hv (CONVERTER *self);
+
#endif
diff --git a/tp/Texinfo/XS/convert/convert_html.c
b/tp/Texinfo/XS/convert/convert_html.c
index 96ad0a4d6b..664ed46e47 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -58,6 +58,11 @@
#include "api_to_perl.h"
#include "convert_html.h"
+/* comment out to use a string list in pure C instead, with linear search.
+ Using a Perl hash map is much faster.
+ */
+#define USE_PERL_HASHMAP 1
+
typedef struct ROOT_AND_UNIT {
const OUTPUT_UNIT *output_unit;
const ELEMENT *root;
@@ -279,6 +284,26 @@ static COMMAND_ARGS_SPECIFICATION
command_args_flags[BUILTIN_CMD_NUMBER];
static void convert_to_html_internal (CONVERTER *self, const ELEMENT *e,
TEXT *result, const char *explanation);
+int
+html_id_is_registered (CONVERTER *self, const char *string)
+{
+#ifdef USE_PERL_HASHMAP
+ return is_hv_registered_id (self, string);
+#else
+ return find_string (&self->registered_ids, string);
+#endif
+}
+
+void
+html_register_id (CONVERTER *self, const char *string)
+{
+#ifdef USE_PERL_HASHMAP
+ hv_register_id (self, string);
+#else
+ add_string (string, &self->registered_ids);
+#endif
+}
+
/*
if OUTPUT_UNITS is defined, the first output unit is used if a proper
top output unit is not found.
@@ -1795,7 +1820,7 @@ set_special_units_targets_files (CONVERTER *self, const
char *document_name)
HTML_TARGET *element_target
= add_element_target (self, special_unit->unit_command, target);
element_target->special_unit_filename = filename;
- add_string (target, &self->seen_ids);
+ html_register_id (self, target);
if (target_filename)
{
@@ -1857,7 +1882,7 @@ prepare_associated_special_units_targets (CONVERTER *self)
element_target
= add_element_target (self, special_unit->unit_command, target);
if (target)
- add_string (target, &self->seen_ids);
+ html_register_id (self, target);
if (filename)
element_target->special_unit_filename = filename;
@@ -1938,7 +1963,7 @@ unique_target (CONVERTER *self, const char *target_base)
char *target = strdup (target_base);
while (1)
{
- if (find_string (&self->seen_ids, target))
+ if (html_id_is_registered (self, target))
{
free (target);
xasprintf (&target, "%s-%d", target_base, nr);
@@ -2036,14 +2061,14 @@ new_sectioning_command_target (CONVERTER *self, const
ELEMENT *command)
HTML_TARGET *element_target
= add_element_target (self, command, target);
element_target->section_filename = filename;
- add_string (target, &self->seen_ids);
+ html_register_id (self, target);
free (target);
if (target_contents)
{
element_target->contents_target = target_contents;
- add_string (target_contents, &self->seen_ids);
+ html_register_id (self, target_contents);
}
else
element_target->contents_target = strdup ("");
@@ -2051,7 +2076,7 @@ new_sectioning_command_target (CONVERTER *self, const
ELEMENT *command)
if (target_shortcontents)
{
element_target->shortcontents_target = target_shortcontents;
- add_string (target_shortcontents, &self->seen_ids);
+ html_register_id (self, target_shortcontents);
}
else
element_target->shortcontents_target = strdup ("");
@@ -2151,7 +2176,7 @@ set_root_commands_targets_node_files (CONVERTER *self)
HTML_TARGET *element_target
= add_element_target (self, target_element, target);
element_target->node_filename = node_filename;
- add_string (target, &self->seen_ids);
+ html_register_id (self, target);
free (target);
}
@@ -4779,7 +4804,7 @@ prepare_index_entries_targets (CONVERTER *self)
target_element = main_entry_element;
add_element_target (self, target_element, target);
- add_string (target, &self->seen_ids);
+ html_register_id (self, target);
free (target);
}
@@ -4849,8 +4874,8 @@ prepare_footnotes_targets (CONVERTER *self)
while (1)
{
- if (find_string (&self->seen_ids, footid.text)
- || find_string (&self->seen_ids, docid.text))
+ if (html_id_is_registered (self, footid.text)
+ || html_id_is_registered (self, docid.text))
{
nr++;
if (nr == 0)
@@ -4864,8 +4889,9 @@ prepare_footnotes_targets (CONVERTER *self)
else
break;
}
- add_string (footid.text, &self->seen_ids);
- add_string (docid.text, &self->seen_ids);
+ html_register_id (self, footid.text);
+ html_register_id (self, docid.text);
+
element_target = add_element_target (self, footnote, footid.text);
add_special_target (self, ST_footnote_location, footnote,
docid.text);
@@ -16412,6 +16438,10 @@ html_converter_initialize (CONVERTER *self)
int external_type_open_function = 0;
int external_formatting_function = 0;
+#ifdef USE_PERL_HASHMAP
+ init_registered_ids_hv (self);
+#endif
+
/* initialization needing some information from perl */
nr_special_units = self->special_unit_varieties.number;
@@ -17094,7 +17124,11 @@ html_reset_converter (CONVERTER *self)
reset_translated_special_unit_info_tree (self);
/* targets */
reset_html_targets (self, self->html_targets);
- clear_strings_list (&self->seen_ids);
+#ifdef USE_PERL_HASHMAP
+ clear_registered_ids_hv (self);
+#else
+ clear_strings_list (&self->registered_ids);
+#endif
for (i = 0; i < ST_footnote_location+1; i++)
{
reset_html_targets_list (self, &self->html_special_targets[i]);
@@ -17278,7 +17312,11 @@ html_free_converter (CONVERTER *self)
free (self->html_target_cmds.stack);
- free_strings_list (&self->seen_ids);
+#ifdef USE_PERL_HASHMAP
+ free_registered_ids_hv (self);
+#else
+ free_strings_list (&self->registered_ids);
+#endif
html_free_files_source_info (&self->files_source_info);
diff --git a/tp/Texinfo/XS/convert/convert_html.h
b/tp/Texinfo/XS/convert/convert_html.h
index f3c6dad7da..2b801acdd8 100644
--- a/tp/Texinfo/XS/convert/convert_html.h
+++ b/tp/Texinfo/XS/convert/convert_html.h
@@ -63,6 +63,9 @@ FORMATTING_REFERENCE *new_special_unit_formatting_references
(int special_units_varieties_nr);
char **new_special_unit_info_type (int special_units_varieties_nr);
+int html_id_is_registered (CONVERTER *self, const char *string);
+void html_register_id (CONVERTER *self, const char *string);
+
int html_open_command_update_context (CONVERTER *self,
enum command_id data_cmd);
void html_convert_command_update_context (CONVERTER *self,
diff --git a/tp/Texinfo/XS/main/converter_types.h
b/tp/Texinfo/XS/main/converter_types.h
index 26450edf89..aebd9fd5ad 100644
--- a/tp/Texinfo/XS/main/converter_types.h
+++ b/tp/Texinfo/XS/main/converter_types.h
@@ -775,7 +775,11 @@ typedef struct CONVERTER {
SPECIAL_UNIT_DIRECTION *special_units_direction_name;
ELEMENT **special_unit_info_tree[SUIT_type_heading+1];
SORTED_INDEX_NAMES sorted_index_names;
- STRING_LIST seen_ids;
+ union {
+ STRING_LIST registered_ids;
+ /* actually HV * but we do not want to drag in Perl headers */
+ void *registered_ids_hv;
+ };
/* potentially one target list per command (only for some actually) */
HTML_TARGET_LIST html_targets[BUILTIN_CMD_NUMBER];
HTML_TARGET_LIST html_special_targets[ST_footnote_location+1];