texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Patrice Dumas
Date: Sat, 25 May 2024 04:03:31 -0400 (EDT)

branch: master
commit 005c7fbf9f4ee75f8f1797ee2e6ae2a2761daae4
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sat May 25 09:54:35 2024 +0200

    * tp/Texinfo/XS/main/converter_types.h (CONVERTER): rename seen_ids as
    registered_ids.  Update users.
    
    * tp/Texinfo/XS/convert/ConvertXS.xs (html_register_id)
    (html_id_is_registered), tp/Texinfo/XS/convert/convert_html.c
    (html_id_is_registered, html_register_id): add html_id_is_registered
    and html_register_id functions for registered_ids access.
    
    * tp/Texinfo/XS/convert/call_html_perl_function.c
    (init_registered_ids_hv, is_hv_registered_id, hv_register_id)
    (clear_registered_ids_hv, free_registered_ids_hv),
    tp/Texinfo/XS/convert/convert_html.c (USE_PERL_HASHMAP)
    (html_id_is_registered, html_register_id, html_converter_initialize)
    (html_reset_converter, html_free_converter),
    tp/Texinfo/XS/main/converter_types.h (CONVERTER): add an alternative
    interface to register and retrieve registered ids, using a Perl hash.
    If USE_PERL_HASHMAP is defined, the default, the Perl hash map is
    used.
---
 ChangeLog                                       | 21 ++++++++
 tp/TODO                                         | 16 +++---
 tp/Texinfo/XS/convert/ConvertXS.xs              |  4 +-
 tp/Texinfo/XS/convert/call_html_perl_function.c | 42 ++++++++++++++++
 tp/Texinfo/XS/convert/call_html_perl_function.h |  8 +++
 tp/Texinfo/XS/convert/convert_html.c            | 66 +++++++++++++++++++------
 tp/Texinfo/XS/convert/convert_html.h            |  3 ++
 tp/Texinfo/XS/main/converter_types.h            |  6 ++-
 8 files changed, 143 insertions(+), 23 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 19a0ea2ea4..f29abbd2fb 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,24 @@
+2024-05-25  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/XS/main/converter_types.h (CONVERTER): rename seen_ids as
+       registered_ids.  Update users.
+
+       * tp/Texinfo/XS/convert/ConvertXS.xs (html_register_id)
+       (html_id_is_registered), tp/Texinfo/XS/convert/convert_html.c
+       (html_id_is_registered, html_register_id): add html_id_is_registered
+       and html_register_id functions for registered_ids access.
+
+       * tp/Texinfo/XS/convert/call_html_perl_function.c
+       (init_registered_ids_hv, is_hv_registered_id, hv_register_id)
+       (clear_registered_ids_hv, free_registered_ids_hv),
+       tp/Texinfo/XS/convert/convert_html.c (USE_PERL_HASHMAP)
+       (html_id_is_registered, html_register_id, html_converter_initialize)
+       (html_reset_converter, html_free_converter),
+       tp/Texinfo/XS/main/converter_types.h (CONVERTER): add an alternative
+       interface to register and retrieve registered ids, using a Perl hash.
+       If USE_PERL_HASHMAP is defined, the default, the Perl hash map is
+       used.
+
 2024-05-24  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/XS/main/parser_conf.c: add a NOTE in comment explaining
diff --git a/tp/TODO b/tp/TODO
index e1a02c2da1..84350546fa 100644
--- a/tp/TODO
+++ b/tp/TODO
@@ -109,12 +109,16 @@ valgrind --tool=callgrind perl -w texi2any.pl 
../doc/texinfo.texi --html
 valgrind --tool=callgrind --separate-callers=3 --separate-recs=10 perl -w 
texi2any.pl ../doc/texinfo.texi --html
 kcachegrind callgrind.out.XXXXXX
 
-C code could be checked to see if using an hash map implementation,
-by compiling C code as C++ and using the standard C++ library hash map
-could be interesting (Patrice 2023-10-14).
-Could be interesting for find_string:
- unique_target -> find_string
- (and, though much less used output_files_open_out -> find_string)
+
+A Perl hash map is used for fast access, see USE_PERL_HASHMAP in
+convert_html.c and interface in call_html_perl_function.c.
+C code could be checked to see if using more hash maps could be interesting:
+Do something similar for output_files_open_out -> find_string?
+
+If a hash without Perl dependency is needed, C++ std::unordered_map could
+be used instead of a Perl hash map, by setting up an interface with
+functions similar with the call_html_perl_function.c defined as extern "C".
+
 
 For the Texinfo manual with full XS, Perl uses 22% of the time (for html),
 now only for code hopefully called once.  Calling Perl getSortKey uses about
diff --git a/tp/Texinfo/XS/convert/ConvertXS.xs 
b/tp/Texinfo/XS/convert/ConvertXS.xs
index 730a803c1f..e0528bcdf0 100644
--- a/tp/Texinfo/XS/convert/ConvertXS.xs
+++ b/tp/Texinfo/XS/convert/ConvertXS.xs
@@ -577,7 +577,7 @@ html_register_id (SV *converter_in, id)
          self = get_sv_converter (converter_in, "html_register_id");
          if (self)
           /* note that we do not care about having the same id twice */
-           add_string (id, &self->seen_ids);
+           html_register_id (self, id);
 
 
 int html_id_is_registered (SV *converter_in, id)
@@ -588,7 +588,7 @@ int html_id_is_registered (SV *converter_in, id)
       CODE:
          self = get_sv_converter (converter_in, "html_id_is_registered");
          if (self)
-           found = find_string (&self->seen_ids, id);
+           found = html_id_is_registered (self, id);
          RETVAL = found;
     OUTPUT:
          RETVAL
diff --git a/tp/Texinfo/XS/convert/call_html_perl_function.c 
b/tp/Texinfo/XS/convert/call_html_perl_function.c
index 5904960a12..546d0360f3 100644
--- a/tp/Texinfo/XS/convert/call_html_perl_function.c
+++ b/tp/Texinfo/XS/convert/call_html_perl_function.c
@@ -2313,3 +2313,45 @@ call_button_direction_function (CONVERTER *self,
 }
 
 
+
+/* Interface with Perl hash map for registered ids */
+
+void
+init_registered_ids_hv (CONVERTER *self)
+{
+  dTHX;
+
+  self->registered_ids_hv = newHV ();
+}
+
+int
+is_hv_registered_id (CONVERTER *self, const char *string)
+{
+  dTHX;
+
+  return hv_exists (self->registered_ids_hv, string, strlen (string));
+}
+
+void
+hv_register_id (CONVERTER *self, const char *string)
+{
+  dTHX;
+
+  hv_store (self->registered_ids_hv, string, strlen (string), newSViv (1), 0);
+}
+
+void
+clear_registered_ids_hv (CONVERTER *self)
+{
+  dTHX;
+
+  hv_clear (self->registered_ids_hv);
+}
+
+void
+free_registered_ids_hv (CONVERTER *self)
+{
+  dTHX;
+
+  hv_undef (self->registered_ids_hv);
+}
diff --git a/tp/Texinfo/XS/convert/call_html_perl_function.h 
b/tp/Texinfo/XS/convert/call_html_perl_function.h
index 96e91f0d9a..117fde2bd4 100644
--- a/tp/Texinfo/XS/convert/call_html_perl_function.h
+++ b/tp/Texinfo/XS/convert/call_html_perl_function.h
@@ -143,4 +143,12 @@ FORMATTED_BUTTON_INFO *call_button_direction_function 
(CONVERTER *self,
                              void *formatting_reference_sv,
                              int direction, const ELEMENT *element);
 
+
+/* Interface with Perl hash map for registered ids */
+void init_registered_ids_hv (CONVERTER *self);
+int is_hv_registered_id (CONVERTER *self, const char *string);
+void hv_register_id (CONVERTER *self, const char *string);
+void clear_registered_ids_hv (CONVERTER *self);
+void free_registered_ids_hv (CONVERTER *self);
+
 #endif
diff --git a/tp/Texinfo/XS/convert/convert_html.c 
b/tp/Texinfo/XS/convert/convert_html.c
index 96ad0a4d6b..664ed46e47 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -58,6 +58,11 @@
 #include "api_to_perl.h"
 #include "convert_html.h"
 
+/* comment out to use a string list in pure C instead, with linear search.
+   Using a Perl hash map is much faster.
+ */
+#define USE_PERL_HASHMAP 1
+
 typedef struct ROOT_AND_UNIT {
     const OUTPUT_UNIT *output_unit;
     const ELEMENT *root;
@@ -279,6 +284,26 @@ static COMMAND_ARGS_SPECIFICATION 
command_args_flags[BUILTIN_CMD_NUMBER];
 static void convert_to_html_internal (CONVERTER *self, const ELEMENT *e,
                                       TEXT *result, const char *explanation);
 
+int
+html_id_is_registered (CONVERTER *self, const char *string)
+{
+#ifdef USE_PERL_HASHMAP
+  return is_hv_registered_id (self, string);
+#else
+  return find_string (&self->registered_ids, string);
+#endif
+}
+
+void
+html_register_id (CONVERTER *self, const char *string)
+{
+#ifdef USE_PERL_HASHMAP
+  hv_register_id (self, string);
+#else
+  add_string (string, &self->registered_ids);
+#endif
+}
+
 /*
  if OUTPUT_UNITS is defined, the first output unit is used if a proper
  top output unit is not found.
@@ -1795,7 +1820,7 @@ set_special_units_targets_files (CONVERTER *self, const 
char *document_name)
       HTML_TARGET *element_target
         = add_element_target (self, special_unit->unit_command, target);
       element_target->special_unit_filename = filename;
-      add_string (target, &self->seen_ids);
+      html_register_id (self, target);
 
       if (target_filename)
         {
@@ -1857,7 +1882,7 @@ prepare_associated_special_units_targets (CONVERTER *self)
           element_target
            = add_element_target (self, special_unit->unit_command, target);
           if (target)
-            add_string (target, &self->seen_ids);
+            html_register_id (self, target);
           if (filename)
             element_target->special_unit_filename = filename;
 
@@ -1938,7 +1963,7 @@ unique_target (CONVERTER *self, const char *target_base)
   char *target = strdup (target_base);
   while (1)
     {
-      if (find_string (&self->seen_ids, target))
+      if (html_id_is_registered (self, target))
         {
           free (target);
           xasprintf (&target, "%s-%d", target_base, nr);
@@ -2036,14 +2061,14 @@ new_sectioning_command_target (CONVERTER *self, const 
ELEMENT *command)
   HTML_TARGET *element_target
     = add_element_target (self, command, target);
   element_target->section_filename = filename;
-  add_string (target, &self->seen_ids);
+  html_register_id (self, target);
 
   free (target);
 
   if (target_contents)
     {
       element_target->contents_target = target_contents;
-      add_string (target_contents, &self->seen_ids);
+      html_register_id (self, target_contents);
     }
   else
     element_target->contents_target = strdup ("");
@@ -2051,7 +2076,7 @@ new_sectioning_command_target (CONVERTER *self, const 
ELEMENT *command)
   if (target_shortcontents)
     {
       element_target->shortcontents_target = target_shortcontents;
-      add_string (target_shortcontents, &self->seen_ids);
+      html_register_id (self, target_shortcontents);
     }
   else
     element_target->shortcontents_target = strdup ("");
@@ -2151,7 +2176,7 @@ set_root_commands_targets_node_files (CONVERTER *self)
           HTML_TARGET *element_target
             = add_element_target (self, target_element, target);
           element_target->node_filename = node_filename;
-          add_string (target, &self->seen_ids);
+          html_register_id (self, target);
 
           free (target);
         }
@@ -4779,7 +4804,7 @@ prepare_index_entries_targets (CONVERTER *self)
                 target_element = main_entry_element;
 
               add_element_target (self, target_element, target);
-              add_string (target, &self->seen_ids);
+              html_register_id (self, target);
 
               free (target);
             }
@@ -4849,8 +4874,8 @@ prepare_footnotes_targets (CONVERTER *self)
 
           while (1)
             {
-              if (find_string (&self->seen_ids, footid.text)
-                    || find_string (&self->seen_ids, docid.text))
+              if (html_id_is_registered (self, footid.text)
+                    || html_id_is_registered (self, docid.text))
                 {
                   nr++;
                   if (nr == 0)
@@ -4864,8 +4889,9 @@ prepare_footnotes_targets (CONVERTER *self)
               else
                 break;
             }
-          add_string (footid.text, &self->seen_ids);
-          add_string (docid.text, &self->seen_ids);
+          html_register_id (self, footid.text);
+          html_register_id (self, docid.text);
+
           element_target = add_element_target (self, footnote, footid.text);
           add_special_target (self, ST_footnote_location, footnote,
                               docid.text);
@@ -16412,6 +16438,10 @@ html_converter_initialize (CONVERTER *self)
   int external_type_open_function = 0;
   int external_formatting_function = 0;
 
+#ifdef USE_PERL_HASHMAP
+  init_registered_ids_hv (self);
+#endif
+
   /* initialization needing some information from perl */
 
   nr_special_units = self->special_unit_varieties.number;
@@ -17094,7 +17124,11 @@ html_reset_converter (CONVERTER *self)
   reset_translated_special_unit_info_tree (self);
   /* targets */
   reset_html_targets (self, self->html_targets);
-  clear_strings_list (&self->seen_ids);
+#ifdef USE_PERL_HASHMAP
+  clear_registered_ids_hv (self);
+#else
+  clear_strings_list (&self->registered_ids);
+#endif
   for (i = 0; i < ST_footnote_location+1; i++)
     {
       reset_html_targets_list (self, &self->html_special_targets[i]);
@@ -17278,7 +17312,11 @@ html_free_converter (CONVERTER *self)
 
   free (self->html_target_cmds.stack);
 
-  free_strings_list (&self->seen_ids);
+#ifdef USE_PERL_HASHMAP
+  free_registered_ids_hv (self);
+#else
+  free_strings_list (&self->registered_ids);
+#endif
 
   html_free_files_source_info (&self->files_source_info);
 
diff --git a/tp/Texinfo/XS/convert/convert_html.h 
b/tp/Texinfo/XS/convert/convert_html.h
index f3c6dad7da..2b801acdd8 100644
--- a/tp/Texinfo/XS/convert/convert_html.h
+++ b/tp/Texinfo/XS/convert/convert_html.h
@@ -63,6 +63,9 @@ FORMATTING_REFERENCE *new_special_unit_formatting_references
                                       (int special_units_varieties_nr);
 char **new_special_unit_info_type (int special_units_varieties_nr);
 
+int html_id_is_registered (CONVERTER *self, const char *string);
+void html_register_id (CONVERTER *self, const char *string);
+
 int html_open_command_update_context (CONVERTER *self,
                                       enum command_id data_cmd);
 void html_convert_command_update_context (CONVERTER *self,
diff --git a/tp/Texinfo/XS/main/converter_types.h 
b/tp/Texinfo/XS/main/converter_types.h
index 26450edf89..aebd9fd5ad 100644
--- a/tp/Texinfo/XS/main/converter_types.h
+++ b/tp/Texinfo/XS/main/converter_types.h
@@ -775,7 +775,11 @@ typedef struct CONVERTER {
     SPECIAL_UNIT_DIRECTION *special_units_direction_name;
     ELEMENT **special_unit_info_tree[SUIT_type_heading+1];
     SORTED_INDEX_NAMES sorted_index_names;
-    STRING_LIST seen_ids;
+    union {
+      STRING_LIST registered_ids;
+      /* actually HV * but we do not want to drag in Perl headers */
+      void *registered_ids_hv;
+    };
     /* potentially one target list per command (only for some actually) */
     HTML_TARGET_LIST html_targets[BUILTIN_CMD_NUMBER];
     HTML_TARGET_LIST html_special_targets[ST_footnote_location+1];



reply via email to

[Prev in Thread] Current Thread [Next in Thread]