[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Sat, 13 Jan 2024 14:34:24 -0500 (EST) |
branch: master
commit 114e10b2a1cb5ee07ae6b9d1228d6d016c9f86e6
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sat Jan 13 20:34:17 2024 +0100
* tp/Texinfo/XS/main/targets.c (set_labels_identifiers_target)
tp/Texinfo/XS/main/tree_types (LABEL),
tp/Texinfo/XS/parsetexi/labels.c (register_label): if a label element
is a duplicate, add a reference to the element used in the LABEL
structure used in labels_lists.
* tp/Texinfo/XS/convert/convert_html.c
(set_root_commands_targets_node_files): use labels_lists instead of
identifiers_target to set up targets to process in the document order,
to have less sorting to do later on in sort_cmd_targets.
---
ChangeLog | 13 ++++++
tp/Texinfo/XS/convert/convert_html.c | 74 +++++++++++++++++++++++++++------
tp/Texinfo/XS/main/errors.c | 2 +-
tp/Texinfo/XS/main/errors.h | 2 +-
tp/Texinfo/XS/main/targets.c | 2 +
tp/Texinfo/XS/main/tree_types.h | 2 +
tp/Texinfo/XS/parsetexi/errors_parser.c | 2 +-
tp/Texinfo/XS/parsetexi/errors_parser.h | 3 +-
tp/Texinfo/XS/parsetexi/labels.c | 1 +
9 files changed, 85 insertions(+), 16 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 26a1192c84..6b8e71008e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2024-01-13 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/XS/main/targets.c (set_labels_identifiers_target)
+ tp/Texinfo/XS/main/tree_types (LABEL),
+ tp/Texinfo/XS/parsetexi/labels.c (register_label): if a label element
+ is a duplicate, add a reference to the element used in the LABEL
+ structure used in labels_lists.
+
+ * tp/Texinfo/XS/convert/convert_html.c
+ (set_root_commands_targets_node_files): use labels_lists instead of
+ identifiers_target to set up targets to process in the document order,
+ to have less sorting to do later on in sort_cmd_targets.
+
2024-01-12 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/Convert/HTML.pm (_convert_no_arg_command): use
diff --git a/tp/Texinfo/XS/convert/convert_html.c
b/tp/Texinfo/XS/convert/convert_html.c
index 3350caec98..5dbe41f50f 100644
--- a/tp/Texinfo/XS/convert/convert_html.c
+++ b/tp/Texinfo/XS/convert/convert_html.c
@@ -2078,7 +2078,9 @@ set_root_commands_targets_node_files (CONVERTER *self)
if (self->conf->EXTENSION.string)
extension = self->conf->EXTENSION.string;
- LABEL_LIST *label_targets = self->document->identifiers_target;
+ /* use labels_list and not identifiers_target to process in the
+ document order */
+ LABEL_LIST *label_targets = self->document->labels_list;
int i;
for (i = 0; i < label_targets->number; i++)
{
@@ -2086,9 +2088,15 @@ set_root_commands_targets_node_files (CONVERTER *self)
char *target;
char *node_filename;
char *user_node_filename;
+ const ELEMENT *label_element;
+ const ELEMENT *target_element;
LABEL *label = &label_targets->list[i];
- const ELEMENT *target_element = label->element;
- const ELEMENT *label_element = get_label_element (target_element);
+
+ if (!label->identifier || label->reference)
+ continue;
+
+ target_element = label->element;
+ label_element = get_label_element (target_element);
TARGET_FILENAME *target_filename =
normalized_label_id_file (self, label->identifier, label_element);
@@ -3808,7 +3816,6 @@ html_internal_command_tree (CONVERTER *self, const
ELEMENT *command,
else if (command->args.number <= 0
|| command->args.list[0]->contents.number <= 0)
{ /* no argument, nothing to do */
- /* TODO check if possible */
tree->status = tree_added_status_no_tree;
}
else
@@ -4917,12 +4924,50 @@ set_heading_commands_targets (CONVERTER *self)
}
}
-/* It may not be efficient to sort and find back with bsearch
- if there is a small number of elements. However, some target
- elements should already be ordered when they are accessed in
- their order of appearance in the document.
- TODO check in which case it is not true and use another data
- source if possible */
+/* For debug/check/optimization
+ used to check to what extent the targets are already ordered.
+ Return the number of elements ordered ok with respect to the
+ previous element
+ */
+size_t
+check_targets_order (enum command_id cmd, HTML_TARGET_LIST *element_targets)
+{
+ size_t i;
+ size_t result = 0;
+ if (element_targets->number <= 1)
+ return result;
+ for (i = 1; i < element_targets->number; i++)
+ {
+ if (compare_element_target (&element_targets->list[i-1],
+ &element_targets->list[i]) > 0)
+ {
+ fprintf (stderr, "no %s %zu %ld %p %s %zu %ld %p %s\n",
+ builtin_command_name (cmd), i-1,
+ (uintptr_t)element_targets->list[i-1].element,
+ element_targets->list[i-1].element,
element_targets->list[i-1].target,
+ i, (uintptr_t)element_targets->list[i].element,
+ element_targets->list[i].element, element_targets->list[i].target);
+ }
+ else
+ result++;
+ }
+ return result;
+}
+
+/* It may not be efficient to sort and find back with bsearch if there is
+ a small number of elements. However, some target elements are more
+ likely to already be ordered when they are accessed in their order of
+ appearance in the document. There is no guarantee, as it is only in the
+ same array that adresses are guaranteed to be increasing. A check done
+ in 2024 with gcc, using check_targets_order, and also looking at the
+ address of newly allocated elements shows that elements are
+ not that much allocated in order. However, overall, the addresses are
+ more in order when elements are accessed in the document order.
+ For indices, it is not really possible to get them in document order,
+ within an index they are in document order, but not across indices.
+ The other data are in document order, for nodes and similar because
+ the labels list is used instead of identifiers_target on purpose.
+ */
void
sort_cmd_targets (CONVERTER *self)
{
@@ -4934,6 +4979,11 @@ sort_cmd_targets (CONVERTER *self)
if (self->html_targets[cmd].number > 0)
{
HTML_TARGET_LIST *element_targets = &self->html_targets[cmd];
+ /* to check the order
+ size_t ordered_items = check_targets_order (cmd, element_targets);
+ fprintf (stderr, "ORDER %s %zu / %zu\n", builtin_command_name (cmd),
+ ordered_items, element_targets->number -1);
+ */
qsort (element_targets->list,
element_targets->number,
sizeof (HTML_TARGET), compare_element_target);
@@ -7906,7 +7956,7 @@ word_number_more_than_level (const char *text, int level)
int count = 0;
while (*p)
- {
+ {/* FIXME in perl unicode spaces are also matched */
int n = strspn (p, whitespace_chars);
if (n)
{
@@ -8525,7 +8575,7 @@ convert_email_command (CONVERTER *self, const enum
command_id cmd,
text = mail_string;
}
- /* FIXME match unicode spaces in perl */
+ /* FIXME in perl unicode spaces are also matched */
if (!mail || mail[strspn (mail, whitespace_chars)] == '\0')
{
if (text)
diff --git a/tp/Texinfo/XS/main/errors.c b/tp/Texinfo/XS/main/errors.c
index a7c019630f..99eebd875e 100644
--- a/tp/Texinfo/XS/main/errors.c
+++ b/tp/Texinfo/XS/main/errors.c
@@ -235,7 +235,7 @@ void
message_list_line_error_ext (ERROR_MESSAGE_LIST *error_messages,
OPTIONS *conf,
enum error_type type, int continuation,
- SOURCE_INFO *cmd_source_info, const char *format, ...)
+ const SOURCE_INFO *cmd_source_info, const char *format, ...)
{
va_list v;
diff --git a/tp/Texinfo/XS/main/errors.h b/tp/Texinfo/XS/main/errors.h
index 07e8b58c37..f2fdfbbf79 100644
--- a/tp/Texinfo/XS/main/errors.h
+++ b/tp/Texinfo/XS/main/errors.h
@@ -22,7 +22,7 @@ void vmessage_list_line_error (ERROR_MESSAGE_LIST
*error_messages,
void message_list_line_error_ext (ERROR_MESSAGE_LIST *error_messages,
OPTIONS *conf,
enum error_type type, int continuation,
- SOURCE_INFO *cmd_source_info, const char *format, ...);
+ const SOURCE_INFO *cmd_source_info, const char *format, ...);
void message_list_command_error (ERROR_MESSAGE_LIST *error_messages,
OPTIONS *conf,
const ELEMENT *e, const char *format, ...);
diff --git a/tp/Texinfo/XS/main/targets.c b/tp/Texinfo/XS/main/targets.c
index b19d8f97c3..235b3fbb5f 100644
--- a/tp/Texinfo/XS/main/targets.c
+++ b/tp/Texinfo/XS/main/targets.c
@@ -132,6 +132,8 @@ set_labels_identifiers_target (LABEL *list_of_labels,
size_t labels_number)
while (j < targets_number - 1 && targets[j+1].identifier
&& !strcmp (targets[i].identifier, targets[j+1].identifier))
{
+ list_of_labels[targets[j+1].label_number].reference
+ = targets[i].element;
j++;
}
if (j > i)
diff --git a/tp/Texinfo/XS/main/tree_types.h b/tp/Texinfo/XS/main/tree_types.h
index b82ec24fbe..9cef300923 100644
--- a/tp/Texinfo/XS/main/tree_types.h
+++ b/tp/Texinfo/XS/main/tree_types.h
@@ -279,6 +279,8 @@ typedef struct {
size_t label_number;
char *identifier;
ELEMENT *element;
+ /* for label that is a duplicate, points to the element used in links */
+ const ELEMENT *reference;
} LABEL;
typedef struct {
diff --git a/tp/Texinfo/XS/parsetexi/errors_parser.c
b/tp/Texinfo/XS/parsetexi/errors_parser.c
index 3484b9e861..8d5d348018 100644
--- a/tp/Texinfo/XS/parsetexi/errors_parser.c
+++ b/tp/Texinfo/XS/parsetexi/errors_parser.c
@@ -51,7 +51,7 @@ line_error_internal (enum error_type type, int continuation,
void
line_error_ext (enum error_type type, int continuation,
- SOURCE_INFO *cmd_source_info,
+ const SOURCE_INFO *cmd_source_info,
const char *format, ...)
{
va_list v;
diff --git a/tp/Texinfo/XS/parsetexi/errors_parser.h
b/tp/Texinfo/XS/parsetexi/errors_parser.h
index 7ba06aa5c5..45477720b9 100644
--- a/tp/Texinfo/XS/parsetexi/errors_parser.h
+++ b/tp/Texinfo/XS/parsetexi/errors_parser.h
@@ -14,7 +14,8 @@ void command_warn (const ELEMENT *e, const char *format, ...);
void wipe_errors (void);
void forget_errors (void);
void line_error_ext (enum error_type type, int continuation,
- SOURCE_INFO *cmd_source_info, const char *format, ...);
+ const SOURCE_INFO *cmd_source_info,
+ const char *format, ...);
void bug_message (char *format, ...);
extern ERROR_MESSAGE_LIST error_messages_list;
diff --git a/tp/Texinfo/XS/parsetexi/labels.c b/tp/Texinfo/XS/parsetexi/labels.c
index 7d0ec84e5e..2f2d20518b 100644
--- a/tp/Texinfo/XS/parsetexi/labels.c
+++ b/tp/Texinfo/XS/parsetexi/labels.c
@@ -61,6 +61,7 @@ register_label (ELEMENT *target_element, char *normalized)
labels_list[labels_number].element = target_element;
labels_list[labels_number].label_number = labels_number;
labels_list[labels_number].identifier = normalized;
+ labels_list[labels_number].reference = 0;
labels_number++;
}