[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Wed, 14 Feb 2024 08:22:51 -0500 (EST) |
branch: master
commit a403b4aee87132234d1d393518e9ac2354592b28
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Wed Feb 14 13:50:50 2024 +0100
* tp/Texinfo/Document.pm (sorted_indices_by_letter)
(sorted_indices_by_index), tp/Texinfo/XS/main/document.c
(sorted_indices_by_letter, sorted_indices_by_index): set the document
argument to be the first argument. Update callers.
* tp/Texinfo/Indices.pm (_setup_sortable_index_entries): rename
setup_sortable_index_entries as _setup_sortable_index_entries.
* tp/Texinfo/XS/main/manipulate_indices.c: make internal functions
static.
Update POD documentation.
---
ChangeLog | 15 +++++
tp/Texinfo/Convert/Converter.pm | 8 +--
tp/Texinfo/Document.pm | 64 ++++++++++++++++----
tp/Texinfo/Indices.pm | 101 ++++++++++++++++++++------------
tp/Texinfo/XS/convert/converter.c | 8 +--
tp/Texinfo/XS/main/document.c | 12 ++--
tp/Texinfo/XS/main/document.h | 10 ++--
tp/Texinfo/XS/main/manipulate_indices.c | 10 ++--
8 files changed, 158 insertions(+), 70 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index fdaac82d13..3a4af58ef7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,18 @@
+2024-02-14 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/Document.pm (sorted_indices_by_letter)
+ (sorted_indices_by_index), tp/Texinfo/XS/main/document.c
+ (sorted_indices_by_letter, sorted_indices_by_index): set the document
+ argument to be the first argument. Update callers.
+
+ * tp/Texinfo/Indices.pm (_setup_sortable_index_entries): rename
+ setup_sortable_index_entries as _setup_sortable_index_entries.
+
+ * tp/Texinfo/XS/main/manipulate_indices.c: make internal functions
+ static.
+
+ Update POD documentation.
+
2024-02-14 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/Indices.pm (_setup_sort_sortable_strings_collator)
diff --git a/tp/Texinfo/Convert/Converter.pm b/tp/Texinfo/Convert/Converter.pm
index 4303d90cec..d54474d012 100644
--- a/tp/Texinfo/Convert/Converter.pm
+++ b/tp/Texinfo/Convert/Converter.pm
@@ -1752,8 +1752,8 @@ sub get_converter_indices_sorted_by_letter($)
}
}
- return Texinfo::Document::sorted_indices_by_letter(undef, $self,
- $self->{'document'},
+ return Texinfo::Document::sorted_indices_by_letter($self->{'document'},
+ undef, $self,
$use_unicode_collation, $locale_lang);
}
}
@@ -1781,8 +1781,8 @@ sub get_converter_indices_sorted_by_index($)
}
}
- return Texinfo::Document::sorted_indices_by_index(undef, $self,
- $self->{'document'},
+ return Texinfo::Document::sorted_indices_by_index($self->{'document'},
+ undef, $self,
$use_unicode_collation, $locale_lang);
}
}
diff --git a/tp/Texinfo/Document.pm b/tp/Texinfo/Document.pm
index cb06d04b4b..6495c3a411 100644
--- a/tp/Texinfo/Document.pm
+++ b/tp/Texinfo/Document.pm
@@ -206,8 +206,10 @@ sub merged_indices($)
return $self->{'merged_indices'};
}
-# TODO document
# call setup_index_entries_sort_strings and cache the result.
+# In general, it is not needed to call that function directly,
+# as it is called by Texinfo::Indices::sort_indices_by_*. It may
+# be called in advance, however, if errors need to be collected early.
sub indices_sort_strings($$$;$)
{
my $document = shift;
@@ -227,12 +229,12 @@ sub indices_sort_strings($$$;$)
return $document->{'index_entries_sort_strings'};
}
-# TODO document
+# call Texinfo::Indices::sort_indices_by_letter and cache the result
sub sorted_indices_by_letter($$$$$)
{
+ my $document = shift;
my $registrar = shift;
my $customization_information = shift;
- my $document = shift;
my $use_unicode_collation = shift;
my $locale_lang = shift;
@@ -260,12 +262,12 @@ sub sorted_indices_by_letter($$$$$)
return $document->{'sorted_indices_by_letter'}->{$lang_key};
}
-# TODO document
+# call Texinfo::Indices::sort_indices_by_index and cache the result
sub sorted_indices_by_index($$$$$)
{
+ my $document = shift;
my $registrar = shift;
my $customization_information = shift;
- my $document = shift;
my $use_unicode_collation = shift;
my $locale_lang = shift;
@@ -564,9 +566,8 @@ labels, and the associated value is the corresponding
@-command.
=back
-Information on C<@float> is also available, grouped by type of
-floats, each type corresponding to potential C<@listoffloats>.
-This information is available through the method C<floats_information>.
+Information on C<@float> grouped by type of floats, each type corresponding
+to potential C<@listoffloats> is available through C<floats_information>.
=over
@@ -606,8 +607,7 @@ by a call to
L<register_document_sections_list|/register_document_sections_list
=back
Information about defined indices, indices merging and index entries is
-also available through the C<indices_information> method. Merged document
-indices are available through C<merged_indices>.
+available through C<indices_information>.
=over
@@ -679,7 +679,18 @@ the indices corresponding to the following texinfo
If C<name> is not set, it is set to the index name.
+=back
+
+Merged and sorted document indices are also available. Parsed indices
+are not merged nor sorted, L<Texinfo::Indices> functions are
+called to merge or sort the indices the first time the following
+methods are called. The results are afterwards associated to the
+document and simply returned.
+
+=over
+
=item $merged_indices = $document->merged_indices()
+X<C<merged_indices>>
Merge indices if needed and return merged indices. The I<$merged_indices>
returned is a hash reference whose keys are the index names and values arrays
@@ -688,6 +699,39 @@ of index entry structures described in L</index_entries>.
L<< C<Texinfo::Indices::merge_indices>|Texinfo::Indices/$merged_indices =
merge_indices($indices_information) >>
is used to merge the indices.
+=item $sorted_indices = $document->sorted_indices_by_index($registrar,
$customization_information, $use_unicode_collation, $locale_lang)
+
+=item $sorted_indices = $document->sorted_indices_by_letter($registrar,
$customization_information, $use_unicode_collation, $locale_lang)
+X<C<sorted_indices_by_index>> X<C<sorted_indices_by_letter>>
+
+C<sorted_indices_by_letter> returns the indices sorted by index and letter,
+while C<sorted_indices_by_index> returns the indices with all entries
+of an index together.
+
+By default, indices are sorted according to the I<Unicode Collation Algorithm>
+defined in the L<Unicode Technical Standard
+#10|http://www.unicode.org/reports/tr10/>, without language-specific collation
+tailoring. If I<$use_unicode_collation> is set to 0, the sorting will not use
+the I<Unicode Collation Algorithm> and simply sort according to the codepoints.
+If I<$locale_lang> is set, the language is used for linguistic tailoring of the
+sorting, if possible.
+
+When sorting by letter, an array reference of letter hash references is
+associated with each index name. Each letter hash reference has two
+keys, a I<letter> key with the letter, and an I<entries> key with an array
+reference of sorted index entries beginning with the letter. The letter
+is a character string suitable for sorting letters, but is not necessarily
+the best to use for output.
+
+When simply sorting, the array of the sorted index entries is associated
+with the index name.
+
+Register errors in I<$registrar> or through I<$customization_information>.
+
+L<<
C<Texinfo::Indices::sort_indices_by_index>|Texinfo::Indices/$index_entries_sorted
= sort_indices_by_index($document, $registrar, $customization_information,
$use_unicode_collation, $locale_lang) >>
+and L<<
C<Texinfo::Indices::sort_indices_by_letter>|Texinfo::Indices/$index_entries_sorted
= sort_indices_by_letter($document, $registrar, $customization_information,
$use_unicode_collation, $locale_lang) >>
+are used to sort the indices, if needed.
+
=back
=head2 Registering document and information in document
diff --git a/tp/Texinfo/Indices.pm b/tp/Texinfo/Indices.pm
index cf04ebfb41..be8f1f0af7 100644
--- a/tp/Texinfo/Indices.pm
+++ b/tp/Texinfo/Indices.pm
@@ -178,7 +178,7 @@ sub index_entry_element_sort_string($$$$;$)
$prefer_reference_element);
$sort_string = Texinfo::Convert::Text::convert_to_text(
$entry_tree_element, $options);
- # FIXME do that for sortas too?
+ # TODO do that for sortas too?
if (defined($main_entry->{'entry_element'}
->{'extra'}->{'index_ignore_chars'})) {
my $ignore_chars = quotemeta($main_entry->{'entry_element'}
@@ -302,6 +302,9 @@ sub _setup_collator($$)
return $collator;
}
+# Not documented, as, in general, it should not be called directly, but
+# through Texinfo::Document::indices_sort_strings that caches the result
+# in the document, itself, in general, called through sorting functions.
sub setup_index_entries_sort_strings($$$$;$)
{
my $registrar = shift;
@@ -402,7 +405,10 @@ sub setup_index_entries_sort_strings($$$$;$)
return $indices_sort_strings;
}
-# TODO document? Probably not to be called in user-defined code.
+# Returns a hash reference associating the index entries with the strings
+# that were used to sort them.
+# Used in tests, but not documented, as it is unlikely for this function
+# to be of any direct use for users.
sub format_index_entries_sort_strings($)
{
my $indices_sort_strings = shift;
@@ -420,7 +426,7 @@ sub format_index_entries_sort_strings($)
return $index_entries_sort_strings;
}
-sub setup_sortable_index_entries($$)
+sub _setup_sortable_index_entries($$)
{
my $collator = shift;
my $indices_sort_strings = shift;
@@ -474,7 +480,7 @@ sub _setup_sort_sortable_strings_collator($$$$$)
my $collator = _setup_collator($use_unicode_collation, $locale_lang);
my $index_sortable_index_entries
- = setup_sortable_index_entries($collator, $indices_sort_strings);
+ = _setup_sortable_index_entries($collator, $indices_sort_strings);
return ($index_sortable_index_entries, $collator);
}
@@ -682,6 +688,7 @@ sub sort_indices_by_letter($$$;$$)
return $sorted_index_entries;
}
+# Norally called through Texinfo::Document::merged_indices only
sub merge_indices($)
{
my $indices_information = shift;
@@ -718,21 +725,20 @@ Texinfo::Indices - merging and sorting indices from
Texinfo
use Texinfo::Indices qw(merge_indices sort_indices_by_letter
sort_indices_by_index);
- # $document is a parsed Texinfo::Document document, $parser is
- # a Texinfo::Parser object. $config is an object implementing the
- # get_conf() method.
- my $registrar = $parser->registered_errors();
-
+ # $document is a parsed Texinfo::Document document.
my $indices_information = $document->indices_information();
my $merged_index_entries
= merge_indices($indices_information);
+
+ # $registrar is a Texinfo::Report object. $config is an object
+ # implementing the get_conf() method.
my $index_entries_sorted;
if ($sort_by_letter) {
- $index_entries_sorted = sort_indices_by_letter($registrar, $config,
- $merged_index_entries, $indices_information);
+ $index_entries_sorted = sort_indices_by_letter($document, $registrar,
+ $config);
} else {
- $index_entries_sorted = sort_indices_by_index($registrar, $config,
- $merged_index_entries, $indices_information);
+ $index_entries_sorted = sort_indices_by_index($document, $registrar,
+ $config);
}
@@ -743,27 +749,20 @@ Texinfo to other formats. There is no promise of API
stability.
=head1 DESCRIPTION
-C<merge_indices> may be used to merge indices, which may be sorted
-with C<sort_indices_by_index> or C<sort_indices_by_letter>.
+C<merge_indices> may be used to merge indices. Document indices may be sorted
+with C<sort_indices_by_index> or C<sort_indices_by_letter>. Other functions
+deal with formatting of index entries as text or getting information on
+index entry.
+Note that, in general, the functions used to merge or sort indices
+should not be called directly, corresponding functions
+in L<Texinfo::Document> already call the functions in this module, and,
+in addition, cache the result with the document.
=head1 METHODS
No method is exported in the default case.
-Some methods takes a L<Texinfo::Report> C<$registrar> as argument for
-error reporting. Error reporting also require Texinfo customization variables
-information, which means an object implementing the C<get_conf> method, in
-practice the main program configuration or a converter
-(L<Texinfo::Convert::Converter/Getting and setting customization
-variables>). If the C<$registrar> argument is not set, the object used to
-get customization information is assumed to be a converter, and the
-error reporting uses converters error messages reporting functions
-(L<Texinfo::Convert::Converter/Registering error and warning messages>).
-
-Other common input arguments such as indices information
-are obtained from a parsed document, see L<Texinfo::Document>.
-
=over
=item $sort_string = index_entry_element_sort_string($document_info,
$main_entry, $index_entry_element, $options, $prefer_reference_element)
@@ -798,31 +797,48 @@ C<sort_indices_by_letter>.
=item $merged_indices = merge_indices($indices_information)
X<C<merge_indices>>
-Using information returned by L<<
C<Texinfo::Document::indices_information>|Texinfo::Document/$indices_information
= $document->indices_information() >>,
-a structure holding all the index entries by index name is returned,
+Returns a structure holding all the index entries by index name
with all the entries of merged indices merged with those of the indice
-merged into.
+merged into. The I<$indices_information> argument should be an hash reference
+with indices information, it is described in details in
+L<<
C<Texinfo::Document::indices_information>|Texinfo::Document/$indices_information
= $document->indices_information() >>.
The I<$merged_indices> returned is a hash reference whose
keys are the index names and values arrays of index entry structures
described in details in L<Texinfo::Document/index_entries>.
+In general, this method should not be directly called, instead
+L<< C<Texinfo::Document::merged_indices>|Texinfo::Document/$merged_indices =
$document->merged_indices() >>
+should be called on a document, which calls C<merge_indices> if needed and
+associate the merged indices to the document.
+
=item $option = setup_index_entry_keys_formatting($customization_information)
X<C<setup_index_entry_keys_formatting>>
Return options relevant for index keys sorting for conversion of Texinfo
to text to be output.
-=item ($index_entries_sorted, $index_entries_sort_strings) =
sort_indices_by_index($registrar, $customization_information,
$merged_index_entries, $indices_information)
+=item $index_entries_sorted = sort_indices_by_index($document, $registrar,
$customization_information, $use_unicode_collation, $locale_lang)
-=item ($index_entries_sorted, $index_entries_sort_strings) =
sort_indices_by_letter($registrar, $customization_information,
$merged_index_entries, $indices_information)
+=item $index_entries_sorted = sort_indices_by_letter($document, $registrar,
$customization_information, $use_unicode_collation, $locale_lang)
X<C<sort_indices_by_index>> X<C<sort_indices_by_letter>>
C<sort_indices_by_letter> sorts by index and letter, while
C<sort_indices_by_index> sort all entries of an index together.
+Indices are obtained from I<$document>, and should have been merged
+previously, in general by using
+L<< C<Texinfo::Document::merged_indices>|Texinfo::Document/$merged_indices =
$document->merged_indices() >>.
In both cases, a hash reference with index names as keys
I<$index_entries_sorted>
is returned.
+By default, indices are sorted according to the I<Unicode Collation Algorithm>
+defined in the L<Unicode Technical Standard
+#10|http://www.unicode.org/reports/tr10/>, without language-specific collation
+tailoring. If I<$use_unicode_collation> is set to 0, the sorting will not use
+the I<Unicode Collation Algorithm> and simply sort according to the codepoints.
+If I<$locale_lang> is set, the language is used for linguistic tailoring of the
+sorting, if possible.
+
When sorting by letter, an array reference of letter hash references is
associated with each index name. Each letter hash reference has two
keys, a I<letter> key with the letter, and an I<entries> key with an array
@@ -833,10 +849,23 @@ the best to use for output.
When simply sorting, the array of the sorted index entries is associated
with the index name.
-I<$index_entries_sort_strings> is a hash reference associating the index
-entries with the strings that were used to sort them.
+The I<$registrar> argument can be set to a L<Texinfo::Report> object.
+Error reporting also require Texinfo customization variables
+information, which means an object implementing the C<get_conf> method, in
+practice the main program configuration or a converter
+(L<Texinfo::Convert::Converter/Getting and setting customization
+variables>) as I<$customization_information> argument.
+If the C<$registrar> argument is not set, the object used to
+get customization information is assumed to be a converter, and the
+error reporting uses converters error messages reporting functions
+(L<Texinfo::Convert::Converter/Registering error and warning messages>).
-Register errors in I<$registrar> or through I<$customization_information>.
+In general, those methods should not be called directly, instead
+L<<
C<Texinfo::Document::sorted_indices_by_index>|Texinfo::Document/$sorted_indices
= $document->sorted_indices_by_index($registrar, $customization_information,
$use_unicode_collation, $locale_lang) >>
+or L<<
C<Texinfo::Document::sorted_indices_by_letter>|Texinfo::Document/$sorted_indices
= $document->sorted_indices_by_letter($registrar, $customization_information,
$use_unicode_collation, $locale_lang) >>
+should be called on a document. These functions calls C<sort_indices_by_index>
or
+C<sort_indices_by_letter> if needed and associate the sorted indices to
+the document.
=back
diff --git a/tp/Texinfo/XS/convert/converter.c
b/tp/Texinfo/XS/convert/converter.c
index ded7317d85..546cda2330 100644
--- a/tp/Texinfo/XS/convert/converter.c
+++ b/tp/Texinfo/XS/convert/converter.c
@@ -649,8 +649,8 @@ get_converter_indices_sorted_by_index (CONVERTER *self)
&& self->conf->documentlanguage.string)
collation_language = self->conf->documentlanguage.string;
- return sorted_indices_by_index (&self->error_messages, self->conf,
- self->document,
+ return sorted_indices_by_index (self->document,
+ &self->error_messages, self->conf,
self->conf->USE_UNICODE_COLLATION.integer,
collation_language,
self->conf->XS_STRXFRM_COLLATION_LOCALE.string);
@@ -670,8 +670,8 @@ get_converter_indices_sorted_by_letter (CONVERTER *self)
&& self->conf->documentlanguage.string)
collation_language = self->conf->documentlanguage.string;
- return sorted_indices_by_letter (&self->error_messages, self->conf,
- self->document,
+ return sorted_indices_by_letter (self->document,
+ &self->error_messages, self->conf,
self->conf->USE_UNICODE_COLLATION.integer,
collation_language,
self->conf->XS_STRXFRM_COLLATION_LOCALE.string);
diff --git a/tp/Texinfo/XS/main/document.c b/tp/Texinfo/XS/main/document.c
index 9c7b1a8f24..d8c8dbc932 100644
--- a/tp/Texinfo/XS/main/document.c
+++ b/tp/Texinfo/XS/main/document.c
@@ -223,9 +223,9 @@ find_collation_sorted_indices_by_index (
}
INDEX_SORTED_BY_INDEX *
-sorted_indices_by_index (ERROR_MESSAGE_LIST *error_messages,
- OPTIONS *options, DOCUMENT *document,
- int use_unicode_collation,
+sorted_indices_by_index (DOCUMENT *document,
+ ERROR_MESSAGE_LIST *error_messages,
+ OPTIONS *options, int use_unicode_collation,
const char *collation_language,
const char *collation_locale)
{
@@ -334,9 +334,9 @@ find_collation_sorted_indices_by_letter (
}
INDEX_SORTED_BY_LETTER *
-sorted_indices_by_letter (ERROR_MESSAGE_LIST *error_messages,
- OPTIONS *options, DOCUMENT *document,
- int use_unicode_collation,
+sorted_indices_by_letter (DOCUMENT *document,
+ ERROR_MESSAGE_LIST *error_messages,
+ OPTIONS *options, int use_unicode_collation,
const char *collation_language,
const char *collation_locale)
{
diff --git a/tp/Texinfo/XS/main/document.h b/tp/Texinfo/XS/main/document.h
index 38f57c913b..ac68d84c2c 100644
--- a/tp/Texinfo/XS/main/document.h
+++ b/tp/Texinfo/XS/main/document.h
@@ -36,16 +36,14 @@ const INDICES_SORT_STRINGS *document_indices_sort_strings (
ERROR_MESSAGE_LIST *error_messages,
OPTIONS *options, int prefer_reference_element);
-INDEX_SORTED_BY_INDEX *sorted_indices_by_index (
+INDEX_SORTED_BY_INDEX *sorted_indices_by_index (DOCUMENT *document,
ERROR_MESSAGE_LIST *error_messages,
- OPTIONS *options, DOCUMENT *document,
- int use_unicode_collation,
+ OPTIONS *options, int use_unicode_collation,
const char *collation_language,
const char *collation_locale);
-INDEX_SORTED_BY_LETTER *sorted_indices_by_letter (
+INDEX_SORTED_BY_LETTER *sorted_indices_by_letter (DOCUMENT *document,
ERROR_MESSAGE_LIST *error_messages,
- OPTIONS *options, DOCUMENT *document,
- int use_unicode_collation,
+ OPTIONS *options, int use_unicode_collation,
const char *collation_language,
const char *collation_locale);
diff --git a/tp/Texinfo/XS/main/manipulate_indices.c
b/tp/Texinfo/XS/main/manipulate_indices.c
index 570a90309e..4566f60944 100644
--- a/tp/Texinfo/XS/main/manipulate_indices.c
+++ b/tp/Texinfo/XS/main/manipulate_indices.c
@@ -228,6 +228,8 @@ strip_index_ignore_chars (const char *string, const char
*index_ignore_chars)
return result_text.text;
}
+/* corresponding perl code in Texinfo::Indices */
+
char *
index_entry_element_sort_string (const INDEX_ENTRY *main_entry,
const ELEMENT *index_entry_element,
@@ -572,7 +574,7 @@ setup_index_entries_sort_strings (ERROR_MESSAGE_LIST
*error_messages,
return indices_sort_strings;
}
-INDEX_COLLATOR *
+static INDEX_COLLATOR *
setup_collator (int use_unicode_collation, const char *collation_language,
const char *collation_locale)
{
@@ -618,7 +620,7 @@ setup_collator (int use_unicode_collation, const char
*collation_language,
return result;
}
-INDICES_SORTABLE_ENTRIES *
+static INDICES_SORTABLE_ENTRIES *
setup_sortable_index_entries (INDEX_COLLATOR *collator,
const INDICES_SORT_STRINGS *indices_sort_strings)
{
@@ -816,7 +818,7 @@ compare_sortable_index_entry_wrapper (const void *a, const
void *b)
return compare_sortable_index_entry (sie_a, sie_b);
}
-void
+static void
destroy_indices_sortable_entries (
INDICES_SORTABLE_ENTRIES *indices_sortable_entries)
{
@@ -856,7 +858,7 @@ destroy_indices_sortable_entries (
}
}
-void
+static void
destroy_collator (INDEX_COLLATOR *collator)
{
if (collator)