[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Fri, 9 Feb 2024 03:19:54 -0500 (EST) |
branch: master
commit aadea406922f9c2be5b9c95dd789d436be4951ae
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri Feb 9 09:16:50 2024 +0100
* tp/Texinfo/Indices.pm (setup_index_entries_sort_strings)
(setup_sortable_index_entries, sort_indices_by_index)
(sort_indices_by_letter): split setup_index_entries_sort_strings out
of setup_sortable_index_entries, to setup sort strings.
setup_sortable_index_entries takes now sort strings structure in
argument and use a collator to setup the same structure as before.
Remove _index_entry_element_sort_string_key.
setup_index_entries_sort_strings store sort strings structure in the
document if passed as argument. Update most callers to pass a
document.
---
ChangeLog | 13 ++
tp/Texinfo/Document.pm | 6 +-
tp/Texinfo/Indices.pm | 159 ++++++++++++---------
tp/t/results/indices/double_seeentry_seealso.pl | 22 ---
tp/t/results/indices/empty_string_index_entry.pl | 64 ---------
.../indices/ignored_sort_char_empty_entries.pl | 36 -----
tp/t/results/indices/sorted_subentries.pl | 22 ---
tp/t/test_utils.pl | 2 +-
8 files changed, 111 insertions(+), 213 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 994f2b8812..8ba89602ff 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2024-02-09 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/Indices.pm (setup_index_entries_sort_strings)
+ (setup_sortable_index_entries, sort_indices_by_index)
+ (sort_indices_by_letter): split setup_index_entries_sort_strings out
+ of setup_sortable_index_entries, to setup sort strings.
+ setup_sortable_index_entries takes now sort strings structure in
+ argument and use a collator to setup the same structure as before.
+ Remove _index_entry_element_sort_string_key.
+ setup_index_entries_sort_strings store sort strings structure in the
+ document if passed as argument. Update most callers to pass a
+ document.
+
2024-02-08 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/Document.pm (sorted_indices_by_letter)
diff --git a/tp/Texinfo/Document.pm b/tp/Texinfo/Document.pm
index f23b571f04..8064dfe3be 100644
--- a/tp/Texinfo/Document.pm
+++ b/tp/Texinfo/Document.pm
@@ -235,7 +235,8 @@ sub sorted_indices_by_letter($$$$$)
$customization_information,
$use_unicode_collation, $locale_lang,
$document->merged_indices(),
- $document->indices_information());
+ $document->indices_information(),
+ $document);
}
return $document->{'sorted_indices_by_letter'}->{$lang_key};
}
@@ -271,7 +272,8 @@ sub sorted_indices_by_index($$$$$)
$customization_information,
$use_unicode_collation, $locale_lang,
$document->merged_indices(),
- $document->indices_information());
+ $document->indices_information(),
+ $document);
}
return $document->{'sorted_indices_by_index'}->{$lang_key};
}
diff --git a/tp/Texinfo/Indices.pm b/tp/Texinfo/Indices.pm
index ffc2e41034..7b5c757039 100644
--- a/tp/Texinfo/Indices.pm
+++ b/tp/Texinfo/Indices.pm
@@ -191,29 +191,6 @@ sub index_entry_element_sort_string($$$$;$)
return $sort_string;
}
-# $DOCUMENT_INFO is used in XS to retrieve the document.
-sub _index_entry_element_sort_string_key($$$$$;$)
-{
- my $document_info = shift;
- my $main_entry = shift;
- my $index_entry_element = shift;
- my $options = shift;
- my $collator = shift;
- my $prefer_reference_element = shift;
-
- my $sort_string = index_entry_element_sort_string ($document_info,
- $main_entry, $index_entry_element,
- $options, $prefer_reference_element);
-
- # This avoids varying results depending on whether the string is
- # represented internally in UTF-8. See 'the "Unicode bug"' in the
- # "perlunicode" man page.
- utf8::upgrade($sort_string);
- my $sort_key = $collator->getSortKey(uc($sort_string));
-
- return ($sort_string, $sort_key);
-}
-
# This is a stub for the Unicode::Collate module. Although this module is
# a core Perl module, some distributions may install a stripped-down Perl
# that doesn't include it, so providing this fall-back allows texi2any
@@ -325,24 +302,20 @@ sub _setup_collator($$)
return $collator;
}
-# There is no strict need for document information in Perl, however, in XS
-# it is needed to retrieve the Tree elements in the C structures.
-# $CUSTOMIZATION_INFORMATION is used as the source of document
-# information. It should already be set if it is a converter based
-# on Texinfo::Convert::Converter, but otherwise it should be set by
-# the caller, setting 'document_descriptor' to document->document_descriptor().
-sub setup_sortable_index_entries($$$$$)
+sub setup_index_entries_sort_strings($$$$;$$)
{
my $registrar = shift;
my $customization_information = shift;
- my $collator = shift;
my $index_entries = shift;
my $indices_information = shift;
+ my $document = shift;
+ my $prefer_reference_element = shift;
- my $index_sortable_index_entries;
- my $index_entries_sort_strings = {};
- return $index_sortable_index_entries, $index_entries_sort_strings
- unless ($index_entries);
+ return undef unless ($index_entries);
+
+ if ($document and $document->{'index_entries_sort_strings'}) {
+ return $document->{'index_entries_sort_strings'};
+ }
# convert index entries to sort string using unicode when possible
# independently of input and output encodings
@@ -354,9 +327,9 @@ sub setup_sortable_index_entries($$$$$)
#$convert_text_options->{'INCLUDE_DIRECTORIES'}
# = $customization_information->get_conf('INCLUDE_DIRECTORIES');
- $index_sortable_index_entries = {};
+ my $indices_sort_strings = {};
foreach my $index_name (keys(%$index_entries)) {
- my $sortable_index_entries = [];
+ my $index_entries_sort_strings = [];
foreach my $index_entry (@{$index_entries->{$index_name}}) {
my $entry_index_name = $index_entry->{'index_name'};
my $main_entry_element = $index_entry->{'entry_element'};
@@ -364,13 +337,12 @@ sub setup_sortable_index_entries($$$$$)
if ($in_code) {
Texinfo::Convert::Text::set_options_code($convert_text_options);
}
- my ($entry_sort_string, $entry_sort_key)
- = _index_entry_element_sort_string_key($customization_information,
- $index_entry, $main_entry_element,
- $convert_text_options, $collator);
+ my $entry_sort_string
+ = index_entry_element_sort_string ($customization_information,
+ $index_entry, $main_entry_element,
+ $convert_text_options, $prefer_reference_element);
my $non_empty_index_subentries = 0;
my @entry_sort_strings;
- my @entry_sort_keys;
if ($entry_sort_string !~ /\S/) {
my $entry_cmdname = $main_entry_element->{'cmdname'};
$entry_cmdname
@@ -382,10 +354,8 @@ sub setup_sortable_index_entries($$$$$)
$entry_cmdname),
$main_entry_element->{'source_info'});
push @entry_sort_strings, '';
- push @entry_sort_keys, '';
} else {
push @entry_sort_strings, $entry_sort_string;
- push @entry_sort_keys, $entry_sort_key;
$non_empty_index_subentries++;
}
my $subentry_nr = 0;
@@ -393,10 +363,9 @@ sub setup_sortable_index_entries($$$$$)
while ($subentry->{'extra'} and $subentry->{'extra'}->{'subentry'}) {
$subentry_nr ++;
$subentry = $subentry->{'extra'}->{'subentry'};
- my ($subentry_sort_string, $sort_subentry_key)
- =
_index_entry_element_sort_string_key($customization_information,
- $index_entry, $subentry, $convert_text_options,
- $collator);
+ my $subentry_sort_string
+ = index_entry_element_sort_string($customization_information,
+ $index_entry, $subentry, $convert_text_options);
if ($subentry_sort_string !~ /\S/) {
my $entry_cmdname = $main_entry_element->{'cmdname'};
$entry_cmdname
@@ -408,41 +377,90 @@ sub setup_sortable_index_entries($$$$$)
$subentry_nr, $entry_cmdname),
$main_entry_element->{'source_info'});
push @entry_sort_strings, '';
- push @entry_sort_keys, '';
} else {
push @entry_sort_strings, $subentry_sort_string;
- push @entry_sort_keys, $sort_subentry_key;
$non_empty_index_subentries++;
}
}
if ($non_empty_index_subentries > 0) {
- my @keys_and_alpha;
+ my $subentries_alpha_strings;
for (my $i = 0; $i < scalar (@entry_sort_strings); $i++) {
my $alpha = 0;
if ($entry_sort_strings[$i] =~ /^[[:alpha:]]/) {
$alpha = 1;
}
- push @keys_and_alpha, [$entry_sort_keys[$i], $alpha];
+ push @$subentries_alpha_strings,
+ {'sort_string' => $entry_sort_strings[$i], 'alpha' => $alpha};
}
- my $sortable_entry = {'entry' => $index_entry,
- 'keys' => \@keys_and_alpha,
- 'entry_keys' => \@entry_sort_strings,
- 'number' => $index_entry->{'entry_number'},
+ push @{$index_entries_sort_strings}, {'entry' => $index_entry,
+ 'sort_strings' => $subentries_alpha_strings,
+ 'number' => $index_entry->{'entry_number'},
'index_name' => $entry_index_name};
- push @{$sortable_index_entries}, $sortable_entry;
}
if ($in_code) {
Texinfo::Convert::Text::reset_options_code($convert_text_options);
}
- $index_entries_sort_strings->{$index_entry}
- = join(', ', @entry_sort_strings);
+ }
+ $indices_sort_strings->{$index_name} = $index_entries_sort_strings;
+ }
+
+ if ($document) {
+ $document->{'index_entries_sort_strings'} = $indices_sort_strings;
+ }
+ return $indices_sort_strings;
+}
+
+# There is no strict need for document information in Perl, however, in XS
+# it is needed to retrieve the Tree elements in the C structures.
+# $CUSTOMIZATION_INFORMATION is used as the source of document
+# information. It should already be set if it is a converter based
+# on Texinfo::Convert::Converter, but otherwise it should be set by
+# the caller, setting 'document_descriptor' to document->document_descriptor().
+sub setup_sortable_index_entries($$)
+{
+ my $collator = shift;
+ my $indices_sort_strings = shift;
+
+ my $index_sortable_index_entries;
+ my $index_entries_sort_strings = {};
+ return $index_sortable_index_entries, $index_entries_sort_strings
+ unless ($indices_sort_strings);
+
+ $index_sortable_index_entries = {};
+ foreach my $index_name (keys(%$indices_sort_strings)) {
+ my $sortable_index_entries = [];
+ foreach my $index_entry (@{$indices_sort_strings->{$index_name}}) {
+ my @keys_and_alpha;
+ foreach my $sort_string_alpha (@{$index_entry->{'sort_strings'}}) {
+ my $sort_string = $sort_string_alpha->{'sort_string'};
+ # TODO $sort_string is never used directly to sort anymore, so
+ # it is possible that utf8::upgrade is not needed anymore. To be safe,
+ # we can keep it until we only support perl > 5.12.
+ # This avoids varying results depending on whether the string is
+ # represented internally in UTF-8. See 'the "Unicode bug"' in the
+ # "perlunicode" man page.
+ utf8::upgrade($sort_string);
+ my $sort_key = $collator->getSortKey(uc($sort_string));
+ push @keys_and_alpha, [$sort_key, $sort_string_alpha->{'alpha'}];
+
+ }
+ my $sortable_entry = {'entry' => $index_entry->{'entry'},
+ 'keys' => \@keys_and_alpha,
+ 'entry_strings_alpha' =>
$index_entry->{'sort_strings'},
+ 'number' => $index_entry->{'number'},
+ 'index_name' => $index_entry->{'index_name'}};
+ push @{$sortable_index_entries}, $sortable_entry;
+ $index_entries_sort_strings->{$index_entry->{'entry'}}
+ = join(', ', map {$_->{'sort_string'}}
+ @{$index_entry->{'sort_strings'}});
}
$index_sortable_index_entries->{$index_name} = $sortable_index_entries;
}
+
return ($index_sortable_index_entries, $index_entries_sort_strings);
}
-sub sort_indices_by_index($$$$$$)
+sub sort_indices_by_index($$$$$$;$)
{
my $registrar = shift;
my $customization_information = shift;
@@ -450,13 +468,17 @@ sub sort_indices_by_index($$$$$$)
my $locale_lang = shift;
my $index_entries = shift;
my $indices_information = shift;
+ my $document = shift;
+
+ my $indices_sort_strings = setup_index_entries_sort_strings($registrar,
+ $customization_information, $index_entries,
+ $indices_information, $document);
my $collator = _setup_collator($use_unicode_collation, $locale_lang);
my $sorted_index_entries;
my ($index_sortable_index_entries, $index_entries_sort_strings)
- = setup_sortable_index_entries($registrar, $customization_information,
- $collator, $index_entries, $indices_information);
+ = setup_sortable_index_entries($collator, $indices_sort_strings);
if (!$index_sortable_index_entries) {
return ($sorted_index_entries, $index_entries_sort_strings);
@@ -588,7 +610,7 @@ sub index_entry_first_letter_text_or_command($;$)
}
}
-sub sort_indices_by_letter($$$$$$)
+sub sort_indices_by_letter($$$$$$;$)
{
my $registrar = shift;
my $customization_information = shift;
@@ -596,13 +618,17 @@ sub sort_indices_by_letter($$$$$$)
my $locale_lang = shift;
my $index_entries = shift;
my $indices_information = shift;
+ my $document = shift;
+
+ my $indices_sort_strings = setup_index_entries_sort_strings($registrar,
+ $customization_information, $index_entries,
+ $indices_information, $document);
my $collator = _setup_collator($use_unicode_collation, $locale_lang);
my $sorted_index_entries;
my ($index_sortable_index_entries, $index_entries_sort_strings)
- = setup_sortable_index_entries($registrar, $customization_information,
- $collator, $index_entries, $indices_information);
+ = setup_sortable_index_entries($collator, $indices_sort_strings);
if (!$index_sortable_index_entries) {
return ($sorted_index_entries, $index_entries_sort_strings);
@@ -613,7 +639,8 @@ sub sort_indices_by_letter($$$$$$)
my $sortable_index_entries = $index_sortable_index_entries->{$index_name};
my $index_letter_hash = {};
foreach my $sortable_entry (@{$sortable_index_entries}) {
- my $entry_key = $sortable_entry->{'entry_keys'}->[0];
+ my $entry_key
+ = $sortable_entry->{'entry_strings_alpha'}->[0]->{'sort_string'};
# the following line leads to each accented letter being separate
# $letter = uc(substr($entry_key, 0, 1));
diff --git a/tp/t/results/indices/double_seeentry_seealso.pl
b/tp/t/results/indices/double_seeentry_seealso.pl
index e61b486cdc..c33240b8e3 100644
--- a/tp/t/results/indices/double_seeentry_seealso.pl
+++ b/tp/t/results/indices/double_seeentry_seealso.pl
@@ -727,17 +727,6 @@ coding: utf-8
End:
';
-$result_converted_errors{'info'}->{'double_seeentry_seealso'} = [
- {
- 'error_line' => 'warning: empty index key in @cindex
-',
- 'line_nr' => 10,
- 'text' => 'empty index key in @cindex',
- 'type' => 'warning'
- }
-];
-
-
$result_converted{'plaintext'}->{'double_seeentry_seealso'} = 'top
***
@@ -810,17 +799,6 @@
$result_converted{'html_text'}->{'double_seeentry_seealso'} = '<div class="top-l
</div>
';
-$result_converted_errors{'html_text'}->{'double_seeentry_seealso'} = [
- {
- 'error_line' => 'warning: empty index key in @cindex
-',
- 'line_nr' => 10,
- 'text' => 'empty index key in @cindex',
- 'type' => 'warning'
- }
-];
-
-
$result_converted{'xml'}->{'double_seeentry_seealso'} = '<node name="Top"
spaces=" "><nodename>top</nodename><nodenext automatic="on">node
index</nodenext></node>
<top spaces=" "><sectiontitle>top</sectiontitle>
diff --git a/tp/t/results/indices/empty_string_index_entry.pl
b/tp/t/results/indices/empty_string_index_entry.pl
index 75359bda95..ecd8bc84ea 100644
--- a/tp/t/results/indices/empty_string_index_entry.pl
+++ b/tp/t/results/indices/empty_string_index_entry.pl
@@ -684,38 +684,6 @@ coding: utf-8
End:
';
-$result_converted_errors{'info'}->{'empty_string_index_entry'} = [
- {
- 'error_line' => 'warning: empty index key in @findex
-',
- 'line_nr' => 4,
- 'text' => 'empty index key in @findex',
- 'type' => 'warning'
- },
- {
- 'error_line' => 'warning: empty index key in @item
-',
- 'line_nr' => 7,
- 'text' => 'empty index key in @item',
- 'type' => 'warning'
- },
- {
- 'error_line' => 'warning: empty index key in @defun
-',
- 'line_nr' => 10,
- 'text' => 'empty index key in @defun',
- 'type' => 'warning'
- },
- {
- 'error_line' => 'warning: empty index key in @defunx
-',
- 'line_nr' => 11,
- 'text' => 'empty index key in @defunx',
- 'type' => 'warning'
- }
-];
-
-
$result_converted{'plaintext'}->{'empty_string_index_entry'} = '‘’
@@ -750,38 +718,6 @@
$result_converted{'html_text'}->{'empty_string_index_entry'} = '<a class="node-i
';
-$result_converted_errors{'html_text'}->{'empty_string_index_entry'} = [
- {
- 'error_line' => 'warning: empty index key in @findex
-',
- 'line_nr' => 4,
- 'text' => 'empty index key in @findex',
- 'type' => 'warning'
- },
- {
- 'error_line' => 'warning: empty index key in @item
-',
- 'line_nr' => 7,
- 'text' => 'empty index key in @item',
- 'type' => 'warning'
- },
- {
- 'error_line' => 'warning: empty index key in @defun
-',
- 'line_nr' => 10,
- 'text' => 'empty index key in @defun',
- 'type' => 'warning'
- },
- {
- 'error_line' => 'warning: empty index key in @defunx
-',
- 'line_nr' => 11,
- 'text' => 'empty index key in @defunx',
- 'type' => 'warning'
- }
-];
-
-
$result_converted{'xml'}->{'empty_string_index_entry'} = '<node name="Top"
spaces=" "><nodename>Top</nodename><nodenext
automatic="on">chap</nodenext></node>
<node name="chap" spaces=" "><nodename>chap</nodename><nodeprev
automatic="on">Top</nodeprev></node>
diff --git a/tp/t/results/indices/ignored_sort_char_empty_entries.pl
b/tp/t/results/indices/ignored_sort_char_empty_entries.pl
index b9b6bebd66..fcaf4bd44c 100644
--- a/tp/t/results/indices/ignored_sort_char_empty_entries.pl
+++ b/tp/t/results/indices/ignored_sort_char_empty_entries.pl
@@ -490,24 +490,6 @@ coding: utf-8
End:
';
-$result_converted_errors{'info'}->{'ignored_sort_char_empty_entries'} = [
- {
- 'error_line' => 'warning: empty index key in @findex
-',
- 'line_nr' => 8,
- 'text' => 'empty index key in @findex',
- 'type' => 'warning'
- },
- {
- 'error_line' => 'warning: empty index key in @findex
-',
- 'line_nr' => 9,
- 'text' => 'empty index key in @findex',
- 'type' => 'warning'
- }
-];
-
-
$result_converted{'plaintext'}->{'ignored_sort_char_empty_entries'} = '* Menu:
@@ -563,24 +545,6 @@
$result_converted{'html_text'}->{'ignored_sort_char_empty_entries'} = '
</div>
';
-$result_converted_errors{'html_text'}->{'ignored_sort_char_empty_entries'} = [
- {
- 'error_line' => 'warning: empty index key in @findex
-',
- 'line_nr' => 8,
- 'text' => 'empty index key in @findex',
- 'type' => 'warning'
- },
- {
- 'error_line' => 'warning: empty index key in @findex
-',
- 'line_nr' => 9,
- 'text' => 'empty index key in @findex',
- 'type' => 'warning'
- }
-];
-
-
$result_converted{'xml'}->{'ignored_sort_char_empty_entries'} = '<set
name="txiindexhyphenignore" line=" txiindexhyphenignore"></set>
diff --git a/tp/t/results/indices/sorted_subentries.pl
b/tp/t/results/indices/sorted_subentries.pl
index e4b3374294..bd330c8e32 100644
--- a/tp/t/results/indices/sorted_subentries.pl
+++ b/tp/t/results/indices/sorted_subentries.pl
@@ -2115,17 +2115,6 @@ coding: utf-8
End:
';
-$result_converted_errors{'info'}->{'sorted_subentries'} = [
- {
- 'error_line' => 'warning: empty index sub entry 1 key in @cindex
-',
- 'line_nr' => 16,
- 'text' => 'empty index sub entry 1 key in @cindex',
- 'type' => 'warning'
- }
-];
-
-
$result_converted{'plaintext'}->{'sorted_subentries'} = '1 one
*****
@@ -2256,17 +2245,6 @@ $result_converted{'html_text'}->{'sorted_subentries'} =
'<div class="top-level-e
</div>
';
-$result_converted_errors{'html_text'}->{'sorted_subentries'} = [
- {
- 'error_line' => 'warning: empty index sub entry 1 key in @cindex
-',
- 'line_nr' => 16,
- 'text' => 'empty index sub entry 1 key in @cindex',
- 'type' => 'warning'
- }
-];
-
-
$result_converted{'xml'}->{'sorted_subentries'} = '<node name="Top" spaces="
"><nodename>Top</nodename><nodenext automatic="on">chapter one</nodenext></node>
<top><sectiontitle></sectiontitle>
diff --git a/tp/t/test_utils.pl b/tp/t/test_utils.pl
index d9aad8cf4e..b07797c720 100644
--- a/tp/t/test_utils.pl
+++ b/tp/t/test_utils.pl
@@ -1192,7 +1192,7 @@ sub test($$)
$main_configuration,
$use_unicode_collation, $locale_lang,
$merged_index_entries,
- $indices_information);
+ $indices_information, $document);
$indices_sorted_sort_strings = {};
foreach my $index_name (keys(%$sorted_index_entries)) {
# index entries sort strings sorted in the order of the index entries