[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Fri, 3 May 2024 17:39:12 -0400 (EDT) |
branch: master
commit 32c59a42bf5308acdff78fb8f5b575f860cfb8c7
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri May 3 22:04:26 2024 +0200
* util/txixml2texi.pl: handle linemacro.
* util/txixml2texi.pl (_encode): convert upon printing instead of
using binmode, which is unreliable if not used right on when opening
and never changing the filter.
---
ChangeLog | 8 +++++
util/txixml2texi.pl | 102 +++++++++++++++++++++++++++++-----------------------
2 files changed, 66 insertions(+), 44 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index ce68152869..d196df2452 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2024-05-03 Patrice Dumas <pertusus@free.fr>
+
+ * util/txixml2texi.pl: handle linemacro.
+
+ * util/txixml2texi.pl (_encode): convert upon printing instead of
+ using binmode, which is unreliable if not used right on when opening
+ and never changing the filter.
+
2024-05-03 Patrice Dumas <pertusus@free.fr>
* util/texinfo.dtd: add defbracketed, defblock, defline and
diff --git a/util/txixml2texi.pl b/util/txixml2texi.pl
index 592ee84ae4..745dfecdaa 100755
--- a/util/txixml2texi.pl
+++ b/util/txixml2texi.pl
@@ -127,7 +127,7 @@ my %elements_end_attributes = (
# keys are markup elements. If the element is associated to one @-command
# only, the value is a string, the corresponding @-command formatted.
-# If the element is associated to more than one element, the value is a
+# If the element is associated to more than one @-command, the value is a
# hash to select the command based on an attribute value. They key of the
# hash attribute is an attribute name and the value is another hash
# reference which associates an attribute value to the formatted @-command
@@ -251,6 +251,17 @@ sub unprotect_spaces($)
}
}
+my $perl_encoding;
+sub _encode($)
+{
+ my $input_text = shift;
+ if (defined($perl_encoding)) {
+ return Encode::encode($perl_encoding, $input_text);
+ } else {
+ return $input_text;
+ }
+}
+
my $eat_space = 0;
my $skip_comment = 0;
my @commands_with_args_stack;
@@ -288,7 +299,8 @@ while ($reader->read) {
or $reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE
) {
if ($reader->hasValue()) {
- print $reader->value();
+ my $value = $reader->value();
+ print _encode($value);
}
}
my $name = $reader->name;
@@ -312,44 +324,43 @@ while ($reader->read) {
if ($reader->hasAttributes()) {
if (defined($reader->getAttribute('type'))) {
my $command = $accent_type_command{$reader->getAttribute('type')};
- print "\@${command}${spaces_after_command}"
+ print _encode("\@${command}${spaces_after_command}")
if (defined($command));
}
if (!(defined($reader->getAttribute('bracketed'))
and $reader->getAttribute('bracketed') eq 'off')) {
- print '{';
+ print _encode('{');
}
} else {
- print '{';
+ print _encode('{');
}
} elsif (exists $element_at_commands{$name}) {
if (!ref($element_at_commands{$name})) {
- print $element_at_commands{$name};
+ print _encode($element_at_commands{$name});
} else {
my ($attribute) = keys(%{$element_at_commands{$name}});
if ($reader->hasAttributes()
and defined($reader->getAttribute($attribute))) {
- print
- $element_at_commands{$name}->{$attribute}->{
- $reader->getAttribute($attribute)};
+ print _encode($element_at_commands{$name}->{$attribute}->{
+ $reader->getAttribute($attribute)});
}
}
} elsif (exists($Texinfo::Commands::brace_commands{$name})) {
- print "\@${name}${spaces_after_command}".'{';
+ print _encode("\@${name}${spaces_after_command}".'{');
if ($name eq 'verb' and $reader->hasAttributes()
and defined($reader->getAttribute('delimiter'))) {
- print $reader->getAttribute('delimiter');
+ print _encode($reader->getAttribute('delimiter'));
}
- print "$spaces";
+ print _encode($spaces);
} elsif (exists($Texinfo::Commands::block_commands{$name})) {
- print "\@$name";
- if ($name eq 'macro' or $name eq 'rmacro') {
+ print _encode("\@$name");
+ if ($name eq 'macro' or $name eq 'rmacro' or $name eq 'linemacro') {
if ($reader->hasAttributes() and
defined($reader->getAttribute('line'))) {
- print $reader->getAttribute('line');
+ print _encode($reader->getAttribute('line'));
}
} else {
# leading spaces are already in the line attribute for (r)macro
- print "$spaces";
+ print _encode($spaces);
}
} elsif (defined($Texinfo::Commands::line_commands{$name})
or defined($Texinfo::Commands::nobrace_commands{$name})
@@ -360,17 +371,20 @@ while ($reader->read) {
}
if ($name eq 'documentencoding' and $reader->hasAttributes()
and defined($reader->getAttribute('encoding'))) {
- my $perl_encoding
+ my $resolved_encoding
= Encode::resolve_alias($reader->getAttribute('encoding'));
- if (defined($perl_encoding)) {
+ if (defined($resolved_encoding)) {
+ $perl_encoding = $resolved_encoding;
if ($debug) {
- print STDERR "Using encoding $perl_encoding\n";
+ print STDERR "Using encoding $resolved_encoding\n";
}
- binmode(STDOUT, ":encoding($perl_encoding)");
+ # prefer encoding with _encode, as using binmode is unreliable
+ # unless set at the file opening and never changed again.
+ #binmode(STDOUT, ":encoding($resolved_encoding)");
}
}
- print "\@$name$spaces";
+ print _encode("\@$name$spaces");
if ($reader->hasAttributes() and defined($reader->getAttribute('line')))
{
my $line = $reader->getAttribute('line');
$line =~ s/\\\\/\x{1F}/g;
@@ -379,7 +393,7 @@ while ($reader->read) {
$line =~ s/\x{1F}/\\/g;
# FIXME needed?
#$line =~ s/\\v/\x{000B}/g;
- print $line;
+ print _encode($line);
}
my $specific_line = (defined($Texinfo::Commands::line_commands{$name})
and $Texinfo::Commands::line_commands{$name} eq 'specific');
@@ -414,7 +428,7 @@ while ($reader->read) {
if ($commands_with_args_stack[-1]->[1] < $arg_element_index) {
while ($commands_with_args_stack[-1]->[1] < $arg_element_index) {
$commands_with_args_stack[-1]->[1]++;
- print ',';
+ print _encode(',');
}
} elsif ($commands_with_args_stack[-1]->[1] > 0) {
# the index is already at or above the argument index. Either it is
@@ -422,7 +436,7 @@ while ($reader->read) {
if ($variadic_elements{$name}
and defined($variadic_elements{$name}->{$command})) {
$commands_with_args_stack[-1]->[1]++;
- print ',';
+ print _encode(',');
# a debug consistency check
my $variadic_arg_index = $variadic_elements{$name}->{$command};
if ($commands_with_args_stack[-1]->[1] < $variadic_arg_index) {
@@ -439,7 +453,7 @@ while ($reader->read) {
if ($debug);
}
}
- print "$spaces";
+ print _encode($spaces);
} elsif ($ignored_elements{$name}) {
my $keep_indexterm = 0;
if ($name eq 'indexterm') {
@@ -461,14 +475,14 @@ while ($reader->read) {
if (defined($reader->getAttribute('command'))
and (not (defined($reader->getAttribute('automatic'))
and $reader->getAttribute('automatic') eq 'on'))) {
- print '@'.$reader->getAttribute('command');
+ print _encode('@'.$reader->getAttribute('command'));
}
}
} elsif ($name eq 'infoenclose') {
if ($reader->hasAttributes()
and defined($reader->getAttribute('command'))) {
my $command = $reader->getAttribute('command');
- print "\@${command}${spaces_after_command}".'{'."$spaces";
+ print _encode("\@${command}${spaces_after_command}".'{'.$spaces);
}
# def* automatic
} elsif ($reader->hasAttributes()
@@ -485,11 +499,11 @@ while ($reader->read) {
if ($reader->hasAttributes()) {
if (defined($reader->getAttribute('bracketed'))
and $reader->getAttribute('bracketed') eq 'on') {
- print '{'."$spaces";
+ print _encode('{'.$spaces);
}
# menus 'star' and following spaces
if (defined($reader->getAttribute('leadingtext'))) {
- print $reader->getAttribute('leadingtext');
+ print _encode($reader->getAttribute('leadingtext'));
}
}
if ($Texinfo::Commands::block_commands{$name}
@@ -498,8 +512,8 @@ while ($reader->read) {
and defined($reader->getAttribute('commandarg'))) {
# happens when formatting command argument is missing and there
# are no spaces.
- print ' ' if ($spaces eq '');
- print '@'.$reader->getAttribute('commandarg');
+ print _encode(' ') if ($spaces eq '');
+ print _encode('@'.$reader->getAttribute('commandarg'));
}
} elsif ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT) {
if ($Texinfo::Convert::TexinfoMarkup::commands_args_elements{$name}) {
@@ -514,19 +528,19 @@ while ($reader->read) {
if ($reader->hasAttributes()) {
if (defined($reader->getAttribute('bracketed'))
and $reader->getAttribute('bracketed') eq 'on') {
- print "$trailingspaces";
+ print _encode($trailingspaces);
# such that spaces are not prepended below when prepended
# for elements without bracketed attribute below
$trailingspaces = '';
- print '}';
+ print _encode('}');
}
}
if (exists ($Texinfo::Commands::brace_commands{$name})) {
if ($name eq 'verb' and $reader->hasAttributes()
and defined($reader->getAttribute('delimiter'))) {
- print $reader->getAttribute('delimiter');
+ print _encode($reader->getAttribute('delimiter'));
}
- print '}';
+ print _encode('}');
} elsif (exists($Texinfo::Commands::block_commands{$name})) {
my $end_spaces;
if ($reader->hasAttributes()
@@ -534,41 +548,41 @@ while ($reader->read) {
$end_spaces = $reader->getAttribute('endspaces');
}
$end_spaces = ' ' if (!defined($end_spaces) or $end_spaces eq '');
- print "\@end".$end_spaces."$name";
+ print _encode("\@end".$end_spaces.$name);
} elsif (defined($Texinfo::Commands::line_commands{$name})
or defined($Texinfo::Commands::nobrace_commands{$name})) {
if ($Texinfo::Commands::root_commands{$name} and $name ne 'node') {
$eat_space = 1;
}
- print "$trailingspaces";
+ print _encode($trailingspaces);
} elsif ($elements_end_attributes{$name}) {
if ($name eq 'accent') {
if ($reader->hasAttributes()) {
if (!(defined($reader->getAttribute('bracketed'))
and $reader->getAttribute('bracketed') eq 'off')) {
- print '}';
+ print _encode('}');
}
} else {
- print '}';
+ print _encode('}');
}
} elsif ($reader->hasAttributes()
and defined($reader->getAttribute('separator'))) {
print $reader->getAttribute('separator');
- print "$trailingspaces";
+ print _encode($trailingspaces);
}
} elsif ($name eq 'infoenclose') {
- print "$trailingspaces".'}';
+ print _encode($trailingspaces.'}');
} elsif ($eat_space_elements{$name}) {
$eat_space = 1;
} else {
print STDERR "END UNKNOWN $name\n" if ($debug);
- print "$trailingspaces";
+ print _encode($trailingspaces);
}
} elsif ($reader->nodeType() eq XML_READER_TYPE_ENTITY_REFERENCE) {
# for some reason XML_READER_TYPE_ENTITY is never emitted
# or $reader->nodeType() eq XML_READER_TYPE_ENTITY) {
if (defined($entity_texts{$name})) {
- print $entity_texts{$name};
+ print _encode($entity_texts{$name});
}
} elsif ($reader->nodeType() eq XML_READER_TYPE_COMMENT) {
my $comment;
@@ -577,7 +591,7 @@ while ($reader->read) {
$comment =~ s/^ (comment|c)//;
my $command = $1;
$comment =~ s/ $//;
- print "\@${command}$comment" unless ($skip_comment);
+ print _encode("\@${command}$comment") unless ($skip_comment);
}
} elsif ($reader->nodeType() eq XML_READER_TYPE_DOCUMENT_TYPE) {
$eat_space = 1;