texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[no subject]


From: Patrice Dumas
Date: Fri, 3 May 2024 17:39:12 -0400 (EDT)

branch: master
commit 32c59a42bf5308acdff78fb8f5b575f860cfb8c7
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Fri May 3 22:04:26 2024 +0200

    * util/txixml2texi.pl: handle linemacro.
    
    * util/txixml2texi.pl (_encode): convert upon printing instead of
    using binmode, which is unreliable if not used right on when opening
    and never changing the filter.
---
 ChangeLog           |   8 +++++
 util/txixml2texi.pl | 102 +++++++++++++++++++++++++++++-----------------------
 2 files changed, 66 insertions(+), 44 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index ce68152869..d196df2452 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2024-05-03  Patrice Dumas  <pertusus@free.fr>
+
+       * util/txixml2texi.pl: handle linemacro.
+
+       * util/txixml2texi.pl (_encode): convert upon printing instead of
+       using binmode, which is unreliable if not used right on when opening
+       and never changing the filter.
+
 2024-05-03  Patrice Dumas  <pertusus@free.fr>
 
        * util/texinfo.dtd: add defbracketed, defblock, defline and
diff --git a/util/txixml2texi.pl b/util/txixml2texi.pl
index 592ee84ae4..745dfecdaa 100755
--- a/util/txixml2texi.pl
+++ b/util/txixml2texi.pl
@@ -127,7 +127,7 @@ my %elements_end_attributes = (
 
 # keys are markup elements.  If the element is associated to one @-command
 # only, the value is a string, the corresponding @-command formatted.
-# If the element is associated to more than one element, the value is a
+# If the element is associated to more than one @-command, the value is a
 # hash to select the command based on an attribute value.  They key of the
 # hash attribute is an attribute name and the value is another hash
 # reference which associates an attribute value to the formatted @-command
@@ -251,6 +251,17 @@ sub unprotect_spaces($)
   }
 }
 
+my $perl_encoding;
+sub _encode($)
+{
+  my $input_text = shift;
+  if (defined($perl_encoding)) {
+    return Encode::encode($perl_encoding, $input_text);
+  } else {
+    return $input_text;
+  }
+}
+
 my $eat_space = 0;
 my $skip_comment = 0;
 my @commands_with_args_stack;
@@ -288,7 +299,8 @@ while ($reader->read) {
       or $reader->nodeType() eq XML_READER_TYPE_SIGNIFICANT_WHITESPACE
      ) {
     if ($reader->hasValue()) {
-      print $reader->value();
+      my $value = $reader->value();
+      print _encode($value);
     }
   }
   my $name = $reader->name;
@@ -312,44 +324,43 @@ while ($reader->read) {
       if ($reader->hasAttributes()) {
         if (defined($reader->getAttribute('type'))) {
           my $command = $accent_type_command{$reader->getAttribute('type')};
-          print "\@${command}${spaces_after_command}"
+          print _encode("\@${command}${spaces_after_command}")
             if (defined($command));
         }
         if (!(defined($reader->getAttribute('bracketed'))
               and $reader->getAttribute('bracketed') eq 'off')) {
-          print '{';
+          print _encode('{');
         }
       } else {
-        print '{';
+        print _encode('{');
       }
     } elsif (exists $element_at_commands{$name}) {
       if (!ref($element_at_commands{$name})) {
-        print $element_at_commands{$name};
+        print _encode($element_at_commands{$name});
       } else {
         my ($attribute) = keys(%{$element_at_commands{$name}});
         if ($reader->hasAttributes()
             and defined($reader->getAttribute($attribute))) {
-          print
-            $element_at_commands{$name}->{$attribute}->{
-                             $reader->getAttribute($attribute)};
+          print _encode($element_at_commands{$name}->{$attribute}->{
+                             $reader->getAttribute($attribute)});
         }
       }
     } elsif (exists($Texinfo::Commands::brace_commands{$name})) {
-      print "\@${name}${spaces_after_command}".'{';
+      print _encode("\@${name}${spaces_after_command}".'{');
       if ($name eq 'verb' and $reader->hasAttributes()
           and defined($reader->getAttribute('delimiter'))) {
-        print $reader->getAttribute('delimiter');
+        print _encode($reader->getAttribute('delimiter'));
       }
-      print "$spaces";
+      print _encode($spaces);
     } elsif (exists($Texinfo::Commands::block_commands{$name})) {
-      print "\@$name";
-      if ($name eq 'macro' or $name eq 'rmacro') {
+      print _encode("\@$name");
+      if ($name eq 'macro' or $name eq 'rmacro' or $name eq 'linemacro') {
         if ($reader->hasAttributes() and 
defined($reader->getAttribute('line'))) {
-          print $reader->getAttribute('line');
+          print _encode($reader->getAttribute('line'));
         }
       } else {
         # leading spaces are already in the line attribute for (r)macro
-        print "$spaces";
+        print _encode($spaces);
       }
     } elsif (defined($Texinfo::Commands::line_commands{$name})
              or defined($Texinfo::Commands::nobrace_commands{$name})
@@ -360,17 +371,20 @@ while ($reader->read) {
       }
       if ($name eq 'documentencoding' and $reader->hasAttributes()
           and defined($reader->getAttribute('encoding'))) {
-        my $perl_encoding
+        my $resolved_encoding
           = Encode::resolve_alias($reader->getAttribute('encoding'));
 
-        if (defined($perl_encoding)) {
+        if (defined($resolved_encoding)) {
+          $perl_encoding = $resolved_encoding;
           if ($debug) {
-            print STDERR "Using encoding $perl_encoding\n";
+            print STDERR "Using encoding $resolved_encoding\n";
           }
-          binmode(STDOUT, ":encoding($perl_encoding)");
+          # prefer encoding with _encode, as using binmode is unreliable
+          # unless set at the file opening and never changed again.
+          #binmode(STDOUT, ":encoding($resolved_encoding)");
         }
       }
-      print "\@$name$spaces";
+      print _encode("\@$name$spaces");
       if ($reader->hasAttributes() and defined($reader->getAttribute('line'))) 
{
         my $line = $reader->getAttribute('line');
         $line =~ s/\\\\/\x{1F}/g;
@@ -379,7 +393,7 @@ while ($reader->read) {
         $line =~ s/\x{1F}/\\/g;
         # FIXME needed?
         #$line =~ s/\\v/\x{000B}/g;
-        print $line;
+        print _encode($line);
       }
       my $specific_line = (defined($Texinfo::Commands::line_commands{$name})
                 and $Texinfo::Commands::line_commands{$name} eq 'specific');
@@ -414,7 +428,7 @@ while ($reader->read) {
       if ($commands_with_args_stack[-1]->[1] < $arg_element_index) {
         while ($commands_with_args_stack[-1]->[1] < $arg_element_index) {
           $commands_with_args_stack[-1]->[1]++;
-          print ',';
+          print _encode(',');
         }
       } elsif ($commands_with_args_stack[-1]->[1] > 0) {
         # the index is already at or above the argument index.  Either it is
@@ -422,7 +436,7 @@ while ($reader->read) {
         if ($variadic_elements{$name}
             and defined($variadic_elements{$name}->{$command})) {
           $commands_with_args_stack[-1]->[1]++;
-          print ',';
+          print _encode(',');
           # a debug consistency check
           my $variadic_arg_index = $variadic_elements{$name}->{$command};
           if ($commands_with_args_stack[-1]->[1] < $variadic_arg_index) {
@@ -439,7 +453,7 @@ while ($reader->read) {
                        if ($debug);
         }
       }
-      print "$spaces";
+      print _encode($spaces);
     } elsif ($ignored_elements{$name}) {
       my $keep_indexterm = 0;
       if ($name eq 'indexterm') {
@@ -461,14 +475,14 @@ while ($reader->read) {
         if (defined($reader->getAttribute('command'))
             and (not (defined($reader->getAttribute('automatic'))
                       and $reader->getAttribute('automatic') eq 'on'))) {
-          print '@'.$reader->getAttribute('command');
+          print _encode('@'.$reader->getAttribute('command'));
         }
       }
     } elsif ($name eq 'infoenclose') {
       if ($reader->hasAttributes()
           and defined($reader->getAttribute('command'))) {
         my $command = $reader->getAttribute('command');
-        print "\@${command}${spaces_after_command}".'{'."$spaces";
+        print _encode("\@${command}${spaces_after_command}".'{'.$spaces);
       }
     # def* automatic
     } elsif ($reader->hasAttributes()
@@ -485,11 +499,11 @@ while ($reader->read) {
     if ($reader->hasAttributes()) {
       if (defined($reader->getAttribute('bracketed'))
           and $reader->getAttribute('bracketed') eq 'on') {
-        print '{'."$spaces";
+        print _encode('{'.$spaces);
       }
       # menus 'star' and following spaces
       if (defined($reader->getAttribute('leadingtext'))) {
-        print $reader->getAttribute('leadingtext');
+        print _encode($reader->getAttribute('leadingtext'));
       }
     }
     if ($Texinfo::Commands::block_commands{$name}
@@ -498,8 +512,8 @@ while ($reader->read) {
         and defined($reader->getAttribute('commandarg'))) {
       # happens when formatting command argument is missing and there
       # are no spaces.
-      print ' ' if ($spaces eq '');
-      print '@'.$reader->getAttribute('commandarg');
+      print _encode(' ') if ($spaces eq '');
+      print _encode('@'.$reader->getAttribute('commandarg'));
     }
   } elsif ($reader->nodeType() eq XML_READER_TYPE_END_ELEMENT) {
     if ($Texinfo::Convert::TexinfoMarkup::commands_args_elements{$name}) {
@@ -514,19 +528,19 @@ while ($reader->read) {
     if ($reader->hasAttributes()) {
       if (defined($reader->getAttribute('bracketed'))
           and $reader->getAttribute('bracketed') eq 'on') {
-        print "$trailingspaces";
+        print _encode($trailingspaces);
         # such that spaces are not prepended below when prepended
         # for elements without bracketed attribute below
         $trailingspaces = '';
-        print '}';
+        print _encode('}');
       }
     }
     if (exists ($Texinfo::Commands::brace_commands{$name})) {
       if ($name eq 'verb' and $reader->hasAttributes()
           and defined($reader->getAttribute('delimiter'))) {
-        print $reader->getAttribute('delimiter');
+        print _encode($reader->getAttribute('delimiter'));
       }
-      print '}';
+      print _encode('}');
     } elsif (exists($Texinfo::Commands::block_commands{$name})) {
       my $end_spaces;
       if ($reader->hasAttributes()
@@ -534,41 +548,41 @@ while ($reader->read) {
         $end_spaces = $reader->getAttribute('endspaces');
       }
       $end_spaces = ' ' if (!defined($end_spaces) or $end_spaces eq '');
-      print "\@end".$end_spaces."$name";
+      print _encode("\@end".$end_spaces.$name);
     } elsif (defined($Texinfo::Commands::line_commands{$name})
              or defined($Texinfo::Commands::nobrace_commands{$name})) {
       if ($Texinfo::Commands::root_commands{$name} and $name ne 'node') {
         $eat_space = 1;
       }
-      print "$trailingspaces";
+      print _encode($trailingspaces);
     } elsif ($elements_end_attributes{$name}) {
       if ($name eq 'accent') {
         if ($reader->hasAttributes()) {
           if (!(defined($reader->getAttribute('bracketed'))
                 and $reader->getAttribute('bracketed') eq 'off')) {
-            print '}';
+            print _encode('}');
           }
         } else {
-          print '}';
+          print _encode('}');
         }
       } elsif ($reader->hasAttributes()
                and defined($reader->getAttribute('separator'))) {
         print $reader->getAttribute('separator');
-        print "$trailingspaces";
+        print _encode($trailingspaces);
       }
     } elsif ($name eq 'infoenclose') {
-      print "$trailingspaces".'}';
+      print _encode($trailingspaces.'}');
     } elsif ($eat_space_elements{$name}) {
       $eat_space = 1;
     } else {
       print STDERR "END UNKNOWN $name\n" if ($debug);
-      print "$trailingspaces";
+      print _encode($trailingspaces);
     }
   } elsif ($reader->nodeType() eq XML_READER_TYPE_ENTITY_REFERENCE) {
     # for some reason XML_READER_TYPE_ENTITY is never emitted
     #       or $reader->nodeType() eq XML_READER_TYPE_ENTITY) {
     if (defined($entity_texts{$name})) {
-      print $entity_texts{$name};
+      print _encode($entity_texts{$name});
     }
   } elsif ($reader->nodeType() eq XML_READER_TYPE_COMMENT) {
     my $comment;
@@ -577,7 +591,7 @@ while ($reader->read) {
       $comment =~ s/^ (comment|c)//;
       my $command = $1;
       $comment =~ s/ $//;
-      print "\@${command}$comment" unless ($skip_comment);
+      print _encode("\@${command}$comment") unless ($skip_comment);
     }
   } elsif ($reader->nodeType() eq XML_READER_TYPE_DOCUMENT_TYPE) {
     $eat_space = 1;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]