texinfo-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

branch master updated: * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_tex


From: Patrice Dumas
Subject: branch master updated: * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_file): rename $input_file_name as $decoded_input_file_path.
Date: Mon, 27 May 2024 02:43:46 -0400

This is an automated email from the git hooks/post-receive script.

pertusus pushed a commit to branch master
in repository texinfo.

The following commit(s) were added to refs/heads/master by this push:
     new 767c7302c6 * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_file): 
rename $input_file_name as $decoded_input_file_path.
767c7302c6 is described below

commit 767c7302c63e7c8c006333a8572e67f4380803d8
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Mon May 27 08:43:33 2024 +0200

    * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_file): rename
    $input_file_name as $decoded_input_file_path.
    
    * tp/Texinfo/XS/parsetexi/Parsetexi.xs (parse_file),
    tp/Texinfo/XS/parsetexi/api.c (parse_file),
    tp/Texinfo/XS/parsetexi/input.c (input_push_file): rename filename as
    input_file_path.
    
    * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_file),
    tp/Texinfo/XS/parsetexi/Parsetexi.xs (parse_file),
    tp/Texinfo/XS/parsetexi/api.c (parse_file, parse_file_path): pass
    only the input file path to parse_file.  Determine the directory in
    parse_file using the new function parse_file_path.
    
    * tp/Texinfo/XS/parsetexi/input.c: add some const.
    
    Changes in comments.
---
 ChangeLog                            | 20 +++++++++++
 tp/Texinfo/XS/main/utils.c           |  4 ---
 tp/Texinfo/XS/parsetexi/Parsetexi.pm | 13 +++----
 tp/Texinfo/XS/parsetexi/Parsetexi.xs |  8 ++---
 tp/Texinfo/XS/parsetexi/api.c        | 67 ++++++++++++++++++++----------------
 tp/Texinfo/XS/parsetexi/api.h        |  3 +-
 tp/Texinfo/XS/parsetexi/input.c      | 24 +++++++------
 tp/Texinfo/XS/parsetexi/parser.c     | 11 +++---
 8 files changed, 87 insertions(+), 63 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b4e444360d..46df760dc1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2024-05-27  Patrice Dumas  <pertusus@free.fr>
+
+       * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_file): rename
+       $input_file_name as $decoded_input_file_path.
+
+       * tp/Texinfo/XS/parsetexi/Parsetexi.xs (parse_file),
+       tp/Texinfo/XS/parsetexi/api.c (parse_file),
+       tp/Texinfo/XS/parsetexi/input.c (input_push_file): rename filename as
+       input_file_path.
+
+       * tp/Texinfo/XS/parsetexi/Parsetexi.pm (parse_texi_file),
+       tp/Texinfo/XS/parsetexi/Parsetexi.xs (parse_file),
+       tp/Texinfo/XS/parsetexi/api.c (parse_file, parse_file_path): pass
+       only the input file path to parse_file.  Determine the directory in
+       parse_file using the new function parse_file_path.
+
+       * tp/Texinfo/XS/parsetexi/input.c: add some const.
+
+       Changes in comments.
+
 2024-05-26  Patrice Dumas  <pertusus@free.fr>
 
        * tp/Texinfo/XS/main/DocumentXS.xs (document_errors): warn if a
diff --git a/tp/Texinfo/XS/main/utils.c b/tp/Texinfo/XS/main/utils.c
index 73963d8815..41d5f7ae93 100644
--- a/tp/Texinfo/XS/main/utils.c
+++ b/tp/Texinfo/XS/main/utils.c
@@ -755,10 +755,6 @@ collapse_spaces (const char *text)
    The filename of the line directive is returned.
    The line number value is in OUT_LINE_NO.
    RETVAL value is 1 for valid line directive, 0 otherwise.
-
-   TODO would be good to have line const, but it is not possible
-   because of strtoul and because of the transient modification to
-   have a \0.
 */
 char *
 parse_line_directive (const char *line, int *retval, int *out_line_no)
diff --git a/tp/Texinfo/XS/parsetexi/Parsetexi.pm 
b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
index 93a790d4a6..641ae98633 100644
--- a/tp/Texinfo/XS/parsetexi/Parsetexi.pm
+++ b/tp/Texinfo/XS/parsetexi/Parsetexi.pm
@@ -216,20 +216,17 @@ sub parse_texi_file ($$)
   # the file is already a byte string, taken as is from the command
   # line.  The encoding was detected as COMMAND_LINE_ENCODING, but
   # it is not useful for the XS parser.
-  # TODO instead of using fileparse here, reimplement fileparse
-  # in XS, or use a file name parsing code from somewhere else?
-  my ($basename, $directories, $suffix) = fileparse($input_file_path);
-  my $document_descriptor = parse_file($self, $input_file_path,
-                                       $basename, $directories);
+  my ($input_file_name, $directories, $suffix) = fileparse($input_file_path);
+  my $document_descriptor = parse_file($self, $input_file_path);
   if (!$document_descriptor) {
     my $parser_registrar = $self->{'registrar'};
-    my $input_file_name = $input_file_path;
+    my $decoded_input_file_path = $input_file_path;
     my $encoding = $self->{'conf'}->{'COMMAND_LINE_ENCODING'};
     if (defined($encoding)) {
-      $input_file_name = decode($encoding, $input_file_path);
+      $decoded_input_file_path = decode($encoding, $input_file_path);
     }
     $parser_registrar->document_error($self,
-       sprintf(__("could not open %s: %s"), $input_file_name, $!));
+       sprintf(__("could not open %s: %s"), $decoded_input_file_path, $!));
     return undef;
   }
 
diff --git a/tp/Texinfo/XS/parsetexi/Parsetexi.xs 
b/tp/Texinfo/XS/parsetexi/Parsetexi.xs
index 24cb70390f..f3a331598a 100644
--- a/tp/Texinfo/XS/parsetexi/Parsetexi.xs
+++ b/tp/Texinfo/XS/parsetexi/Parsetexi.xs
@@ -74,13 +74,11 @@ register_parser_conf (SV *parser)
 
 # file path, can be in any encoding
 int
-parse_file (SV *parser, filename, input_file_name, input_directory)
-        char *filename = (char *)SvPVbyte_nolen ($arg);
-        char *input_file_name = (char *)SvPVbyte_nolen ($arg);
-        char *input_directory = (char *)SvPVbyte_nolen ($arg);
+parse_file (SV *parser, input_file_path)
+        char *input_file_path = (char *)SvPVbyte_nolen ($arg);
       CODE:
         apply_sv_parser_conf (parser);
-        RETVAL = parse_file (filename, input_file_name, input_directory);
+        RETVAL = parse_file (input_file_path);
       OUTPUT:
         RETVAL
 
diff --git a/tp/Texinfo/XS/parsetexi/api.c b/tp/Texinfo/XS/parsetexi/api.c
index 0808281640..50c0f55ff2 100644
--- a/tp/Texinfo/XS/parsetexi/api.c
+++ b/tp/Texinfo/XS/parsetexi/api.c
@@ -120,59 +120,68 @@ reset_parser (int local_debug_output)
   reset_parser_conf ();
 }
 
+/* RESULT should be an array of size two.  Upon return, it holds
+   the file name in the first position and directory, if any, in
+   the second position.  The file name and directory should be
+   freed.
+ */
+static void
+parse_file_path (const char *input_file_path, char **result)
+{
+  /* Strip off a leading directory path, by looking for the last
+     '/' in input_file_path. */
+  const char *p = 0;
+  const char *q = strchr (input_file_path, '/');
+  while (q)
+    {
+      p = q;
+      q = strchr (q + 1, '/');
+    }
+
+  if (p)
+    {
+      result[0] = strdup (p + 1);
+      result[1] = strndup (input_file_path, (p - input_file_path) + 1);
+    }
+  else
+    {
+      result[0] = strdup (input_file_path);
+      /* FIXME or strdup ("") */
+      result[1] = 0;
+    }
+}
+
 /* Determine directory path based on file name.
    Return a DOCUMENT_DESCRIPTOR that can be used to retrieve the
    tree and document obtained by parsing FILENAME.
    Used for parse_texi_file. */
 int
-parse_file (const char *filename, const char *input_file_name,
-            const char *input_directory)
+parse_file (const char *input_file_path)
 {
   int document_descriptor;
-  /*
-  char *p, *q;
-   */
   GLOBAL_INFO *global_info;
+  char *input_file_name_and_directory[2];
 
   int status;
 
   initialize_parsing ();
 
-  status = input_push_file (filename);
+  status = input_push_file (input_file_path);
   if (status)
     {
       remove_document_descriptor (parsed_document->descriptor);
       return 0;
     }
 
+  parse_file_path (input_file_path, input_file_name_and_directory);
+
   global_info = &parsed_document->global_info;
 
   free (global_info->input_file_name);
   free (global_info->input_directory);
-  global_info->input_file_name = strdup (input_file_name);
-  global_info->input_directory = strdup (input_directory);
+  global_info->input_file_name = input_file_name_and_directory[0];
+  global_info->input_directory = input_file_name_and_directory[1];
 
-  /* Strip off a leading directory path, by looking for the last
-     '/' in filename. */
-  /* The following is not needed, it is already done in the
-     main program */
-  /*
-  p = 0;
-  q = strchr (filename, '/');
-  while (q)
-    {
-      p = q;
-      q = strchr (q + 1, '/');
-    }
-
-  if (p)
-    {
-      char saved = *p;
-      *p = '\0';
-      add_include_directory (filename, &parser_include_directories);
-      *p = saved;
-    }
-  */
   document_descriptor = parse_texi_document ();
 
   return document_descriptor;
diff --git a/tp/Texinfo/XS/parsetexi/api.h b/tp/Texinfo/XS/parsetexi/api.h
index 6f72e76e85..3af555d9ae 100644
--- a/tp/Texinfo/XS/parsetexi/api.h
+++ b/tp/Texinfo/XS/parsetexi/api.h
@@ -4,8 +4,7 @@
 
 #include "tree_types.h"
 
-int parse_file (const char *filename, const char *input_file_name,
-                                      const char *input_directory);
+int parse_file (const char *input_file_path);
 int parse_piece (const char *, int line_nr);
 int parse_string (const char *, int line_nr);
 int parse_text (const char *, int line_nr);
diff --git a/tp/Texinfo/XS/parsetexi/input.c b/tp/Texinfo/XS/parsetexi/input.c
index ebae9122fc..d67c46e824 100644
--- a/tp/Texinfo/XS/parsetexi/input.c
+++ b/tp/Texinfo/XS/parsetexi/input.c
@@ -41,10 +41,10 @@ typedef struct {
 
     FILE *file;
     SOURCE_INFO source_info;
-    char *input_file_path; /* for IN_file type, the full input file path */
+    const char *input_file_path; /* for IN_file type, the full input file path 
*/
 
     char *text;  /* Input text to be parsed as Texinfo. */
-    char *ptext; /* How far we are through 'text'.  Used to split 'text'
+    const char *ptext; /* How far we are through 'text'.  Used to split 'text'
                     into lines. */
     char *value_flag; /* value flag if the input text is a @value
                          expansion */
@@ -290,7 +290,8 @@ next_text (ELEMENT *current)
 
       switch (input->type)
         {
-          char *p, *new;
+          const char *p;
+          char *new;
         case IN_text:
           /*
           debug_nonl ("IN_TEXT '"); debug_print_protected_string 
(input->ptext);
@@ -552,18 +553,18 @@ parser_locate_include_file (const char *filename)
 
 /* Try to open a file called FILENAME */
 int
-input_push_file (const char *filename)
+input_push_file (const char *input_file_path)
 {
   FILE *stream = 0;
-  char *p, *q;
+  const char *p, *q;
   char *base_filename;
-  char *stored_file_path;
+  const char *stored_file_path;
 
-  if (!strcmp (filename, "-"))
+  if (!strcmp (input_file_path, "-"))
     stream = stdin;
   else
     {
-      stream = fopen (filename, "r");
+      stream = fopen (input_file_path, "r");
       if (!stream)
         return errno;
     }
@@ -577,20 +578,21 @@ input_push_file (const char *filename)
 
   /* Strip off a leading directory path. */
   p = 0;
-  q = strchr (filename, '/');
+  q = strchr (input_file_path, '/');
   while (q)
     {
       p = q;
       q = strchr (q + 1, '/');
     }
+
   if (p)
     {
       base_filename = save_string (p+1);
-      stored_file_path = save_string (filename);
+      stored_file_path = save_string (input_file_path);
     }
   else
     {
-      base_filename = save_string (filename);
+      base_filename = save_string (input_file_path);
       stored_file_path = base_filename;
     }
 
diff --git a/tp/Texinfo/XS/parsetexi/parser.c b/tp/Texinfo/XS/parsetexi/parser.c
index 81633b7418..a11f19f73a 100644
--- a/tp/Texinfo/XS/parsetexi/parser.c
+++ b/tp/Texinfo/XS/parsetexi/parser.c
@@ -530,10 +530,13 @@ parse_texi_document (void)
 
   document_descriptor = parse_texi (document_root, before_node_section);
 
-  /* TODO the document information often use more memory than needed,
-     when space > number.  We could realloc here the diverse structures
-     to number. No need to do it in parse_texi, it should only be
-     truely interesting for a whole document.
+  /* TODO the document structure lists use more memory than needed
+     when space > number.  We could realloc here the diverse lists
+     to hold only what is strictly needed.  There is probably a trade off
+     with speed, and this unused memory is probably small compared to the
+     memory used by the Texinfo tree.
+     No need to do it in parse_texi, it should be relevant for whole
+     documents only.
    */
 
   rearrange_tree_beginning (before_node_section, document_descriptor);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]