[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[no subject]
From: |
Patrice Dumas |
Date: |
Sat, 28 Oct 2023 20:22:03 -0400 (EDT) |
branch: master
commit 4ed1b83e5371665d3b3ebcce4ed04af5b129625b
Author: Patrice Dumas <pertusus@free.fr>
AuthorDate: Sun Oct 29 02:22:03 2023 +0200
* tp/Texinfo/XS/convert/ConvertXS.xs (plain_texinfo_convert_tree)
(text_convert_tree): use newSVpv_utf8.
* tp/Texinfo/XS/main/builtin_commands.c (element_builtin_cmd): return
0 if the e->cmd value is outside of the the command id tables
boundaries.
* tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line): use
lookup_extra_string.
* tp/Texinfo/XS/convert/converter.c,
tp/Texinfo/XS/convert/indices_in_conversion.c,
tp/Texinfo/XS/main/build_perl_info.c,
tp/Texinfo/XS/main/command_stack.c, tp/Texinfo/XS/parsetexi/api.c,
tp/Texinfo/XS/parsetexi/parser.c: update comments.
* tp/Texinfo/XS/parsetexi/parser.c: reindent.
---
ChangeLog | 20 ++++++
man/pod2texi.1 | 2 +-
tp/Texinfo/XS/convert/ConvertXS.xs | 6 +-
tp/Texinfo/XS/convert/converter.c | 2 +-
tp/Texinfo/XS/convert/indices_in_conversion.c | 3 +-
tp/Texinfo/XS/main/build_perl_info.c | 14 ++---
tp/Texinfo/XS/main/builtin_commands.c | 6 ++
tp/Texinfo/XS/main/command_stack.c | 8 ++-
tp/Texinfo/XS/parsetexi/api.c | 18 ++++--
tp/Texinfo/XS/parsetexi/parser.c | 89 +++++++++++++++------------
10 files changed, 104 insertions(+), 64 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 27cdd0ca0d..add7ae825e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,23 @@
+2023-10-28 Patrice Dumas <pertusus@free.fr>
+
+ * tp/Texinfo/XS/convert/ConvertXS.xs (plain_texinfo_convert_tree)
+ (text_convert_tree): use newSVpv_utf8.
+
+ * tp/Texinfo/XS/main/builtin_commands.c (element_builtin_cmd): return
+ 0 if the e->cmd value is outside of the the command id tables
+ boundaries.
+
+ * tp/Texinfo/XS/parsetexi/parser.c (process_remaining_on_line): use
+ lookup_extra_string.
+
+ * tp/Texinfo/XS/convert/converter.c,
+ tp/Texinfo/XS/convert/indices_in_conversion.c,
+ tp/Texinfo/XS/main/build_perl_info.c,
+ tp/Texinfo/XS/main/command_stack.c, tp/Texinfo/XS/parsetexi/api.c,
+ tp/Texinfo/XS/parsetexi/parser.c: update comments.
+
+ * tp/Texinfo/XS/parsetexi/parser.c: reindent.
+
2023-10-28 Patrice Dumas <pertusus@free.fr>
* tp/Texinfo/XS/main/api_to_perl.c (call_switch_to_global_locale)
diff --git a/man/pod2texi.1 b/man/pod2texi.1
index 62bb5c7f74..17f3cdab17 100644
--- a/man/pod2texi.1
+++ b/man/pod2texi.1
@@ -133,7 +133,7 @@
.\" ========================================================================
.\"
.IX Title "POD2TEXI 1"
-.TH POD2TEXI 1 "2023-10-20" "perl" "User Contributed Perl Documentation"
+.TH POD2TEXI 1 "2023-10-25" "perl" "User Contributed Perl Documentation"
.\" For nroff, turn off justification. Always turn off hyphenation; it makes
.\" way too many mistakes in technical documents.
.if n .ad l
diff --git a/tp/Texinfo/XS/convert/ConvertXS.xs
b/tp/Texinfo/XS/convert/ConvertXS.xs
index 30104d0a80..c4caa7c8eb 100644
--- a/tp/Texinfo/XS/convert/ConvertXS.xs
+++ b/tp/Texinfo/XS/convert/ConvertXS.xs
@@ -56,9 +56,8 @@ plain_texinfo_convert_tree (tree_in)
if (document)
{
char *result = plain_texinfo_convert (document);
- RETVAL = newSVpv (result, strlen(result));
+ RETVAL = newSVpv_utf8 (result, 0);
free (result);
- SvUTF8_on (RETVAL);
}
else
RETVAL = newSV(0);
@@ -81,9 +80,8 @@ text_convert_tree (text_options_in, tree_in, unused=0)
{
/* text_options is destroyed in text_convert */
char *result = text_convert (document, text_options);
- RETVAL = newSVpv (result, strlen(result));
+ RETVAL = newSVpv_utf8 (result, 0);
free (result);
- SvUTF8_on (RETVAL);
}
else
{
diff --git a/tp/Texinfo/XS/convert/converter.c
b/tp/Texinfo/XS/convert/converter.c
index cb29766772..a5e4e48fba 100644
--- a/tp/Texinfo/XS/convert/converter.c
+++ b/tp/Texinfo/XS/convert/converter.c
@@ -43,7 +43,7 @@ retrieve_converter (int converter_descriptor)
return 0;
}
-/* descriptor starts at 1, 0 is an error */
+/* descriptor starts at 1, 0 is not found or an error */
size_t
register_converter (CONVERTER *converter)
{
diff --git a/tp/Texinfo/XS/convert/indices_in_conversion.c
b/tp/Texinfo/XS/convert/indices_in_conversion.c
index 3ac7a135c3..e5607961a1 100644
--- a/tp/Texinfo/XS/convert/indices_in_conversion.c
+++ b/tp/Texinfo/XS/convert/indices_in_conversion.c
@@ -27,6 +27,7 @@
#include "unicode.h"
#include "indices_in_conversion.h"
+/* corresponding perl code in Texinfo::Structuring */
MERGED_INDEX **
merge_indices (INDEX **index_names)
@@ -88,7 +89,7 @@ merge_indices (INDEX **index_names)
return merged_indices;
}
-/* in Texinfo::Common */
+/* corresponding perl code in Texinfo::Common */
ELEMENT *
index_content_element (ELEMENT *element, int prefer_reference_element)
diff --git a/tp/Texinfo/XS/main/build_perl_info.c
b/tp/Texinfo/XS/main/build_perl_info.c
index 2d06695cba..27a50e0dec 100644
--- a/tp/Texinfo/XS/main/build_perl_info.c
+++ b/tp/Texinfo/XS/main/build_perl_info.c
@@ -55,6 +55,12 @@
#define LOCALEDIR DATADIR "/locale"
+ /* TODO the following NOTE could be obsolete, as this code is now part
+ of a library that is not linked against Gnulib. However, XS dynamic
+ shared object link against both the library this code is part of and
+ another library that does not use perl headers and do not link against
+ perl libraries but links against Gnulib. */
+
/* NOTE: Do not call 'malloc' or 'free' in any function called in this file.
Since this file (build_perl_info.c) includes the Perl headers,
we get the Perl redefinitions, which we do not want, as we don't use
@@ -1049,9 +1055,6 @@ get_errors (ERROR_MESSAGE* error_list, size_t
error_number)
/* Return Texinfo::Document perl object corresponding to the
C document structure corresponding to DOCUMENT_DESCRIPTOR.
- If NO_CLEAN_PERl_REFS is set, do not remove the pointers to perl
- tree elements in C tree elements. It should normally only be set for
- a tree that does not change anymore and will not be rebuilt.
If NO_STORE is set, destroy the C document.
*/
SV *
@@ -1244,7 +1247,6 @@ output_unit_to_perl_hash (OUTPUT_UNIT *output_unit)
if (output_unit->unit_filename)
{
- /* FIXME check if utf8 or binary */
sv = newSVpv_utf8 (output_unit->unit_filename,
strlen (output_unit->unit_filename));
STORE("unit_filename");
@@ -1276,10 +1278,6 @@ output_unit_to_perl_hash (OUTPUT_UNIT *output_unit)
unit_sv = newRV_inc ((SV *) output_unit->hv);
/* set the tree element associated_unit */
- /* TODO is it an issue if already set?
- hv_delete (element_hv, "associated_unit", strlen ("associated_unit"),
- G_DISCARD);
- */
hv_store (element_hv, "associated_unit", strlen ("associated_unit"),
unit_sv, 0);
}
diff --git a/tp/Texinfo/XS/main/builtin_commands.c
b/tp/Texinfo/XS/main/builtin_commands.c
index 7d2b912052..65e0cd1ec5 100644
--- a/tp/Texinfo/XS/main/builtin_commands.c
+++ b/tp/Texinfo/XS/main/builtin_commands.c
@@ -95,7 +95,13 @@ element_builtin_cmd (ELEMENT *e)
fprintf (stderr, "BUG: element_builtin_cmd: unexpected %s; add code?\n",
debug_str);
free (debug_str);
+ /* The e->cmd value being outside of the command id tables
+ it is likely that it would be associated with incorrect access
+ to memory is returned */
+ /*
return e->cmd;
+ */
+ return 0;
}
/* should never reach here */
return 0;
diff --git a/tp/Texinfo/XS/main/command_stack.c
b/tp/Texinfo/XS/main/command_stack.c
index eb1b873a95..8a02a42efc 100644
--- a/tp/Texinfo/XS/main/command_stack.c
+++ b/tp/Texinfo/XS/main/command_stack.c
@@ -65,6 +65,8 @@ top_command (COMMAND_STACK *stack)
return stack->stack[stack->top - 1];
}
+
+/* stack of command or type ids */
void
push_command_or_type (COMMAND_OR_TYPE_STACK *stack, enum command_id cmd,
enum element_type type)
@@ -113,7 +115,8 @@ top_command_or_type (COMMAND_OR_TYPE_STACK *stack)
return &stack->stack[stack->top - 1];
}
-
+
+/* stack of strings */
void
push_string_stack_string (STRING_STACK *stack, char *string)
{
@@ -148,7 +151,8 @@ top_string_stack (STRING_STACK *stack)
return stack->stack[stack->top - 1];
}
-
+
+/* stack of monospace contexts */
static void
push_monospace_context (MONOSPACE_CONTEXT_STACK *stack,
enum monospace_context mono_ctx)
diff --git a/tp/Texinfo/XS/parsetexi/api.c b/tp/Texinfo/XS/parsetexi/api.c
index 6e62d5fef4..acec0ded23 100644
--- a/tp/Texinfo/XS/parsetexi/api.c
+++ b/tp/Texinfo/XS/parsetexi/api.c
@@ -55,9 +55,12 @@
#ifdef ENABLE_NLS
-/* Use the uninstalled locales dir. Currently unused.
- The texinfo.mo files are not actually created here, only the
- texinfo_document.mo files, which aren't used by parsetexi. */
+/* Use the uninstalled locales dir for translated strings. Currently unused.
+ Note that if this code is used to find LocaleData, it should better be
+ for the texinfo_document domain and not for the texinfo/PACKAGE text
domain..
+ Note that the LocaleData directory is passed to XS code and and used for
+ bindtextdomain in the main/translations.c configure function.
+ This code may still be relevant for some future C only code */
static void
find_locales_dir (char *builddir)
{
@@ -74,7 +77,8 @@ find_locales_dir (char *builddir)
strerror (errno));
}
else
- {
+ { /* FIXME LocaleData does not contain the texinfo/PACKAGE text domain
+ translations, but the texinfo_document domain */
bindtextdomain (PACKAGE, s);
free (s);
closedir (dir);
@@ -160,7 +164,8 @@ reset_parser (int local_debug_output)
}
/* Determine directory path based on file name.
- Set ROOT to root of tree obtained by parsing FILENAME.
+ Return a DOCUMENT_DESCRIPTOR that can be used to retrieve the
+ tree and document obtained by parsing FILENAME.
Used for parse_texi_file. */
int
parse_file (char *filename, char *input_file_name, char *input_directory)
@@ -214,7 +219,8 @@ parse_text (char *string, int line_nr)
return document_descriptor;
}
-/* Set ROOT to root of tree obtained by parsing the Texinfo code in STRING.
+/* Set DOCUMENT_DESCRIPTOR to the value corresponding to the tree
+ obtained by parsing the Texinfo code in STRING.
STRING should be a UTF-8 buffer. Used for parse_texi_line. */
int
parse_string (char *string, int line_nr)
diff --git a/tp/Texinfo/XS/parsetexi/parser.c b/tp/Texinfo/XS/parsetexi/parser.c
index 2b1120d00a..cfa8f027bd 100644
--- a/tp/Texinfo/XS/parsetexi/parser.c
+++ b/tp/Texinfo/XS/parsetexi/parser.c
@@ -75,7 +75,7 @@ looking_at (char *s1, char *s2)
}
/* Look for a sequence of alphanumeric characters or hyphens, where the
- first isn't a hyphen. This is the format of (non-single-character) Texinfo
+ first isn't a hyphen. This is the format of (non-single-character) Texinfo
commands, but is also used elsewhere. Return value to be freed by caller.
*PTR is advanced past the read name. Return 0 if name is invalid. */
char *
@@ -444,6 +444,9 @@ wipe_parser_global_info (void)
memset (&global_commands, 0, sizeof (global_commands));
}
+/* setup a Texinfo tree with document_root as root and before_node_section
+ as first content. Used for all the tree except for those obtained by
+ parse_texi_line/parse_string. */
ELEMENT *
setup_document_root_and_before_node_section ()
{
@@ -574,8 +577,8 @@ begin_paragraph_p (ELEMENT *current)
&& in_paragraph_context (current_context ());
}
-/* If in a context where paragraphs are to be started, start a new
- paragraph. */
+/* If in a context where paragraphs are to be started, start a new
+ paragraph. */
ELEMENT *
begin_paragraph (ELEMENT *current)
{
@@ -1421,14 +1424,16 @@ process_remaining_on_line (ELEMENT **current_inout,
char **line_inout)
/* Else check if line is "@end ..." for current command. */
else
{
+ /* element used as is_end_current_command argument. */
ELEMENT *top_stack_raw_element;
enum command_id top_stack_cmd = raw_block_stack_top ();
if (top_stack_cmd == CM_NONE)
- {
+ {/* current is the first command */
top_stack_raw_element = current;
}
else
{
+ /* create a temporary element based on the top stack cmd command id */
top_stack_raw_element = new_element (ET_NONE);
top_stack_raw_element->cmd = top_stack_cmd;
}
@@ -1471,7 +1476,8 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
"macro `%s' previously defined", name);
line_error_ext (MSG_warning, 0,
¯o->element->source_info,
- "here is the previous definition of
`%s'", name);
+ "here is the previous definition of `%s'",
+ name);
}
else if (!(existing & USER_COMMAND_BIT))
{
@@ -1488,13 +1494,13 @@ process_remaining_on_line (ELEMENT **current_inout,
char **line_inout)
}
}
debug ("CLOSED raw %s", command_name(end_cmd));
- /* start a new line for the @end line (without the first
spaces on
- the line that have already been put in a raw container).
- This is normally done at the beginning of a line, but not
here,
- as we directly got the line. As the @end is processed
just below,
- an empty line will not appear in the output, but it is
needed to
- avoid a duplicate warning on @end not appearing at the
beginning
- of the line */
+ /* start a new line for the @end line (without the first spaces on
+ the line that have already been put in a raw container).
+ This is normally done at the beginning of a line, but not here,
+ as we directly got the line. As the @end is processed just
below,
+ an empty line will not appear in the output, but it is needed to
+ avoid a duplicate warning on @end not appearing at the beginning
+ of the line */
e = new_element (ET_empty_line);
add_to_element_contents (current, e);
@@ -1503,6 +1509,7 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
else
pop_raw_block_stack();
}
+ /* a temporary element was created based on the top stack cmd, remove */
if (top_stack_cmd != CM_NONE)
destroy_element (top_stack_raw_element);
}
@@ -1591,12 +1598,10 @@ process_remaining_on_line (ELEMENT **current_inout,
char **line_inout)
/* Check if parent element is 'verb' */
else if (current->parent && current->parent->cmd == CM_verb)
{
- char *delimiter;
char *q;
- KEY_PAIR *k_delimiter;
- k_delimiter = lookup_info (current->parent, "delimiter");
- delimiter = (char *)k_delimiter->value;
+ char *delimiter = lookup_extra_string (current->parent, "delimiter");
+
if (strcmp (delimiter, ""))
{
/* Look forward for the delimiter character followed by a close
@@ -1693,8 +1698,8 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
n = strspn (line, whitespace_chars_except_newline);
text_append_n (&e_empty_line->text, line, n);
line += n;
- /* It is important to let the processing continue from here, such that
- the @end is catched and handled below, as the condition has not
changed */
+ /* It is important to let the processing continue from here, such that
+ the @end is catched and handled below, as the condition has not changed
*/
} /* ignored raw format */
/* Skip empty lines. If we reach the end of input, continue in case there
@@ -1826,7 +1831,7 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
{
line_warn (
"value call nested too deeply "
- "(set MAX_MACRO_CALL_NESTING to override; current
value %d)",
+ "(set MAX_MACRO_CALL_NESTING to override; current value
%d)",
conf.max_macro_call_nesting);
free (flag);
if (spaces_element)
@@ -1845,7 +1850,8 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
= new_source_mark (SM_type_value_expansion);
value_source_mark->status = SM_status_start;
value_source_mark->line = strdup(value);
- sm_value_element = new_value_element (cmd, flag,
spaces_element);
+ sm_value_element = new_value_element (cmd, flag,
+ spaces_element);
value_source_mark->element = sm_value_element;
register_source_mark (current, value_source_mark);
@@ -1894,10 +1900,8 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
NOTE the last element in the current command contents is an element that
is transiently in the tree, and is put in the info hash by
gather_spaces_after_cmd_before_arg. It could therefore be possible
- to accept an @comment here and put it in this element. It would not
- necessarily be a good idea, as it would mean having an element
- in the info hash that holds something more complex than text and source
- marks.
+ to accept an @comment here and put it in this element, but we do
+ not want to complicate the tree.
*/
if (command_flags(current) & CF_brace && (cmd || command))
@@ -1919,7 +1923,7 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
This is possible only if the command read was already an alias
resolving to cmd and not to a non alias command. In turn,
this is possible if there was an error at the time of alias
- definition (because the alias was defined recursively).
+ definition (because the alias was defined recursively to itself).
*/
|| (command_data(cmd).flags & CF_ALIAS))
{
@@ -2051,7 +2055,7 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
}
}
/* special case for accent commands, use following character except @
- * as argument */
+ as argument */
else if ((command_flags(current) & CF_accent)
&& *line != '@')
{
@@ -2251,7 +2255,8 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
current = end_preformatted (current, 0, 0);
}
- /* cannot check parent before closing paragraph/preformatted */
+ /* done here and not above because it is not possible to check the parent
+ before closing paragraph/preformatted */
if (cmd == CM_item && item_line_parent (current))
data_cmd = CM_item_LINE;
@@ -2295,7 +2300,8 @@ process_remaining_on_line (ELEMENT **current_inout, char
**line_inout)
"spaces_at_end" if followed by spaces only when the
index or subentry command is done. */
{
- isolate_trailing_space (current,
ET_internal_spaces_before_brace_in_index);
+ isolate_trailing_space (current,
+
ET_internal_spaces_before_brace_in_index);
}
}
@@ -2526,15 +2532,15 @@ store_document (ELEMENT *root)
doc_global_info->input_directory
= strdup (global_info.input_directory);
#define COPY_GLOBAL_ARRAY(type,cmd) \
- doc_global_##type->cmd.contents.list = 0; \
- doc_global_##type->cmd.contents.number = 0; \
- doc_global_##type->cmd.contents.space = 0; \
- if (global_##type.cmd.contents.number > 0) \
+ doc_global_##type->cmd.contents.list = 0; \
+ doc_global_##type->cmd.contents.number = 0; \
+ doc_global_##type->cmd.contents.space = 0; \
+ if (global_##type.cmd.contents.number > 0) \
{ \
- for (i = 0; i < global_##type.cmd.contents.number; i++) \
+ for (i = 0; i < global_##type.cmd.contents.number; i++) \
{ \
- ELEMENT *e = contents_child_by_index (&global_##type.cmd, i);
\
- add_to_contents_as_array (&doc_global_##type->cmd, e); \
+ ELEMENT *e = contents_child_by_index (&global_##type.cmd, i); \
+ add_to_contents_as_array (&doc_global_##type->cmd, e); \
} \
}
COPY_GLOBAL_ARRAY(info,dircategory_direntry);
@@ -2584,8 +2590,8 @@ store_document (ELEMENT *root)
return document_descriptor;
}
-/* Pass in a root of "Texinfo tree". Starting point for adding
- to the tree is current_elt. Returns a stored document_descriptor */
+/* Pass in a ROOT_ELT root of "Texinfo tree". Starting point for adding
+ to the tree is CURRENT_ELT. Returns a stored DOCUMENT_DESCRIPTOR */
int
parse_texi (ELEMENT *root_elt, ELEMENT *current_elt)
{
@@ -2691,13 +2697,13 @@ parse_texi (ELEMENT *root_elt, ELEMENT *current_elt)
ELEMENT *dummy;
current = close_commands (current, CM_NONE, &dummy, CM_NONE);
- /* Make sure we are at the very top - we could have stopped at the "top"
- element, with "document_root" still to go. (This happens if the file
+ /* Make sure we are at the very top - we could have stopped at the "top"
+ element, with "document_root" still to go. (This happens if the file
didn't end with "@bye".) */
while (current->parent)
current = current->parent;
}
-
+
if (current_context () != ct_NONE)
fatal ("context_stack not empty at the end");
@@ -2735,7 +2741,8 @@ parse_texi (ELEMENT *root_elt, ELEMENT *current_elt)
if (input_number > 0)
fprintf (stderr, "BUG: at end, input_number > 0: %d\n", input_number);
- /* update merged_in for merging hapening after first index merge */
+ /* update merged_in. Only needed for merging happening after first
+ index merge */
resolve_indices_merged_in ();
identifiers_target