[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: Proposed interface changes for "parse.error custom"
From: |
Akim Demaille |
Subject: |
Re: Proposed interface changes for "parse.error custom" |
Date: |
Mon, 9 Mar 2020 07:15:18 +0100 |
Hi Adrian, hi all,
I'm struggling to get a good API, but I think that if drop
yysyntax_error_arguments, then we need two yyexpected_tokens functions.
Working on completion in the c/bistromathic example was enlightening for me. I
believe that push parsers are the only place where yyexpected_tokens makes
sense in itself (as opposed to being part of yysyntax_error_arguments). In
that case, there is no lookahead needed: only the current configuration (in the
sense of "global state": all the stacks, etc.) of the parser is needed. In the
push parser, this is captured in the yypstate struct.
So in the push parser, we need
> int res = yypstate_expected_tokens (ps, tokens, ntokens);
which only depends on the parser state (ps). It does not make sense to have it
depend on yyparse_context_t, as was the case before the appended wip-patch:
> yyparse_context_t yyctx = {ps, YYEMPTY, &lloc};
> int res = yyexpected_tokens (&yyctx, tokens, ntokens);
To see this in action, have a look at c/bistromathic, especially the
"expected_tokens" function (to which you give a line to parse, and it returns
the possible continuations).
Yet in order to customize the error messages, you need to give the user both
the parser state (pstate), and facts about the lookahead, so there something
like "yyparse_context_t" makes sense. In which case either we provide another
expected-tokens function that takes a parse-context, or we force the user to
write something like
res = yypstate_expected_tokens (yycontext_pstate (ctx), arg, argn);
I'm also wondering whether we should always have a pstate structure, even for
traditional pull parsers. If it makes no measurable difference in performance,
it would simplify a lot the implementation (e.g., the LAC function actually
just need the pstate).
I would very much like to have feedback, ideas, suggestions, about this.
Thanks in advance!
commit d3c153cae63df6a85bfb24185d3199ae23c2aa0b
Author: Akim Demaille <address@hidden>
Date: Sun Mar 8 17:31:28 2020 +0100
wip: yacc.c: yypstate_expected_tokens
diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c
index 7f92a200..37515697 100644
--- a/data/skeletons/yacc.c
+++ b/data/skeletons/yacc.c
@@ -856,7 +856,15 @@ int yydebug;
#ifndef YYMAXDEPTH
# define YYMAXDEPTH ]b4_stack_depth_max[
-#endif]b4_lac_if([[
+#endif]b4_push_if([[
+struct yypstate
+ {]b4_declare_parser_state_variables[
+ /* Whether this instance has not started parsing yet. */
+ int yynew;
+ };]b4_pure_if([], [[
+
+static char yypstate_allocated = 0;]])])[
+]b4_lac_if([[
/* Given a state stack such that *YYBOTTOM is its bottom, such that
*YYTOP is either its top or is YYTOP_EMPTY to indicate an empty
@@ -1109,22 +1117,26 @@ yy_lac (yy_state_t *yyesa, yy_state_t **yyes,
]b4_parse_error_case([simple], [],
[[typedef struct
-{
- yy_state_t *yyssp;
- int yytoken;]b4_locations_if([[
- YYLTYPE *yylloc;]])[]b4_lac_if([[
+{]b4_push_if([[
+ yypstate* yyps;]], [[
+ yy_state_t *yyssp;]b4_lac_if([[
yy_state_t *yyesa;
yy_state_t **yyes;
- YYPTRDIFF_T *yyes_capacity;]])[
+ YYPTRDIFF_T *yyes_capacity;]])])[
+ int yytoken;]b4_locations_if([[
+ YYLTYPE *yylloc;]])[
} yyparse_context_t;
/* Put in YYARG at most YYARGN of the expected tokens given the
current YYCTX, and return the number of tokens stored in YYARG. If
YYARG is null, return the number of expected tokens (guaranteed to
- be less than YYNTOKENS). */
+ be less than YYNTOKENS). */]b4_push_if([[
+static int
+yypstate_expected_tokens (yypstate *yyps,
+ int yyarg[], int yyargn)]], [[
static int
yyexpected_tokens (const yyparse_context_t *yyctx,
- int yyarg[], int yyargn)
+ int yyarg[], int yyargn)]])[
{
/* Actual size of YYARG. */
int yycount = 0;
@@ -1134,8 +1146,9 @@ yyexpected_tokens (const yyparse_context_t *yyctx,
if (yyx != YYTERROR && yyx != YYUNDEFTOK)
{
{
- int yy_lac_status = yy_lac (yyctx->yyesa, yyctx->yyes,
yyctx->yyes_capacity,
- yyctx->yyssp, yyx);
+ int yy_lac_status
+ = yy_lac (]b4_push_if([[yyps->yyesa, &yyps->yyes,
&yyps->yyes_capacity, yyps->yyssp, yyx]],
+ [[yyctx->yyesa, yyctx->yyes,
yyctx->yyes_capacity, yyctx->yyssp, yyx]])[);
if (yy_lac_status == 2)
return -2;
if (yy_lac_status == 1)
@@ -1148,7 +1161,7 @@ yyexpected_tokens (const yyparse_context_t *yyctx,
else
yyarg[yycount++] = yyx;
}]],
-[[ int yyn = yypact[+*yyctx->yyssp];
+[[ int yyn = yypact@{+*]b4_push_if([yyps], [yyctx])[->yyssp@};
if (!yypact_value_is_default (yyn))
{
/* Start YYX at -YYN if negative to avoid negative indexes in
@@ -1174,6 +1187,15 @@ yyexpected_tokens (const yyparse_context_t *yyctx,
return yycount;
}
+]b4_push_if([[
+/* Similar to the previous function. */
+static int
+yyexpected_tokens (const yyparse_context_t *yyctx,
+ int yyarg[], int yyargn)
+{
+ return yypstate_expected_tokens (yyctx->yyps, yyarg, yyargn);
+}]])[
+
static int
yysyntax_error_arguments (const yyparse_context_t *yyctx,
int yyarg[], int yyargn)
@@ -1430,14 +1452,7 @@ yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg,
]b4_pure_if([], [b4_declare_scanner_communication_variables])[
-]b4_push_if([[
-struct yypstate
- {]b4_declare_parser_state_variables[
- /* Whether this instance has not started parsing yet. */
- int yynew;
- };]b4_pure_if([], [[
-
-static char yypstate_allocated = 0;]])b4_pull_if([[
+]b4_push_if([b4_pull_if([[
int
yyparse (]m4_ifset([b4_parse_param], [b4_formals(b4_parse_param)], [void])[)
@@ -1873,7 +1888,7 @@ yyerrlab:
[custom],
[[ {
yyparse_context_t yyctx
- = {yyssp, yytoken]b4_locations_if([[, &yylloc]])[]b4_lac_if([[,
yyesa, &yyes, &yyes_capacity]])[};]b4_lac_if([[
+ = {]b4_push_if([[yyps]], [[yyssp]b4_lac_if([[, yyesa, &yyes,
&yyes_capacity]])])[, yytoken]b4_locations_if([[, &yylloc]])[};]b4_lac_if([[
if (yychar != YYEMPTY)
YY_LAC_ESTABLISH;]])[
if (yyreport_syntax_error (&yyctx]m4_ifset([b4_parse_param],
@@ -1885,7 +1900,7 @@ yyerrlab:
[[ {
char const *yymsgp = YY_("syntax error");
yyparse_context_t yyctx
- = {yyssp, yytoken]b4_locations_if([[, &yylloc]])[]b4_lac_if([[,
yyesa, &yyes, &yyes_capacity]])[};
+ = {]b4_push_if([[yyps]], [[yyssp]b4_lac_if([[, yyesa, &yyes,
&yyes_capacity]])])[, yytoken]b4_locations_if([[, &yylloc]])[};
int yysyntax_error_status;]b4_lac_if([[
if (yychar != YYEMPTY)
YY_LAC_ESTABLISH;]])[
diff --git a/examples/c/bistromathic/parse.y b/examples/c/bistromathic/parse.y
index 02bd93f1..3ec4d7c8 100644
--- a/examples/c/bistromathic/parse.y
+++ b/examples/c/bistromathic/parse.y
@@ -349,9 +349,7 @@ expected_tokens (const char *input,
} while (status == YYPUSH_MORE);
// Then query for the accepted tokens at this point.
- yyparse_context_t yyctx
- = {ps->yyssp, YYEMPTY, &lloc, ps->yyesa, &ps->yyes, &ps->yyes_capacity};
- int res = yyexpected_tokens (&yyctx, tokens, ntokens);
+ int res = yypstate_expected_tokens (ps, tokens, ntokens);
yypstate_delete (ps);
return res;
}
- Re: Proposed interface changes for "parse.error custom",
Akim Demaille <=