[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Complain about hidden left recursion in GLR parsing
From: |
Sylvain Schmitz |
Subject: |
Complain about hidden left recursion in GLR parsing |
Date: |
Mon, 13 Mar 2006 15:09:00 +0100 |
User-agent: |
Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.7) Gecko/20040616 |
Hi,
I recently read in
<http://wiki.di.uminho.pt/wiki/pub/Joao/--TwikiJoaoPublications/technicalReport.pdf>
that bison did not handle hidden left recursion in GLR mode, and indeed,
it loops on the provided example.
Here is a simple modification so that bison complains about hidden
left recursion, avoiding such cases until someone into GLR parsing
implements one of the existing solutions.
--
Hope that helps,
Sylvain
--- bison/src/closure.c 2005-12-10 00:51:25.000000000 +0100
+++ bison-cvs20060222/src/closure.c 2006-03-13 14:48:15.000000000 +0100
@@ -29,9 +29,11 @@
#include <quotearg.h>
#include "closure.h"
+#include "complain.h"
#include "derives.h"
#include "getargs.h"
#include "gram.h"
+#include "nullable.h"
#include "reader.h"
#include "symtab.h"
@@ -44,6 +46,7 @@
/* internal data. See comments before set_fderives and set_firsts. */
static bitsetv fderives = NULL;
static bitsetv firsts = NULL;
+static bitsetv next_vars = NULL;
/* Retrieve the FDERIVES/FIRSTS sets of the nonterminals numbered Var. */
#define FDERIVES(Var) fderives[(Var) - ntokens]
@@ -120,6 +123,10 @@
| symbols 8 3 20, the symbol 8 can be the beginning of the data for |
| symbol 5, so the bit [8 - ntokens] in first[5 - ntokens] (= FIRST |
| (5)) is set. |
+| |
+| NEXT_VARS is similar but also takes nullable symbols into |
+| account. It is used for the detection of hidden left recursion |
+| in GLR parsing. |
`------------------------------------------------------------------*/
static void
@@ -128,23 +135,63 @@
symbol_number i, j;
firsts = bitsetv_create (nvars, nvars, BITSET_FIXED);
+ next_vars = bitsetv_create (nvars, nvars, BITSET_FIXED);
for (i = ntokens; i < nsyms; i++)
for (j = 0; derives[i - ntokens][j]; ++j)
{
item_number sym = derives[i - ntokens][j]->rhs[0];
if (ISVAR (sym))
- bitset_set (FIRSTS (i), sym - ntokens);
+ {
+ int k = 1;
+ bitset_set (FIRSTS (i), sym - ntokens);
+ bitset_set (next_vars [i - ntokens], sym - ntokens);
+ /* Keep going in the rule right hand side if the current
+ symbol is nullable. */
+ while (nullable[sym - ntokens]
+ && (sym = derives[i - ntokens][j]->rhs[k]) >= 0
+ && ISVAR (sym))
+ {
+ bitset_set (next_vars [i - ntokens], sym - ntokens);
+ k++;
+ }
+ if (ISVAR (sym))
+ bitset_set (next_vars [i - ntokens], sym - ntokens);
+ }
}
if (trace_flag & trace_sets)
bitsetv_matrix_dump (stderr, "RTC: Firsts Input", firsts);
- bitsetv_reflexive_transitive_closure (firsts);
+ bitsetv_transitive_closure (firsts);
+ bitsetv_transitive_closure (next_vars);
+
+ if (glr_parser)
+ {
+ for (i = ntokens; i < nsyms; i++)
+ {
+ /* Compare the results of the two closures: if there is a
+ recursion appearing in NEXT_VARS that did not appear in
+ FIRSTS, then it is a hidden left recursion. */
+ if (bitset_test (next_vars[i - ntokens], i - ntokens)
+ && !bitset_test (FIRSTS (i), i - ntokens))
+ {
+ complain_at (symbols[i]->location,
+ _("hidden left recursion on nonterminal: %s"),
+ symbols[i]->tag);
+ }
+ }
+ }
+ for (i = ntokens; i < nsyms; i++)
+ {
+ bitset_set (FIRSTS (i), i - ntokens);
+ }
if (trace_flag & trace_sets)
bitsetv_matrix_dump (stderr, "RTC: Firsts Output", firsts);
if (trace_flag & trace_sets)
print_firsts ();
+
+ bitsetv_free (next_vars);
}
/*-------------------------------------------------------------------.
%glr-parser
%{
#include <stdio.h>
char *yystring;
int nbs;
int yylex (void);
void yyerror (const char *);
%}
%%
s:
a s 'b' { nbs++; }
| 'x'
;
a:
/* empty */
;
%%
int
yylex (void)
{
return *yystring++;
}
void
yyerror (const char *msg)
{
fprintf (stderr, "%s\n", msg);
}
int
main (int argc, char *argv[])
{
int ret;
nbs = 0;
yystring = argv[1];
ret = yyparse ();
if (!ret)
printf ("Number of b's: %d\n", nbs);
return ret;
}
- Complain about hidden left recursion in GLR parsing,
Sylvain Schmitz <=