[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 1/5] State-item pair graph generation
From: |
Vincent Imbimbo |
Subject: |
[PATCH 1/5] State-item pair graph generation |
Date: |
Tue, 12 May 2020 22:23:50 -0400 |
---
src/lssi.c | 367 ++++++++++++++++++++++++++++++
src/lssi.h | 56 +++++
src/state-item.c | 563 +++++++++++++++++++++++++++++++++++++++++++++++
src/state-item.h | 100 +++++++++
4 files changed, 1086 insertions(+)
create mode 100644 src/lssi.c
create mode 100644 src/lssi.h
create mode 100644 src/state-item.c
create mode 100644 src/state-item.h
diff --git a/src/lssi.c b/src/lssi.c
new file mode 100644
index 00000000..c4c51b32
--- /dev/null
+++ b/src/lssi.c
@@ -0,0 +1,367 @@
+/* Lookahead sensative state item searches for counterexample generation
+
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+
+ This file is part of Bison, the GNU Compiler Compiler.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <gl_linked_list.h>
+#include <gl_xlist.h>
+#include <stdlib.h>
+#include "lssi.h"
+#include "nullable.h"
+#include "getargs.h"
+
+// lookahead sensative state item
+struct lssi;
+typedef struct lssi
+{
+ state_item_number si;
+ struct lssi *parent;
+ // this is the precise lookahead set (follow_L from the CupEx paper)
+ bitset lookahead;
+ bool free_lookahead;
+} lssi;
+
+lssi *
+new_lssi (state_item_number si, lssi *p, bitset l, bool free_l)
+{
+ lssi *ret = xmalloc (sizeof (lssi));
+ ret->si = si;
+ ret->parent = p;
+ ret->lookahead = l;
+ ret->free_lookahead = free_l;
+ return ret;
+}
+
+void
+lssi_free (lssi *sn)
+{
+ if (sn == NULL)
+ return;
+ if (sn->free_lookahead)
+ bitset_free (sn->lookahead);
+ free (sn);
+}
+
+static size_t
+lssi_hasher (lssi *sn, size_t max)
+{
+ size_t hash = sn->si;
+ bitset_iterator biter;
+ symbol_number syn;
+ BITSET_FOR_EACH (biter, sn->lookahead, syn, 0)
+ hash += syn;
+ return hash % max;
+}
+
+static bool
+lssi_comparator (lssi *s1, lssi *s2)
+{
+ if (s1->si == s2->si)
+ {
+ if (s1->lookahead == s2->lookahead)
+ return true;
+ return bitset_equal_p (s1->lookahead, s2->lookahead);
+ }
+ return false;
+}
+
+static inline bool
+append_lssi (lssi *sn, Hash_table *visited, gl_list_t queue)
+{
+ if (hash_lookup (visited, sn))
+ {
+ sn->free_lookahead = false;
+ lssi_free (sn);
+ return false;
+ }
+ if (!hash_insert (visited, sn))
+ xalloc_die ();
+ gl_list_add_last (queue, sn);
+ return true;
+}
+
+static void
+lssi_print (lssi *l)
+{
+ print_state_item (state_items + l->si, stdout);
+ if (l->lookahead)
+ {
+ printf ("FOLLOWL = { ");
+ bitset_iterator biter;
+ symbol_number sin;
+ BITSET_FOR_EACH (biter, l->lookahead, sin, 0)
+ printf ("%s, \n", symbols[sin]->tag);
+ puts ("}");
+ }
+}
+
+/**
+ * Compute the set of state-items that can reach the given conflict item via
+ * a combination of transitions or production steps.
+ */
+bitset
+eligible_state_items (state_item *target)
+{
+ bitset result = bitset_create (nstate_items, BITSET_FIXED);
+ gl_list_t queue =
+ gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
+ (const void **) &target);
+ while (gl_list_size (queue) > 0)
+ {
+ state_item *si = (state_item *) gl_list_get_at (queue, 0);
+ gl_list_remove_at (queue, 0);
+ if (bitset_test (result, si - state_items))
+ continue;
+ bitset_set (result, si - state_items);
+ // search all reverse edges.
+ bitset rsi = si_revs[si - state_items];
+ bitset_iterator biter;
+ state_item_number sin;
+ BITSET_FOR_EACH (biter, rsi, sin, 0)
+ gl_list_add_last (queue, &state_items[sin]);
+ }
+ gl_list_free (queue);
+ return result;
+}
+
+/**
+ * Compute the shortest lookahead-sensitive path from the start state to
+ * this conflict. If optimized is true, only consider parser states
+ * that can reach the conflict state.
+ */
+gl_list_t
+shortest_path_from_start (state_item_number target, symbol_number next_sym)
+{
+ bitset eligible = eligible_state_items (&state_items[target]);
+ Hash_table *visited = hash_initialize (32,
+ NULL,
+ (Hash_hasher) lssi_hasher,
+ (Hash_comparator) lssi_comparator,
+ (Hash_data_freer) lssi_free);
+ bitset il = bitset_create (nsyms, BITSET_FIXED);
+ bitset_set (il, 0);
+ lssi *init = new_lssi (0, NULL, il, true);
+ gl_list_t queue = gl_list_create (GL_LINKED_LIST, NULL, NULL,
+ NULL,
+ true, 1, (const void **) &init);
+ // breadth-first search
+ bool finished = false;
+ lssi *n;
+ while (gl_list_size (queue) > 0)
+ {
+ n = (lssi *) gl_list_get_at (queue, 0);
+ gl_list_remove_at (queue, 0);
+ state_item_number last = n->si;
+ if (target == last && bitset_test (n->lookahead, next_sym))
+ {
+ finished = true;
+ break;
+ }
+ // Transitions don't change follow_L
+ if (si_trans[last] >= 0)
+ {
+ state_item_number nextSI = si_trans[last];
+ if (bitset_test (eligible, nextSI))
+ {
+ lssi *next = new_lssi (nextSI, n, n->lookahead, false);
+ append_lssi (next, visited, queue);
+ }
+ }
+ // For production steps, follow_L is based on the symbol after the
+ // non-terminal being produced.
+ // if no such symbol exists, follow_L is unchanged
+ // if the symbol is a terminal, follow_L only contains that terminal
+ // if the symbol is not nullable, follow_L is its FIRSTS set
+ // if the symbol is nullable, follow_L is its FIRSTS set unioned with
+ // this logic applied to the next symbol in the rule
+ bitset p = si_prods_lookup (last);
+ if (p)
+ {
+ state_item si = state_items[last];
+ // Compute follow_L as above
+ bitset lookahead = bitset_create (nsyms, BITSET_FIXED);
+ item_number *pos = si.item + 1;
+ for (; !item_number_is_rule_number (*pos); ++pos)
+ {
+ item_number it = *pos;
+ if (ISTOKEN (it))
+ {
+ bitset_set (lookahead, it);
+ break;
+ }
+ else
+ {
+ bitset_union (lookahead, lookahead, FIRSTS (it));
+ if (!nullable[it - ntokens])
+ break;
+ }
+ }
+ if (item_number_is_rule_number (*pos))
+ bitset_union (lookahead, n->lookahead, lookahead);
+
+ bool lookahead_used = false;
+ // Try all possible production steps within this parser state.
+ bitset_iterator biter;
+ state_item_number nextSI;
+ BITSET_FOR_EACH (biter, p, nextSI, 0)
+ {
+ if (!bitset_test (eligible, nextSI))
+ continue;
+ lssi *next = new_lssi (nextSI, n, lookahead,
+ !lookahead_used);
+ lookahead_used = append_lssi (next, visited, queue)
+ || lookahead_used;
+ }
+ if (!lookahead_used)
+ bitset_free (lookahead);
+ }
+ }
+ if (!finished)
+ {
+ gl_list_free (queue);
+ fputs ("Cannot find shortest path to conflict state.", stderr);
+ exit (1);
+ }
+ gl_list_t ret =
+ gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
+ for (lssi *sn = n; sn != NULL; sn = sn->parent)
+ gl_list_add_first (ret, state_items + sn->si);
+
+ hash_free (visited);
+ gl_list_free (queue);
+
+ if (trace_flag & trace_cex)
+ {
+ puts ("REDUCE ITEM PATH:");
+ gl_list_iterator_t it = gl_list_iterator (ret);
+ const void *sip;
+ while (gl_list_iterator_next (&it, &sip, NULL))
+ print_state_item ((state_item *) sip, stdout);
+ }
+ return ret;
+}
+
+/**
+ * Determine if the given terminal is in the given symbol set or can begin
+ * a nonterminal in the given symbol set.
+ */
+bool
+intersect_symbol (symbol_number sym, bitset syms)
+{
+ if (!syms)
+ return true;
+ bitset_iterator biter;
+ symbol_number sn;
+ BITSET_FOR_EACH (biter, syms, sn, 0)
+ {
+ if (sym == sn)
+ return true;
+ if (ISVAR (sn) && bitset_test (FIRSTS (sn), sym))
+ return true;
+ }
+ return false;
+}
+
+/**
+ * Determine if any symbol in ts is in syms
+ * or can begin a nonterminal syms.
+ */
+bool
+intersect (bitset ts, bitset syms)
+{
+ if (!syms || !ts)
+ return true;
+ bitset_iterator biter;
+ symbol_number sn;
+ BITSET_FOR_EACH (biter, syms, sn, 0)
+ {
+ if (bitset_test (ts, sn))
+ return true;
+ if (ISVAR (sn) && !bitset_disjoint_p (ts, FIRSTS (sn)))
+ return true;
+ }
+ return false;
+}
+
+
+/**
+ * Compute a list of state_items that have a production to n with respect
+ * to its lookahead
+ */
+gl_list_t
+lssi_reverse_production (const state_item *si, bitset lookahead)
+{
+ gl_list_t result =
+ gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
+ if (SI_TRANSITION (si))
+ return result;
+ // A production step was made to the current lalr_item.
+ // Check that the next symbol in the parent lalr_item is
+ // compatible with the lookahead.
+ bitset_iterator biter;
+ state_item_number sin;
+ BITSET_FOR_EACH (biter, si_revs[si - state_items], sin, 0)
+ {
+ state_item *prevsi = state_items + sin;
+ if (!production_allowed (prevsi, si))
+ continue;
+ bitset prev_lookahead = prevsi->lookahead;
+ if (item_number_is_rule_number (*(prevsi->item)))
+ {
+ // reduce item
+ // Check that some lookaheads can be preserved.
+ if (!intersect (prev_lookahead, lookahead))
+ continue;
+ }
+ else
+ {
+ // shift item
+ if (lookahead)
+ {
+ // Check that lookahead is compatible with the first
+ // possible symbols in the rest of the production.
+ // Alternatively, if the rest of the production is
+ // nullable, the lookahead must be compatible with
+ // the lookahead of the corresponding item.
+ bool applicable = false;
+ bool nlable = true;
+ for (item_number *pos = prevsi->item + 1;
+ !applicable && nlable && item_number_is_symbol_number (*pos);
+ ++pos)
+ {
+ symbol_number next_sym = item_number_as_symbol_number (*pos);
+ if (ISTOKEN (next_sym))
+ {
+ applicable = intersect_symbol (next_sym, lookahead);
+ nlable = false;
+ }
+ else
+ {
+ applicable = intersect (FIRSTS (next_sym), lookahead);
+ if (!applicable)
+ nlable = nullable[next_sym - ntokens];
+ }
+ }
+ if (!applicable && !nlable)
+ continue;
+ }
+ }
+ gl_list_add_last (result, prevsi);
+ }
+ return result;
+}
diff --git a/src/lssi.h b/src/lssi.h
new file mode 100644
index 00000000..c3a205ca
--- /dev/null
+++ b/src/lssi.h
@@ -0,0 +1,56 @@
+/* Lookahead sensative state item searches for counterexample generation
+
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+
+ This file is part of Bison, the GNU Compiler Compiler.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef LSSI_H
+#define LSSI_H
+
+#include "state-item.h"
+/*
+ All state-item graph nodes should also include a precise follow set
(follow_L).
+ However, ignoring follow_L saves a lot of memory and is a pretty good
approximation.
+ These functions exist to enforce restrictions caused by follow_L sets.
+ */
+
+/*
+ * find shortest lookahead-sensitive path of state-items to target such that
+ * next_sym is in the follow_L set of target in that position.
+*/
+gl_list_t shortest_path_from_start (state_item_number target,
+ symbol_number next_sym);
+
+/**
+ * Determine if the given terminal is in the given symbol set or can begin
+ * a nonterminal in the given symbol set.
+ */
+bool intersect_symbol (symbol_number sym, bitset syms);
+
+/**
+ * Determine if any symbol in ts is in syms
+ * or can begin with a nonterminal in syms.
+ */
+bool intersect (bitset ts, bitset syms);
+
+/**
+ * Compute a set of sequences of state-items that can make production steps
+ * to this state-item such that the resulting possible lookahead symbols are
+ * as given.
+ */
+gl_list_t lssi_reverse_production (const state_item *si, bitset lookahead);
+
+#endif /* LSSI_H */
diff --git a/src/state-item.c b/src/state-item.c
new file mode 100644
index 00000000..69cb923d
--- /dev/null
+++ b/src/state-item.c
@@ -0,0 +1,563 @@
+/* Counterexample Generation Search Nodes
+
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+
+ This file is part of Bison, the GNU Compiler Compiler.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#include <config.h>
+#include <stdlib.h>
+#include <time.h>
+#include <assert.h>
+#include <gl_linked_list.h>
+#include <gl_xlist.h>
+#include "state-item.h"
+#include "closure.h"
+#include "nullable.h"
+#include "getargs.h"
+
+size_t nstate_items;
+state_item_number *state_item_map;
+state_item *state_items;
+
+state_item_number *si_trans;
+bitsetv si_revs;
+Hash_table *si_prods;
+
+// hash functions for index -> bitset hash maps
+typedef struct
+{
+ int key;
+ bitset l;
+} hash_pair;
+
+static size_t
+hash_pair_hasher (const hash_pair *sl, size_t max)
+{
+ return sl->key % max;
+}
+
+static bool
+hash_pair_comparator (const hash_pair *l, const hash_pair *r)
+{
+ return l->key == r->key;
+}
+
+static void
+hash_pair_free (hash_pair *hp)
+{
+ bitset_free (hp->l);
+ free (hp);
+}
+
+static bitset
+hash_pair_lookup (Hash_table *tab, int key)
+{
+ hash_pair *l = xmalloc (sizeof (hash_pair));
+ l->key = key;
+ hash_pair *hp = (hash_pair *) hash_lookup (tab, l);
+ if (!hp)
+ return NULL;
+ return hp->l;
+}
+
+static void
+hash_pair_insert (Hash_table *tab, int key, bitset val)
+{
+ hash_pair *hp = xmalloc (sizeof (hash_pair));
+ hp->key = key;
+ hp->l = val;
+ if (!hash_insert (tab, hp))
+ xalloc_die ();
+}
+
+static void
+hash_pair_remove (Hash_table *tab, int key)
+{
+ hash_pair *hp = xmalloc (sizeof (hash_pair));
+ hp->key = key;
+ hash_delete (tab, hp);
+}
+
+/* return a state_item from a state's id and the offset of the item
+ within the state.
+ */
+state_item *
+state_item_lookup (state_number s, state_item_number off)
+{
+ return &state_items[state_item_index_lookup (s, off)];
+}
+
+static inline void
+state_item_set (state_item_number sidx, state *s, item_number off)
+{
+ state_item *si = state_items + sidx;
+ si->state = s;
+ si->item = &ritem[off];
+ si->lookahead = NULL;
+ si_trans[sidx] = -1;
+}
+
+/**
+ * Initialize state_items set
+ */
+static void
+init_state_items ()
+{
+ nstate_items = 0;
+ bitsetv production_items = bitsetv_create (nstates, nritems, BITSET_SPARSE);
+ for (int i = 0; i < nstates; ++i)
+ {
+ state *s = states[i];
+ nstate_items += s->nitems;
+ closure (s->items, s->nitems);
+ for (size_t j = 0; j < nitemset; ++j)
+ if (itemset[j] > 0
+ && item_number_is_rule_number (ritem[itemset[j] - 1]))
+ {
+ bitset_set (production_items[i], itemset[j]);
+ ++nstate_items;
+ }
+ }
+ state_item_map = xnmalloc (nstates + 1, sizeof (state_item_number));
+ state_items = xnmalloc (nstate_items, sizeof (state_item));
+ si_trans = xnmalloc (nstate_items, sizeof (state_item_number));
+ si_revs = bitsetv_create (nstate_items, nstate_items, BITSET_SPARSE);
+ state_item_number sidx = 0;
+ for (int i = 0; i < nstates; ++i)
+ {
+ state_item_map[i] = sidx;
+ int rule_search_idx = 0;
+ state *s = states[i];
+ reductions *red = s->reductions;
+ for (int j = 0; j < s->nitems; ++j)
+ {
+ state_item_set (sidx, s, s->items[j]);
+ state_item *si = state_items + sidx;
+ const rule *r = item_rule (si->item);
+ if (red->rules[rule_search_idx] < r)
+ ++rule_search_idx;
+ if (rule_search_idx < red->num && r == red->rules[rule_search_idx])
+ {
+ bitsetv lookahead = red->lookahead_tokens;
+ if (lookahead)
+ si->lookahead = lookahead[rule_search_idx];
+ }
+ ++sidx;
+ }
+ bitset_iterator biter;
+ item_number off;
+ BITSET_FOR_EACH (biter, production_items[i], off, 0)
+ {
+ state_item_set (sidx, s, off);
+ if (item_number_is_rule_number (ritem[off]))
+ {
+ bitsetv lookahead = red->lookahead_tokens;
+ if (lookahead)
+ state_items[sidx].lookahead = lookahead[rule_search_idx];
+ ++rule_search_idx;
+ }
+ ++sidx;
+ }
+
+ }
+ state_item_map[nstates] = nstate_items;
+}
+
+static size_t
+state_sym_hasher (const void *st, size_t max)
+{
+ return ((state *) st)->accessing_symbol % max;
+}
+
+static bool
+state_sym_comparator (const void *s1, const void *s2)
+{
+ return ((state *) s1)->accessing_symbol == ((state *) s2)->accessing_symbol;
+}
+
+static state *
+state_sym_lookup (symbol_number sym, Hash_table *h)
+{
+ state *s = xmalloc (sizeof (state));
+ s->accessing_symbol = sym;
+ return hash_lookup (h, s);
+}
+
+static void
+init_trans ()
+{
+ for (state_number i = 0; i < nstates; ++i)
+ {
+ // generate a hash set that maps from accepting symbols to the states
+ // this state transitions to
+ state *s = states[i];
+ transitions *t = s->transitions;
+ Hash_table *transitions
+ = hash_initialize (t->num, NULL, (Hash_hasher) state_sym_hasher,
+ (Hash_comparator) state_sym_comparator, NULL);
+ for (int j = 0; j < t->num; ++j)
+ if (!TRANSITION_IS_DISABLED (t, j))
+ if (!hash_insert (transitions, t->states[j]))
+ xalloc_die ();
+ for (int j = state_item_map[i]; j < state_item_map[i + 1]; ++j)
+ {
+ item_number *item = state_items[j].item;
+ if (item_number_is_rule_number (*item))
+ continue;
+ state *dst = state_sym_lookup (*item, transitions);
+ if (!dst)
+ continue;
+ // find the item in the destination state that corresponds
+ // to the transition of item
+ for (int k = 0; k < dst->nitems; ++k)
+ {
+ if (item + 1 == ritem + dst->items[k])
+ {
+ state_item_number dstSI =
+ state_item_index_lookup (dst->number, k);
+
+ si_trans[j] = dstSI;
+ bitset_set (si_revs[dstSI], j);
+ break;
+ }
+ }
+ }
+ }
+}
+
+bitset
+si_prods_lookup (state_item_number si)
+{
+ return hash_pair_lookup (si_prods, si);
+}
+
+static void
+init_prods ()
+{
+ si_prods = hash_initialize (nstate_items,
+ NULL,
+ (Hash_hasher) hash_pair_hasher,
+ (Hash_comparator) hash_pair_comparator,
+ (Hash_data_freer) hash_pair_free);
+ for (int i = 0; i < nstates; ++i)
+ {
+ state *state = states[i];
+ // closure_map is a hash map from nonterminals to a set
+ // of the items that produce those nonterminals
+ Hash_table *closure_map
+ = hash_initialize (nsyms - ntokens, NULL,
+ (Hash_hasher) hash_pair_hasher,
+ (Hash_comparator) hash_pair_comparator,
+ NULL);
+
+ // Add the nitems of state to skip to the production portion
+ // of that state's state_items
+ for (int j = state_item_map[i] + state->nitems;
+ j < state_item_map[i + 1]; ++j)
+ {
+ state_item *src = state_items + j;
+ item_number *item = src->item;
+ symbol_number lhs = item_rule (item)->lhs->number;
+ bitset itms = hash_pair_lookup (closure_map, lhs);
+ if (!itms)
+ {
+ itms = bitset_create (nstate_items, BITSET_SPARSE);
+ hash_pair_insert (closure_map, lhs, itms);
+ }
+ bitset_set (itms, j);
+ }
+ // For each item with a dot followed by a nonterminal,
+ // try to create a production edge.
+ for (int j = state_item_map[i]; j < state_item_map[i + 1]; ++j)
+ {
+ state_item *src = state_items + j;
+ item_number item = *(src->item);
+ // Skip reduce items and items with terminals after the dot
+ if (item_number_is_rule_number (item) || ISTOKEN (item))
+ continue;
+ symbol_number sym = item_number_as_symbol_number (item);
+ bitset lb = hash_pair_lookup (closure_map, sym);
+ if (lb)
+ {
+ bitset copy = bitset_create (nstate_items, BITSET_SPARSE);
+ bitset_copy (copy, lb);
+ hash_pair *prod_hp = xmalloc (sizeof (hash_pair));
+ prod_hp->key = j;
+ prod_hp->l = copy;
+ //update prods
+ if (!hash_insert (si_prods, prod_hp))
+ xalloc_die ();
+
+ //update revs
+ bitset_iterator biter;
+ state_item_number prod;
+ BITSET_FOR_EACH (biter, copy, prod, 0)
+ bitset_set (si_revs[prod], j);
+ }
+ }
+
+ }
+}
+
+/* Since lookaheads are only generated for reductions,
+ we need to propogate lookahead sets backwards as
+ the searches require each state_item to have a lookahead.
+ */
+static inline void
+gen_lookaheads ()
+{
+ for (state_item_number i = 0; i < nstate_items; ++i)
+ {
+ state_item *si = state_items + i;
+ if (item_number_is_symbol_number (*(si->item)) || !si->lookahead)
+ continue;
+
+ bitset lookahead = si->lookahead;
+ gl_list_t queue =
+ gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
+ (const void **) &si);
+
+ // For each reduction item, traverse through all state_items
+ // accessible through reverse transition steps, and set their
+ // lookaheads to the reduction items lookahead
+ while (gl_list_size (queue) > 0)
+ {
+ state_item *prev = (state_item *) gl_list_get_at (queue, 0);
+ gl_list_remove_at (queue, 0);
+ prev->lookahead = lookahead;
+ if (SI_TRANSITION (prev))
+ {
+ bitset rsi = si_revs[prev - state_items];
+ bitset_iterator biter;
+ state_item_number sin;
+ BITSET_FOR_EACH (biter, rsi, sin, 0)
+ gl_list_add_first (queue, &state_items[sin]);
+ }
+ }
+ }
+}
+
+bitsetv firsts = NULL;
+
+void
+init_firsts (void)
+{
+ firsts = bitsetv_create (nvars, nsyms, BITSET_FIXED);
+ for (rule_number i = 0; i < nrules; ++i)
+ {
+ rule *r = rules + i;
+ item_number *n = r->rhs;
+ // Iterate through nullable nonterminals to try to find a terminal.
+ while (item_number_is_symbol_number (*n) && ISVAR (*n)
+ && nullable[*n - ntokens])
+ ++n;
+ if (item_number_is_rule_number (*n) || ISVAR (*n))
+ continue;
+
+ symbol_number lhs = r->lhs->number;
+ bitset_set (FIRSTS (lhs), *n);
+ }
+ bool change = true;
+ while (change)
+ {
+ change = false;
+ for (rule_number i = 0; i < nrules; ++i)
+ {
+ rule *r = rules + i;
+ symbol_number lhs = r->lhs->number;
+ bitset f_lhs = FIRSTS (lhs);
+ for (item_number *n = r->rhs;
+ item_number_is_symbol_number (*n) &&
+ ISVAR (*n);
+ ++n)
+ {
+ bitset f = FIRSTS (*n);
+ if (!bitset_subset_p (f_lhs, f))
+ {
+ change = true;
+ bitset_union (f_lhs, f_lhs, f);
+ }
+ if (!nullable[*n - ntokens])
+ break;
+ }
+ }
+ }
+}
+
+static inline void
+disable_state_item (state_item_number sin)
+{
+ si_trans[sin] = -2;
+ hash_pair_remove (si_prods, sin);
+}
+
+/*
+ To make searches more efficient, we can prune away paths that are
+ caused by disabled transitions.
+ */
+static void
+prune_disabled_paths (void)
+{
+ for (int i = nstate_items - 1; i >= 0; --i)
+ {
+ state_item *si = state_items + i;
+ if (si_trans[i] == -1 && item_number_is_symbol_number (*si->item))
+ {
+ // disable the transitions out of i
+ for (state_item_number j = si_trans[i]; j != -1; j = si_trans[j])
+ disable_state_item (j);
+
+ gl_list_t queue =
+ gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
+ (const void **) &si);
+
+ // For each disabled transition, traverse through all state_items
+ // accessible through reverse transition steps, and set their
+ // lookaheads to the reduction items lookahead
+ while (gl_list_size (queue) > 0)
+ {
+ const state_item *prev = gl_list_get_at (queue, 0);
+ gl_list_remove_at (queue, 0);
+ state_item_number prev_num = prev - state_items;
+ disable_state_item (prev_num);
+
+ bitset rsi = si_revs[prev_num];
+ bitset_iterator biter;
+ state_item_number sin;
+ BITSET_FOR_EACH (biter, rsi, sin, 0)
+ {
+ if (SI_TRANSITION (prev))
+ gl_list_add_first (queue, &state_items[sin]);
+ else
+ {
+ bitset p = si_prods_lookup (sin);
+ if (p)
+ bitset_reset (p, prev_num);
+ }
+ }
+ }
+ }
+ }
+}
+
+void
+print_state_item (const state_item *si, FILE *out)
+{
+ fprintf (out, "%d:", si->state->number);
+ item_print (si->item, NULL, out);
+ putc ('\n', out);
+}
+
+/**
+ * Report set counts and the state_item graph if trace is enabled
+ */
+static void
+state_items_report (void)
+{
+ printf ("# state items: %zu\n", nstate_items);
+ for (state_number i = 0; i < nstates; ++i)
+ {
+ printf ("State %d:\n", i);
+ for (int j = state_item_map[i]; j < state_item_map[i + 1]; ++j)
+ {
+ item_print (state_items[j].item, NULL, stdout);
+ puts ("");
+ if (si_trans[j] >= 0)
+ {
+ fputs (" -> ", stdout);
+ print_state_item (state_items + si_trans[j], stdout);
+ }
+
+ bitset sets[2] = { si_prods_lookup (j), si_revs[j] };
+ const char *txt[2] = { " => ", " <- " };
+ for (int seti = 0; seti < 2; ++seti)
+ {
+ bitset b = sets[seti];
+ if (b)
+ {
+ bitset_iterator biter;
+ state_item_number sin;
+ BITSET_FOR_EACH (biter, b, sin, 0)
+ {
+ fputs (txt[seti], stdout);
+ print_state_item (state_items + sin, stdout);
+ }
+ }
+ }
+ puts ("");
+ }
+ }
+ printf ("FIRSTS\n");
+ for (symbol_number i = ntokens; i < nsyms; ++i)
+ {
+ printf (" %s firsts\n", symbols[i]->tag);
+ bitset_iterator iter;
+ symbol_number j;
+ BITSET_FOR_EACH (iter, FIRSTS (i), j, 0)
+ printf (" %s\n", symbols[j]->tag);
+ }
+ puts ("\n");
+}
+
+void
+state_items_init (void)
+{
+ time_t start = time (NULL);
+ init_state_items ();
+ init_trans ();
+ init_prods ();
+ gen_lookaheads ();
+ init_firsts ();
+ prune_disabled_paths ();
+ if (trace_flag & trace_cex)
+ {
+ printf ("init: %f\n", difftime (time (NULL), start));
+ state_items_report ();
+ }
+}
+
+void
+state_items_free (void)
+{
+ hash_free (si_prods);
+ bitsetv_free (si_revs);
+ free (si_trans);
+ free (state_items);
+ bitsetv_free (firsts);
+}
+
+/**
+ * Determine, using precedence and associativity, whether the next
+ * production is allowed from the current production.
+ */
+bool
+production_allowed (const state_item *si, const state_item *next)
+{
+ sym_content *s1 = item_rule (si->item)->lhs;
+ sym_content *s2 = item_rule (next->item)->lhs;
+ int prec1 = s1->prec;
+ int prec2 = s2->prec;
+ if (prec1 >= 0 && prec2 >= 0)
+ {
+ // Do not expand if lower precedence.
+ if (prec1 > prec2)
+ return false;
+ // Do not expand if same precedence, but left-associative.
+ if (prec1 == prec2 && s1->assoc == left_assoc)
+ return false;
+ }
+ return true;
+}
diff --git a/src/state-item.h b/src/state-item.h
new file mode 100644
index 00000000..50996cd2
--- /dev/null
+++ b/src/state-item.h
@@ -0,0 +1,100 @@
+/* Counterexample Generation Search Nodes
+
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+
+ This file is part of Bison, the GNU Compiler Compiler.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <http://www.gnu.org/licenses/>. */
+
+#ifndef STATE_ITEM_H
+#define STATE_ITEM_H
+
+#include <hash.h>
+#include <gl_list.h>
+#include <bitsetv.h>
+#include "gram.h"
+#include "state.h"
+
+/*
+ Initializes a graph connecting (state, production item) pairs to
+ pairs they can make a transition or production step to. This graph
+ is used to search for paths that represent counterexamples of some
+ conflict.
+
+ state_items is an array of state state-item pairs ordered by state.
+ state_item_map maps state numbers to the first item which corresponds
+ to it in the array. A state's portion in state_items begins with its
+ items in the same order as it was in the state. This is then followed by
+ productions from the closure of the state in order by rule.
+
+ There are two type of edges in this graph transitions and productions.
+ Transitions are the same as transitions from the parser except edges
+ are only between items from the same rule. These are stored as an
+ array "si_trans" (as most items will have transitions) which are indexed the
+ same way as state_items.
+
+ Productions are edges from items with a nonterminal after the dot to
+ the production of that nonterminal in the same state. These edges are
+ stored as a hash map "si_prods" from a state_item to a set of what productions
+ it goes from/to
+
+ The inverses of these edges are stored in an array of bitsets, "si_revs."
+ A state-item that begins with a dot will have reverse production edges,
+ and all others will have reverse transition edges.
+
+ */
+
+#define SI_DISABLED(sin) (si_trans[sin] == -2)
+#define SI_PRODUCTION(si) ((si) == state_items || *((si)->item - 1) < 0)
+#define SI_TRANSITION(si) ((si) != state_items && *((si)->item - 1) >= 0)
+
+typedef int state_item_number;
+
+typedef struct
+{
+ state *state;
+ item_number *item;
+ bitset lookahead;
+} state_item;
+
+extern bitsetv firsts;
+#define FIRSTS(sym) firsts[(sym) - ntokens]
+
+extern size_t nstate_items;
+extern state_item_number *state_item_map;
+
+/** Array mapping state_item_numbers to state_items */
+extern state_item *state_items;
+
+/** state-item graph edges */
+extern state_item_number *si_trans;
+extern bitsetv si_revs;
+
+state_item *state_item_lookup (state_number s, state_item_number off);
+
+static inline state_item_number
+state_item_index_lookup (state_number s, state_item_number off)
+{
+ return state_item_map[s] + off;
+}
+
+bitset si_prods_lookup (state_item_number si);
+
+void state_items_init (void);
+void print_state_item (const state_item *si, FILE *out);
+void state_items_free (void);
+
+bool production_allowed (const state_item *si, const state_item *next);
+
+#endif /* STATE_ITEM_H */
--
2.20.1 (Apple Git-117)
- [PATCH 0/5] Conflict Counterexample Generation, Vincent Imbimbo, 2020/05/12
- [PATCH 1/5] State-item pair graph generation,
Vincent Imbimbo <=
- [PATCH 2/5] Parse simulator, Vincent Imbimbo, 2020/05/12
- [PATCH 3/5] Counterexample search, Vincent Imbimbo, 2020/05/12
- [PATCH 4/5] counterexample generation integration, Vincent Imbimbo, 2020/05/12
- [PATCH 5/5] counterexample test suite, Vincent Imbimbo, 2020/05/12
- Re: [PATCH 0/5] Conflict Counterexample Generation, Akim Demaille, 2020/05/13