bison-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 1/5] State-item pair graph generation


From: Vincent Imbimbo
Subject: [PATCH 1/5] State-item pair graph generation
Date: Tue, 12 May 2020 22:23:50 -0400

---
 src/lssi.c       | 367 ++++++++++++++++++++++++++++++
 src/lssi.h       |  56 +++++
 src/state-item.c | 563 +++++++++++++++++++++++++++++++++++++++++++++++
 src/state-item.h | 100 +++++++++
 4 files changed, 1086 insertions(+)
 create mode 100644 src/lssi.c
 create mode 100644 src/lssi.h
 create mode 100644 src/state-item.c
 create mode 100644 src/state-item.h

diff --git a/src/lssi.c b/src/lssi.c
new file mode 100644
index 00000000..c4c51b32
--- /dev/null
+++ b/src/lssi.c
@@ -0,0 +1,367 @@
+/* Lookahead sensative state item searches for counterexample generation
+ 
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ 
+ This file is part of Bison, the GNU Compiler Compiler.
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include <gl_linked_list.h>
+#include <gl_xlist.h>
+#include <stdlib.h>
+#include "lssi.h"
+#include "nullable.h"
+#include "getargs.h"
+
+// lookahead sensative state item
+struct lssi;
+typedef struct lssi
+{
+  state_item_number si;
+  struct lssi *parent;
+  // this is the precise lookahead set (follow_L from the CupEx paper)
+  bitset lookahead;
+  bool free_lookahead;
+} lssi;
+
+lssi *
+new_lssi (state_item_number si, lssi *p, bitset l, bool free_l)
+{
+  lssi *ret = xmalloc (sizeof (lssi));
+  ret->si = si;
+  ret->parent = p;
+  ret->lookahead = l;
+  ret->free_lookahead = free_l;
+  return ret;
+}
+
+void
+lssi_free (lssi *sn)
+{
+  if (sn == NULL)
+    return;
+  if (sn->free_lookahead)
+    bitset_free (sn->lookahead);
+  free (sn);
+}
+
+static size_t
+lssi_hasher (lssi *sn, size_t max)
+{
+  size_t hash = sn->si;
+  bitset_iterator biter;
+  symbol_number syn;
+  BITSET_FOR_EACH (biter, sn->lookahead, syn, 0)
+    hash += syn;
+  return hash % max;
+}
+
+static bool
+lssi_comparator (lssi *s1, lssi *s2)
+{
+  if (s1->si == s2->si)
+    {
+      if (s1->lookahead == s2->lookahead)
+        return true;
+      return bitset_equal_p (s1->lookahead, s2->lookahead);
+    }
+  return false;
+}
+
+static inline bool
+append_lssi (lssi *sn, Hash_table *visited, gl_list_t queue)
+{
+  if (hash_lookup (visited, sn))
+    {
+      sn->free_lookahead = false;
+      lssi_free (sn);
+      return false;
+    }
+  if (!hash_insert (visited, sn))
+    xalloc_die ();
+  gl_list_add_last (queue, sn);
+  return true;
+}
+
+static void
+lssi_print (lssi *l)
+{
+  print_state_item (state_items + l->si, stdout);
+  if (l->lookahead)
+    {
+      printf ("FOLLOWL = { ");
+      bitset_iterator biter;
+      symbol_number sin;
+      BITSET_FOR_EACH (biter, l->lookahead, sin, 0)
+        printf ("%s, \n", symbols[sin]->tag);
+      puts ("}");
+    }
+}
+
+/**
+ * Compute the set of state-items that can reach the given conflict item via
+ * a combination of transitions or production steps.
+ */
+bitset
+eligible_state_items (state_item *target)
+{
+  bitset result = bitset_create (nstate_items, BITSET_FIXED);
+  gl_list_t queue =
+    gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
+                    (const void **) &target);
+  while (gl_list_size (queue) > 0)
+    {
+      state_item *si = (state_item *) gl_list_get_at (queue, 0);
+      gl_list_remove_at (queue, 0);
+      if (bitset_test (result, si - state_items))
+        continue;
+      bitset_set (result, si - state_items);
+      // search all reverse edges.
+      bitset rsi = si_revs[si - state_items];
+      bitset_iterator biter;
+      state_item_number sin;
+      BITSET_FOR_EACH (biter, rsi, sin, 0)
+        gl_list_add_last (queue, &state_items[sin]);
+    }
+  gl_list_free (queue);
+  return result;
+}
+
+/**
+ * Compute the shortest lookahead-sensitive path from the start state to
+ * this conflict. If optimized is true, only consider parser states
+ * that can reach the conflict state.
+ */
+gl_list_t
+shortest_path_from_start (state_item_number target, symbol_number next_sym)
+{
+  bitset eligible = eligible_state_items (&state_items[target]);
+  Hash_table *visited = hash_initialize (32,
+                                         NULL,
+                                         (Hash_hasher) lssi_hasher,
+                                         (Hash_comparator) lssi_comparator,
+                                         (Hash_data_freer) lssi_free);
+  bitset il = bitset_create (nsyms, BITSET_FIXED);
+  bitset_set (il, 0);
+  lssi *init = new_lssi (0, NULL, il, true);
+  gl_list_t queue = gl_list_create (GL_LINKED_LIST, NULL, NULL,
+                                    NULL,
+                                    true, 1, (const void **) &init);
+  // breadth-first search
+  bool finished = false;
+  lssi *n;
+  while (gl_list_size (queue) > 0)
+    {
+      n = (lssi *) gl_list_get_at (queue, 0);
+      gl_list_remove_at (queue, 0);
+      state_item_number last = n->si;
+      if (target == last && bitset_test (n->lookahead, next_sym))
+        {
+          finished = true;
+          break;
+        }
+      // Transitions don't change follow_L
+      if (si_trans[last] >= 0)
+        {
+          state_item_number nextSI = si_trans[last];
+          if (bitset_test (eligible, nextSI))
+            {
+              lssi *next = new_lssi (nextSI, n, n->lookahead, false);
+              append_lssi (next, visited, queue);
+            }
+        }
+      // For production steps, follow_L is based on the symbol after the
+      // non-terminal being produced.
+      // if no such symbol exists, follow_L is unchanged
+      // if the symbol is a terminal, follow_L only contains that terminal
+      // if the symbol is not nullable, follow_L is its FIRSTS set
+      // if the symbol is nullable, follow_L is its FIRSTS set unioned with
+      // this logic applied to the next symbol in the rule
+      bitset p = si_prods_lookup (last);
+      if (p)
+        {
+          state_item si = state_items[last];
+          // Compute follow_L as above
+          bitset lookahead = bitset_create (nsyms, BITSET_FIXED);
+          item_number *pos = si.item + 1;
+          for (; !item_number_is_rule_number (*pos); ++pos)
+            {
+              item_number it = *pos;
+              if (ISTOKEN (it))
+                {
+                  bitset_set (lookahead, it);
+                  break;
+                }
+              else
+                {
+                  bitset_union (lookahead, lookahead, FIRSTS (it));
+                  if (!nullable[it - ntokens])
+                    break;
+                }
+            }
+          if (item_number_is_rule_number (*pos))
+            bitset_union (lookahead, n->lookahead, lookahead);
+
+          bool lookahead_used = false;
+          // Try all possible production steps within this parser state.
+          bitset_iterator biter;
+          state_item_number nextSI;
+          BITSET_FOR_EACH (biter, p, nextSI, 0)
+            {
+              if (!bitset_test (eligible, nextSI))
+                continue;
+              lssi *next = new_lssi (nextSI, n, lookahead,
+                                     !lookahead_used);
+              lookahead_used = append_lssi (next, visited, queue)
+                               || lookahead_used;
+            }
+          if (!lookahead_used)
+            bitset_free (lookahead);
+        }
+    }
+  if (!finished)
+    {
+      gl_list_free (queue);
+      fputs ("Cannot find shortest path to conflict state.", stderr);
+      exit (1);
+    }
+  gl_list_t ret =
+    gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
+  for (lssi *sn = n; sn != NULL; sn = sn->parent)
+    gl_list_add_first (ret, state_items + sn->si);
+
+  hash_free (visited);
+  gl_list_free (queue);
+
+  if (trace_flag & trace_cex)
+    {
+      puts ("REDUCE ITEM PATH:");
+      gl_list_iterator_t it = gl_list_iterator (ret);
+      const void *sip;
+      while (gl_list_iterator_next (&it, &sip, NULL))
+        print_state_item ((state_item *) sip, stdout);
+    }
+  return ret;
+}
+
+/**
+ * Determine if the given terminal is in the given symbol set or can begin
+ * a nonterminal in the given symbol set.
+ */
+bool
+intersect_symbol (symbol_number sym, bitset syms)
+{
+  if (!syms)
+    return true;
+  bitset_iterator biter;
+  symbol_number sn;
+  BITSET_FOR_EACH (biter, syms, sn, 0)
+    {
+      if (sym == sn)
+        return true;
+      if (ISVAR (sn) && bitset_test (FIRSTS (sn), sym))
+        return true;
+    }
+  return false;
+}
+
+/**
+ * Determine if any symbol in ts is in syms
+ * or can begin a nonterminal syms.
+ */
+bool
+intersect (bitset ts, bitset syms)
+{
+  if (!syms || !ts)
+    return true;
+  bitset_iterator biter;
+  symbol_number sn;
+  BITSET_FOR_EACH (biter, syms, sn, 0)
+    {
+      if (bitset_test (ts, sn))
+        return true;
+      if (ISVAR (sn) && !bitset_disjoint_p (ts, FIRSTS (sn)))
+        return true;
+    }
+  return false;
+}
+
+
+/**
+ * Compute a list of state_items that have a production to n with respect
+ * to its lookahead
+ */
+gl_list_t
+lssi_reverse_production (const state_item *si, bitset lookahead)
+{
+  gl_list_t result =
+    gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, true);
+  if (SI_TRANSITION (si))
+    return result;
+  // A production step was made to the current lalr_item.
+  // Check that the next symbol in the parent lalr_item is
+  // compatible with the lookahead.
+  bitset_iterator biter;
+  state_item_number sin;
+  BITSET_FOR_EACH (biter, si_revs[si - state_items], sin, 0)
+  {
+    state_item *prevsi = state_items + sin;
+    if (!production_allowed (prevsi, si))
+      continue;
+    bitset prev_lookahead = prevsi->lookahead;
+    if (item_number_is_rule_number (*(prevsi->item)))
+      {
+        // reduce item
+        // Check that some lookaheads can be preserved.
+        if (!intersect (prev_lookahead, lookahead))
+          continue;
+      }
+    else
+      {
+        // shift item
+        if (lookahead)
+          {
+            // Check that lookahead is compatible with the first
+            // possible symbols in the rest of the production.
+            // Alternatively, if the rest of the production is
+            // nullable, the lookahead must be compatible with
+            // the lookahead of the corresponding item.
+            bool applicable = false;
+            bool nlable = true;
+            for (item_number *pos = prevsi->item + 1;
+                 !applicable && nlable && item_number_is_symbol_number (*pos);
+                 ++pos)
+              {
+                symbol_number next_sym = item_number_as_symbol_number (*pos);
+                if (ISTOKEN (next_sym))
+                  {
+                    applicable = intersect_symbol (next_sym, lookahead);
+                    nlable = false;
+                  }
+                else
+                  {
+                    applicable = intersect (FIRSTS (next_sym), lookahead);
+                    if (!applicable)
+                      nlable = nullable[next_sym - ntokens];
+                  }
+              }
+            if (!applicable && !nlable)
+              continue;
+          }
+      }
+    gl_list_add_last (result, prevsi);
+  }
+  return result;
+}
diff --git a/src/lssi.h b/src/lssi.h
new file mode 100644
index 00000000..c3a205ca
--- /dev/null
+++ b/src/lssi.h
@@ -0,0 +1,56 @@
+/* Lookahead sensative state item searches for counterexample generation
+ 
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ 
+ This file is part of Bison, the GNU Compiler Compiler.
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef LSSI_H
+#define LSSI_H
+
+#include "state-item.h"
+/*
+  All state-item graph nodes should also include a precise follow set 
(follow_L).
+  However, ignoring follow_L saves a lot of memory and is a pretty good 
approximation.
+  These functions exist to enforce restrictions caused by follow_L sets.
+ */
+
+/*
+ * find shortest lookahead-sensitive path of state-items to target such that
+ * next_sym is in the follow_L set of target in that position.
+*/
+gl_list_t shortest_path_from_start (state_item_number target,
+                                    symbol_number next_sym);
+
+/**
+ * Determine if the given terminal is in the given symbol set or can begin
+ * a nonterminal in the given symbol set.
+ */
+bool intersect_symbol (symbol_number sym, bitset syms);
+
+/**
+ * Determine if any symbol in ts is in syms
+ * or can begin with a nonterminal in syms.
+ */
+bool intersect (bitset ts, bitset syms);
+
+/**
+ * Compute a set of sequences of state-items that can make production steps
+ * to this state-item such that the resulting possible lookahead symbols are
+ * as given.
+ */
+gl_list_t lssi_reverse_production (const state_item *si, bitset lookahead);
+
+#endif /* LSSI_H */
diff --git a/src/state-item.c b/src/state-item.c
new file mode 100644
index 00000000..69cb923d
--- /dev/null
+++ b/src/state-item.c
@@ -0,0 +1,563 @@
+/* Counterexample Generation Search Nodes
+ 
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ 
+ This file is part of Bison, the GNU Compiler Compiler.
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include <stdlib.h>
+#include <time.h>
+#include <assert.h>
+#include <gl_linked_list.h>
+#include <gl_xlist.h>
+#include "state-item.h"
+#include "closure.h"
+#include "nullable.h"
+#include "getargs.h"
+
+size_t nstate_items;
+state_item_number *state_item_map;
+state_item *state_items;
+
+state_item_number *si_trans;
+bitsetv si_revs;
+Hash_table *si_prods;
+
+// hash functions for index -> bitset hash maps
+typedef struct
+{
+  int key;
+  bitset l;
+} hash_pair;
+
+static size_t
+hash_pair_hasher (const hash_pair *sl, size_t max)
+{
+  return sl->key % max;
+}
+
+static bool
+hash_pair_comparator (const hash_pair *l, const hash_pair *r)
+{
+  return l->key == r->key;
+}
+
+static void
+hash_pair_free (hash_pair *hp)
+{
+  bitset_free (hp->l);
+  free (hp);
+}
+
+static bitset
+hash_pair_lookup (Hash_table *tab, int key)
+{
+  hash_pair *l = xmalloc (sizeof (hash_pair));
+  l->key = key;
+  hash_pair *hp = (hash_pair *) hash_lookup (tab, l);
+  if (!hp)
+    return NULL;
+  return hp->l;
+}
+
+static void
+hash_pair_insert (Hash_table *tab, int key, bitset val)
+{
+  hash_pair *hp = xmalloc (sizeof (hash_pair));
+  hp->key = key;
+  hp->l = val;
+  if (!hash_insert (tab, hp))
+    xalloc_die ();
+}
+
+static void
+hash_pair_remove (Hash_table *tab, int key)
+{
+  hash_pair *hp = xmalloc (sizeof (hash_pair));
+  hp->key = key;
+  hash_delete (tab, hp);
+}
+
+/* return a state_item from a state's id and the offset of the item
+  within the state.
+ */
+state_item *
+state_item_lookup (state_number s, state_item_number off)
+{
+  return &state_items[state_item_index_lookup (s, off)];
+}
+
+static inline void
+state_item_set (state_item_number sidx, state *s, item_number off)
+{
+  state_item *si = state_items + sidx;
+  si->state = s;
+  si->item = &ritem[off];
+  si->lookahead = NULL;
+  si_trans[sidx] = -1;
+}
+
+/**
+ * Initialize state_items set
+ */
+static void
+init_state_items ()
+{
+  nstate_items = 0;
+  bitsetv production_items = bitsetv_create (nstates, nritems, BITSET_SPARSE);
+  for (int i = 0; i < nstates; ++i)
+    {
+      state *s = states[i];
+      nstate_items += s->nitems;
+      closure (s->items, s->nitems);
+      for (size_t j = 0; j < nitemset; ++j)
+        if (itemset[j] > 0
+            && item_number_is_rule_number (ritem[itemset[j] - 1]))
+          {
+            bitset_set (production_items[i], itemset[j]);
+            ++nstate_items;
+          }
+    }
+  state_item_map = xnmalloc (nstates + 1, sizeof (state_item_number));
+  state_items = xnmalloc (nstate_items, sizeof (state_item));
+  si_trans = xnmalloc (nstate_items, sizeof (state_item_number));
+  si_revs = bitsetv_create (nstate_items, nstate_items, BITSET_SPARSE);
+  state_item_number sidx = 0;
+  for (int i = 0; i < nstates; ++i)
+    {
+      state_item_map[i] = sidx;
+      int rule_search_idx = 0;
+      state *s = states[i];
+      reductions *red = s->reductions;
+      for (int j = 0; j < s->nitems; ++j)
+        {
+          state_item_set (sidx, s, s->items[j]);
+          state_item *si = state_items + sidx;
+          const rule *r = item_rule (si->item);
+          if (red->rules[rule_search_idx] < r)
+            ++rule_search_idx;
+          if (rule_search_idx < red->num && r == red->rules[rule_search_idx])
+            {
+              bitsetv lookahead = red->lookahead_tokens;
+              if (lookahead)
+                si->lookahead = lookahead[rule_search_idx];
+            }
+          ++sidx;
+        }
+      bitset_iterator biter;
+      item_number off;
+      BITSET_FOR_EACH (biter, production_items[i], off, 0)
+        {
+          state_item_set (sidx, s, off);
+          if (item_number_is_rule_number (ritem[off]))
+            {
+              bitsetv lookahead = red->lookahead_tokens;
+              if (lookahead)
+                state_items[sidx].lookahead = lookahead[rule_search_idx];
+              ++rule_search_idx;
+            }
+          ++sidx;
+        }
+
+    }
+  state_item_map[nstates] = nstate_items;
+}
+
+static size_t
+state_sym_hasher (const void *st, size_t max)
+{
+  return ((state *) st)->accessing_symbol % max;
+}
+
+static bool
+state_sym_comparator (const void *s1, const void *s2)
+{
+  return ((state *) s1)->accessing_symbol == ((state *) s2)->accessing_symbol;
+}
+
+static state *
+state_sym_lookup (symbol_number sym, Hash_table *h)
+{
+  state *s = xmalloc (sizeof (state));
+  s->accessing_symbol = sym;
+  return hash_lookup (h, s);
+}
+
+static void
+init_trans ()
+{
+  for (state_number i = 0; i < nstates; ++i)
+    {
+      // generate a hash set that maps from accepting symbols to the states
+      // this state transitions to
+      state *s = states[i];
+      transitions *t = s->transitions;
+      Hash_table *transitions
+        = hash_initialize (t->num, NULL, (Hash_hasher) state_sym_hasher,
+                           (Hash_comparator) state_sym_comparator, NULL);
+      for (int j = 0; j < t->num; ++j)
+        if (!TRANSITION_IS_DISABLED (t, j))
+          if (!hash_insert (transitions, t->states[j]))
+            xalloc_die ();
+      for (int j = state_item_map[i]; j < state_item_map[i + 1]; ++j)
+        {
+          item_number *item = state_items[j].item;
+          if (item_number_is_rule_number (*item))
+            continue;
+          state *dst = state_sym_lookup (*item, transitions);
+          if (!dst)
+            continue;
+          // find the item in the destination state that corresponds
+          // to the transition of item
+          for (int k = 0; k < dst->nitems; ++k)
+            {
+              if (item + 1 == ritem + dst->items[k])
+                {
+                  state_item_number dstSI =
+                    state_item_index_lookup (dst->number, k);
+
+                  si_trans[j] = dstSI;
+                  bitset_set (si_revs[dstSI], j);
+                  break;
+                }
+            }
+        }
+    }
+}
+
+bitset
+si_prods_lookup (state_item_number si)
+{
+  return hash_pair_lookup (si_prods, si);
+}
+
+static void
+init_prods ()
+{
+  si_prods = hash_initialize (nstate_items,
+                              NULL,
+                              (Hash_hasher) hash_pair_hasher,
+                              (Hash_comparator) hash_pair_comparator,
+                              (Hash_data_freer) hash_pair_free);
+  for (int i = 0; i < nstates; ++i)
+    {
+      state *state = states[i];
+      // closure_map is a hash map from nonterminals to a set
+      // of the items that produce those nonterminals
+      Hash_table *closure_map
+        = hash_initialize (nsyms - ntokens, NULL,
+                           (Hash_hasher) hash_pair_hasher,
+                           (Hash_comparator) hash_pair_comparator,
+                           NULL);
+
+      // Add the nitems of state to skip to the production portion
+      // of that state's state_items
+      for (int j = state_item_map[i] + state->nitems;
+           j < state_item_map[i + 1]; ++j)
+        {
+          state_item *src = state_items + j;
+          item_number *item = src->item;
+          symbol_number lhs = item_rule (item)->lhs->number;
+          bitset itms = hash_pair_lookup (closure_map, lhs);
+          if (!itms)
+            {
+              itms = bitset_create (nstate_items, BITSET_SPARSE);
+              hash_pair_insert (closure_map, lhs, itms);
+            }
+          bitset_set (itms, j);
+        }
+      // For each item with a dot followed by a nonterminal,
+      // try to create a production edge.
+      for (int j = state_item_map[i]; j < state_item_map[i + 1]; ++j)
+        {
+          state_item *src = state_items + j;
+          item_number item = *(src->item);
+          // Skip reduce items and items with terminals after the dot
+          if (item_number_is_rule_number (item) || ISTOKEN (item))
+            continue;
+          symbol_number sym = item_number_as_symbol_number (item);
+          bitset lb = hash_pair_lookup (closure_map, sym);
+          if (lb)
+            {
+              bitset copy = bitset_create (nstate_items, BITSET_SPARSE);
+              bitset_copy (copy, lb);
+              hash_pair *prod_hp = xmalloc (sizeof (hash_pair));
+              prod_hp->key = j;
+              prod_hp->l = copy;
+              //update prods
+              if (!hash_insert (si_prods, prod_hp))
+                xalloc_die ();
+
+              //update revs
+              bitset_iterator biter;
+              state_item_number prod;
+              BITSET_FOR_EACH (biter, copy, prod, 0)
+                bitset_set (si_revs[prod], j);
+            }
+        }
+
+    }
+}
+
+/* Since lookaheads are only generated for reductions,
+  we need to propogate lookahead sets backwards as
+  the searches require each state_item to have a lookahead.
+ */
+static inline void
+gen_lookaheads ()
+{
+  for (state_item_number i = 0; i < nstate_items; ++i)
+    {
+      state_item *si = state_items + i;
+      if (item_number_is_symbol_number (*(si->item)) || !si->lookahead)
+        continue;
+
+      bitset lookahead = si->lookahead;
+      gl_list_t queue =
+        gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
+                        (const void **) &si);
+
+      // For each reduction item, traverse through all state_items
+      // accessible through reverse transition steps, and set their
+      // lookaheads to the reduction items lookahead
+      while (gl_list_size (queue) > 0)
+        {
+          state_item *prev = (state_item *) gl_list_get_at (queue, 0);
+          gl_list_remove_at (queue, 0);
+          prev->lookahead = lookahead;
+          if (SI_TRANSITION (prev))
+            {
+              bitset rsi = si_revs[prev - state_items];
+              bitset_iterator biter;
+              state_item_number sin;
+              BITSET_FOR_EACH (biter, rsi, sin, 0)
+                gl_list_add_first (queue, &state_items[sin]);
+            }
+        }
+    }
+}
+
+bitsetv firsts = NULL;
+
+void
+init_firsts (void)
+{
+  firsts = bitsetv_create (nvars, nsyms, BITSET_FIXED);
+  for (rule_number i = 0; i < nrules; ++i)
+    {
+      rule *r = rules + i;
+      item_number *n = r->rhs;
+      // Iterate through nullable nonterminals to try to find a terminal.
+      while (item_number_is_symbol_number (*n) && ISVAR (*n)
+             && nullable[*n - ntokens])
+        ++n;
+      if (item_number_is_rule_number (*n) || ISVAR (*n))
+        continue;
+
+      symbol_number lhs = r->lhs->number;
+      bitset_set (FIRSTS (lhs), *n);
+    }
+  bool change = true;
+  while (change)
+    {
+      change = false;
+      for (rule_number i = 0; i < nrules; ++i)
+        {
+          rule *r = rules + i;
+          symbol_number lhs = r->lhs->number;
+          bitset f_lhs = FIRSTS (lhs);
+          for (item_number *n = r->rhs;
+               item_number_is_symbol_number (*n) &&
+                 ISVAR (*n);
+               ++n)
+            {
+              bitset f = FIRSTS (*n);
+              if (!bitset_subset_p (f_lhs, f))
+                {
+                  change = true;
+                  bitset_union (f_lhs, f_lhs, f);
+                }
+              if (!nullable[*n - ntokens])
+                break;
+            }
+        }
+    }
+}
+
+static inline void
+disable_state_item (state_item_number sin)
+{
+  si_trans[sin] = -2;
+  hash_pair_remove (si_prods, sin);
+}
+
+/*
+ To make searches more efficient, we can prune away paths that are
+ caused by disabled transitions.
+ */
+static void
+prune_disabled_paths (void)
+{
+  for (int i = nstate_items - 1; i >= 0; --i)
+    {
+      state_item *si = state_items + i;
+      if (si_trans[i] == -1 && item_number_is_symbol_number (*si->item))
+        {
+          // disable the transitions out of i
+          for (state_item_number j = si_trans[i]; j != -1; j = si_trans[j])
+            disable_state_item (j);
+
+          gl_list_t queue =
+            gl_list_create (GL_LINKED_LIST, NULL, NULL, NULL, true, 1,
+                            (const void **) &si);
+
+          // For each disabled transition, traverse through all state_items
+          // accessible through reverse transition steps, and set their
+          // lookaheads to the reduction items lookahead
+          while (gl_list_size (queue) > 0)
+            {
+              const state_item *prev = gl_list_get_at (queue, 0);
+              gl_list_remove_at (queue, 0);
+              state_item_number prev_num = prev - state_items;
+              disable_state_item (prev_num);
+
+              bitset rsi = si_revs[prev_num];
+              bitset_iterator biter;
+              state_item_number sin;
+              BITSET_FOR_EACH (biter, rsi, sin, 0)
+              {
+                if (SI_TRANSITION (prev))
+                  gl_list_add_first (queue, &state_items[sin]);
+                else
+                  {
+                    bitset p = si_prods_lookup (sin);
+                    if (p)
+                      bitset_reset (p, prev_num);
+                  }
+              }
+            }
+        }
+    }
+}
+
+void
+print_state_item (const state_item *si, FILE *out)
+{
+  fprintf (out, "%d:", si->state->number);
+  item_print (si->item, NULL, out);
+  putc ('\n', out);
+}
+
+/**
+ * Report set counts and the state_item graph if trace is enabled
+ */
+static void
+state_items_report (void)
+{
+  printf ("# state items: %zu\n", nstate_items);
+  for (state_number i = 0; i < nstates; ++i)
+    {
+      printf ("State %d:\n", i);
+      for (int j = state_item_map[i]; j < state_item_map[i + 1]; ++j)
+        {
+          item_print (state_items[j].item, NULL, stdout);
+          puts ("");
+          if (si_trans[j] >= 0)
+            {
+              fputs ("    -> ", stdout);
+              print_state_item (state_items + si_trans[j], stdout);
+            }
+
+          bitset sets[2] = { si_prods_lookup (j), si_revs[j] };
+          const char *txt[2] = { "    => ", "    <- " };
+          for (int seti = 0; seti < 2; ++seti)
+            {
+              bitset b = sets[seti];
+              if (b)
+                {
+                  bitset_iterator biter;
+                  state_item_number sin;
+                  BITSET_FOR_EACH (biter, b, sin, 0)
+                    {
+                      fputs (txt[seti], stdout);
+                      print_state_item (state_items + sin, stdout);
+                    }
+                }
+            }
+          puts ("");
+        }
+    }
+  printf ("FIRSTS\n");
+  for (symbol_number i = ntokens; i < nsyms; ++i)
+    {
+      printf ("  %s firsts\n", symbols[i]->tag);
+      bitset_iterator iter;
+      symbol_number j;
+      BITSET_FOR_EACH (iter, FIRSTS (i), j, 0)
+        printf ("    %s\n", symbols[j]->tag);
+    }
+  puts ("\n");
+}
+
+void
+state_items_init (void)
+{
+  time_t start = time (NULL);
+  init_state_items ();
+  init_trans ();
+  init_prods ();
+  gen_lookaheads ();
+  init_firsts ();
+  prune_disabled_paths ();
+  if (trace_flag & trace_cex)
+    {
+      printf ("init: %f\n", difftime (time (NULL), start));
+      state_items_report ();
+    }
+}
+
+void
+state_items_free (void)
+{
+  hash_free (si_prods);
+  bitsetv_free (si_revs);
+  free (si_trans);
+  free (state_items);
+  bitsetv_free (firsts);
+}
+
+/**
+ * Determine, using precedence and associativity, whether the next
+ * production is allowed from the current production.
+ */
+bool
+production_allowed (const state_item *si, const state_item *next)
+{
+  sym_content *s1 = item_rule (si->item)->lhs;
+  sym_content *s2 = item_rule (next->item)->lhs;
+  int prec1 = s1->prec;
+  int prec2 = s2->prec;
+  if (prec1 >= 0 && prec2 >= 0)
+    {
+      // Do not expand if lower precedence.
+      if (prec1 > prec2)
+        return false;
+      // Do not expand if same precedence, but left-associative.
+      if (prec1 == prec2 && s1->assoc == left_assoc)
+        return false;
+    }
+    return true;
+}
diff --git a/src/state-item.h b/src/state-item.h
new file mode 100644
index 00000000..50996cd2
--- /dev/null
+++ b/src/state-item.h
@@ -0,0 +1,100 @@
+/* Counterexample Generation Search Nodes
+ 
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ 
+ This file is part of Bison, the GNU Compiler Compiler.
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef STATE_ITEM_H
+#define STATE_ITEM_H
+
+#include <hash.h>
+#include <gl_list.h>
+#include <bitsetv.h>
+#include "gram.h"
+#include "state.h"
+
+/*
+ Initializes a graph connecting (state, production item) pairs to
+ pairs they can make a transition or production step to. This graph
+ is used to search for paths that represent counterexamples of some
+ conflict.
+ 
+ state_items is an array of state state-item pairs ordered by state.
+ state_item_map maps state numbers to the first item which corresponds
+ to it in the array. A state's portion in state_items begins with its
+ items in the same order as it was in the state. This is then followed by
+ productions from the closure of the state in order by rule.
+ 
+ There are two type of edges in this graph transitions and productions.
+ Transitions are the same as transitions from the parser except edges
+ are only between items from the same rule. These are stored as an
+ array "si_trans" (as most items will have transitions) which are indexed the
+ same way as state_items.
+ 
+ Productions are edges from items with a nonterminal after the dot to
+ the production of that nonterminal in the same state. These edges are
+ stored as a hash map "si_prods" from a state_item to a set of what productions
+ it goes from/to
+ 
+ The inverses of these edges are stored in an array of bitsets, "si_revs."
+ A state-item that begins with a dot will have reverse production edges,
+ and all others will have reverse transition edges.
+ 
+ */
+
+#define SI_DISABLED(sin) (si_trans[sin] == -2)
+#define SI_PRODUCTION(si) ((si) == state_items || *((si)->item - 1) < 0)
+#define SI_TRANSITION(si) ((si) != state_items && *((si)->item - 1) >= 0)
+
+typedef int state_item_number;
+
+typedef struct
+{
+  state *state;
+  item_number *item;
+  bitset lookahead;
+} state_item;
+
+extern bitsetv firsts;
+#define FIRSTS(sym) firsts[(sym) - ntokens]
+
+extern size_t nstate_items;
+extern state_item_number *state_item_map;
+
+/** Array mapping state_item_numbers to state_items */
+extern state_item *state_items;
+
+/** state-item graph edges */
+extern state_item_number *si_trans;
+extern bitsetv si_revs;
+
+state_item *state_item_lookup (state_number s, state_item_number off);
+
+static inline state_item_number
+state_item_index_lookup (state_number s, state_item_number off)
+{
+  return state_item_map[s] + off;
+}
+
+bitset si_prods_lookup (state_item_number si);
+
+void state_items_init (void);
+void print_state_item (const state_item *si, FILE *out);
+void state_items_free (void);
+
+bool production_allowed (const state_item *si, const state_item *next);
+
+#endif /* STATE_ITEM_H */
-- 
2.20.1 (Apple Git-117)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]