bison-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 2/5] Parse simulator


From: Vincent Imbimbo
Subject: [PATCH 2/5] Parse simulator
Date: Tue, 12 May 2020 22:23:51 -0400

---
 src/derivation.c       | 107 ++++++++
 src/derivation.h       |  47 ++++
 src/parse-simulation.c | 554 +++++++++++++++++++++++++++++++++++++++++
 src/parse-simulation.h | 131 ++++++++++
 4 files changed, 839 insertions(+)
 create mode 100644 src/derivation.c
 create mode 100644 src/derivation.h
 create mode 100644 src/parse-simulation.c
 create mode 100644 src/parse-simulation.h

diff --git a/src/derivation.c b/src/derivation.c
new file mode 100644
index 00000000..b845ba89
--- /dev/null
+++ b/src/derivation.c
@@ -0,0 +1,107 @@
+/* Counterexample derivation trees
+ 
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ 
+ This file is part of Bison, the GNU Compiler Compiler.
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include "derivation.h"
+#include "system.h"
+
+static derivation d_dot = { -1, NULL };
+
+const derivation *
+derivation_dot (void)
+{
+  return &d_dot;
+}
+
+derivation *
+derivation_new (symbol_number sym, gl_list_t children)
+{
+  derivation *deriv = xmalloc (sizeof (derivation));
+  deriv->sym = sym;
+  deriv->children = children;
+  return deriv;
+}
+
+void
+derivation_free (derivation *d)
+{
+  if (d && d != &d_dot)
+    {
+      if (d->children)
+        gl_list_free (d->children);
+      free (d);
+    }
+}
+
+size_t
+derivation_size (const derivation *deriv)
+{
+  if (!deriv->children)
+    return 1;
+  int size = 1;
+  gl_list_iterator_t it = gl_list_iterator (deriv->children);
+  derivation *child;
+  while (gl_list_iterator_next (&it, (const void **) &child, NULL))
+    size += derivation_size (child);
+  return size;
+}
+
+// these are used rarely enough that I don't think they should be interned.
+void
+derivation_print (const derivation *deriv, FILE *f)
+{
+  if (deriv == &d_dot)
+    {
+      fputs (" • ", f);
+      return;
+    }
+  symbol *sym = symbols[deriv->sym];
+  if (!deriv->children)
+    {
+      fprintf (f, " %s ", sym->tag);
+      return;
+    }
+  gl_list_iterator_t it = gl_list_iterator (deriv->children);
+  derivation *child;
+  fprintf (f, " %s ::=[", sym->tag);
+  while (gl_list_iterator_next (&it, (const void **) &child, NULL))
+    derivation_print (child, f);
+  fputs ("] ", f);
+}
+
+void
+derivation_print_leaves (const derivation *deriv, FILE *f)
+{
+  if (deriv == &d_dot)
+    {
+      fputs (" • ", f);
+      return;
+    }
+  if (!deriv->children)
+    {
+      symbol *sym = symbols[deriv->sym];
+      fprintf (f, " %s ", sym->tag);
+      return;
+    }
+
+  gl_list_iterator_t it = gl_list_iterator (deriv->children);
+  derivation *child;
+  while (gl_list_iterator_next (&it, (const void **) &child, NULL))
+    derivation_print_leaves (child, f);
+}
diff --git a/src/derivation.h b/src/derivation.h
new file mode 100644
index 00000000..c89d2f41
--- /dev/null
+++ b/src/derivation.h
@@ -0,0 +1,47 @@
+/* Counterexample derivation trees
+ 
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ 
+ This file is part of Bison, the GNU Compiler Compiler.
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef DERIVATION_H
+#define DERIVATION_H
+
+#include <gl_list.h>
+#include "gram.h"
+
+/*
+ Derivations are trees of symbols such that each non terminal's
+ children are symbols that produce that nonterminal if they are
+ relevant to the counterexample. The leaves of a derivation form
+ a counterexample when printed.
+ */
+
+typedef struct derivation
+{
+  symbol_number sym;
+  gl_list_t children;
+} derivation;
+
+derivation *derivation_new (symbol_number sym, gl_list_t children);
+size_t derivation_size (const derivation *deriv);
+void derivation_print (const derivation *deriv, FILE *f);
+void derivation_print_leaves (const derivation *deriv, FILE *f);
+void derivation_free (derivation *deriv);
+
+const derivation *derivation_dot (void);
+
+#endif /* DERIVATION_H */
diff --git a/src/parse-simulation.c b/src/parse-simulation.c
new file mode 100644
index 00000000..bb307760
--- /dev/null
+++ b/src/parse-simulation.c
@@ -0,0 +1,554 @@
+/* Parser simulator for unifying counterexample search
+ 
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ 
+ This file is part of Bison, the GNU Compiler Compiler.
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+#include <gl_linked_list.h>
+#include <gl_xlist.h>
+#include <stdlib.h>
+#include "parse-simulation.h"
+#include "nullable.h"
+#include "lssi.h"
+
+typedef struct
+{
+  // elements newly added in this chunk
+  gl_list_t contents;
+  // properties of the linked list this chunk represents
+  const void *head_elt;
+  const void *tail_elt;
+  size_t total_size;
+} ps_chunk;
+
+struct parse_state;
+typedef struct parse_state
+{
+  // path of state-items the parser has traversed
+  ps_chunk state_items;
+  // list of derivations of the symbols
+  ps_chunk derivs;
+  struct parse_state *parent;
+  int reference_count;
+  // incremented during productions,
+  // decremented during reductions
+  int depth;
+  // whether the contents of the chunks should be
+  // prepended or appended to the list the chunks
+  // represent
+  bool prepend;
+  // causes chunk contents to be freed when the
+  // reference count is one. Used when only the chunk metadata
+  // will be needed.
+  bool free_contents_early;
+} parse_state;
+
+
+static void
+ps_chunk_prepend (ps_chunk *chunk, const void *element)
+{
+  gl_list_add_first (chunk->contents, element);
+  chunk->head_elt = element;
+  ++chunk->total_size;
+  if (!chunk->tail_elt)
+    chunk->tail_elt = element;
+}
+
+static void
+ps_chunk_append (ps_chunk *chunk, const void *element)
+{
+  gl_list_add_last (chunk->contents, element);
+  chunk->tail_elt = element;
+  ++chunk->total_size;
+  if (!chunk->head_elt)
+    chunk->head_elt = element;
+}
+
+static int allocs = 0;
+static int frees = 0;
+
+static parse_state *
+empty_parse_state (void)
+{
+  parse_state *ret = xcalloc (1, sizeof (parse_state));
+  ret->state_items.contents = gl_list_create_empty (GL_LINKED_LIST, NULL,
+                                                    NULL, NULL, true);
+  ret->derivs.contents = gl_list_create_empty (GL_LINKED_LIST, NULL,
+                                               NULL, NULL, true);
+  ++allocs;
+  return ret;
+}
+
+parse_state *
+new_parse_state (const state_item *si)
+{
+  parse_state *ret = empty_parse_state ();
+  ps_chunk_append (&ret->state_items, si);
+  ps_chunk_append (&ret->derivs, derivation_dot ());
+  return ret;
+}
+
+static parse_state *
+copy_parse_state (bool prepend, parse_state *parent)
+{
+  parse_state *ret = xmalloc (sizeof (parse_state));
+  memcpy (ret, parent, sizeof (parse_state));
+  ret->state_items.contents = gl_list_create_empty (GL_LINKED_LIST, NULL,
+                                                    NULL, NULL, true);
+  ret->derivs.contents = gl_list_create_empty (GL_LINKED_LIST, NULL,
+                                               NULL, NULL, true);
+  ret->parent = parent;
+  ret->prepend = prepend;
+  ret->reference_count = 0;
+  ret->free_contents_early = false;
+  ++parent->reference_count;
+  ++allocs;
+  return ret;
+}
+
+bool
+parse_state_derivation_completed (const parse_state *ps)
+{
+  return ps->derivs.total_size == 1;
+}
+
+const derivation *
+parse_state_derivation (const parse_state *ps)
+{
+  return ps->derivs.head_elt;
+}
+
+const state_item *
+parse_state_head (const parse_state *ps)
+{
+  return ps->state_items.head_elt;
+}
+
+const state_item *
+parse_state_tail (const parse_state *ps)
+{
+  return ps->state_items.tail_elt;
+}
+
+int
+parse_state_length (const parse_state *ps)
+{
+  return ps->state_items.total_size;
+}
+
+int
+parse_state_depth (const parse_state *ps)
+{
+  return ps->depth;
+}
+
+void
+parse_state_retain (parse_state *ps)
+{
+  ++ps->reference_count;
+}
+
+void
+parse_state_free_contents_early (parse_state *ps)
+{
+  ps->free_contents_early = true;
+}
+
+void
+parse_state_retain_deriv (parse_state *ps)
+{
+  ps->derivs.contents = NULL;
+}
+
+void
+free_parse_state (parse_state *ps)
+{
+  if (ps == NULL)
+    return;
+  --ps->reference_count;
+  // need to keep the parse state around
+  // for visited, but its contents can be freed
+  if ((ps->reference_count == 1 && ps->free_contents_early) ||
+      (ps->reference_count == 0 && !ps->free_contents_early))
+    {
+      if (ps->state_items.contents)
+        gl_list_free (ps->state_items.contents);
+      if (ps->derivs.contents)
+        gl_list_free (ps->derivs.contents);
+      free_parse_state (ps->parent);
+    }
+  if (ps->reference_count <= 0)
+    {
+      free (ps);
+      ++frees;
+    }
+}
+
+size_t
+parse_state_hasher (const parse_state *ps, size_t max)
+{
+  const ps_chunk *sis = &ps->state_items;
+  return ((state_item *) sis->head_elt - state_items +
+          (state_item *) sis->tail_elt - state_items + sis->total_size) % max;
+}
+
+bool
+parse_state_comparator (const parse_state *ps1, const parse_state *ps2)
+{
+  const ps_chunk *sis1 = &ps1->state_items;
+  const ps_chunk *sis2 = &ps2->state_items;
+  return sis1->head_elt == sis2->head_elt &&
+    sis1->tail_elt == sis2->tail_elt && sis1->total_size == sis2->total_size;
+}
+
+
+void
+parse_state_completed_steps (const parse_state *ps, int *shifts, int 
*productions)
+{
+  // traverse to the root parse_state,
+  // which will have a list of all completed productions.
+  const parse_state *root_ps = ps;
+  while (root_ps->parent)
+    root_ps = root_ps->parent;
+
+  gl_list_t sis = root_ps->state_items.contents;
+  int count = 0;
+  gl_list_iterator_t it = gl_list_iterator (sis);
+  state_item *last = NULL;
+  state_item *next = NULL;
+  while (gl_list_iterator_next (&it, (const void **) &next, NULL))
+    {
+      if (last && last->state == next->state)
+        ++count;
+      last = next;
+    }
+  *productions = count;
+  *shifts = root_ps->state_items.total_size - count;
+}
+
+// takes an array of n gl_lists and flattens them into two list
+// based off of the index split
+static void
+list_flatten_and_split (gl_list_t *l, gl_list_t *rets, int split, int n)
+{
+  int ret_index = 0;
+  int ret_array = 0;
+  for (int i = 0; i < n; ++i)
+    {
+      gl_list_iterator_t it = gl_list_iterator (l[i]);
+      gl_list_t l;
+      while (gl_list_iterator_next (&it, (const void **) &l, NULL))
+        {
+          if (!l)
+            continue;
+          gl_list_iterator_t it2 = gl_list_iterator (l);
+          void *si;
+          while (gl_list_iterator_next (&it2, (const void **) &si, NULL))
+            {
+              if (ret_index++ == split)
+                ++ret_array;
+              if (rets[ret_array])
+                gl_list_add_last (rets[ret_array], si);
+            }
+        }
+    }
+}
+
+// Emulates a reduction on a parse state by popping some amount of
+// derivations and state_items off of the parse_state and returning
+// the result in ret. Returns the derivation of what's popped.
+static gl_list_t
+parser_pop (parse_state *ps, int deriv_index,
+            int si_index, parse_state *ret)
+{
+  // prepend sis, append sis, prepend derivs, append derivs
+  gl_list_t chunks[4];
+  for (int i = 0; i < 4; ++i)
+    chunks[i] = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, 1);
+  for (parse_state *pn = ps; pn != NULL; pn = pn->parent)
+    {
+      if (pn->prepend)
+        {
+          gl_list_add_last (chunks[0], pn->state_items.contents);
+          gl_list_add_last (chunks[2], pn->derivs.contents);
+        }
+      else
+        {
+          gl_list_add_first (chunks[1], pn->state_items.contents);
+          gl_list_add_first (chunks[3], pn->derivs.contents);
+        }
+    }
+  gl_list_t popped_derivs =
+    gl_list_create_empty (GL_LINKED_LIST, NULL, NULL, NULL, 1);
+  gl_list_t ret_chunks[4] = { ret->state_items.contents, NULL,
+    ret->derivs.contents, popped_derivs
+  };
+  list_flatten_and_split (chunks, ret_chunks, si_index, 2);
+  list_flatten_and_split (chunks + 2, ret_chunks + 2, deriv_index, 2);
+  size_t s_size = gl_list_size (ret->state_items.contents);
+  ret->state_items.total_size = s_size;
+  if (s_size > 0)
+    {
+      ret->state_items.tail_elt = gl_list_get_at (ret->state_items.contents,
+                                                  s_size - 1);
+      ret->state_items.head_elt =
+        gl_list_get_at (ret->state_items.contents, 0);
+    }
+  else
+    {
+      ret->state_items.tail_elt = NULL;
+      ret->state_items.head_elt = NULL;
+    }
+  size_t d_size = gl_list_size (ret->derivs.contents);
+  ret->derivs.total_size = d_size;
+  if (d_size > 0)
+    {
+      ret->derivs.tail_elt = gl_list_get_at (ret->derivs.contents,
+                                             d_size - 1);
+      ret->derivs.head_elt = gl_list_get_at (ret->derivs.contents, 0);
+    }
+  else
+    {
+      ret->derivs.tail_elt = NULL;
+      ret->derivs.head_elt = NULL;
+    }
+  for (int i = 0; i < 4; ++i)
+    gl_list_free (chunks[i]);
+  return popped_derivs;
+}
+
+void
+parse_state_lists (parse_state *ps, gl_list_t *state_items,
+                   gl_list_t *derivs)
+{
+  parse_state *temp = empty_parse_state ();
+  size_t si_size = ps->state_items.total_size;
+  size_t deriv_size = ps->derivs.total_size;
+  parser_pop (ps, si_size, deriv_size, temp);
+  *state_items = temp->state_items.contents;
+  *derivs = temp->derivs.contents;
+  // prevent the return lists from being freed
+  temp->state_items.contents = NULL;
+  temp->derivs.contents = NULL;
+  free_parse_state (temp);
+}
+
+/**
+ * Compute the parse states that result from taking a transition on
+ * nullable symbols whenever possible from the given state_item.
+ */
+void
+nullable_closure (parse_state *ps, state_item *si, gl_list_t states)
+{
+  parse_state *current_ps = ps;
+  state_item_number prev_sin = si - state_items;
+  for (state_item_number sin = si_trans[prev_sin];
+       sin != -1; prev_sin = sin, sin = si_trans[sin])
+    {
+      state_item *psi = state_items + prev_sin;
+      symbol_number sp = item_number_as_symbol_number (*psi->item);
+      if (ISTOKEN (sp) || !nullable[sp - ntokens])
+        break;
+
+      state_item *nsi = state_items + sin;
+      current_ps = copy_parse_state (false, current_ps);
+      ps_chunk_append (&current_ps->state_items, nsi);
+      ps_chunk_append (&current_ps->derivs, derivation_new (sp, NULL));
+      ++current_ps->reference_count;
+      gl_list_add_last (states, current_ps);
+    }
+}
+
+gl_list_t
+simulate_transition (parse_state *ps)
+{
+  const state_item *si = ps->state_items.tail_elt;
+  symbol_number sym = item_number_as_symbol_number (*si->item);
+  // Transition on the same next symbol, taking nullable
+  // symbols into account.
+  gl_list_t result =
+    gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
+                          (gl_listelement_dispose_fn)free_parse_state,
+                          true);
+  state_item_number si_next = si_trans[si - state_items];
+  // check for disabled transition, shouldn't happen
+  // as any state_items that lead to these should be
+  // disabled.
+  if (si_next < 0)
+    return result;
+  parse_state *next_ps = copy_parse_state (false, ps);
+  ps_chunk_append (&next_ps->state_items, state_items + si_next);
+  ps_chunk_append (&next_ps->derivs, derivation_new (sym, NULL));
+  ++next_ps->reference_count;
+  gl_list_add_last (result, next_ps);
+
+  nullable_closure (next_ps, state_items + si_next, result);
+  return result;
+}
+
+/**
+ * Determine if the given symbols are equal or their first sets
+ * intersect.
+ */
+static bool
+compatible (symbol_number sym1, symbol_number sym2)
+{
+  if (sym1 == sym2)
+    return true;
+  if (ISTOKEN (sym1) && ISVAR (sym2))
+    return bitset_test (FIRSTS (sym2), sym1);
+  else if (ISVAR (sym1) && ISTOKEN (sym2))
+    return bitset_test (FIRSTS (sym1), sym2);
+  else if (ISVAR (sym1) && ISVAR (sym2))
+    return !bitset_disjoint_p (FIRSTS (sym1), FIRSTS (sym2));
+  else
+    return false;
+}
+
+gl_list_t
+simulate_production (parse_state *ps, symbol_number compat_sym)
+{
+  gl_list_t result =
+    gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
+                          (gl_listelement_dispose_fn)free_parse_state,
+                          true);
+  const state_item *si = parse_state_tail (ps);
+  bitset prod = si_prods_lookup (si - state_items);
+  if (prod)
+    {
+      bitset_iterator biter;
+      state_item_number sin;
+      BITSET_FOR_EACH (biter, prod, sin, 0)
+        {
+          // Take production step only if lhs is not nullable and
+          // if first rhs symbol is compatible with compat_sym
+          state_item *next = state_items + sin;
+          item_number *itm1 = next->item;
+          if (!compatible (*itm1, compat_sym) || !production_allowed (si, 
next))
+            continue;
+          parse_state *next_ps = copy_parse_state (false, ps);
+          ps_chunk_append (&next_ps->state_items, next);
+          ++next_ps->reference_count;
+          gl_list_add_last (result, next_ps);
+          if (next_ps->depth >= 0)
+            ++next_ps->depth;
+          nullable_closure (next_ps, next, result);
+        }
+    }
+  return result;
+}
+
+// simulates a reduction on the given parse state, conflict_item is the
+// item associated with ps's conflict. symbol_set is a lookahead set this
+// reduction must be compatible with
+gl_list_t
+simulate_reduction (parse_state *ps, int rule_len, bitset symbol_set)
+{
+  gl_list_t result =
+    gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
+                          (gl_listelement_dispose_fn)free_parse_state,
+                          true);
+
+  int s_size = ps->state_items.total_size;
+  int d_size = ps->derivs.total_size;
+  if (ps->depth >= 0)
+    d_size--;                   // account for dot
+  parse_state *new_root = empty_parse_state ();
+  gl_list_t popped_derivs = parser_pop (ps, d_size - rule_len,
+                                        s_size - rule_len - 1,
+                                        new_root);
+
+  // update derivation
+  state_item *si = (state_item *) ps->state_items.tail_elt;
+  const rule *r = item_rule (si->item);
+  symbol_number lhs = r->lhs->number;
+  derivation *deriv = derivation_new (lhs, popped_derivs);
+  --new_root->depth;
+  ps_chunk_append (&new_root->derivs, deriv);
+
+  if (s_size != rule_len + 1)
+    {
+      state_item *tail = (state_item *) new_root->state_items.tail_elt;
+      ps_chunk_append (&new_root->state_items,
+                       state_items + si_trans[tail - state_items]);
+      ++new_root->reference_count;
+      gl_list_add_last (result, new_root);
+    }
+  else
+    {
+      // The head state_item is a production item, so we need to prepend
+      // with possible source state-items.
+      const state_item *head = ps->state_items.head_elt;
+      gl_list_t prev = lssi_reverse_production (head, symbol_set);
+      gl_list_iterator_t it = gl_list_iterator (prev);
+      state_item *psis;
+      while (gl_list_iterator_next (&it, (const void **) &psis, NULL))
+        {
+          //Prepend the result from the reverse production
+          parse_state *copy = copy_parse_state (true, new_root);
+          ps_chunk_prepend (&copy->state_items, psis);
+
+          // Append the left hand side to the end of the parser state
+          copy = copy_parse_state (false, copy);
+          ps_chunk *sis = &copy->state_items;
+          const state_item *tail = sis->tail_elt;
+          ps_chunk_append (sis, state_items + si_trans[tail - state_items]);
+          ++copy->reference_count;
+          gl_list_add_last (result, copy);
+          nullable_closure (copy, (state_item *) sis->tail_elt, result);
+        }
+      gl_list_free (prev);
+    }
+  return result;
+}
+
+gl_list_t
+parser_prepend (parse_state *ps)
+{
+  gl_list_t result = gl_list_create_empty (GL_LINKED_LIST, NULL, NULL,
+                                           (gl_listelement_dispose_fn)
+                                           free_parse_state,
+                                           true);
+  const state_item *head = ps->state_items.head_elt;
+  bitset prev = si_revs[head - state_items];
+  symbol_number prepend_sym =
+    item_number_as_symbol_number (*(head->item - 1));
+  bitset_iterator biter;
+  state_item_number sin;
+  BITSET_FOR_EACH (biter, prev, sin, 0)
+  {
+    parse_state *copy = copy_parse_state (true, ps);
+    copy->reference_count++;
+    ps_chunk_prepend (&copy->state_items, state_items + sin);
+    if (SI_TRANSITION (head))
+      ps_chunk_prepend (&copy->derivs, derivation_new (prepend_sym, NULL));
+    gl_list_add_last (result, copy);
+  }
+  return result;
+}
+
+void
+print_parse_state (parse_state *ps)
+{
+  printf ("(size %zu depth %d rc %d)\n",
+          ps->state_items.total_size, ps->depth, ps->reference_count);
+  print_state_item (ps->state_items.head_elt, stdout);
+  print_state_item (ps->state_items.tail_elt, stdout);
+  if (ps->derivs.total_size > 0)
+    derivation_print (ps->derivs.head_elt, stdout);
+  putc ('\n', stdout);
+}
diff --git a/src/parse-simulation.h b/src/parse-simulation.h
new file mode 100644
index 00000000..7bb596fd
--- /dev/null
+++ b/src/parse-simulation.h
@@ -0,0 +1,131 @@
+/* Parser simulator for unifying counterexample search
+ 
+ Copyright (C) 2019-2020 Free Software Foundation, Inc.
+ 
+ This file is part of Bison, the GNU Compiler Compiler.
+ 
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+ 
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
+ 
+ You should have received a copy of the GNU General Public License
+ along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#ifndef PARSE_SIMULATION_H
+#define PARSE_SIMULATION_H
+
+#include <stdio.h>
+#include <gl_xlist.h>
+#include "state-item.h"
+#include "derivation.h"
+
+/*
+  Simulating states of the parser:
+  Each state is an array of state-items and an array of derivations.
+  Each consecutive state-item represents a transition/goto or production,
+  and the derivations are the dereivation trees associated with the symbols
+  transitioned on each step. In more detail:
+ 
+  Parse states are stored as a tree. Each new parse state contains two 
"chunks,"
+  one corresponding to its state-items and the other corresponding to its 
derivations.
+  Chunks only have new elements which weren't present in its parent.
+  Each chunk also stores the head, tail, and total_size of the list it 
represents.
+  So if a parse state was to be copied it retains the list metadata but its
+  contents are empty.
+ 
+  A transition gets the state-item which the last state-item of the parse state
+  transitions to. This is appended to the state-item list, and a derivation 
with
+  just the symbol being transitioned on is appended to the derivation list.
+  A production appends the new state-item, but does not have a derivation
+  associated with it.
+ 
+  A reduction looks at the rule of the last state-item in the state, and pops
+  the last few state-items that make up the rhs of the rule along with their
+  derivations. The derivations become the derivation of the lhs which is then
+  shifted over.
+ 
+  Effectively, everytime a derivation is appended, it represents a shift in
+  the parser. So a parse state that contains
+   start: A . B C D
+   start: A B C D .
+  and the state-items in between will represent a parser that has BCD on the
+  parse stack.
+ 
+  However, the above example cannot be reduced, as it's missing A.
+  Since we start at a state-item that can have a dot in the middle of a rule,
+  it's necessary to support a prepend operation. Luckily the prepend operations
+  are very similar to transitions and productions with the difference being 
that
+  they operate on the head of the state-item list instead of the tail.
+ 
+  A production
+  A transition gets the state-item which the last state-item of the parse state
+  transitions to. This is appended to the state-item list, and a derivation 
with
+  just the symbol being transitioned on is appended to the derivation list.
+
+ */
+
+typedef struct parse_state parse_state;
+
+parse_state *new_parse_state (const state_item *conflict);
+
+size_t parse_state_hasher (const parse_state *ps, size_t max);
+
+bool parse_state_comparator (const parse_state *ps1, const parse_state *ps2);
+
+/* Memory management */
+
+void parse_state_retain (parse_state *ps);
+/* This allows a parse_state to free its contents list
+ * when its reference count reaches 1. This is used to
+ * free memory while the parse state is in a hash set. */
+void parse_state_free_contents_early (parse_state *ps);
+void parse_state_retain_deriv (parse_state *ps);
+void free_parse_state (parse_state *ps);
+
+/* counts the amount of shift and production steps in this parse state */
+void parse_state_completed_steps (const parse_state *ps, int *shifts, int 
*productions);
+
+/* parse state getters */
+bool parse_state_derivation_completed (const parse_state *ps);
+const derivation *parse_state_derivation (const parse_state *ps);
+const state_item *parse_state_head (const parse_state *ps);
+const state_item *parse_state_tail (const parse_state *ps);
+int parse_state_length (const parse_state *ps);
+int parse_state_depth (const parse_state *ps);
+
+/* returns the linked lists that the parse state is supposed to represent */
+void parse_state_lists (parse_state *ps, gl_list_t *state_items,
+                        gl_list_t *derivs);
+
+/* various functions that return a list of states based off of
+ * whatever operation is simulated. After whatever operation, every possible
+ * transition on nullable nonterminals will be added to the returned list. */
+
+/* Look at the tail state-item of the parse state and transition on the symbol
+ * after its dot. The symbol gets added to derivs, and the resulting state-item
+ * is appended to state-items. */
+gl_list_t simulate_transition (parse_state *ps);
+
+/* Look at all of the productions for the non-terminal following the dot in 
the tail
+ * state-item. Appends to state-items each production state-item which may 
start with
+ * compat_sym. */
+gl_list_t simulate_production (parse_state *ps, symbol_number compat_sym);
+
+/* Removes the last rule_len state-items along with their derivations. A new 
state-item is
+ * appended representing the goto after the reduction. A derivation for the 
non-terminal that
+ * was just reduced is appended which consists of the list of derivations that 
were just removed. */
+gl_list_t simulate_reduction (parse_state *ps, int rule_len,
+                              bitset symbol_set);
+
+/* Generate states with a state-item prepended for each state-item that has a
+ * transition or production step to ps's head. */
+gl_list_t parser_prepend (parse_state *ps);
+
+void print_parse_state (parse_state *ps);
+#endif /* PARSE_SIMULATION_H */
-- 
2.20.1 (Apple Git-117)




reply via email to

[Prev in Thread] Current Thread [Next in Thread]