bug-bison
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 3/3] yacc: use the most appropriate integral type for state numbe


From: Akim Demaille
Subject: [PATCH 3/3] yacc: use the most appropriate integral type for state numbers
Date: Sun, 29 Sep 2019 18:34:11 +0200

Currently we properly use the "best" integral type for tables,
including those storing state numbers.  However the variables for
state numbers used in yyparse (and its dependencies such as
yy_stack_print) still use int16_t invariably.  As a consequence, very
large models overflow these variables.

Let's use the "best" type for these variables too.  It turns out that
we can still use 16 bits for twice larger automata: stick to unsigned
types.

Reported by Tom Kramer.
https://lists.gnu.org/archive/html/bug-bison/2019-09/msg00018.html

* data/skeletons/yacc.c (yy_state_num): Be computed from YYNSTATES.
Adjust an assertion: yystate can no longer be negative, and compilers
warn about that.
* tests/linear: New.
* tests/torture.at (State number type): New.
Use it.
---
 THANKS                |  1 +
 data/skeletons/yacc.c |  5 +--
 tests/linear          | 94 +++++++++++++++++++++++++++++++++++++++++++
 tests/local.mk        |  2 +-
 tests/torture.at      | 43 +++++++++++++++++---
 5 files changed, 135 insertions(+), 10 deletions(-)
 create mode 100755 tests/linear

diff --git a/THANKS b/THANKS
index 2cdd9b0b..569ba173 100644
--- a/THANKS
+++ b/THANKS
@@ -179,6 +179,7 @@ Tim Landscheidt           address@hidden
 Tim Van Holder            address@hidden
 Tobias Frost              address@hidden
 Todd Freed                address@hidden
+Tom Kramer                address@hidden
 Tom Lane                  address@hidden
 Tom Tromey                address@hidden
 Tomasz Kłoczko            address@hidden
diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c
index c15f73d4..74ef76d6 100644
--- a/data/skeletons/yacc.c
+++ b/data/skeletons/yacc.c
@@ -432,8 +432,7 @@ typedef short yytype_int16;
 
 
 /* State numbers. */
-typedef yytype_int16 yy_state_num;
-
+typedef ]b4_int_type(0, m4_eval(b4_states_number - 1))[ yy_state_num;
 
 #ifndef YY_
 # if defined YYENABLE_NLS && YYENABLE_NLS
@@ -1505,7 +1504,7 @@ yynewstate:
 `--------------------------------------------------------------------*/
 yysetstate:
   YYDPRINTF ((stderr, "Entering state %d\n", yystate));
-  YY_ASSERT (0 <= yystate && yystate < YYNSTATES);
+  YY_ASSERT (yystate < YYNSTATES);
   *yyssp = yystate;
 
   if (yyss + yystacksize - 1 <= yyssp)
diff --git a/tests/linear b/tests/linear
new file mode 100755
index 00000000..d522bd82
--- /dev/null
+++ b/tests/linear
@@ -0,0 +1,94 @@
+#! /usr/bin/env ruby
+
+# Build a grammar whose LALR(1) parser has a given number of states.
+# Useful to test edge cases (e.g., 256 and 257 states, etc.).
+
+class Linear
+  def initialize(states)
+    @states = states - 4
+    @cols = Math.sqrt(@states).to_i
+    @lines = @states / @cols
+    @rest = @states % @cols
+
+    @n = @lines * (@cols - 1) + (@rest == 0 ? 1 : @rest == 1 ? 2 : @rest)
+  end
+
+  def nterms
+    last = @lines + ([0, 1].include?(@rest) ? 0 : 1)
+    (0...last).map { |i| "t#{i}" }.join(' ')
+  end
+
+  def rules
+    res = (0...@lines).map { |i| "t#{i}:#{' N' * (@cols - 1)}" }.join("\n")
+    case @rest
+    when 0
+      res += ' N'
+    when 1
+      res += ' N N'
+    else
+      res += "\nt#{@lines}:#{" N" * @rest}"
+    end
+    res
+  end
+
+  def to_s
+    puts <<~EOF
+      // states: #{@states}
+      // cols: #{@cols}
+      // lines: #{@lines}
+      // rest: #{@rest}
+      // n: #{@n}
+
+      %code {
+        #include <stdio.h>
+        #include <stdlib.h>
+
+        static int yylex (void);
+        static void yyerror (const char *msg);
+       }
+
+      %debug
+      %define api.value.type union
+      %define parse.lac full
+      %define parse.error verbose
+      %printer { fprintf (yyo, "%ld", $$); } <long>
+      %token <long> N
+
+      %%
+
+      exp: #{nterms}
+
+      #{rules}
+
+      %%
+
+      static
+      int yylex (void)
+      {
+        static long count = 0;
+        if (count++ < #{@n})
+          {
+            yylval.N = count;
+            return N;
+          }
+        else
+          return 0;
+      }
+
+      static
+      void yyerror (const char *msg)
+      {
+        fprintf (stderr, "%s\\n", msg);
+      }
+
+      int
+      main (int argc, const char **argv)
+      {
+        yydebug = !!getenv ("YYDEBUG");
+        return yyparse ();
+      }
+      EOF
+  end
+end
+
+puts Linear.new(ARGV[0].to_i).to_s
diff --git a/tests/local.mk b/tests/local.mk
index 9a2b4d51..9f55e552 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -15,7 +15,7 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-EXTRA_DIST += $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h
+EXTRA_DIST += %D%/linear $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h
 
 DISTCLEANFILES       += %D%/atconfig $(check_SCRIPTS)
 MAINTAINERCLEANFILES += $(TESTSUITE)
diff --git a/tests/torture.at b/tests/torture.at
index b91e059a..29cbbdd3 100644
--- a/tests/torture.at
+++ b/tests/torture.at
@@ -233,7 +233,7 @@ AT_DATA_HORIZONTAL_GRAMMAR([input.y], [1000])
 # Ask for 200 MiB, which should be plenty even on a 64-bit host.
 AT_INCREASE_DATA_SIZE(204000)
 
-AT_BISON_CHECK_NO_XML([-v -o input.c input.y])
+AT_BISON_CHECK_NO_XML([-o input.c input.y])
 AT_COMPILE([input])
 AT_PARSER_CHECK([input])
 
@@ -241,6 +241,42 @@ AT_CLEANUP
 
 
 
+## ------------------- ##
+## State number type.  ##
+## ------------------- ##
+
+# AT_TEST(NUM-STATES, TYPE)
+# -------------------------
+# Check that automaton with NUM-STATES uses TYPE has state number type.
+# Check that parser works.
+
+m4_pushdef([AT_TEST],
+[AT_SETUP([State number type: $1 states])
+
+AT_BISON_OPTION_PUSHDEFS
+
+AT_CHECK([ruby $abs_top_srcdir/tests/linear $1 >input.y])
+AT_FULL_COMPILE([input])
+AT_CHECK([grep 'define YYNSTATES  *$1' input.c], [], [ignore])
+AT_CHECK([grep 'typedef $2 yy_state_num' input.c], [], [ignore])
+AT_PARSER_CHECK([input])
+
+AT_BISON_OPTION_POPDEFS
+AT_CLEANUP])
+
+AT_TEST(  [256], [yytype_uint8])
+AT_TEST(  [257], [yytype_uint16])
+AT_TEST([65536], [yytype_uint16])
+AT_TEST([65537], [unsigned])
+
+m4_popdef([AT_TEST])
+
+
+
+## ------------------------ ##
+## Many lookahead tokens.   ##
+## ------------------------ ##
+
 # AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR(FILE-NAME, SIZE)
 # --------------------------------------------------
 # Create FILE-NAME, containing a self checking parser for a grammar
@@ -340,11 +376,6 @@ mv stdout $1
 AT_BISON_OPTION_POPDEFS
 ])
 
-
-## ------------------------ ##
-## Many lookahead tokens.   ##
-## ------------------------ ##
-
 AT_SETUP([Many lookahead tokens])
 
 AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR([input.y], [1000])
-- 
2.23.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]