Currently we properly use the "best" integral type for tables,
including those storing state numbers.  However the variables for
state numbers used in yyparse (and its dependencies such as
yy_stack_print) still use int16_t invariably.  As a consequence, very
large models overflow these variables.

Let's use the "best" type for these variables too.  It turns out that
we can still use 16 bits for twice larger automata: stick to unsigned
types.

Reported by Tom Kramer.
https://lists.gnu.org/archive/html/bug-bison/2019-09/msg00018.html

* data/skeletons/yacc.c (yy_state_num): Be computed from YYNSTATES.
Adjust an assertion: yystate can no longer be negative, and compilers
warn about that.
* tests/linear: New.
* tests/torture.at (State number type): New.
Use it.
---
 THANKS                |  1 +
 data/skeletons/yacc.c |  5 +--
 tests/linear          | 94 +++++++++++++++++++++++++++++++++++++++++++
 tests/local.mk        |  2 +-
 tests/torture.at      | 43 +++++++++++++++++---
 5 files changed, 135 insertions(+), 10 deletions(-)
 create mode 100755 tests/linear

diff --git a/THANKS b/THANKS
index 2cdd9b0b..569ba173 100644
--- a/THANKS
+++ b/THANKS
@@ -179,6 +179,7 @@ Tim Landscheidt           t...@tim-landscheidt.de
 Tim Van Holder            tim.van.hol...@pandora.be
 Tobias Frost              t...@debian.org
 Todd Freed                todd.fr...@gmail.com
+Tom Kramer                kra...@nist.gov
 Tom Lane                  t...@sss.pgh.pa.us
 Tom Tromey                tro...@cygnus.com
 Tomasz Kłoczko            kloczko.tom...@gmail.com
diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c
index c15f73d4..74ef76d6 100644
--- a/data/skeletons/yacc.c
+++ b/data/skeletons/yacc.c
@@ -432,8 +432,7 @@ typedef short yytype_int16;
 
 
 /* State numbers. */
-typedef yytype_int16 yy_state_num;
-
+typedef ]b4_int_type(0, m4_eval(b4_states_number - 1))[ yy_state_num;
 
 #ifndef YY_
 # if defined YYENABLE_NLS && YYENABLE_NLS
@@ -1505,7 +1504,7 @@ yynewstate:
 `--------------------------------------------------------------------*/
 yysetstate:
   YYDPRINTF ((stderr, "Entering state %d\n", yystate));
-  YY_ASSERT (0 <= yystate && yystate < YYNSTATES);
+  YY_ASSERT (yystate < YYNSTATES);
   *yyssp = yystate;
 
   if (yyss + yystacksize - 1 <= yyssp)
diff --git a/tests/linear b/tests/linear
new file mode 100755
index 00000000..d522bd82
--- /dev/null
+++ b/tests/linear
@@ -0,0 +1,94 @@
+#! /usr/bin/env ruby
+
+# Build a grammar whose LALR(1) parser has a given number of states.
+# Useful to test edge cases (e.g., 256 and 257 states, etc.).
+
+class Linear
+  def initialize(states)
+    @states = states - 4
+    @cols = Math.sqrt(@states).to_i
+    @lines = @states / @cols
+    @rest = @states % @cols
+
+    @n = @lines * (@cols - 1) + (@rest == 0 ? 1 : @rest == 1 ? 2 : @rest)
+  end
+
+  def nterms
+    last = @lines + ([0, 1].include?(@rest) ? 0 : 1)
+    (0...last).map { |i| "t#{i}" }.join(' ')
+  end
+
+  def rules
+    res = (0...@lines).map { |i| "t#{i}:#{' N' * (@cols - 1)}" }.join("\n")
+    case @rest
+    when 0
+      res += ' N'
+    when 1
+      res += ' N N'
+    else
+      res += "\nt#{@lines}:#{" N" * @rest}"
+    end
+    res
+  end
+
+  def to_s
+    puts <<~EOF
+      // states: #{@states}
+      // cols: #{@cols}
+      // lines: #{@lines}
+      // rest: #{@rest}
+      // n: #{@n}
+
+      %code {
+        #include <stdio.h>
+        #include <stdlib.h>
+
+        static int yylex (void);
+        static void yyerror (const char *msg);
+       }
+
+      %debug
+      %define api.value.type union
+      %define parse.lac full
+      %define parse.error verbose
+      %printer { fprintf (yyo, "%ld", $$); } <long>
+      %token <long> N
+
+      %%
+
+      exp: #{nterms}
+
+      #{rules}
+
+      %%
+
+      static
+      int yylex (void)
+      {
+        static long count = 0;
+        if (count++ < #{@n})
+          {
+            yylval.N = count;
+            return N;
+          }
+        else
+          return 0;
+      }
+
+      static
+      void yyerror (const char *msg)
+      {
+        fprintf (stderr, "%s\\n", msg);
+      }
+
+      int
+      main (int argc, const char **argv)
+      {
+        yydebug = !!getenv ("YYDEBUG");
+        return yyparse ();
+      }
+      EOF
+  end
+end
+
+puts Linear.new(ARGV[0].to_i).to_s
diff --git a/tests/local.mk b/tests/local.mk
index 9a2b4d51..9f55e552 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -15,7 +15,7 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-EXTRA_DIST += $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h
+EXTRA_DIST += %D%/linear $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h
 
 DISTCLEANFILES       += %D%/atconfig $(check_SCRIPTS)
 MAINTAINERCLEANFILES += $(TESTSUITE)
diff --git a/tests/torture.at b/tests/torture.at
index b91e059a..29cbbdd3 100644
--- a/tests/torture.at
+++ b/tests/torture.at
@@ -233,7 +233,7 @@ AT_DATA_HORIZONTAL_GRAMMAR([input.y], [1000])
 # Ask for 200 MiB, which should be plenty even on a 64-bit host.
 AT_INCREASE_DATA_SIZE(204000)
 
-AT_BISON_CHECK_NO_XML([-v -o input.c input.y])
+AT_BISON_CHECK_NO_XML([-o input.c input.y])
 AT_COMPILE([input])
 AT_PARSER_CHECK([input])
 
@@ -241,6 +241,42 @@ AT_CLEANUP
 
 
 
+## ------------------- ##
+## State number type.  ##
+## ------------------- ##
+
+# AT_TEST(NUM-STATES, TYPE)
+# -------------------------
+# Check that automaton with NUM-STATES uses TYPE has state number type.
+# Check that parser works.
+
+m4_pushdef([AT_TEST],
+[AT_SETUP([State number type: $1 states])
+
+AT_BISON_OPTION_PUSHDEFS
+
+AT_CHECK([ruby $abs_top_srcdir/tests/linear $1 >input.y])
+AT_FULL_COMPILE([input])
+AT_CHECK([grep 'define YYNSTATES  *$1' input.c], [], [ignore])
+AT_CHECK([grep 'typedef $2 yy_state_num' input.c], [], [ignore])
+AT_PARSER_CHECK([input])
+
+AT_BISON_OPTION_POPDEFS
+AT_CLEANUP])
+
+AT_TEST(  [256], [yytype_uint8])
+AT_TEST(  [257], [yytype_uint16])
+AT_TEST([65536], [yytype_uint16])
+AT_TEST([65537], [unsigned])
+
+m4_popdef([AT_TEST])
+
+
+
+## ------------------------ ##
+## Many lookahead tokens.   ##
+## ------------------------ ##
+
 # AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR(FILE-NAME, SIZE)
 # --------------------------------------------------
 # Create FILE-NAME, containing a self checking parser for a grammar
@@ -340,11 +376,6 @@ mv stdout $1
 AT_BISON_OPTION_POPDEFS
 ])
 
-
-## ------------------------ ##
-## Many lookahead tokens.   ##
-## ------------------------ ##
-
 AT_SETUP([Many lookahead tokens])
 
 AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR([input.y], [1000])
-- 
2.23.0


Reply via email to