Currently we properly use the "best" integral type for tables, including those storing state numbers. However the variables for state numbers used in yyparse (and its dependencies such as yy_stack_print) still use int16_t invariably. As a consequence, very large models overflow these variables.
Let's use the "best" type for these variables too. It turns out that we can still use 16 bits for twice larger automata: stick to unsigned types. Reported by Tom Kramer. https://lists.gnu.org/archive/html/bug-bison/2019-09/msg00018.html * data/skeletons/yacc.c (yy_state_num): Be computed from YYNSTATES. Adjust an assertion: yystate can no longer be negative, and compilers warn about that. * tests/linear: New. * tests/torture.at (State number type): New. Use it. --- THANKS | 1 + data/skeletons/yacc.c | 5 +-- tests/linear | 94 +++++++++++++++++++++++++++++++++++++++++++ tests/local.mk | 2 +- tests/torture.at | 43 +++++++++++++++++--- 5 files changed, 135 insertions(+), 10 deletions(-) create mode 100755 tests/linear diff --git a/THANKS b/THANKS index 2cdd9b0b..569ba173 100644 --- a/THANKS +++ b/THANKS @@ -179,6 +179,7 @@ Tim Landscheidt t...@tim-landscheidt.de Tim Van Holder tim.van.hol...@pandora.be Tobias Frost t...@debian.org Todd Freed todd.fr...@gmail.com +Tom Kramer kra...@nist.gov Tom Lane t...@sss.pgh.pa.us Tom Tromey tro...@cygnus.com Tomasz Kłoczko kloczko.tom...@gmail.com diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c index c15f73d4..74ef76d6 100644 --- a/data/skeletons/yacc.c +++ b/data/skeletons/yacc.c @@ -432,8 +432,7 @@ typedef short yytype_int16; /* State numbers. */ -typedef yytype_int16 yy_state_num; - +typedef ]b4_int_type(0, m4_eval(b4_states_number - 1))[ yy_state_num; #ifndef YY_ # if defined YYENABLE_NLS && YYENABLE_NLS @@ -1505,7 +1504,7 @@ yynewstate: `--------------------------------------------------------------------*/ yysetstate: YYDPRINTF ((stderr, "Entering state %d\n", yystate)); - YY_ASSERT (0 <= yystate && yystate < YYNSTATES); + YY_ASSERT (yystate < YYNSTATES); *yyssp = yystate; if (yyss + yystacksize - 1 <= yyssp) diff --git a/tests/linear b/tests/linear new file mode 100755 index 00000000..d522bd82 --- /dev/null +++ b/tests/linear @@ -0,0 +1,94 @@ +#! /usr/bin/env ruby + +# Build a grammar whose LALR(1) parser has a given number of states. +# Useful to test edge cases (e.g., 256 and 257 states, etc.). + +class Linear + def initialize(states) + @states = states - 4 + @cols = Math.sqrt(@states).to_i + @lines = @states / @cols + @rest = @states % @cols + + @n = @lines * (@cols - 1) + (@rest == 0 ? 1 : @rest == 1 ? 2 : @rest) + end + + def nterms + last = @lines + ([0, 1].include?(@rest) ? 0 : 1) + (0...last).map { |i| "t#{i}" }.join(' ') + end + + def rules + res = (0...@lines).map { |i| "t#{i}:#{' N' * (@cols - 1)}" }.join("\n") + case @rest + when 0 + res += ' N' + when 1 + res += ' N N' + else + res += "\nt#{@lines}:#{" N" * @rest}" + end + res + end + + def to_s + puts <<~EOF + // states: #{@states} + // cols: #{@cols} + // lines: #{@lines} + // rest: #{@rest} + // n: #{@n} + + %code { + #include <stdio.h> + #include <stdlib.h> + + static int yylex (void); + static void yyerror (const char *msg); + } + + %debug + %define api.value.type union + %define parse.lac full + %define parse.error verbose + %printer { fprintf (yyo, "%ld", $$); } <long> + %token <long> N + + %% + + exp: #{nterms} + + #{rules} + + %% + + static + int yylex (void) + { + static long count = 0; + if (count++ < #{@n}) + { + yylval.N = count; + return N; + } + else + return 0; + } + + static + void yyerror (const char *msg) + { + fprintf (stderr, "%s\\n", msg); + } + + int + main (int argc, const char **argv) + { + yydebug = !!getenv ("YYDEBUG"); + return yyparse (); + } + EOF + end +end + +puts Linear.new(ARGV[0].to_i).to_s diff --git a/tests/local.mk b/tests/local.mk index 9a2b4d51..9f55e552 100644 --- a/tests/local.mk +++ b/tests/local.mk @@ -15,7 +15,7 @@ ## You should have received a copy of the GNU General Public License ## along with this program. If not, see <http://www.gnu.org/licenses/>. -EXTRA_DIST += $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h +EXTRA_DIST += %D%/linear $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h DISTCLEANFILES += %D%/atconfig $(check_SCRIPTS) MAINTAINERCLEANFILES += $(TESTSUITE) diff --git a/tests/torture.at b/tests/torture.at index b91e059a..29cbbdd3 100644 --- a/tests/torture.at +++ b/tests/torture.at @@ -233,7 +233,7 @@ AT_DATA_HORIZONTAL_GRAMMAR([input.y], [1000]) # Ask for 200 MiB, which should be plenty even on a 64-bit host. AT_INCREASE_DATA_SIZE(204000) -AT_BISON_CHECK_NO_XML([-v -o input.c input.y]) +AT_BISON_CHECK_NO_XML([-o input.c input.y]) AT_COMPILE([input]) AT_PARSER_CHECK([input]) @@ -241,6 +241,42 @@ AT_CLEANUP +## ------------------- ## +## State number type. ## +## ------------------- ## + +# AT_TEST(NUM-STATES, TYPE) +# ------------------------- +# Check that automaton with NUM-STATES uses TYPE has state number type. +# Check that parser works. + +m4_pushdef([AT_TEST], +[AT_SETUP([State number type: $1 states]) + +AT_BISON_OPTION_PUSHDEFS + +AT_CHECK([ruby $abs_top_srcdir/tests/linear $1 >input.y]) +AT_FULL_COMPILE([input]) +AT_CHECK([grep 'define YYNSTATES *$1' input.c], [], [ignore]) +AT_CHECK([grep 'typedef $2 yy_state_num' input.c], [], [ignore]) +AT_PARSER_CHECK([input]) + +AT_BISON_OPTION_POPDEFS +AT_CLEANUP]) + +AT_TEST( [256], [yytype_uint8]) +AT_TEST( [257], [yytype_uint16]) +AT_TEST([65536], [yytype_uint16]) +AT_TEST([65537], [unsigned]) + +m4_popdef([AT_TEST]) + + + +## ------------------------ ## +## Many lookahead tokens. ## +## ------------------------ ## + # AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR(FILE-NAME, SIZE) # -------------------------------------------------- # Create FILE-NAME, containing a self checking parser for a grammar @@ -340,11 +376,6 @@ mv stdout $1 AT_BISON_OPTION_POPDEFS ]) - -## ------------------------ ## -## Many lookahead tokens. ## -## ------------------------ ## - AT_SETUP([Many lookahead tokens]) AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR([input.y], [1000]) -- 2.23.0