This commit:

> Le 29 sept. 2019 à 18:34, Akim Demaille <[email protected]> a écrit :
> 
> * data/skeletons/yacc.c (yy_state_num): Be computed from YYNSTATES.
> Adjust an assertion: yystate can no longer be negative, and compilers
> warn about that.
> * tests/linear: New.
> * tests/torture.at (State number type): New.
> Use it.

Was not enough.  It worked, but triggered many warnings with some compilers, 
and also failed to GCC 4.6 because it generated files with #line with too big a 
number of line for GCC...

Eventually, this commit converged to this one, that passes on the CI.  The main 
differences are:
- we use two different types: yy_state_num for arrays, and int for scalars
- yy_state_num "saturated" to int, not to unsigned.


commit 2ca6b719674228fa2f77f6627e78111a757b83cc
Author: Akim Demaille <[email protected]>
Date:   Sat Sep 28 13:48:35 2019 +0200

    yacc: use the most appropriate integral type for state numbers
    
    Currently we properly use the "best" integral type for tables,
    including those storing state numbers.  However the variables for
    state numbers used in yyparse (and its dependencies such as
    yy_stack_print) still use int16_t invariably.  As a consequence, very
    large models overflow these variables.
    
    Let's use the "best" type for these variables too.  It turns out that
    we can still use 16 bits for twice larger automata: stick to unsigned
    types.
    
    However using 'unsigned' when 16 bits are not enough is troublesome
    and generates tons of warnings about signedness issues.  Instead,
    let's use 'int'.
    
    Reported by Tom Kramer.
    https://lists.gnu.org/archive/html/bug-bison/2019-09/msg00018.html
    
    * data/skeletons/yacc.c (b4_state_num_type): New.
    (yy_state_num): Be computed from YYNSTATES.
    * tests/linear: New.
    * tests/torture.at (State number type): New.
    Use it.

diff --git a/THANKS b/THANKS
index 2cdd9b0b..569ba173 100644
--- a/THANKS
+++ b/THANKS
@@ -179,6 +179,7 @@ Tim Landscheidt           [email protected]
 Tim Van Holder            [email protected]
 Tobias Frost              [email protected]
 Todd Freed                [email protected]
+Tom Kramer                [email protected]
 Tom Lane                  [email protected]
 Tom Tromey                [email protected]
 Tomasz Kłoczko            [email protected]
diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c
index 4f68f408..057cc7a0 100644
--- a/data/skeletons/yacc.c
+++ b/data/skeletons/yacc.c
@@ -127,6 +127,18 @@ m4_define([b4_int_type],
 
                                                [int])])
 
+# b4_state_num_type(MIN, MAX)
+# ---------------------------
+# Likewise, but prefer 'int' to 'unsigned' for large integers.
+m4_define([b4_state_num_type],
+[m4_if(b4_ints_in($@,      [0],   [255]), [1], [yytype_uint8],
+       b4_ints_in($@,   [-128],   [127]), [1], [yytype_int8],
+
+       b4_ints_in($@,      [0], [65535]), [1], [yytype_uint16],
+       b4_ints_in($@, [-32768], [32767]), [1], [yytype_int16],
+
+                                               [int])])
+
 
 ## ----------------- ##
 ## Semantic Values.  ##
@@ -432,7 +444,7 @@ typedef short yytype_int16;
 
 
 /* State numbers. */
-typedef yytype_int16 yy_state_num;
+typedef ]b4_state_num_type(0, m4_eval(b4_states_number - 1))[ yy_state_num;
 
 
 #ifndef YY_
@@ -740,10 +752,7 @@ do {                                                       
               \
 {
   YYFPRINTF (stderr, "Stack now");
   for (; yybottom <= yytop; yybottom++)
-    {
-      int yybot = *yybottom;
-      YYFPRINTF (stderr, " %d", yybot);
-    }
+    YYFPRINTF (stderr, " %u", (unsigned) *yybottom);
   YYFPRINTF (stderr, "\n");
 }
 
@@ -985,7 +994,7 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes,
         }
       else
         {
-          yyrule = yytable[yyrule];
+          yyrule = (int) yytable[yyrule];
           if (yytable_value_is_error (yyrule))
             {
               YYDPRINTF ((stderr, " Err\n"));
@@ -1022,11 +1031,10 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes,
         int yystate;
         {
           const int yylhs = yyr1[yyrule] - YYNTOKENS;
-          const int yyi = yypgoto[yylhs] + *yyesp;
-          yystate = ((yy_state_num)
-                     (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyesp
-                      ? yytable[yyi]
-                      : yydefgoto[yylhs]));
+          const int yyi = yypgoto[yylhs] + (int) *yyesp;
+          yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyesp
+                     ? (int) yytable[yyi]
+                     : yydefgoto[yylhs]);
         }
         if (yyesp == yyes_prev)
           {
@@ -1046,7 +1054,7 @@ yy_lac (yy_state_num *yyesa, yy_state_num **yyes,
               }
             *++yyesp = (yy_state_num) yystate;
           }
-        YYDPRINTF ((stderr, " G%d", (int) yystate));
+        YYDPRINTF ((stderr, " G%d", yystate));
       }
     }
 }]])[
@@ -1639,7 +1647,7 @@ yyread_pushed_token:]])[
       goto yydefault;
     }]], [[
     goto yydefault;]])[
-  yyn = yytable[yyn];
+  yyn = (int) yytable[yyn];
   if (yyn <= 0)
     {
       if (yytable_value_is_error (yyn))
@@ -1661,7 +1669,7 @@ yyread_pushed_token:]])[
   yychar = YYEMPTY;]b4_lac_if([[
   YY_LAC_DISCARD ("shift");]])[
 
-  yystate = (yy_state_num) yyn;
+  yystate = yyn;
   YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN
   *++yyvsp = yylval;
   YY_IGNORE_MAYBE_UNINITIALIZED_END]b4_locations_if([
@@ -1741,9 +1749,9 @@ yyreduce:
      number reduced by.  */
   {
     const int yylhs = yyr1[yyn] - YYNTOKENS;
-    const int yyi = yypgoto[yylhs] + *yyssp;
+    const int yyi = yypgoto[yylhs] + (int) *yyssp;
     yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp
-               ? yytable[yyi]
+               ? (int) yytable[yyi]
                : yydefgoto[yylhs]);
   }
 
@@ -1859,7 +1867,7 @@ yyerrlab1:
           yyn += YYTERROR;
           if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR)
             {
-              yyn = yytable[yyn];
+              yyn = (int) yytable[yyn];
               if (0 < yyn)
                 break;
             }
@@ -1894,7 +1902,7 @@ yyerrlab1:
   /* Shift the error token.  */
   YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp);
 
-  yystate = (yy_state_num) yyn;
+  yystate = yyn;
   goto yynewstate;
 
 
diff --git a/tests/linear b/tests/linear
new file mode 100755
index 00000000..fb5512be
--- /dev/null
+++ b/tests/linear
@@ -0,0 +1,94 @@
+#! /usr/bin/env ruby
+
+# Build a grammar whose LALR(1) parser has a given number of states.
+# Useful to test edge cases (e.g., 256 and 257 states, etc.).
+
+class Linear
+  def initialize(states)
+    @states = states - 4
+    @cols = Math.sqrt(@states).to_i
+    @lines = @states / @cols
+    @rest = @states % @cols
+
+    @n = @lines * (@cols - 1) + (@rest == 0 ? 1 : @rest == 1 ? 2 : @rest)
+  end
+
+  def nterms
+    last = @lines + ([0, 1].include?(@rest) ? 0 : 1)
+    (0...last).map { |i| "t#{i}" }.join(' ')
+  end
+
+  def rules
+    res = (0...@lines).map { |i| "t#{i}:#{' N' * (@cols - 1)}" }.join("\n")
+    case @rest
+    when 0
+      res += ' N'
+    when 1
+      res += ' N N'
+    else
+      res += "\nt#{@lines}:#{" N" * @rest}"
+    end
+    res
+  end
+
+  def to_s
+    puts <<~EOF
+      // states: #{@states}
+      // cols: #{@cols}
+      // lines: #{@lines}
+      // rest: #{@rest}
+      // n: #{@n}
+
+      %code {
+        #include <stdio.h>
+        #include <stdlib.h>
+
+        static int yylex (void);
+        static void yyerror (const char *msg);
+       }
+
+      %debug
+      %define api.value.type union
+      %define parse.lac full
+      %define parse.error verbose
+      %printer { fprintf (yyo, "%ld", $$); } <long>
+      %token <long> N
+
+      %%
+
+      exp: #{nterms}
+
+      #{rules}
+
+      %%
+
+      static
+      int yylex (void)
+      {
+        static long count = 0;
+        if (count++ < #{@n})
+          {
+            yylval.N = count;
+            return N;
+          }
+        else
+          return 0;
+      }
+
+      static
+      void yyerror (const char *msg)
+      {
+        fprintf (stderr, "%s\\n", msg);
+      }
+
+      int
+      main (void)
+      {
+        yydebug = !!getenv ("YYDEBUG");
+        return yyparse ();
+      }
+      EOF
+  end
+end
+
+puts Linear.new(ARGV[0].to_i).to_s
diff --git a/tests/local.mk b/tests/local.mk
index 9a2b4d51..9f55e552 100644
--- a/tests/local.mk
+++ b/tests/local.mk
@@ -15,7 +15,7 @@
 ## You should have received a copy of the GNU General Public License
 ## along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
-EXTRA_DIST += $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h
+EXTRA_DIST += %D%/linear $(TESTSUITE_AT) %D%/testsuite %D%/testsuite.h
 
 DISTCLEANFILES       += %D%/atconfig $(check_SCRIPTS)
 MAINTAINERCLEANFILES += $(TESTSUITE)
diff --git a/tests/torture.at b/tests/torture.at
index b91e059a..952add73 100644
--- a/tests/torture.at
+++ b/tests/torture.at
@@ -233,7 +233,7 @@ AT_DATA_HORIZONTAL_GRAMMAR([input.y], [1000])
 # Ask for 200 MiB, which should be plenty even on a 64-bit host.
 AT_INCREASE_DATA_SIZE(204000)
 
-AT_BISON_CHECK_NO_XML([-v -o input.c input.y])
+AT_BISON_CHECK_NO_XML([-o input.c input.y])
 AT_COMPILE([input])
 AT_PARSER_CHECK([input])
 
@@ -241,6 +241,43 @@ AT_CLEANUP
 
 
 
+## ------------------- ##
+## State number type.  ##
+## ------------------- ##
+
+# AT_TEST(NUM-STATES, TYPE)
+# -------------------------
+# Check that automaton with NUM-STATES uses TYPE has state number type.
+# Check that parser works.
+
+m4_pushdef([AT_TEST],
+[AT_SETUP([State number type: $1 states])
+
+AT_BISON_OPTION_PUSHDEFS
+
+AT_CHECK([ruby $abs_top_srcdir/tests/linear $1 >input.y || { echo "ruby does 
not work"; exit 77; }])
+# Old versions of GCC reject large values given to #line.
+AT_FULL_COMPILE([input], [], [], [], [--no-line])
+AT_CHECK([grep 'define YYNSTATES  *$1' input.c], [], [ignore])
+AT_CHECK([grep 'typedef $2 yy_state_num' input.c], [], [ignore])
+AT_PARSER_CHECK([input])
+
+AT_BISON_OPTION_POPDEFS
+AT_CLEANUP])
+
+AT_TEST(  [256], [yytype_uint8])
+AT_TEST(  [257], [yytype_uint16])
+AT_TEST([65536], [yytype_uint16])
+AT_TEST([65537], [int])
+
+m4_popdef([AT_TEST])
+
+
+
+## ------------------------ ##
+## Many lookahead tokens.   ##
+## ------------------------ ##
+
 # AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR(FILE-NAME, SIZE)
 # --------------------------------------------------
 # Create FILE-NAME, containing a self checking parser for a grammar
@@ -340,11 +377,6 @@ mv stdout $1
 AT_BISON_OPTION_POPDEFS
 ])
 
-
-## ------------------------ ##
-## Many lookahead tokens.   ##
-## ------------------------ ##
-
 AT_SETUP([Many lookahead tokens])
 
 AT_DATA_LOOKAHEAD_TOKENS_GRAMMAR([input.y], [1000])


Reply via email to