On Tue, 19 Jul 2005, Joel E. Denny wrote: > Should yychar be reliable in semantic actions when declaring %glr-parser?
I've installed this patch, which fixes the GLR lookahead bugs I've encountered and adds some related test cases. Although my original post was mainly about deterministic operation, this patch also addresses nondeterministic operation. This is a complex patch. More pairs of eyes would be helpful. Joel 2006-01-06 Joel E. Denny <[EMAIL PROTECTED]> * data/glr.c (yyGLRStateSet): Add yybool* yylookaheadStatuses member to use during nondeterministic operation to track which stacks have actually needed the current lookahead. (yyinitStateSet, yyfreeStateSet, yyremoveDeletes, yysplitStack): Allocate, deallocate, resize, and otherwise shuffle space for yylookaheadStatuses in parallel with yystates member of yyGLRStateSet. (yysplitStack, yyprocessOneStack, yyparse): Set lookahead status appropriately during nondeterministic operation. (yySemanticOption): Add int yyrawchar, YYSTYPE yyval, and YYLTYPE yyloc members to store the current lookahead to be used by the deferred user action. (yyaddDeferredAction): Add size_t yyk parameter specifying the stack from which the RHS is taken. Set the lookahead members of the new yySemanticOption according to the lookahead status for stack yyk. (yyglrShiftDefer, yyglrReduce): Pass yyk parameter on to yyaddDeferredAction. (yyresolveAction): Set yychar, yylval, and yylloc to the lookahead members of yySemanticOption before invoking yyuserAction, and then set them back to their current values afterward. (yyparse): Set yychar = YYEMPTY where yytoken = YYEMPTY. (yyreportAmbiguity): Add /*ARGSUSED*/ to pacify lint. * tests/glr-regression.at: Remove `.' from the ends of recent test case titles for consistency. (Leaked merged semantic value if user action cuts parse): In order to suppress lint warnings, use arguments in merge function, and assign char value < 128 in main. (Incorrect lookahead during deterministic GLR): New test case. (Incorrect lookahead during nondeterministic GLR): New test case. Index: data/glr.c =================================================================== RCS file: /sources/bison/bison/data/glr.c,v retrieving revision 1.155 diff -p -u -r1.155 glr.c --- data/glr.c 6 Jan 2006 20:09:31 -0000 1.155 +++ data/glr.c 6 Jan 2006 20:25:55 -0000 @@ -761,6 +761,11 @@ struct yyGLRState { struct yyGLRStateSet { yyGLRState** yystates; + /** During nondeterministic operation, yylookaheadStatuses tracks which + * stacks have actually needed the current lookahead. During deterministic + * operation, yylookaheadStatuses[0] is not maintained since it would merely + * duplicate yychar != YYEMPTY. */ + yybool* yylookaheadStatuses; size_t yysize, yycapacity; }; @@ -771,6 +776,10 @@ struct yySemanticOption { yyRuleNum yyrule; /** The last RHS state in the list of states to be reduced. */ yyGLRState* yystate; + /** The lookahead for this reduction. */ + int yyrawchar; + YYSTYPE yyval; + YYLTYPE yyloc; /** Next sibling in chain of options. To facilitate merging, * options are chained in decreasing order by address. */ yySemanticOption* yynext; @@ -1092,14 +1101,24 @@ yynewGLRStackItem (yyGLRStack* yystackp, return yynewItem; } +/** Stack #K = the stack from which RHS is taken. This might not be the stack + * containing STATE, to which the deferred action is added. */ static void -yyaddDeferredAction (yyGLRStack* yystackp, yyGLRState* yystate, +yyaddDeferredAction (yyGLRStack* yystackp, size_t yyk, yyGLRState* yystate, yyGLRState* rhs, yyRuleNum yyrule) { yySemanticOption* yynewOption = &yynewGLRStackItem (yystackp, yyfalse)->yyoption; yynewOption->yystate = rhs; yynewOption->yyrule = yyrule; + if (yystackp->yytops.yylookaheadStatuses[yyk]) + { + yynewOption->yyrawchar = yychar; + yynewOption->yyval = yylval; + yynewOption->yyloc = yylloc; + } + else + yynewOption->yyrawchar = YYEMPTY; yynewOption->yynext = yystate->yysemantics.yyfirstVal; yystate->yysemantics.yyfirstVal = yynewOption; @@ -1118,12 +1137,20 @@ yyinitStateSet (yyGLRStateSet* yyset) if (! yyset->yystates) return yyfalse; yyset->yystates[0] = NULL; + yyset->yylookaheadStatuses = + (yybool*) YYMALLOC (16 * sizeof yyset->yylookaheadStatuses[0]); + if (! yyset->yylookaheadStatuses) + { + YYFREE (yyset->yystates); + return yyfalse; + } return yytrue; } static void yyfreeStateSet (yyGLRStateSet* yyset) { YYFREE (yyset->yystates); + YYFREE (yyset->yylookaheadStatuses); } /** Initialize STACK to a single empty stack, with total maximum @@ -1270,6 +1297,13 @@ yyremoveDeletes (yyGLRStack* yystackp) else { yystackp->yytops.yystates[yyj] = yystackp->yytops.yystates[yyi]; + /* In the current implementation, it's unnecessary to copy + yystackp->yytops.yylookaheadStatuses[yyi] since, after + yyremoveDeletes returns, the parser immediately either enters + deterministic operation or shifts a token. However, it doesn't + hurt, and the code might evolve to need it. */ + yystackp->yytops.yylookaheadStatuses[yyj] = + yystackp->yytops.yylookaheadStatuses[yyi]; if (yyj != yyi) { YYDPRINTF ((stderr, "Rename stack %lu -> %lu.\n", @@ -1318,7 +1352,7 @@ yyglrShiftDefer (yyGLRStack* yystackp, s yystackp->yytops.yystates[yyk] = yynewState; /* Invokes YY_RESERVE_GLRSTACK. */ - yyaddDeferredAction (yystackp, yynewState, rhs, yyrule); + yyaddDeferredAction (yystackp, yyk, yynewState, rhs, yyrule); } /** Pop the symbols consumed by reduction #RULE from the top of stack @@ -1470,7 +1504,7 @@ yyglrReduce (yyGLRStack* yystackp, size_ { if (yyp->yylrState == yynewLRState && yyp->yypred == yys) { - yyaddDeferredAction (yystackp, yyp, yys0, yyrule); + yyaddDeferredAction (yystackp, yyk, yyp, yys0, yyrule); yymarkStackDeleted (yystackp, yyk); YYDPRINTF ((stderr, "Merging stack %lu into stack %lu.\n", (unsigned long int) yyk, @@ -1497,6 +1531,7 @@ yysplitStack (yyGLRStack* yystackp, size if (yystackp->yytops.yysize >= yystackp->yytops.yycapacity) { yyGLRState** yynewStates; + yybool* yynewLookaheadStatuses; if (! ((yystackp->yytops.yycapacity <= (YYSIZEMAX / (2 * sizeof yynewStates[0]))) && (yynewStates = @@ -1505,9 +1540,17 @@ yysplitStack (yyGLRStack* yystackp, size * sizeof yynewStates[0]))))) yyMemoryExhausted (yystackp); yystackp->yytops.yystates = yynewStates; + if (! (yynewLookaheadStatuses = + (yybool*) YYREALLOC (yystackp->yytops.yylookaheadStatuses, + ((yystackp->yytops.yycapacity) + * sizeof yynewLookaheadStatuses[0])))) + yyMemoryExhausted (yystackp); + yystackp->yytops.yylookaheadStatuses = yynewLookaheadStatuses; } yystackp->yytops.yystates[yystackp->yytops.yysize] = yystackp->yytops.yystates[yyk]; + yystackp->yytops.yylookaheadStatuses[yystackp->yytops.yysize] + = yystackp->yytops.yylookaheadStatuses[yyk]; yystackp->yytops.yysize += 1; return yystackp->yytops.yysize-1; } @@ -1647,6 +1690,10 @@ yyresolveAction (yySemanticOption* yyopt { yyGLRStackItem yyrhsVals[YYMAXRHS + YYMAXLEFT + 1]; int yynrhs; + int yychar_current; + YYSTYPE yylval_current; + YYLTYPE yylloc_current; + YYRESULTTAG yyresult; yynrhs = yyrhsLength (yyopt->yyrule); YYCHK (yyresolveStates (yyopt->yystate, yynrhs, yystackp]b4_user_args[)); @@ -1654,9 +1701,19 @@ yyresolveAction (yySemanticOption* yyopt if (yynrhs == 0) /* Set default location. */ yyrhsVals[YYMAXRHS + YYMAXLEFT - 1].yystate.yyloc = yyopt->yystate->yyloc;]])[ - return yyuserAction (yyopt->yyrule, yynrhs, - yyrhsVals + YYMAXRHS + YYMAXLEFT - 1, - yyvalp, yylocp, yystackp]b4_user_args[); + yychar_current = yychar; + yylval_current = yylval; + yylloc_current = yylloc; + yychar = yyopt->yyrawchar; + yylval = yyopt->yyval; + yylloc = yyopt->yyloc; + yyresult = yyuserAction (yyopt->yyrule, yynrhs, + yyrhsVals + YYMAXRHS + YYMAXLEFT - 1, + yyvalp, yylocp, yystackp]b4_user_args[); + yychar = yychar_current; + yylval = yylval_current; + yylloc = yylloc_current; + return yyresult; } #if YYDEBUG @@ -1710,7 +1767,7 @@ yyreportTree (yySemanticOption* yyx, int static void yyreportAmbiguity (yySemanticOption* yyx0, yySemanticOption* yyx1, yyGLRStack* yystackp]b4_pure_formals[) __attribute__ ((__noreturn__)); -static void +/*ARGSUSED*/ static void yyreportAmbiguity (yySemanticOption* yyx0, yySemanticOption* yyx1, yyGLRStack* yystackp]b4_pure_formals[) { @@ -1885,6 +1942,7 @@ yyprocessOneStack (yyGLRStack* yystackp, } else { + yystackp->yytops.yylookaheadStatuses[yyk] = yytrue; if (*yytokenp == YYEMPTY) { YYDPRINTF ((stderr, "Reading a token: ")); @@ -2147,6 +2205,7 @@ yyrecoverSyntaxError (yyGLRStack* yystac YYDPRINTF ((stderr, "Starting parse\n")); + yychar = YYEMPTY; yytoken = YYEMPTY; yylval = yyval_default; ]b4_location_if([ @@ -2221,7 +2280,10 @@ b4_syncline([EMAIL PROTECTED]@], [EMAIL PROTECTED]@])])dnl { YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); if (yytoken != YYEOF) - yytoken = YYEMPTY; + { + yychar = YYEMPTY; + yytoken = YYEMPTY; + } yyposn += 1; yyglrShift (&yystack, 0, yyaction, yyposn, &yylval, &yylloc); if (0 < yystack.yyerrState) @@ -2244,6 +2306,9 @@ b4_syncline([EMAIL PROTECTED]@], [EMAIL PROTECTED]@])])dnl size_t yys; size_t yyn = yystack.yytops.yysize; + for (yys = 0; yys < yyn; yys += 1) + yystackp->yytops.yylookaheadStatuses[yys] = yychar != YYEMPTY; + /* yyprocessOneStack returns one of three things: - An error flag. If the caller is yyprocessOneStack, it @@ -2274,6 +2339,7 @@ b4_syncline([EMAIL PROTECTED]@], [EMAIL PROTECTED]@])])dnl before the loop to make sure the user destructor for yylval isn't called twice. */ yytoken_to_shift = yytoken; + yychar = YYEMPTY; yytoken = YYEMPTY; yyposn += 1; for (yys = 0; yys < yyn; yys += 1) Index: tests/glr-regression.at =================================================================== RCS file: /sources/bison/bison/tests/glr-regression.at,v retrieving revision 1.24 diff -p -u -r1.24 glr-regression.at --- tests/glr-regression.at 6 Jan 2006 01:07:37 -0000 1.24 +++ tests/glr-regression.at 6 Jan 2006 20:25:55 -0000 @@ -816,7 +816,7 @@ AT_CLEANUP ## Corrupted semantic options if user action cuts parse. ## ## ------------------------------------------------------------------------- ## -AT_SETUP([Corrupted semantic options if user action cuts parse.]) +AT_SETUP([Corrupted semantic options if user action cuts parse]) AT_DATA_GRAMMAR([glr-regr10.y], [[ @@ -858,7 +858,7 @@ main (void) { int index; for (index = 0; index < GARBAGE_SIZE; index+=1) - garbage[index] = 132; + garbage[index] = 108; return yyparse (); } ]]) @@ -877,7 +877,7 @@ AT_CLEANUP ## Undesirable destructors if user action cuts parse. ## ## ------------------------------------------------------------------------- ## -AT_SETUP([Undesirable destructors if user action cuts parse.]) +AT_SETUP([Undesirable destructors if user action cuts parse]) AT_DATA_GRAMMAR([glr-regr11.y], [[ @@ -943,7 +943,7 @@ AT_CLEANUP ## Leaked merged semantic value if user action cuts parse. ## ## ------------------------------------------------------------------------- ## -AT_SETUP([Leaked merged semantic value if user action cuts parse.]) +AT_SETUP([Leaked merged semantic value if user action cuts parse]) AT_DATA_GRAMMAR([glr-regr12.y], [[ @@ -974,7 +974,8 @@ static int merge (YYSTYPE s1, YYSTYPE s2) { /* Not invoked. */ - return 0; + char dummy = s1.dummy + s2.dummy; + return dummy; } static void @@ -1010,3 +1011,350 @@ AT_COMPILE([glr-regr12]) AT_CHECK([[./glr-regr12]], 0, [], []) AT_CLEANUP + + +## ------------------------------------------------------------------------- ## +## Incorrect lookahead during deterministic GLR. See ## +## <http://lists.gnu.org/archive/html/help-bison/2005-07/msg00017.html>. ## +## ------------------------------------------------------------------------- ## + +AT_SETUP([Incorrect lookahead during deterministic GLR]) + +AT_DATA_GRAMMAR([glr-regr13.y], +[[ +/* Tests: + - Defaulted state with initial yychar: yychar == YYEMPTY. + - Nondefaulted state: yychar != YYEMPTY. + - Defaulted state after lookahead: yychar != YYEMPTY. + - Defaulted state after shift: yychar == YYEMPTY. */ + +%{ + #include <stdio.h> + static void yyerror (char const *); + static int yylex (void); + static void print_look_ahead (char const *); + #define USE(value) +%} + +%union { char value; } +%type <value> 'a' 'b' +%glr-parser +%locations + +%% + +start: + defstate_init defstate_shift 'b' { + USE ($3); + print_look_ahead ("start <- defstate_init defstate_shift 'b'"); + } + ; +defstate_init: + { + print_look_ahead ("defstate_init <- empty string"); + } + ; +defstate_shift: + nondefstate defstate_look 'a' { + USE ($3); + print_look_ahead ("defstate_shift <- nondefstate defstate_look 'a'"); + } + ; +defstate_look: + { + print_look_ahead ("defstate_look <- empty string"); + } + ; +nondefstate: + { + print_look_ahead ("nondefstate <- empty string"); + } + | 'b' { + USE ($1); + print_look_ahead ("nondefstate <- 'b'"); + } + ; + +%% + +static void +yyerror (char const *msg) +{ + fprintf (stderr, "%s\n", msg); +} + +static int +yylex (void) +{ + static char const *input = "ab"; + static int index = 0; + yylloc.first_line = yylloc.last_line = 1; + yylloc.first_column = yylloc.last_column = index+1; + yylval.value = input[index] + 'A' - 'a'; + return input[index++]; +} + +static void +print_look_ahead (char const *reduction) +{ + printf ("%s:\n yychar=", reduction); + if (yychar == YYEMPTY) + printf ("YYEMPTY"); + else if (yychar == YYEOF) + printf ("YYEOF"); + else + { + printf ("'%c', yylval='", yychar); + if (yylval.value > ' ') + printf ("%c", yylval.value); + printf ("', yylloc=(%d,%d),(%d,%d)", + yylloc.first_line, yylloc.first_column, + yylloc.last_line, yylloc.last_column); + } + printf ("\n"); +} + +int +main (void) +{ + yychar = '#'; /* Not a token in the grammar. */ + yylval.value = '!'; + return yyparse (); +} +]]) + +AT_CHECK([[bison -t -o glr-regr13.c glr-regr13.y]], 0, [], []) +AT_COMPILE([glr-regr13]) + +AT_CHECK([[./glr-regr13]], 0, +[defstate_init <- empty string: + yychar=YYEMPTY +nondefstate <- empty string: + yychar='a', yylval='A', yylloc=(1,1),(1,1) +defstate_look <- empty string: + yychar='a', yylval='A', yylloc=(1,1),(1,1) +defstate_shift <- nondefstate defstate_look 'a': + yychar=YYEMPTY +start <- defstate_init defstate_shift 'b': + yychar=YYEMPTY +], []) + +AT_CLEANUP + + +## ------------------------------------------------------------------------- ## +## Incorrect lookahead during nondeterministic GLR. ## +## ------------------------------------------------------------------------- ## + +AT_SETUP([Incorrect lookahead during nondeterministic GLR]) + +AT_DATA_GRAMMAR([glr-regr14.y], +[[ +/* Tests: + - Conflicting actions (split-off parse, which copies lookahead status, + which is necessarily yytrue) and nonconflicting actions (non-split-off + parse) for nondefaulted state: yychar != YYEMPTY. + - Merged deferred actions (lookahead status and RHS from different stack + than the target state) and nonmerged deferred actions (same stack). + - Defaulted state after lookahead: yychar != YYEMPTY. + - Defaulted state after shift: yychar == YYEMPTY. + - yychar != YYEMPTY but lookahead status is yyfalse (a previous stack has + seen the lookahead but current stack has not). + - Exceeding stack capacity (stack explosion), and thus reallocating + lookahead status array. + Note that it does not seem possible to see the initial yychar value during + nondeterministic operation since: + - In order to preserve the initial yychar, only defaulted states may be + entered. + - If only defaulted states are entered, there are no conflicts, so + nondeterministic operation does not start. */ + +%union { char value; } + +%{ + #include <stdio.h> + static void yyerror (char const *); + static int yylex (void); + static void print_look_ahead (char const *); + static char merge (union YYSTYPE, union YYSTYPE); + #define USE(value) +%} + +%type <value> 'a' 'b' 'c' 'd' stack_explosion +%glr-parser +%locations + +%% + +start: + merge 'c' stack_explosion { + USE ($2); USE ($3); + print_look_ahead ("start <- merge 'c' stack_explosion"); + } + ; + +/* When merging the 2 deferred actions, the lookahead statuses are + different. */ +merge: + nonconflict1 'a' 'b' nonconflict2 %dprec 1 { + USE ($2); USE ($3); + print_look_ahead ("merge <- nonconflict1 'a' 'b' nonconflict2"); + } + | conflict defstate_look 'a' nonconflict2 'b' defstate_shift %dprec 2 { + USE ($3); USE ($5); + print_look_ahead ("merge <- conflict defstate_look 'a' nonconflict2 'b'" + " defstate_shift"); + } + ; + +nonconflict1: + { + print_look_ahead ("nonconflict1 <- empty string"); + } + ; +nonconflict2: + { + print_look_ahead ("nonconflict2 <- empty string"); + } + | 'a' { + USE ($1); + print_look_ahead ("nonconflict2 <- 'a'"); + } + ; +conflict: + { + print_look_ahead ("conflict <- empty string"); + } + ; +defstate_look: + { + print_look_ahead ("defstate_look <- empty string"); + } + ; + +/* yychar != YYEMPTY but lookahead status is yyfalse. */ +defstate_shift: + { + print_look_ahead ("defstate_shift <- empty string"); + } + ; + +stack_explosion: + { $$ = '\0'; } + | alt1 stack_explosion %merge<merge> { $$ = $2; } + | alt2 stack_explosion %merge<merge> { $$ = $2; } + | alt3 stack_explosion %merge<merge> { $$ = $2; } + ; +alt1: + 'd' no_look { + USE ($1); + if (yychar != 'd' && yychar != YYEOF) + { + fprintf (stderr, "Incorrect lookahead during stack explosion.\n"); + } + } + ; +alt2: + 'd' no_look { + USE ($1); + if (yychar != 'd' && yychar != YYEOF) + { + fprintf (stderr, "Incorrect lookahead during stack explosion.\n"); + } + } + ; +alt3: + 'd' no_look { + USE ($1); + if (yychar != 'd' && yychar != YYEOF) + { + fprintf (stderr, "Incorrect lookahead during stack explosion.\n"); + } + } + ; +no_look: + { + if (yychar != YYEMPTY) + { + fprintf (stderr, + "Found lookahead where shouldn't during stack explosion.\n"); + } + } + ; + +%% + +static void +yyerror (char const *msg) +{ + fprintf (stderr, "%s\n", msg); +} + +static int +yylex (void) +{ + static char const *input = "abcdddd"; + static int index = 0; + yylloc.first_line = yylloc.last_line = 1; + yylloc.first_column = yylloc.last_column = index+1; + yylval.value = input[index] + 'A' - 'a'; + return input[index++]; +} + +static void +print_look_ahead (char const *reduction) +{ + printf ("%s:\n yychar=", reduction); + if (yychar == YYEMPTY) + printf ("YYEMPTY"); + else if (yychar == YYEOF) + printf ("YYEOF"); + else + { + printf ("'%c', yylval='", yychar); + if (yylval.value > ' ') + printf ("%c", yylval.value); + printf ("', yylloc=(%d,%d),(%d,%d)", + yylloc.first_line, yylloc.first_column, + yylloc.last_line, yylloc.last_column); + } + printf ("\n"); +} + +static char +merge (union YYSTYPE s1, union YYSTYPE s2) +{ + char dummy = s1.value + s2.value; + return dummy; +} + +int +main (void) +{ + yychar = '#'; /* Not a token in the grammar. */ + yylval.value = '!'; + return yyparse (); +} +]]) + +AT_CHECK([[bison -t -o glr-regr14.c glr-regr14.y]], 0, [], +[glr-regr14.y: conflicts: 3 reduce/reduce +]) +AT_COMPILE([glr-regr14]) + +AT_CHECK([[./glr-regr14]], 0, +[conflict <- empty string: + yychar='a', yylval='A', yylloc=(1,1),(1,1) +defstate_look <- empty string: + yychar='a', yylval='A', yylloc=(1,1),(1,1) +nonconflict2 <- empty string: + yychar='b', yylval='B', yylloc=(1,2),(1,2) +defstate_shift <- empty string: + yychar=YYEMPTY +merge <- conflict defstate_look 'a' nonconflict2 'b' defstate_shift: + yychar=YYEMPTY +start <- merge 'c' stack_explosion: + yychar=YYEOF +], []) + +AT_CLEANUP _______________________________________________ Help-bison@gnu.org http://lists.gnu.org/mailman/listinfo/help-bison