Hi all, Please find attached a patch for ticket 2550: "Lemon parser may accept input before EOF is seen."
I'm sending it to the list as I do not seem able to pass the abbreviated Turing test that guards attaching stuff to the ticket. :( It appears to work over here where several lemon generated parsers are used in a context with heavy regression tests. Patch hereby released to the public. Enjoy. Vincent.
Index: lemon.c =================================================================== --- lemon.c (revision 3121) +++ lemon.c (working copy) @@ -42,6 +42,7 @@ /********** From the file "build.h" ************************************/ void FindRulePrecedences(); void FindFirstSets(); +void FindStartSymbol(); void FindStates(); void FindLinks(); void FindFollowSets(); @@ -244,6 +245,7 @@ char *tokentype; /* Type of terminal symbols in the parser stack */ char *vartype; /* The default type of non-terminal symbols */ char *start; /* Name of the start symbol for the grammar */ + struct symbol *startsym; /* Start symbol for the grammar */ char *stacksize; /* Size of the parser stack */ char *include; /* Code to put at the start of the C file */ int includeln; /* Line number for start of include code */ @@ -660,19 +662,12 @@ return; } -/* Compute all LR(0) states for the grammar. Links -** are added to between some states so that the LR(1) follow sets -** can be computed later. -*/ -PRIVATE struct state *getstate(/* struct lemon * */); /* forward reference */ -void FindStates(lemp) +void FindStartSymbol(lemp) struct lemon *lemp; { struct symbol *sp; struct rule *rp; - Configlist_init(); - /* Find the start symbol */ if( lemp->start ){ sp = Symbol_find(lemp->start); @@ -704,10 +699,23 @@ } } + lemp->startsym = sp; +} + +/* Compute all LR(0) states for the grammar. Links +** are added to between some states so that the LR(1) follow sets +** can be computed later. +*/ +PRIVATE struct state *getstate(/* struct lemon * */); /* forward reference */ +void FindStates(lemp) +struct lemon *lemp; +{ + struct rule *rp; + /* The basis configuration set for the first state ** is all rules which have the start symbol as their ** left-hand side */ - for(rp=sp->rule; rp; rp=rp->nextlhs){ + for(rp=lemp->startsym->rule; rp; rp=rp->nextlhs){ struct config *newcfp; newcfp = Configlist_addbasis(rp,0); SetAdd(newcfp->fws,0); @@ -921,7 +929,6 @@ int i,j; struct config *cfp; struct state *stp; - struct symbol *sp; struct rule *rp; /* Add all of the reduce actions @@ -944,16 +951,10 @@ } /* Add the accepting token */ - if( lemp->start ){ - sp = Symbol_find(lemp->start); - if( sp==0 ) sp = lemp->rule->lhs; - }else{ - sp = lemp->rule->lhs; - } /* Add to the first state (which is always the starting state of the ** finite state machine) an action to ACCEPT if the lookahead is the ** start nonterminal. */ - Action_add(&lemp->sorted[0]->ap,ACCEPT,sp,0); + Action_add(&lemp->sorted[0]->ap,ACCEPT,lemp->startsym,0); /* Resolve conflicts */ for(i=0; i<lemp->nstate; i++){ @@ -1461,6 +1462,11 @@ ** nonterminal */ FindFirstSets(&lem); + Configlist_init(); + + /* Determine the start symbol */ + FindStartSymbol(&lem); + /* Compute all LR(0) states. Also record follow-set propagation ** links so that the follow-set can be computed later */ lem.nstate = 0; @@ -3941,6 +3947,7 @@ } if( ap->type!=REDUCE ) continue; rp = ap->x.rp; + if (rp->lhs == lemp->startsym) continue; /* do not default a start rule to ensure reduction only on EOF */ if( rp==rbest ) continue; n = 1; for(ap2=ap->next; ap2; ap2=ap2->next){
----------------------------------------------------------------------------- To unsubscribe, send email to [EMAIL PROTECTED] -----------------------------------------------------------------------------