Attached is a modified version of p9p yacc that
supports the Go grammar.  I'll be sending a
version of Plan 9 yacc later today.

The following is a description of the changes.

  1. The %error-verbose directive is ignored.

  2. A description of the final grammar is
     printed before the state descriptions
         in y.output.

  3. The 'x' format for character literals is
     now used instead of prefixing with a space.

  4. The YYEMPTY define is now used to clear
     the lookahead token (instead of an explicit
         negative one).

  5. Make yychar and yystate globals so they
     can be inspected by external code.

  5. Support C++ style // comments in actions.

  6. Add a usage message.

  7. Fix a few uses of sprint and strcpy.


I've also sent out a changeset to the Go
development list which adds support for
using Plan 9 yacc to generate the special
errors.

One tiny nit is that Plan 9 uses the name
yytoknames for debugging where Bison uses
yytname.  I've just used sed for this.

Any questions?
  Anthony
diff -r 44a7194d00cf lib/yaccpar
--- a/lib/yaccpar       Sat Nov 12 11:52:10 2011 -0800
+++ b/lib/yaccpar       Tue Nov 15 13:10:13 2011 -0800
@@ -2,7 +2,7 @@
 #define YYERROR                goto yyerrlab
 #define YYACCEPT       return(0)
 #define YYABORT                return(1)
-#define        yyclearin       yychar = -1
+#define        yyclearin       yychar = YYEMPTY
 #define        yyerrok         yyerrflag = 0
 
 #ifdef yydebug
@@ -51,6 +51,8 @@
        return x;
 }
 
+long yychar;
+
 static long
 #ifdef YYARG
 yylex1(struct Yyarg *yyarg)
@@ -58,7 +60,6 @@
 yylex1(void)
 #endif
 {
-       long yychar;
        const long *t3p;
        int c;
 
@@ -68,6 +69,7 @@
        yychar = yylex();
 #endif
        if(yychar <= 0) {
+               yychar = 0;
                c = yytok1[0];
                goto out;
        }
@@ -99,6 +101,8 @@
        return c;
 }
 
+int yystate;
+
 int
 #ifdef YYARG
 yyparse(struct Yyarg *yyarg)
@@ -112,8 +116,8 @@
                int     yys;
        } yys[YYMAXDEPTH], *yyp, *yypt;
        const short *yyxi;
-       int yyj, yym, yystate, yyn, yyg;
-       long yychar;
+       int yyj, yym, yyn, yyg;
+       long yyc;
 #ifndef YYARG
        YYSTYPE save1, save2;
        int save3, save4;
@@ -125,7 +129,8 @@
 #endif
 
        yystate = 0;
-       yychar = -1;
+       yychar = YYEMPTY;
+       yyc = YYEMPTY;
        yynerrs = 0;
        yyerrflag = 0;
        yyp = &yys[-1];
@@ -151,7 +156,7 @@
 yystack:
        /* put a state and value onto the stack */
        if(yydebug >= 4)
-               fprint(2, "char %s in %s", yytokname(yychar), 
yystatname(yystate));
+               fprint(2, "char %s in %s", yytokname(yyc), yystatname(yystate));
 
        yyp++;
        if(yyp >= &yys[YYMAXDEPTH]) {
@@ -165,18 +170,19 @@
        yyn = yypact[yystate];
        if(yyn <= YYFLAG)
                goto yydefault; /* simple state */
-       if(yychar < 0)
+       if(yyc < 0)
 #ifdef YYARG
-               yychar = yylex1(yyarg);
+               yyc = yylex1(yyarg);
 #else
-               yychar = yylex1();
+               yyc = yylex1();
 #endif
-       yyn += yychar;
+       yyn += yyc;
        if(yyn < 0 || yyn >= YYLAST)
                goto yydefault;
        yyn = yyact[yyn];
-       if(yychk[yyn] == yychar) { /* valid shift */
-               yychar = -1;
+       if(yychk[yyn] == yyc) { /* valid shift */
+               yyc = YYEMPTY;
+               yychar = YYEMPTY;
                yyval = yylval;
                yystate = yyn;
                if(yyerrflag > 0)
@@ -188,11 +194,11 @@
        /* default state action */
        yyn = yydef[yystate];
        if(yyn == -2) {
-               if(yychar < 0)
+               if(yyc < 0)
 #ifdef YYARG
-               yychar = yylex1(yyarg);
+                       yyc = yylex1(yyarg);
 #else
-               yychar = yylex1();
+                       yyc = yylex1();
 #endif
 
                /* look through exception table */
@@ -201,21 +207,24 @@
                                break;
                for(yyxi += 2;; yyxi += 2) {
                        yyn = yyxi[0];
-                       if(yyn < 0 || yyn == yychar)
+                       if(yyn < 0 || yyn == yyc)
                                break;
                }
                yyn = yyxi[1];
-               if(yyn < 0)
+               if(yyn < 0) {
+                       yyc = YYEMPTY;
+                       yychar = YYEMPTY;
                        goto ret0;
+               }
        }
        if(yyn == 0) {
                /* error ... attempt to resume parsing */
                switch(yyerrflag) {
                case 0:   /* brand new error */
                        yyerror("syntax error");
-                       if(yydebug >= 1) {
+                       if(yydebug >= 2) {
                                fprint(2, "%s", yystatname(yystate));
-                               fprint(2, "saw %s\n", yytokname(yychar));
+                               fprint(2, "saw %s\n", yytokname(yyc));
                        }
                        goto yyerrlab;
                yyerrlab:
@@ -245,10 +254,11 @@
 
                case 3:  /* no shift yet; clobber input char */
                        if(yydebug >= 2)
-                               fprint(2, "error recovery discards %s\n", 
yytokname(yychar));
-                       if(yychar == YYEOFCODE)
+                               fprint(2, "error recovery discards %s\n", 
yytokname(yyc));
+                       if(yyc == YYEOFCODE)
                                goto ret1;
-                       yychar = -1;
+                       yyc = YYEMPTY;
+                       yychar = YYEMPTY;
                        goto yynewstate;   /* try again in the same state */
                }
        }
diff -r 44a7194d00cf src/cmd/yacc.c
--- a/src/cmd/yacc.c    Sat Nov 12 11:52:10 2011 -0800
+++ b/src/cmd/yacc.c    Tue Nov 15 13:10:13 2011 -0800
@@ -92,6 +92,7 @@
        TYPEDEF,
        TYPENAME,
        UNION,
+       IGNORE,
 
        ENDFILE         = 0,
 
@@ -319,6 +320,9 @@
        "token",        TERM,
        "type",         TYPEDEF,
        "union",        UNION,
+
+       /* ignored bison directives */
+       "error-verbose",        IGNORE,
        0,
 };
 
@@ -330,6 +334,7 @@
 char*  writem(int*);
 char*  symnam(int);
 void   summary(void);
+void   grammar(void);
 void   error(char*, ...);
 void   aryfil(int*, int, int);
 int    setunion(int*, int*);
@@ -388,6 +393,7 @@
        cempty();               /* make a table of which nonterminals can match 
the empty string */
        cpfir();                /* make a table of firsts of nonterminals */
        stagen();               /* generate the states */
+       grammar();
        output();               /* write the states and the tables */
        go2out();
        hideprod();
@@ -531,14 +537,14 @@
                ;
        p = prdptr[-*p];
        q = chcopy(sarr, nontrst[*p-NTBASE].name);
-       q = chcopy(q, ": ");
+       q = chcopy(q, ":");
        for(;;) {
                *q = ' ';
                p++;
-               if(p == pp)
-                       *q = '.';
                q++;
                *q = '\0';
+               if(p == pp)
+                       q = chcopy(q, ". ");
                i = *p;
                if(i <= 0)
                        break;
@@ -550,7 +556,7 @@
        /* an item calling for a reduction */
        i = *pp;
        if(i < 0 ) {
-               q = chcopy(q, "    (");
+               q = chcopy(q, "   (");
                sprint(q, "%d)", -i);
        }
        return sarr;
@@ -562,12 +568,41 @@
 char*
 symnam(int i)
 {
-       char* cp;
+       return (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
+}
 
-       cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name;
-       if(*cp == ' ')
-               cp++;
-       return cp;
+/*
+ * output the grammar rules on y.output
+ */
+void
+grammar(void)
+{
+       int i, j, n, prev;
+       int *p;
+
+       if(foutput == 0)
+               return;
+
+       Bprint(foutput, "\nGrammar\n");
+       prev = 0;
+       n = 0;
+       PLOOP(0, i) {
+               p = prdptr[i];
+               j = 0;
+               if(p[0] == prev)
+                       Bprint(foutput, "\t%d \t|", n++);
+               else {
+                       Bprint(foutput, "\n\t%d %s:", n++, symnam(p[0]));
+                       if(p[1] <= 0)
+                               Bprint(foutput, " /* empty */");
+               }
+               for(j = 1; p[j] > 0; j++) {
+                       Bprint(foutput, " %s", symnam(p[j]));
+               }
+               Bputc(foutput, '\n');
+               prev = p[0];
+       }
+       Bprint(foutput, "\n");
 }
 
 /*
@@ -1173,10 +1208,17 @@
 }
 
 void
+usage(void)
+{
+       fprint(2, "usage: yacc [-Dn] [-vdS] [-o outputfile] [-s stem] 
grammar\n");
+       exits("usage");
+}
+
+void
 setup(int argc, char *argv[])
 {
        long c, t;
-       int i, j, fd, lev, ty, ytab, *p;
+       int i, j, lev, ty, ytab, *p;
        int vflag, dflag, stem;
        char actnm[8], *stemc, *s, dirbuf[128];
        Biobuf *fout;
@@ -1195,7 +1237,7 @@
                vflag++;
                break;
        case 'D':
-               yydebug = ARGF();
+               yydebug = EARGF(usage());
                break;
        case 'a':
                yyarg = 1;
@@ -1208,7 +1250,7 @@
                break;
        case 'o':
                ytab++;
-               ytabc = ARGF();
+               ytabc = EARGF(usage());
                break;
        case 's':
                stem++;
@@ -1221,18 +1263,11 @@
                error("illegal option: %c", ARGC());
        }ARGEND
        openup(stemc, dflag, vflag, ytab, ytabc);
-       fout = dflag?fdefine:ftable;
-       if(yyarg){
+       if(yyarg)
                Bprint(ftable, "#define\tYYARG\t1\n\n");
-       }
-       if((fd = mkstemp(ttempname)) >= 0){
-               tempname = ttempname;
-               ftemp = Bfdopen(fd, OWRITE);
-       }
-       if((fd = mkstemp(tactname)) >= 0){
-               actname = tactname;
-               faction = Bfdopen(fd, OWRITE);
-       }
+
+       ftemp = Bopen(tempname = mktemp(ttempname), OWRITE);
+       faction = Bopen(actname = mktemp(tactname), OWRITE);
        if(ftemp == 0 || faction == 0)
                error("cannot open temp file");
        if(argc < 1)
@@ -1375,6 +1410,10 @@
                t = gettok();
                continue;
 
+       case IGNORE:
+               t = gettok();
+               continue;
+
        default:
                error("syntax error");
        }
@@ -1396,8 +1435,11 @@
                Bprint(ftable, "YYSTYPE yylval;\n");
                Bprint(ftable, "YYSTYPE yyval;\n");
        }else{
-               if(dflag)
+               fout = ftable;
+               if(dflag){
+                       fout = fdefine;
                        Bprint(ftable, "#include \"%s.%s\"\n\n", stemc, FILED);
+               }
                Bprint(fout, "struct Yyarg {\n");
                Bprint(fout, "\tint\tyynerrs;\n");
                Bprint(fout, "\tint\tyyerrflag;\n");
@@ -1406,6 +1448,7 @@
                Bprint(fout, "\tYYSTYPE\tyylval;\n");
                Bprint(fout, "};\n\n");
        }
+
        prdptr[0] = mem;
 
        /* added production */
@@ -1552,6 +1595,7 @@
        Bterm(faction);
        Bprint(ftable, "#define YYEOFCODE %d\n", 1);
        Bprint(ftable, "#define YYERRCODE %d\n", 2);
+       Bprint(ftable, "#define YYEMPTY (%d)\n", -2);
 }
 
 /*
@@ -1581,17 +1625,17 @@
 
        /* establish value for token */
        /* single character literal */
-       if(s[0] == ' ') {
+       if(s[0] == '\'') {
                val = chartorune(&rune, &s[1]);
-               if(s[val+1] == 0) {
+               if(s[val+1] == '\'') {
                        val = rune;
                        goto out;
                }
        }
 
        /* escape sequence */
-       if(s[0] == ' ' && s[1] == '\\') {
-               if(s[3] == 0) {
+       if(s[0] == '\'' && s[1] == '\\') {
+               if(s[3] == '\'') {
                        /* single character escape sequence */
                        switch(s[2]) {
                        case 'n':       val = '\n'; break;
@@ -1625,6 +1669,7 @@
        val = extval++;
 
 out:
+       //print("%s = %d\n", s, val);
        tokset[ntokens].value = val;
        toklev[ntokens] = 0;
        return ntokens;
@@ -1642,7 +1687,7 @@
        for(i=ndefout; i<=ntokens; i++) {
                /* non-literals */
                c = tokset[i].name[0];
-               if(c != ' ' && c != '$') {
+               if(c != '\'' && c != '$') {
                        Bprint(ftable, "#define %s      %d\n",
                                tokset[i].name, tokset[i].value);
                        if(fdefine)
@@ -1737,7 +1782,7 @@
        case '"':
        case '\'':
                match = c;
-               tokname[0] = ' ';
+               tokname[0] = '\'';
                i = 1;
                for(;;) {
                        c = Bgetrune(finput);
@@ -1756,6 +1801,8 @@
                        if(i < NAMESIZE)
                                i += c;
                }
+               tokname[i] = '\'';
+               i++;
                break;
 
        case '%':
@@ -1847,7 +1894,7 @@
 {
        int i;
 
-       if(s[0] == ' ')
+       if(s[0] == '\'')
                t = 0;
        TLOOP(i)
                if(!strcmp(s, tokset[i].name))
@@ -1915,6 +1962,7 @@
 void
 cpycode(void)
 {
+
        long c;
 
        c = Bgetrune(finput);
@@ -1956,17 +2004,22 @@
 
        /* i is the number of lines skipped */
        i = 0;
-       if(Bgetrune(finput) != '*')
+       c = Bgetrune(finput);
+       if(c == '/'){                   /* C++ //: skip to end of line */
+               while((c = Bgetrune(finput)) != Beof)
+                       if(c == '\n')
+                               return 1;
+       }else if(c == '*'){             /* normal C comment */
+               while((c = Bgetrune(finput)) != Beof) {
+                       while(c == '*')
+                               if((c = Bgetrune(finput)) == '/')
+                                       return i;
+                       if(c == '\n')
+                               i++;
+               }
+       }else
                error("illegal comment");
-       c = Bgetrune(finput);
-       while(c != Beof) {
-               while(c == '*')
-                       if((c=Bgetrune(finput)) == '/')
-                               return i;
-               if(c == '\n')
-                       i++;
-               c = Bgetrune(finput);
-       }
+
        error("EOF inside comment");
        return 0;
 }
@@ -2094,22 +2147,30 @@
                /* look for comments */
                Bputrune(faction, c);
                c = Bgetrune(finput);
-               if(c != '*')
+               switch(c) {
+               case '/':
+                       while(c != Beof) {
+                               if(c == '\n')
+                                       goto swt;
+                               Bputrune(faction, c);
+                               c = Bgetrune(finput);
+                       }
+                       break;
+               case '*':
+                       while(c != Beof) {
+                               while(c == '*') {
+                                       Bputrune(faction, c);
+                                       if((c = Bgetrune(finput)) == '/')
+                                               goto lcopy;
+                               }
+                               Bputrune(faction, c);
+                               if(c == '\n')
+                                       lineno++;
+                               c = Bgetrune(faction);
+                       }
+                       break;
+               default:
                        goto swt;
-
-               /* it really is a comment */
-               Bputrune(faction, c);
-               c = Bgetrune(finput);
-               while(c >= 0) {
-                       while(c == '*') {
-                               Bputrune(faction, c);
-                               if((c=Bgetrune(finput)) == '/')
-                                       goto lcopy;
-                       }
-                       Bputrune(faction, c);
-                       if(c == '\n')
-                               lineno++;
-                       c = Bgetrune(finput);
                }
                error("EOF inside comment");
 
@@ -2158,26 +2219,26 @@
        char buf[256];
 
        if(vflag) {
-               sprint(buf, "%s.%s", stem, FILEU);
+               snprint(buf, sizeof buf, "%s.%s", stem, FILEU);
                foutput = Bopen(buf, OWRITE);
                if(foutput == 0)
                        error("cannot open %s", buf);
        }
        if(yydebug) {
-               sprint(buf, "%s.%s", stem, FILEDEBUG);
+               snprint(buf, sizeof buf, "%s.%s", stem, FILEDEBUG);
                if((fdebug = Bopen(buf, OWRITE)) == 0)
                        error("can't open %s", buf);
        }
        if(dflag) {
-               sprint(buf, "%s.%s", stem, FILED);
+               snprint(buf, sizeof buf, "%s.%s", stem, FILED);
                fdefine = Bopen(buf, OWRITE);
                if(fdefine == 0)
                        error("can't create %s", buf);
        }
        if(ytab == 0)
-               sprint(buf, "%s.%s", stem, OFILE);
+               snprint(buf, sizeof buf, "%s.%s", stem, OFILE);
        else
-               strcpy(buf, ytabc);
+               strecpy(buf, buf+sizeof buf, ytabc);
        ftable = Bopen(buf, OWRITE);
        if(ftable == 0)
                error("cannot open table file %s", buf);

Reply via email to