# HG changeset patch # User Anthony Martin # Date 1322743628 -3600 # Node ID 3dd2bfb35f8163fdd89f8e17bed639d706f35107 # Parent f23a243e5c73e63b226208799180a0aa6cb95242 yacc: fixes and support for the go grammar This description is an elaboration of the one in my mail sent to 9fans†. There were two small bugs in the patch that I posted. I've tested this change on all the .y files in /sys/src/cmd and it appears to work fine. 1. The %error-verbose directive implemented by GNU Bison is ignored. 2. A description of the final grammar is printed before the state descriptions in y.output. This is needed to produce the fancy error messages that Russ added to the Go compiler. 3. The 'x' format for character literals is now used instead of prefixing with a space. This was the easiest way to appease the Go code in src/cmd/gc/lex.c:/yytfix. I also think it makes error messages look nicer. 4. The YYEMPTY define is now used to clear the lookahead token (instead of an explicit -1). Bison uses -2 for this purpose and the Go compiler depends on this value to determine the correct line number for parse errors. 5. yychar and yystate are now globals so they can be inspected by external code. This is the second part of the change to support Go's fancy error messages. 6. C++ style comments are handled in actions. Also, Russ already submitted the changes I made to the error generation script in the Go repository (in changeset 7a3dc6d535b7). † http://9fans.net/archive/2011/11/81 R=nixiedev, rminnich CC=nix-dev http://codereview.appspot.com/5415048 Committer: Noah Evans diff -r f23a243e5c73 -r 3dd2bfb35f81 sys/lib/yaccpar --- a/sys/lib/yaccpar Thu Dec 01 11:19:13 2011 +0100 +++ b/sys/lib/yaccpar Thu Dec 01 13:47:08 2011 +0100 @@ -1,5 +1,6 @@ #define YYFLAG -1000 -#define yyclearin yychar = -1 +#define YYERROR goto yyerrlab +#define yyclearin yychar = YYEMPTY #define yyerrok yyerrflag = 0 #ifdef yydebug @@ -42,15 +43,17 @@ return x; } -long +int yychar; + +int yylex1(void) { - long yychar; long *t3p; int c; yychar = yylex(); if(yychar <= 0) { + yychar = 0; c = yytok1[0]; goto out; } @@ -82,6 +85,8 @@ return c; } +int yystate; + int yyparse(void) { @@ -91,8 +96,8 @@ int yys; } yys[YYMAXDEPTH], *yyp, *yypt; short *yyxi; - int yyj, yym, yystate, yyn, yyg; - long yychar; + int yyj, yym, yyn, yyg; + int yyc; YYSTYPE save1, save2; int save3, save4; @@ -102,7 +107,8 @@ save4 = yyerrflag; yystate = 0; - yychar = -1; + yychar = YYEMPTY; + yyc = YYEMPTY; yynerrs = 0; yyerrflag = 0; yyp = &yys[-1]; @@ -126,7 +132,7 @@ yystack: /* put a state and value onto the stack */ if(yydebug >= 4) - fprint(2, "char %s in %s", yytokname(yychar), yystatname(yystate)); + fprint(2, "char %s in %s", yytokname(yyc), yystatname(yystate)); yyp++; if(yyp >= &yys[YYMAXDEPTH]) { @@ -140,14 +146,15 @@ yyn = yypact[yystate]; if(yyn <= YYFLAG) goto yydefault; /* simple state */ - if(yychar < 0) - yychar = yylex1(); - yyn += yychar; + if(yyc < 0) + yyc = yylex1(); + yyn += yyc; if(yyn < 0 || yyn >= YYLAST) goto yydefault; yyn = yyact[yyn]; - if(yychk[yyn] == yychar) { /* valid shift */ - yychar = -1; + if(yychk[yyn] == yyc) { /* valid shift */ + yyc = YYEMPTY; + yychar = YYEMPTY; yyval = yylval; yystate = yyn; if(yyerrflag > 0) @@ -159,8 +166,8 @@ /* default state action */ yyn = yydef[yystate]; if(yyn == -2) { - if(yychar < 0) - yychar = yylex1(); + if(yyc < 0) + yyc = yylex1(); /* look through exception table */ for(yyxi=yyexca;; yyxi+=2) @@ -168,23 +175,26 @@ break; for(yyxi += 2;; yyxi += 2) { yyn = yyxi[0]; - if(yyn < 0 || yyn == yychar) + if(yyn < 0 || yyn == yyc) break; } yyn = yyxi[1]; - if(yyn < 0) + if(yyn < 0) { + yychar = YYEMPTY; goto ret0; + } } if(yyn == 0) { /* error ... attempt to resume parsing */ switch(yyerrflag) { case 0: /* brand new error */ yyerror("syntax error"); + if(yydebug >= 2) { + fprint(2, "%s", yystatname(yystate)); + fprint(2, "saw %s\n", yytokname(yyc)); + } +yyerrlab: yynerrs++; - if(yydebug >= 1) { - fprint(2, "%s", yystatname(yystate)); - fprint(2, "saw %s\n", yytokname(yychar)); - } case 1: case 2: /* incompletely recovered error ... try again */ @@ -210,10 +220,11 @@ case 3: /* no shift yet; clobber input char */ if(yydebug >= 2) - fprint(2, "error recovery discards %s\n", yytokname(yychar)); - if(yychar == YYEOFCODE) + fprint(2, "error recovery discards %s\n", yytokname(yyc)); + if(yyc == YYEOFCODE) goto ret1; - yychar = -1; + yyc = YYEMPTY; + yychar = YYEMPTY; goto yynewstate; /* try again in the same state */ } } diff -r f23a243e5c73 -r 3dd2bfb35f81 sys/lib/yaccpars --- a/sys/lib/yaccpars Thu Dec 01 11:19:13 2011 +0100 +++ b/sys/lib/yaccpars Thu Dec 01 13:47:08 2011 +0100 @@ -2,7 +2,7 @@ #define YYERROR goto yyerrlab #define YYACCEPT return(0) #define YYABORT return(1) -#define yyclearin yychar = -1 +#define yyclearin yychar = YYEMPTY #define yyerrok yyerrflag = 0 #ifdef yydebug @@ -42,15 +42,17 @@ return x; } -long +int yychar; + +int yylex1(void) { - long yychar; long *t3p; int c; yychar = yylex(); if(yychar <= 0) { + yychar = 0; c = yytok1[0]; goto out; } @@ -82,6 +84,8 @@ return c; } +int yystate; + int yyparse(void) { @@ -91,10 +95,10 @@ int yys; } yys[YYMAXDEPTH], *yyp, *yypt; short *yyxi; - int yyj, yym, yystate, yyn, yyg; + int yyj, yym, yyn, yyg; + int yyc; YYSTYPE save1, save2; int save3, save4; - long yychar; save1 = yylval; save2 = yyval; @@ -102,7 +106,8 @@ save4 = yyerrflag; yystate = 0; - yychar = -1; + yychar = YYEMPTY; + yyc = YYEMPTY; yynerrs = 0; yyerrflag = 0; yyp = &yys[-1]; @@ -126,7 +131,7 @@ yystack: /* put a state and value onto the stack */ if(yydebug >= 4) - printf("char %s in %s", yytokname(yychar), yystatname(yystate)); + printf("char %s in %s", yytokname(yyc), yystatname(yystate)); yyp++; if(yyp >= &yys[YYMAXDEPTH]) { @@ -140,14 +145,15 @@ yyn = yypact[yystate]; if(yyn <= YYFLAG) goto yydefault; /* simple state */ - if(yychar < 0) - yychar = yylex1(); - yyn += yychar; + if(yyc < 0) + yyc = yylex1(); + yyn += yyc; if(yyn < 0 || yyn >= YYLAST) goto yydefault; yyn = yyact[yyn]; - if(yychk[yyn] == yychar) { /* valid shift */ - yychar = -1; + if(yychk[yyn] == yyc) { /* valid shift */ + yyc = YYEMPTY; + yychar = YYEMPTY; yyval = yylval; yystate = yyn; if(yyerrflag > 0) @@ -159,8 +165,8 @@ /* default state action */ yyn = yydef[yystate]; if(yyn == -2) { - if(yychar < 0) - yychar = yylex1(); + if(yyc < 0) + yyc = yylex1(); /* look through exception table */ for(yyxi=yyexca;; yyxi+=2) @@ -168,21 +174,23 @@ break; for(yyxi += 2;; yyxi += 2) { yyn = yyxi[0]; - if(yyn < 0 || yyn == yychar) + if(yyn < 0 || yyn == yyc) break; } yyn = yyxi[1]; - if(yyn < 0) + if(yyn < 0) { + yychar = YYEMPTY; goto ret0; + } } if(yyn == 0) { /* error ... attempt to resume parsing */ switch(yyerrflag) { case 0: /* brand new error */ yyerror("syntax error"); - if(yydebug >= 1) { + if(yydebug >= 2) { printf("%s", yystatname(yystate)); - printf("saw %s\n", yytokname(yychar)); + printf("saw %s\n", yytokname(yyc)); } yyerrlab: yynerrs++; @@ -210,11 +218,12 @@ goto ret1; case 3: /* no shift yet; clobber input char */ - if(yydebug >= YYEOFCODE) - printf("error recovery discards %s\n", yytokname(yychar)); - if(yychar == YYEOFCODE) + if(yydebug >= 2) + printf("error recovery discards %s\n", yytokname(yyc)); + if(yyc == YYEOFCODE) goto ret1; - yychar = -1; + yyc = YYEMPTY; + yychar = YYEMPTY; goto yynewstate; /* try again in the same state */ } } diff -r f23a243e5c73 -r 3dd2bfb35f81 sys/src/cmd/yacc.c --- a/sys/src/cmd/yacc.c Thu Dec 01 11:19:13 2011 +0100 +++ b/sys/src/cmd/yacc.c Thu Dec 01 13:47:08 2011 +0100 @@ -91,13 +91,14 @@ TYPEDEF, TYPENAME, UNION, + IGNORE, ENDFILE = 0, EMPTY = 1, WHOKNOWS = 0, OK = 1, - NOMORE = -1000, + NOMORE = -1000 }; /* macros for getting associativity and precedence levels */ @@ -316,6 +317,9 @@ "token", TERM, "type", TYPEDEF, "union", UNION, + + /* ignored bison directives */ + "error-verbose", IGNORE, 0, }; @@ -327,6 +331,7 @@ char* writem(int*); char* symnam(int); void summary(void); +void grammar(void); void error(char*, ...); void aryfil(int*, int, int); int setunion(int*, int*); @@ -382,6 +387,7 @@ cempty(); /* make a table of which nonterminals can match the empty string */ cpfir(); /* make a table of firsts of nonterminals */ stagen(); /* generate the states */ + grammar(); output(); /* write the states and the tables */ go2out(); hideprod(); @@ -401,7 +407,7 @@ finput = Bopen(parser, OREAD); if(finput == 0) - error("cannot find parser %s", parser); + error("cannot open parser %s: %r", parser); warray("yyr1", levprd, nprod); aryfil(temp1, nprod, 0); PLOOP(1, i) @@ -525,14 +531,14 @@ ; p = prdptr[-*p]; q = chcopy(sarr, nontrst[*p-NTBASE].name); - q = chcopy(q, ": "); + q = chcopy(q, ":"); for(;;) { *q = ' '; p++; - if(p == pp) - *q = '.'; q++; *q = '\0'; + if(p == pp) + q = chcopy(q, ". "); i = *p; if(i <= 0) break; @@ -544,7 +550,7 @@ /* an item calling for a reduction */ i = *pp; if(i < 0 ) { - q = chcopy(q, " ("); + q = chcopy(q, " ("); sprint(q, "%d)", -i); } return sarr; @@ -556,12 +562,40 @@ char* symnam(int i) { - char* cp; + return (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name; +} - cp = (i >= NTBASE)? nontrst[i-NTBASE].name: tokset[i].name; - if(*cp == ' ') - cp++; - return cp; +/* + * output the grammar rules on y.output + */ +void +grammar(void) +{ + int i, j, n, prev; + int *p; + + if(foutput == 0) + return; + + Bprint(foutput, "\nGrammar\n"); + prev = 0; + n = 0; + PLOOP(0, i) { + p = prdptr[i]; + if(p[0] == prev) + Bprint(foutput, "\t%d \t|", n++); + else { + Bprint(foutput, "\n\t%d %s:", n++, symnam(p[0])); + if(p[1] <= 0) + Bprint(foutput, " /* empty */"); + } + for(j = 1; p[j] > 0; j++) { + Bprint(foutput, " %s", symnam(p[j])); + } + Bputc(foutput, '\n'); + prev = p[0]; + } + Bprint(foutput, "\n"); } /* @@ -614,10 +648,13 @@ void error(char *s, ...) { + va_list arg; nerrors++; fprint(2, "\n fatal error:"); - fprint(2, s, (&s)[1]); + va_start(arg, s); + vfprint(2, s, arg); + va_end(arg); fprint(2, ", %s:%d\n", infile, lineno); if(!fatfl) return; @@ -1357,6 +1394,10 @@ t = gettok(); continue; + case IGNORE: + t = gettok(); + continue; + default: error("syntax error"); } @@ -1520,6 +1561,7 @@ Bterm(faction); Bprint(ftable, "#define YYEOFCODE %d\n", 1); Bprint(ftable, "#define YYERRCODE %d\n", 2); + Bprint(ftable, "#define YYEMPTY (%d)\n", -2); } /* @@ -1549,17 +1591,17 @@ /* establish value for token */ /* single character literal */ - if(s[0] == ' ') { + if(s[0] == '\'') { val = chartorune(&rune, &s[1]); - if(s[val+1] == 0) { + if(s[val] != '\\' && s[val+1] == '\'') { val = rune; goto out; } } /* escape sequence */ - if(s[0] == ' ' && s[1] == '\\') { - if(s[3] == 0) { + if(s[0] == '\'' && s[1] == '\\') { + if(s[3] == '\'') { /* single character escape sequence */ switch(s[2]) { case 'n': val = '\n'; break; @@ -1610,7 +1652,7 @@ for(i=ndefout; i<=ntokens; i++) { /* non-literals */ c = tokset[i].name[0]; - if(c != ' ' && c != '$') { + if(c != '\'' && c != '$') { Bprint(ftable, "#define %s %d\n", tokset[i].name, tokset[i].value); if(fdefine) @@ -1705,7 +1747,7 @@ case '"': case '\'': match = c; - tokname[0] = ' '; + tokname[0] = '\''; i = 1; for(;;) { c = Bgetrune(finput); @@ -1724,6 +1766,8 @@ if(i < NAMESIZE) i += c; } + tokname[i] = '\''; + i++; break; case '%': @@ -1815,7 +1859,7 @@ { int i; - if(s[0] == ' ') + if(s[0] == '\'') t = 0; TLOOP(i) if(!strcmp(s, tokset[i].name)) @@ -2059,22 +2103,30 @@ /* look for comments */ Bputrune(faction, c); c = Bgetrune(finput); - if(c != '*') + switch(c) { + case '/': + while(c != Beof) { + if(c == '\n') + goto swt; + Bputrune(faction, c); + c = Bgetrune(finput); + } + break; + case '*': + while(c != Beof) { + while(c == '*') { + Bputrune(faction, c); + if((c = Bgetrune(finput)) == '/') + goto lcopy; + } + if(c == '\n') + lineno++; + Bputrune(faction, c); + c = Bgetrune(finput); + } + break; + default: goto swt; - - /* it really is a comment */ - Bputrune(faction, c); - c = Bgetrune(finput); - while(c >= 0) { - while(c == '*') { - Bputrune(faction, c); - if((c=Bgetrune(finput)) == '/') - goto lcopy; - } - Bputrune(faction, c); - if(c == '\n') - lineno++; - c = Bgetrune(finput); } error("EOF inside comment"); @@ -2635,7 +2687,7 @@ case '$': break; default: - error("bad tempfile"); + error("bad tempfile %s", tempname); } break; }