Why didn't I think about this before??? symbolName should be a method of SymbolKind.
* data/skeletons/lalr1.java (YYParser::yysymbolName): Move as... * data/skeletons/java.m4 (SymbolKind::getName): this. Make the table a static final table, not a local variable. Adjust dependencies. * doc/bison.texi (Java Parser Interface): Document i18n. (Java Parser Context Interface): Document SymbolKind. * examples/java/calc/Calc.y, tests/local.at: Adjust. --- TODO | 25 +++++++------ data/skeletons/java.m4 | 63 ++++++++++++++++++++++++++++++--- data/skeletons/lalr1.java | 74 +++------------------------------------ doc/bison.texi | 72 ++++++++++++++++++++++++++++--------- examples/java/calc/Calc.y | 4 +-- tests/local.at | 34 +++++++++--------- 6 files changed, 151 insertions(+), 121 deletions(-) diff --git a/TODO b/TODO index a1dec320..73b89db6 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,21 @@ * Bison 3.6 +** Questions +*** Java +- Should i18n be part of the Lexer? Currently it's a static method of + Lexer. + +- is there a migration path that would allow to use TokenKinds in + yylex? + +*** D +- is there a way to attach yysymbol_name to the enum itself? As we did + in Java. + +- It would be better to have TokenKind as return value. Can we use + reflection to support both output types? + ** Documentation -- yyexpected_tokens/expected_tokens/expectedTokens in all the languages. - YYERRCODE, YYUNDEF, YYEOF -- i18n in Java - symbol.type_get should be kind_get, and it's not documented. - YYERRCODE and "end of file" and translation @@ -11,9 +24,6 @@ You can explicitly specify the numeric code for a token type... The token numbered as 0. -Therefore each time the scanner returns an (external) token number, -it must be mapped to the (internal) symbol number. - ** Java: EOF We should be able to redefine EOF like we do in C. @@ -120,11 +130,6 @@ https://www.cs.tufts.edu/~nr/cs257/archive/clinton-jefferey/lr-error-messages.pd https://research.swtch.com/yyerror http://gallium.inria.fr/~fpottier/publis/fpottier-reachability-cc2016.pdf -* D -** yylex -It would be better to have TokenKind as return value. Can we use reflexion -to support both output types? - * Modernization Fix data/skeletons/yacc.c so that it defines YYPTRDIFF_T properly for modern and older C++ compilers. Currently the code defaults to defining it to diff --git a/data/skeletons/java.m4 b/data/skeletons/java.m4 index d09890a1..ba44ce8e 100644 --- a/data/skeletons/java.m4 +++ b/data/skeletons/java.m4 @@ -174,10 +174,10 @@ m4_define([b4_declare_symbol_enum], { ]b4_symbol_foreach([b4_symbol_enum])[ - private final int code_; + private final int yycode_; SymbolKind (int n) { - this.code_ = n; + this.yycode_ = n; } private static final SymbolKind[] values_ = { @@ -185,13 +185,66 @@ m4_define([b4_declare_symbol_enum], ], b4_symbol_numbers)[ }; - static final SymbolKind get (int code) { + static final SymbolKind get(int code) { return values_[code]; } - public final int getCode () { - return this.code_; + public final int getCode() { + return this.yycode_; } + +]b4_parse_error_bmatch( +[simple\|verbose], +[[ /* Return YYSTR after stripping away unnecessary quotes and + backslashes, so that it's suitable for yyerror. The heuristic is + that double-quoting is unnecessary unless the string contains an + apostrophe, a comma, or backslash (other than backslash-backslash). + YYSTR is taken from yytname. */ + private static String yytnamerr_(String yystr) + { + if (yystr.charAt (0) == '"') + { + StringBuffer yyr = new StringBuffer(); + strip_quotes: for (int i = 1; i < yystr.length(); i++) + switch (yystr.charAt(i)) + { + case '\'': + case ',': + break strip_quotes; + + case '\\': + if (yystr.charAt(++i) != '\\') + break strip_quotes; + /* Fall through. */ + default: + yyr.append(yystr.charAt(i)); + break; + + case '"': + return yyr.toString(); + } + } + return yystr; + } + + /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */ + ]b4_typed_parser_table_define([String], [tname], [b4_tname])[ + + /* The user-facing name of this symbol. */ + public final String getName() { + return yytnamerr_(yytname_[yycode_]); + } +]], +[custom\|detailed], +[[ /* YYNAMES_[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */ + ]b4_typed_parser_table_define([String], [names], [b4_symbol_names])[ + + /* The user-facing name of this symbol. */ + public final String getName() { + return yynames_[yycode_]; + }]])[ }; ]])]) diff --git a/data/skeletons/lalr1.java b/data/skeletons/lalr1.java index e5b0f729..813c7757 100644 --- a/data/skeletons/lalr1.java +++ b/data/skeletons/lalr1.java @@ -507,7 +507,7 @@ import java.text.MessageFormat; b4_locations_if([, Object yylocationp])[) { yycdebug (s + (yykind.getCode () < YYNTOKENS_ ? " token " : " nterm ") - + yysymbolName (yykind) + " ("]b4_locations_if([ + + yykind.getName() + " ("]b4_locations_if([ + yylocationp + ": "])[ + (yyvaluep == null ? "(null)" : yyvaluep.toString ()) + ")"); }]])[ @@ -880,7 +880,7 @@ b4_dollar_popdef[]dnl /** * The symbol kind of the lookahead token. */ - public SymbolKind getToken () + public final SymbolKind getToken () { return yytoken; } @@ -890,7 +890,7 @@ b4_dollar_popdef[]dnl /** * The location of the lookahead. */ - public ]b4_location_type[ getLocation () + public final ]b4_location_type[ getLocation () { return yylocation; } @@ -937,15 +937,6 @@ b4_dollar_popdef[]dnl } return yycount - yyoffset; } - - /** - * The user-facing name of the symbol whose (internal) number is - * YYSYMBOL. No bounds checking. - */ - static String yysymbolName (SymbolKind yysymbol) - { - return ]b4_parser_class[.yysymbolName (yysymbol); - } } ]b4_parse_error_bmatch( @@ -1005,7 +996,7 @@ b4_dollar_popdef[]dnl int yycount = yysyntaxErrorArguments (yyctx, yyarg, argmax); String[] yystr = new String[yycount]; for (int yyi = 0; yyi < yycount; ++yyi) - yystr[yyi] = yysymbolName (yyarg[yyi]); + yystr[yyi] = yyarg[yyi].getName(); String yyformat; switch (yycount) { @@ -1049,63 +1040,6 @@ b4_dollar_popdef[]dnl ]b4_parser_tables_define[ -]b4_parse_error_bmatch( - [simple\|verbose], -[[ /* Return YYSTR after stripping away unnecessary quotes and - backslashes, so that it's suitable for yyerror. The heuristic is - that double-quoting is unnecessary unless the string contains an - apostrophe, a comma, or backslash (other than backslash-backslash). - YYSTR is taken from yytname. */ - private static String yytnamerr_ (String yystr) - { - if (yystr.charAt (0) == '"') - { - StringBuffer yyr = new StringBuffer (); - strip_quotes: for (int i = 1; i < yystr.length (); i++) - switch (yystr.charAt (i)) - { - case '\'': - case ',': - break strip_quotes; - - case '\\': - if (yystr.charAt(++i) != '\\') - break strip_quotes; - /* Fall through. */ - default: - yyr.append (yystr.charAt (i)); - break; - - case '"': - return yyr.toString (); - } - } - return yystr; - } - - /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */ - ]b4_typed_parser_table_define([String], [tname], [b4_tname])[ - - /* The user-facing name of the symbol whose (internal) number is - YYSYMBOL. No bounds checking. */ - static String yysymbolName (SymbolKind yysymbol) - { - return yytnamerr_ (yytname_[yysymbol.getCode ()]); - } -]], - [custom\|detailed], -[[ /* The user-facing name of the symbol whose (internal) number is - YYSYMBOL. No bounds checking. */ - static String yysymbolName (SymbolKind yysymbol) - { - String[] yy_sname = - { - ]b4_symbol_names[ - }; - return yy_sname[yysymbol.getCode ()]; - }]])[ - ]b4_parse_trace_if([[ ]b4_integral_parser_table_define([rline], [b4_rline], [[YYRLINE[YYN] -- Source line where rule number YYN was defined.]])[ diff --git a/doc/bison.texi b/doc/bison.texi index c7084032..abf49b5d 100644 --- a/doc/bison.texi +++ b/doc/bison.texi @@ -13125,6 +13125,22 @@ or nonzero, full tracing. Identify the Bison version and skeleton used to generate this parser. @end deftypecv +If you enabled token internationalization (@pxref{Token I18n}), you must +provide the parser with the following function: + +@deftypecv {Static Method} {YYParser} {String} {i18n} (@code{string} @var{s}) +Return the translation of @var{s} in the user's language. As an example: + +@example +%code @{ + static ResourceBundle myResources + = ResourceBundle.getBundle("domain-name"); + static final String i18n(String s) @{ + return myResources.getString(s); + @} +@} +@end example +@end deftypecv @node Java Parser Context Interface @subsection Java Parser Context Interface @@ -13132,9 +13148,35 @@ Identify the Bison version and skeleton used to generate this parser. The parser context provides information to build error reports when you invoke @samp{%define parse.error custom}. +@defcv {Type} {YYParser} {SymbolKind} +An enum that includes all the grammar symbols, tokens and nonterminals. Its +enumerators are forged from the symbol names: + +@example +public enum SymbolKind +@{ + S_YYEOF(0), /* "end of file" */ + S_YYERROR(1), /* error */ + S_YYUNDEF(2), /* "invalid token" */ + S_BANG(3), /* "!" */ + S_PLUS(4), /* "+" */ + S_MINUS(5), /* "-" */ + [...] + S_NUM(13), /* "number" */ + S_NEG(14), /* NEG */ + S_YYACCEPT(15), /* $accept */ + S_input(16), /* input */ + S_line(17); /* line */ +@}; +@end example +@end defcv + +@deftypemethod {YYParser.SymbolKind} {String} getName () +The name of this symbol, possibly translated. +@end deftypemethod + @deftypemethod {YYParser.Context} {YYParser.SymbolKind} getToken () -The kind of the lookahead. Maybe return @code{null} when there is no -lookahead. +The kind of the lookahead. Return @code{null} iff there is no lookahead. @end deftypemethod @deftypemethod {YYParser.Context} {YYParser.Location} getLocation () @@ -13143,14 +13185,12 @@ The location of the lookahead. @deftypemethod {YYParser.Context} {int} getExpectedTokens (@code{YYParser.SymbolKind[]} @var{argv}, @code{int} @var{argc}) Fill @var{argv} with the expected tokens, which never includes -@code{YYSYMBOL_YYEMPTY}, @code{YYSYMBOL_YYERROR}, or -@code{YYSYMBOL_YYUNDEF}. +@code{SymbolKind.S_YYERROR}, or @code{SymbolKind.S_YYUNDEF}. Never put more than @var{argc} elements into @var{argv}, and on success return the effective number of tokens stored in @var{argv}. Return 0 if there are more than @var{argc} expected tokens, yet fill @var{argv} up to -@var{argc}. When LAC is enabled, may return a negative number on errors, -such as @code{YYENOMEM} on memory exhaustion. +@var{argc}. If @var{argv} is null, return the size needed to store all the possible values, which is always less than @code{YYNTOKENS}. @@ -13227,28 +13267,28 @@ Declarations}), then the parser no longer passes syntax error messages to Whether it uses @code{yyerror} is up to the user. -Here is a typical example of a reporting function. +Here is an example of a reporting function (@pxref{Java Parser Context +Interface}). @example -public void yyreportSyntaxError (YYParser.Context ctx) -@{ - System.err.print (ctx.getLocation () + ": syntax error"); +public void reportSyntaxError(YYParser.Context ctx) @{ + System.err.print(ctx.getLocation() + ": syntax error"); // Report the expected tokens. @{ final int TOKENMAX = 5; YYParser.SymbolKind[] arg = new YYParser.SymbolKind[TOKENMAX]; - int n = ctx.getExpectedTokens (arg, TOKENMAX); + int n = ctx.getExpectedTokens(arg, TOKENMAX); for (int i = 0; i < n; ++i) - System.err.print ((i == 0 ? ": expected " : " or ") - + ctx.yysymbolName (arg[i])); + System.err.print((i == 0 ? ": expected " : " or ") + + arg[i].getName()); @} // Report the unexpected token which triggered the error. @{ - YYParser.SymbolKind lookahead = ctx.getToken (); + YYParser.SymbolKind lookahead = ctx.getToken(); if (lookahead != null) - System.err.print (" before " + ctx.yysymbolName (lookahead)); + System.err.print(" before " + lookahead.getName()); @} - System.err.println (""); + System.err.println(""); @} @end example @end deftypemethod diff --git a/examples/java/calc/Calc.y b/examples/java/calc/Calc.y index 948c7f1c..8070e0c1 100644 --- a/examples/java/calc/Calc.y +++ b/examples/java/calc/Calc.y @@ -121,12 +121,12 @@ class CalcLexer implements Calc.Lexer { int n = ctx.getExpectedTokens(arg, TOKENMAX); for (int i = 0; i < n; ++i) System.err.print((i == 0 ? ": expected " : " or ") - + ctx.yysymbolName(arg[i])); + + arg[i].getName()); } { Calc.SymbolKind lookahead = ctx.getToken(); if (lookahead != null) - System.err.print(" before " + ctx.yysymbolName(lookahead)); + System.err.print(" before " + lookahead.getName()); } System.err.println(""); } diff --git a/tests/local.at b/tests/local.at index 7d63142c..d3ded8f4 100644 --- a/tests/local.at +++ b/tests/local.at @@ -958,13 +958,13 @@ class PositionReader extends BufferedReader { # FIXME: We should not hard-code "Calc". m4_define([AT_YYERROR_DEFINE(java)], [AT_LOCATION_IF([[public void yyerror (Calc.Location l, String m) - {]m4_bmatch(m4_defn([AT_PARSE_PARAMS]), [nerrs],[[ + {]m4_bmatch(m4_defn([AT_PARSE_PARAMS]), [nerrs], [[ ++global_nerrs; ++*nerrs;]])[ if (l == null) - System.err.println (m); + System.err.println(m); else - System.err.println (l + ": " + m); + System.err.println(l + ": " + m); } ]], [[ public void yyerror (String m) @@ -976,27 +976,25 @@ m4_define([AT_YYERROR_DEFINE(java)], ]])[ ]AT_ERROR_CUSTOM_IF([[ - public void reportSyntaxError (Calc.Context ctx) - { - System.err.print (]AT_LOCATION_IF([[ctx.getLocation () + ": "]] - + )["syntax error"); + public void reportSyntaxError(Calc.Context ctx) { + System.err.print(]AT_LOCATION_IF([[ctx.getLocation() + ": " + + ]])["syntax error"); { - Calc.SymbolKind token = ctx.getToken (); + Calc.SymbolKind token = ctx.getToken(); if (token != null) - System.err.print (" on token @<:@" + ctx.yysymbolName (token) + "@:>@"); + System.err.print(" on token @<:@" + token.getName() + "@:>@"); } { Calc.SymbolKind[] arg = new Calc.SymbolKind[ctx.NTOKENS]; - int n = ctx.getExpectedTokens (arg, ctx.NTOKENS); - if (0 < n) - { - System.err.print (" (expected:"); - for (int i = 0; i < n; ++i) - System.err.print (" @<:@" + ctx.yysymbolName (arg[i]) + "@:>@"); - System.err.print (")"); - } + int n = ctx.getExpectedTokens(arg, ctx.NTOKENS); + if (0 < n) { + System.err.print(" (expected:"); + for (int i = 0; i < n; ++i) + System.err.print(" @<:@" + arg[i].getName() + "@:>@"); + System.err.print(")"); + } } - System.err.println (""); + System.err.println(""); } ]]) ]) -- 2.26.0
