Hi Akim, nice patch series! Thanks for the great work. Being able to translate token names will be definitely useful and I am looking forward to using that functionality.
I just took a short glance over the patches and maybe I am missing something, but: Where is the `_` function used by lalr1.cc defined? Am I supposed to put a "#define _(T) myTranslateFunc(T)" into my grammar or is it already defined somewhere? Cheers, Adrian On 29/12/2018, 18:42, "bison-patches on behalf of Akim Demaille" <[email protected] on behalf of [email protected]> wrote: In addition to %token NUM "number" accept %token NUM _("number") in which case the token will be translated in error messages. Do not use _() in the output if there are no translatable tokens. * src/symtab.h, src/symtab.c (symbol): Add a 'translatable' member. * src/parse-gram.y (TSTRING): New token. (string_as_id.opt): Replace with... (alias): this. Use it. * src/scan-gram.l (SC_ESCAPED_TSTRING): New start conditions, to match TSTRINGs. * src/output.c (prepare_symbols): Define b4_translatable if there are translatable strings. * data/skeletons/glr.c, data/skeletons/lalr1.cc, * data/skeletons/yacc.c (yytnamerr): Receive b4_translatable, and use it. --- data/skeletons/glr.c | 16 +++++++++++----- data/skeletons/lalr1.cc | 11 +++++++++-- data/skeletons/yacc.c | 12 ++++++++++-- src/output.c | 28 ++++++++++++++++++++++++++-- src/parse-gram.y | 28 ++++++++++++++++++---------- src/scan-gram.l | 25 ++++++++++++++++++++----- src/symtab.c | 3 ++- src/symtab.h | 7 ++++++- 8 files changed, 102 insertions(+), 28 deletions(-) diff --git a/data/skeletons/glr.c b/data/skeletons/glr.c index 02438887..99321cd7 100644 --- a/data/skeletons/glr.c +++ b/data/skeletons/glr.c @@ -541,11 +541,17 @@ typedef int yySymbol; /** A printable representation of TOKEN. */ static inline const char* yytokenName (yySymbol yytoken) -{ +{]m4_ifdef([b4_translatable], [[ + /* YYTRANSLATABLE[SYMBOL-NUM] -- Whether YYTNAME[SYMBOL-NUM] is + internationalizable. */ + static ]b4_int_type_for([b4_translate])[ yytranslatable[] = + { + ]b4_translatable[ + };]])[ if (yytoken == YYEMPTY) - return ""; - else - return yytname[yytoken]; + return "";]m4_ifdef([b4_translatable], [[ + return yytranslatable[yytoken] ? _(yytname[yytoken]) : yytname[yytoken];]], [[ + return yytname[yytoken];]])[ } #endif @@ -575,7 +581,7 @@ yystpcpy (char *yydest, const char *yysrc) /* Copy to YYRES the name of YYTOKEN. If YYRES is null, do not copy; instead, return the length of what the result would have been. */ static size_t -yytnamerr (char *yyres, int yytoken) +yytnamerr (char *yyres, yySymbol yytoken) { const char *yystr = yytokenName (yytoken); if (! yyres) diff --git a/data/skeletons/lalr1.cc b/data/skeletons/lalr1.cc index 7944c4e7..d3d9ff37 100644 --- a/data/skeletons/lalr1.cc +++ b/data/skeletons/lalr1.cc @@ -511,8 +511,15 @@ m4_if(b4_prefix, [yy], [], std::string ]b4_parser_class_name[::yytnamerr_ (int yytoken) - { - return yytname_[yytoken]; + {]m4_ifdef([b4_translatable], [[ + // YYTRANSLATABLE[TOKEN-NUM] -- Whether YYTNAME[TOKEN-NUM] is + // internationalizable. */ + static ]b4_int_type_for([b4_translate])[ yytranslatable[] = + { + ]b4_translatable[ + }; + return yytranslatable[yytoken] ? _(yytname_[yytoken]) : yytname_[yytoken];]], [[ + return yytname_[yytoken];]])[ } ]])[ diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c index fd10a004..e508633d 100644 --- a/data/skeletons/yacc.c +++ b/data/skeletons/yacc.c @@ -1045,8 +1045,16 @@ yy_lac (yytype_int16 *yyesa, yytype_int16 **yyes, instead, return the length of what the result would have been. */ static YYSIZE_T yytnamerr (char *yyres, int yytoken) -{ - const char *yystr = yytname[yytoken]; +{]m4_ifdef([b4_translatable], [[ + /* YYTRANSLATABLE[SYMBOL-NUM] -- Whether YYTNAME[SYMBOL-NUM] is + internationalizable. */ + static ]b4_int_type_for([b4_translate])[ yytranslatable[] = + { + ]b4_translatable[ + }; + const char *yystr + = yytranslatable[yytoken] ? _(yytname[yytoken]) : yytname[yytoken];]], [[ + const char *yystr = yytname[yytoken];]])[ if (! yyres) return yystrlen (yystr); diff --git a/src/output.c b/src/output.c index a90e8266..eeeb3b81 100644 --- a/src/output.c +++ b/src/output.c @@ -54,6 +54,10 @@ static char *relocate_buffer = NULL; | result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of | | TYPE), and to the muscle NAME_max, the max value of the | | TABLE_DATA. | +| | +| For the typical case of outputting a complete table from 0, pass | +| TABLE[0] as FIRST, and 1 as BEGIN. For instance | +| muscle_insert_base_table ("pact", base, base[0], 1, nstates); | `-------------------------------------------------------------------*/ @@ -156,7 +160,7 @@ prepare_symbols (void) token_translations[0], 1, max_user_token_number + 1); - /* tname -- token names. */ + /* tname -- symbol names. */ { /* We assume that the table will be output starting at column 2. */ int j = 2; @@ -193,6 +197,26 @@ prepare_symbols (void) muscle_insert ("tname", obstack_finish0 (&format_obstack)); } + /* translatable -- whether a token is translatable. */ + { + bool translatable = false; + for (int i = 0; i < ntokens; ++i) + if (symbols[i]->translatable) + { + translatable = true; + break; + } + if (translatable) + { + int *values = xnmalloc (nsyms, sizeof *values); + for (int i = 0; i < ntokens; ++i) + values[i] = symbols[i]->translatable; + muscle_insert_int_table ("translatable", values, + values[0], 1, ntokens); + free (values); + } + } + /* Output YYTOKNUM. */ { int *values = xnmalloc (ntokens, sizeof *values); @@ -230,7 +254,7 @@ prepare_rules (void) prhs[r] = i; /* RHS of the rule R. */ for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp) - rhs[i++] = *rhsp; + rhs[i++] = *rhsp; /* Separator in RHS. */ rhs[i++] = -1; diff --git a/src/parse-gram.y b/src/parse-gram.y index 366dccb5..f99cb052 100644 --- a/src/parse-gram.y +++ b/src/parse-gram.y @@ -120,6 +120,7 @@ /* Define the tokens together with their human representation. */ %token GRAM_EOF 0 "end of file" %token STRING "string" + TSTRING "translatable string" %token PERCENT_TOKEN "%token" %token PERCENT_NTERM "%nterm" @@ -186,8 +187,8 @@ %type <unsigned char> CHAR %printer { fputs (char_name ($$), yyo); } <unsigned char> -%type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING -%printer { fputs (quotearg_style (c_quoting_style, $$), yyo); } STRING +%type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING TSTRING +%printer { fputs (quotearg_style (c_quoting_style, $$), yyo); } STRING TSTRING %printer { fprintf (yyo, "{\n%s\n}", $$); } <char*> %type <uniqstr> BRACKETED_ID ID ID_COLON PERCENT_FLAG TAG tag tag.opt variable @@ -462,7 +463,7 @@ tag: `-----------------------*/ // A non empty list of possibly tagged symbols for %nterm. -// +// // Can easily be defined like symbol_decls but restricted to ID, but // using token_decls allows to reudce the number of rules, and also to // make nicer error messages on "%nterm 'a'" or '%nterm FOO "foo"'. @@ -497,7 +498,7 @@ token_decl.1: // One symbol declaration for %token or %nterm. token_decl: - id int.opt[num] string_as_id.opt[alias] + id int.opt[num] alias { $$ = $id; symbol_class_set ($id, current_class, @id, true); @@ -514,6 +515,19 @@ int.opt: | INT ; +%type <symbol*> alias; +alias: + %empty { $$ = NULL; } +| string_as_id { $$ = $1; } +| TSTRING + { + $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1); + symbol_class_set ($$, token_sym, @1, false); + $$->translatable = true; + } +; + + /*-------------------------------------. | token_decls_for_prec (%left, etc.). | `-------------------------------------*/ @@ -727,12 +741,6 @@ string_as_id: } ; -%type <symbol*> string_as_id.opt; -string_as_id.opt: - %empty { $$ = NULL; } -| string_as_id -; - epilogue.opt: %empty | "%%" EPILOGUE diff --git a/src/scan-gram.l b/src/scan-gram.l index 5fe0fc4e..c2ada035 100644 --- a/src/scan-gram.l +++ b/src/scan-gram.l @@ -108,8 +108,8 @@ static void unexpected_newline (boundary, char const *); %} /* A C-like comment in directives/rules. */ %x SC_YACC_COMMENT - /* Strings and characters in directives/rules. */ -%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER + /* Characters and strings in directives/rules. */ +%x SC_ESCAPED_CHARACTER SC_ESCAPED_STRING SC_ESCAPED_TSTRING /* A identifier was just read in directives/rules. Special state to capture the sequence 'identifier :'. */ %x SC_AFTER_IDENTIFIER @@ -309,6 +309,7 @@ eqopt ([[:space:]]*=)? /* Strings. */ "\"" token_start = loc->start; BEGIN SC_ESCAPED_STRING; + "_(\"" token_start = loc->start; BEGIN SC_ESCAPED_TSTRING; /* Prologue. */ "%{" code_start = loc->start; BEGIN SC_PROLOGUE; @@ -369,7 +370,7 @@ eqopt ([[:space:]]*=)? | added value. | `--------------------------------------------------------------*/ -<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG> +<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING,SC_TAG> { \0 complain (loc, complaint, _("invalid null character")); } @@ -529,6 +530,20 @@ eqopt ([[:space:]]*=)? "\n" unexpected_newline (token_start, "\""); } +<SC_ESCAPED_TSTRING> +{ + "\")" { + STRING_FINISH; + BEGIN INITIAL; + loc->start = token_start; + complain (loc, Wyacc, + _("POSIX Yacc does not support string literals")); + RETURN_VALUE (TSTRING, last_string); + } + <<EOF>> unexpected_eof (token_start, "\""); + "\n" unexpected_newline (token_start, "\""); +} + /*----------------------------------------------------------. | Scanning a Bison character literal, decoding its escapes. | | The initial quote is already eaten. | @@ -591,7 +606,7 @@ eqopt ([[:space:]]*=)? | Decode escaped characters. | `----------------------------*/ -<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER> +<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING> { \\[0-7]{1,3} { verify (UCHAR_MAX < ULONG_MAX); @@ -787,7 +802,7 @@ eqopt ([[:space:]]*=)? | By default, grow the string obstack with the input. | `-----------------------------------------------------*/ -<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>. | +<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING>. | <SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n STRING_GROW; %% diff --git a/src/symtab.c b/src/symtab.c index 2409f41c..72630cd0 100644 --- a/src/symtab.c +++ b/src/symtab.c @@ -98,6 +98,7 @@ symbol_new (uniqstr tag, location loc) _("POSIX Yacc forbids dashes in symbol names: %s"), tag); res->tag = tag; + res->translatable = false; res->location = loc; res->location_of_lhs = false; res->alias = NULL; @@ -874,7 +875,7 @@ dummy_symbol_get (location loc) } bool -symbol_is_dummy (const symbol *sym) +symbol_is_dummy (symbol const *sym) { return sym->tag[0] == '@' || (sym->tag[0] == '$' && sym->tag[1] == '@'); } diff --git a/src/symtab.h b/src/symtab.h index a63b904e..4d8f4321 100644 --- a/src/symtab.h +++ b/src/symtab.h @@ -88,6 +88,9 @@ struct symbol /** The key, name of the symbol. */ uniqstr tag; + /** Whether this symbol is translatable. */ + bool translatable; + /** The "defining" location. */ location location; @@ -111,6 +114,8 @@ struct symbol struct sym_content { + /** The main symbol that denotes this content (it contains the + possible alias). */ symbol *symbol; /** Its \c \%type. @@ -173,7 +178,7 @@ symbol *dummy_symbol_get (location loc); void symbol_print (symbol const *s, FILE *f); /** Is this a dummy nonterminal? */ -bool symbol_is_dummy (const symbol *sym); +bool symbol_is_dummy (symbol const *sym); /** The name of the code_props type: "\%destructor" or "\%printer". */ char const *code_props_type_string (code_props_type kind); -- 2.20.0
