Re: [PATCH 5/8] parsers: support translatable token aliases

Adrian Vogelsgesang Wed, 02 Jan 2019 00:53:27 -0800

Hi Akim,

nice patch series! Thanks for the great work.
Being able to translate token names will be definitely useful and I am looking 
forward to using that functionality.


I just took a short glance over the patches and maybe I am missing something, 
but:
Where is the `_` function used by lalr1.cc defined?
Am I supposed to put a "#define _(T) myTranslateFunc(T)" into my grammar or is 
it already defined somewhere?

Cheers,
Adrian

On 29/12/2018, 18:42, "bison-patches on behalf of Akim Demaille" 
<[email protected] on behalf of 
[email protected]> wrote:

    In addition to
    
        %token NUM "number"
    
    accept
    
        %token NUM _("number")
    
    in which case the token will be translated in error messages.
    Do not use _() in the output if there are no translatable tokens.
    
    * src/symtab.h, src/symtab.c (symbol): Add a 'translatable' member.
    * src/parse-gram.y (TSTRING): New token.
    (string_as_id.opt): Replace with...
    (alias): this.
    Use it.
    * src/scan-gram.l (SC_ESCAPED_TSTRING): New start conditions, to match
    TSTRINGs.
    * src/output.c (prepare_symbols): Define b4_translatable if there are
    translatable strings.
    
    * data/skeletons/glr.c, data/skeletons/lalr1.cc,
    * data/skeletons/yacc.c (yytnamerr): Receive b4_translatable, and use it.
    ---
     data/skeletons/glr.c    | 16 +++++++++++-----
     data/skeletons/lalr1.cc | 11 +++++++++--
     data/skeletons/yacc.c   | 12 ++++++++++--
     src/output.c            | 28 ++++++++++++++++++++++++++--
     src/parse-gram.y        | 28 ++++++++++++++++++----------
     src/scan-gram.l         | 25 ++++++++++++++++++++-----
     src/symtab.c            |  3 ++-
     src/symtab.h            |  7 ++++++-
     8 files changed, 102 insertions(+), 28 deletions(-)
    
    diff --git a/data/skeletons/glr.c b/data/skeletons/glr.c
    index 02438887..99321cd7 100644
    --- a/data/skeletons/glr.c
    +++ b/data/skeletons/glr.c
    @@ -541,11 +541,17 @@ typedef int yySymbol;
     /** A printable representation of TOKEN.  */
     static inline const char*
     yytokenName (yySymbol yytoken)
    -{
    +{]m4_ifdef([b4_translatable], [[
    +  /* YYTRANSLATABLE[SYMBOL-NUM] -- Whether YYTNAME[SYMBOL-NUM] is
    +     internationalizable.  */
    +  static ]b4_int_type_for([b4_translate])[ yytranslatable[] =
    +  {
    +  ]b4_translatable[
    +  };]])[
       if (yytoken == YYEMPTY)
    -    return "";
    -  else
    -    return yytname[yytoken];
    +    return "";]m4_ifdef([b4_translatable], [[
    +  return yytranslatable[yytoken] ? _(yytname[yytoken]) : 
yytname[yytoken];]], [[
    +  return yytname[yytoken];]])[
     }
     #endif
     
    @@ -575,7 +581,7 @@ yystpcpy (char *yydest, const char *yysrc)
     /* Copy to YYRES the name of YYTOKEN.  If YYRES is null, do not copy;
        instead, return the length of what the result would have been.  */
     static size_t
    -yytnamerr (char *yyres, int yytoken)
    +yytnamerr (char *yyres, yySymbol yytoken)
     {
       const char *yystr = yytokenName (yytoken);
       if (! yyres)
    diff --git a/data/skeletons/lalr1.cc b/data/skeletons/lalr1.cc
    index 7944c4e7..d3d9ff37 100644
    --- a/data/skeletons/lalr1.cc
    +++ b/data/skeletons/lalr1.cc
    @@ -511,8 +511,15 @@ m4_if(b4_prefix, [yy], [],
     
       std::string
       ]b4_parser_class_name[::yytnamerr_ (int yytoken)
    -  {
    -    return yytname_[yytoken];
    +  {]m4_ifdef([b4_translatable], [[
    +    // YYTRANSLATABLE[TOKEN-NUM] -- Whether YYTNAME[TOKEN-NUM] is
    +    // internationalizable.  */
    +    static ]b4_int_type_for([b4_translate])[ yytranslatable[] =
    +    {
    +  ]b4_translatable[
    +    };
    +    return yytranslatable[yytoken] ? _(yytname_[yytoken]) : 
yytname_[yytoken];]], [[
    +    return yytname_[yytoken];]])[
       }
     ]])[
     
    diff --git a/data/skeletons/yacc.c b/data/skeletons/yacc.c
    index fd10a004..e508633d 100644
    --- a/data/skeletons/yacc.c
    +++ b/data/skeletons/yacc.c
    @@ -1045,8 +1045,16 @@ yy_lac (yytype_int16 *yyesa, yytype_int16 **yyes,
        instead, return the length of what the result would have been.  */
     static YYSIZE_T
     yytnamerr (char *yyres, int yytoken)
    -{
    -  const char *yystr = yytname[yytoken];
    +{]m4_ifdef([b4_translatable], [[
    +  /* YYTRANSLATABLE[SYMBOL-NUM] -- Whether YYTNAME[SYMBOL-NUM] is
    +     internationalizable.  */
    +  static ]b4_int_type_for([b4_translate])[ yytranslatable[] =
    +  {
    +  ]b4_translatable[
    +  };
    +  const char *yystr
    +    = yytranslatable[yytoken] ? _(yytname[yytoken]) : yytname[yytoken];]], 
[[
    +  const char *yystr = yytname[yytoken];]])[
       if (! yyres)
         return yystrlen (yystr);
     
    diff --git a/src/output.c b/src/output.c
    index a90e8266..eeeb3b81 100644
    --- a/src/output.c
    +++ b/src/output.c
    @@ -54,6 +54,10 @@ static char *relocate_buffer = NULL;
     | result of formatting the FIRST and then TABLE_DATA[BEGIN..END[ (of |
     | TYPE), and to the muscle NAME_max, the max value of the            |
     | TABLE_DATA.                                                        |
    +|                                                                    |
    +| For the typical case of outputting a complete table from 0, pass   |
    +| TABLE[0] as FIRST, and 1 as BEGIN.  For instance                   |
    +| muscle_insert_base_table ("pact", base, base[0], 1, nstates);      |
     `-------------------------------------------------------------------*/
     
     
    @@ -156,7 +160,7 @@ prepare_symbols (void)
                                          token_translations[0],
                                          1, max_user_token_number + 1);
     
    -  /* tname -- token names.  */
    +  /* tname -- symbol names.  */
       {
         /* We assume that the table will be output starting at column 2. */
         int j = 2;
    @@ -193,6 +197,26 @@ prepare_symbols (void)
         muscle_insert ("tname", obstack_finish0 (&format_obstack));
       }
     
    +  /* translatable -- whether a token is translatable. */
    +  {
    +    bool translatable = false;
    +    for (int i = 0; i < ntokens; ++i)
    +      if (symbols[i]->translatable)
    +        {
    +          translatable = true;
    +          break;
    +        }
    +    if (translatable)
    +      {
    +        int *values = xnmalloc (nsyms, sizeof *values);
    +        for (int i = 0; i < ntokens; ++i)
    +          values[i] = symbols[i]->translatable;
    +        muscle_insert_int_table ("translatable", values,
    +                                 values[0], 1, ntokens);
    +        free (values);
    +      }
    +  }
    +
       /* Output YYTOKNUM. */
       {
         int *values = xnmalloc (ntokens, sizeof *values);
    @@ -230,7 +254,7 @@ prepare_rules (void)
           prhs[r] = i;
           /* RHS of the rule R. */
           for (item_number *rhsp = rules[r].rhs; 0 <= *rhsp; ++rhsp)
    -   rhs[i++] = *rhsp;
    +        rhs[i++] = *rhsp;
           /* Separator in RHS. */
           rhs[i++] = -1;
     
    diff --git a/src/parse-gram.y b/src/parse-gram.y
    index 366dccb5..f99cb052 100644
    --- a/src/parse-gram.y
    +++ b/src/parse-gram.y
    @@ -120,6 +120,7 @@
     /* Define the tokens together with their human representation.  */
     %token GRAM_EOF 0 "end of file"
     %token STRING     "string"
    +       TSTRING    "translatable string"
     
     %token PERCENT_TOKEN       "%token"
     %token PERCENT_NTERM       "%nterm"
    @@ -186,8 +187,8 @@
     %type <unsigned char> CHAR
     %printer { fputs (char_name ($$), yyo); } <unsigned char>
     
    -%type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING
    -%printer { fputs (quotearg_style (c_quoting_style, $$), yyo); } STRING
    +%type <char*> "{...}" "%?{...}" "%{...%}" EPILOGUE STRING TSTRING
    +%printer { fputs (quotearg_style (c_quoting_style, $$), yyo); } STRING 
TSTRING
     %printer { fprintf (yyo, "{\n%s\n}", $$); } <char*>
     
     %type <uniqstr> BRACKETED_ID ID ID_COLON PERCENT_FLAG TAG tag tag.opt 
variable
    @@ -462,7 +463,7 @@ tag:
     `-----------------------*/
     
     // A non empty list of possibly tagged symbols for %nterm.
    -// 
    +//
     // Can easily be defined like symbol_decls but restricted to ID, but
     // using token_decls allows to reudce the number of rules, and also to
     // make nicer error messages on "%nterm 'a'" or '%nterm FOO "foo"'.
    @@ -497,7 +498,7 @@ token_decl.1:
     
     // One symbol declaration for %token or %nterm.
     token_decl:
    -  id int.opt[num] string_as_id.opt[alias]
    +  id int.opt[num] alias
         {
           $$ = $id;
           symbol_class_set ($id, current_class, @id, true);
    @@ -514,6 +515,19 @@ int.opt:
     | INT
     ;
     
    +%type <symbol*> alias;
    +alias:
    +  %empty         { $$ = NULL; }
    +| string_as_id   { $$ = $1; }
    +| TSTRING
    +    {
    +      $$ = symbol_get (quotearg_style (c_quoting_style, $1), @1);
    +      symbol_class_set ($$, token_sym, @1, false);
    +      $$->translatable = true;
    +    }
    +;
    +
    +
     /*-------------------------------------.
     | token_decls_for_prec (%left, etc.).  |
     `-------------------------------------*/
    @@ -727,12 +741,6 @@ string_as_id:
         }
     ;
     
    -%type <symbol*> string_as_id.opt;
    -string_as_id.opt:
    -  %empty             { $$ = NULL; }
    -| string_as_id
    -;
    -
     epilogue.opt:
       %empty
     | "%%" EPILOGUE
    diff --git a/src/scan-gram.l b/src/scan-gram.l
    index 5fe0fc4e..c2ada035 100644
    --- a/src/scan-gram.l
    +++ b/src/scan-gram.l
    @@ -108,8 +108,8 @@ static void unexpected_newline (boundary, char const *);
     %}
      /* A C-like comment in directives/rules. */
     %x SC_YACC_COMMENT
    - /* Strings and characters in directives/rules. */
    -%x SC_ESCAPED_STRING SC_ESCAPED_CHARACTER
    + /* Characters and strings in directives/rules. */
    +%x SC_ESCAPED_CHARACTER SC_ESCAPED_STRING SC_ESCAPED_TSTRING
      /* A identifier was just read in directives/rules.  Special state
         to capture the sequence 'identifier :'. */
     %x SC_AFTER_IDENTIFIER
    @@ -309,6 +309,7 @@ eqopt    ([[:space:]]*=)?
     
       /* Strings. */
       "\""        token_start = loc->start; BEGIN SC_ESCAPED_STRING;
    +  "_(\""      token_start = loc->start; BEGIN SC_ESCAPED_TSTRING;
     
       /* Prologue. */
       "%{"        code_start = loc->start; BEGIN SC_PROLOGUE;
    @@ -369,7 +370,7 @@ eqopt    ([[:space:]]*=)?
       | added value.                                                  |
       `--------------------------------------------------------------*/
     
    -<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_TAG>
    +<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING,SC_TAG>
     {
       \0        complain (loc, complaint, _("invalid null character"));
     }
    @@ -529,6 +530,20 @@ eqopt    ([[:space:]]*=)?
       "\n"      unexpected_newline (token_start, "\"");
     }
     
    +<SC_ESCAPED_TSTRING>
    +{
    +  "\")" {
    +    STRING_FINISH;
    +    BEGIN INITIAL;
    +    loc->start = token_start;
    +    complain (loc, Wyacc,
    +              _("POSIX Yacc does not support string literals"));
    +    RETURN_VALUE (TSTRING, last_string);
    +  }
    +  <<EOF>>   unexpected_eof (token_start, "\"");
    +  "\n"      unexpected_newline (token_start, "\"");
    +}
    +
       /*----------------------------------------------------------.
       | Scanning a Bison character literal, decoding its escapes. |
       | The initial quote is already eaten.                       |
    @@ -591,7 +606,7 @@ eqopt    ([[:space:]]*=)?
       | Decode escaped characters.  |
       `----------------------------*/
     
    -<SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>
    +<SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING>
     {
       \\[0-7]{1,3} {
         verify (UCHAR_MAX < ULONG_MAX);
    @@ -787,7 +802,7 @@ eqopt    ([[:space:]]*=)?
       | By default, grow the string obstack with the input.  |
       `-----------------------------------------------------*/
     
    
-<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_CHARACTER>.
 |
    
+<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE,SC_STRING,SC_CHARACTER,SC_ESCAPED_CHARACTER,SC_ESCAPED_STRING,SC_ESCAPED_TSTRING>.
 |
       
<SC_COMMENT,SC_LINE_COMMENT,SC_BRACED_CODE,SC_PREDICATE,SC_PROLOGUE,SC_EPILOGUE>\n
    STRING_GROW;
     
     %%
    diff --git a/src/symtab.c b/src/symtab.c
    index 2409f41c..72630cd0 100644
    --- a/src/symtab.c
    +++ b/src/symtab.c
    @@ -98,6 +98,7 @@ symbol_new (uniqstr tag, location loc)
                   _("POSIX Yacc forbids dashes in symbol names: %s"), tag);
     
       res->tag = tag;
    +  res->translatable = false;
       res->location = loc;
       res->location_of_lhs = false;
       res->alias = NULL;
    @@ -874,7 +875,7 @@ dummy_symbol_get (location loc)
     }
     
     bool
    -symbol_is_dummy (const symbol *sym)
    +symbol_is_dummy (symbol const *sym)
     {
       return sym->tag[0] == '@' || (sym->tag[0] == '$' && sym->tag[1] == '@');
     }
    diff --git a/src/symtab.h b/src/symtab.h
    index a63b904e..4d8f4321 100644
    --- a/src/symtab.h
    +++ b/src/symtab.h
    @@ -88,6 +88,9 @@ struct symbol
       /** The key, name of the symbol.  */
       uniqstr tag;
     
    +  /** Whether this symbol is translatable. */
    +  bool translatable;
    +
       /** The "defining" location.  */
       location location;
     
    @@ -111,6 +114,8 @@ struct symbol
     
     struct sym_content
     {
    +  /** The main symbol that denotes this content (it contains the
    +      possible alias). */
       symbol *symbol;
     
       /** Its \c \%type.
    @@ -173,7 +178,7 @@ symbol *dummy_symbol_get (location loc);
     void symbol_print (symbol const *s, FILE *f);
     
     /** Is this a dummy nonterminal?  */
    -bool symbol_is_dummy (const symbol *sym);
    +bool symbol_is_dummy (symbol const *sym);
     
     /** The name of the code_props type: "\%destructor" or "\%printer".  */
     char const *code_props_type_string (code_props_type kind);
    -- 
    2.20.0

Re: [PATCH 5/8] parsers: support translatable token aliases

Reply via email to