Hello! The following 4 commits regard the internationalisation, and the removal of: yytnamerr, $end, and some unused imports caused by the internationalisation (now the import of std.conv is at global scope, not only used by lookahead correction).
În mar., 8 dec. 2020 la 16:31, Staniloiu Eduard <[email protected]> a scris: > This can definitely be done with introspection. > I wrote the gist of it below: > ``` > static if (!is(typeof(YY_))) { > version(YYENABLE_NLS) > { > version(ENABLE_NLS) > { > alias YY_ = partial!(dgettext, "bison-runtime"); > } > } > static if (!is(typeof(YY_))) > { > pragma(inline, true) > string YY_(string msg) { return msg; } > } > } > static if (!is(typeof(N_))) > { > pragma(inline, true) > string N_(string msg) { return msg; } > } > ``` > To use YYENABLE_NLS and ENABLE_NLS, one would use the `-version` compiler > option: > `-version=YYENABLE_NLS -version=ENABLE_NLS` > > I used Edi's suggestion from above. > >> This looks like yytnamerr, something the old skeletons have to live with >> that does not make sense in D. In the other skeletons, this is triggered >> with "parse.error verbose" that you should not support. Support only >> "parse.error detailed". You should remove all this. >> >> Done using yysymbol_name . > In C, for instance, i18n is handled in yysymbol_name. >> >> You should really eliminate all this code, even in the other cases. >> I suggest that first you change this commit to not add this piece >> of code for i18n, and in an later commit, you remove this from the >> other cases too. > > Done. > > else if (yystr == "$end") >> > - { >> > - put(sink, "end of input"); >> > + {]b4_has_translations_if([[ >> > + put(sink, _("end of input"));]],[[ >> > + put(sink, "end of input");]])[ >> > return; >> >> This is hairy, and is no longer needed at all. Java no longer >> uses that either. The translation of $end is provided by bison itself, >> like any other token, the skeleton should not play with it at all. >> Have a look at the other skeletons. >> > Done. > >> > +/** >> > + * Handle error message internationalisation >> >> Please end your comments with a period. >> > Done. > >> > + */ >> > +extern(C) char* dgettext(const char*, const char*); >> > +string YY_(const char* s) >> > +{ >> > + char* res = dgettext("bison-runtime", s); >> > + return to!string(res); >> >> Why this intermediate variable? Also, can't you declare dcgettext >> within the function itself? I like scopes to be as small as possible. >> > The C import only works at global scope. I removed the variable. > >> > +} >> > +]b4_has_translations_if([ >> > +/** >> > + * User error message internationalisation >> > + */ >> > +extern(C) char* gettext(const char*); >> > +string _(string s) >> > +{ >> > + char* res = gettext(s.ptr); >> > + return to!string(res); >> >> Ditto. >> > Done. > >> > +} >> > +])[ >> > /** >> > * A Bison parser, automatically generated from >> <tt>]m4_bpatsubst(b4_file_name, [^"\(.*\)"$], [\1])[</tt>. >> > * >> > @@ -704,20 +725,41 @@ m4_popdef([b4_at_dollar])])dnl >> > immutable int argmax = 5; >> > SymbolKind[] yyarg = new SymbolKind[argmax]; >> > int yycount = yysyntaxErrorArguments(yyctx, yyarg, argmax); >> > - string res = "syntax error, unexpected "; >> > - res ~= format!"%s"(yyarg[0]); >> > - if (yycount < argmax + 1) >> > + string[] yystr = new string[yycount]; >> > + for (int yyi = 0; yyi < yycount; yyi++) >> > + yystr[yyi] = format!"%s"(yyarg[yyi]); >> > + string res, yyformat; >> > + import std.string; >> > + switch (yycount) >> > { >> > - for (int yyi = 1; yyi < yycount; yyi++) >> > - { >> > - res ~= yyi == 1 ? ", expecting " : " or "; >> > - res ~= format!"%s"(SymbolKind(yyarg[yyi])); >> > - } >> > + case 1: >> > + yyformat = YY_("syntax error, unexpected %s"); >> > + res = format(yyformat, yystr[0]); >> > + break; >> >> I don't see the point of using format twice: once to convert the symbol >> kinds to strings, and then to build the full error message. How about >> not using yystr at all? >> > I removed it. > > +The internationalization in D is very simmilar to the one in C. The D >> > +parser uses gettext for user messages and dgettext for Bison messages. >> >> Use @code for function names. But I'm not sure we should go into such >> details here. >> >> > +If gettext is not present on your system, >> >> Use Title Case for package names, but... >> >> > install it and re-run Bison's >> > +configuration. >> >> The manual is about an installed Bison. Configuration and installation >> is irrelevant here. >> >> > + >> > +For enabling internationalisation, import bindtextdomain and textdomain >> >> Use @code for function names. I feel unconformable with "For enabling" >> instead of "To enable", but I'm not a native. >> >> > +from C: >> > + >> > +@example >> > +extern(C) char* bindtextdomain(const char* domainname, const char* >> dirname); >> > +extern(C) char* textdomain(const char* domainname); >> > +@end example >> > + >> > +The main function should load the translation catalogues, similarly to >> the >> > +@file{c/bistromathic} example: >> > + >> > +@example >> > +int main() >> > +@{ >> > + import core.stdc.locale; >> > + >> > + // Set up internationalization. >> > + setlocale(LC_ALL, ""); >> > + // Use Bison's standard translation catalogue for error messages >> > + // (the generated messages). >> > + bindtextdomain("bison-runtime", BISON_LOCALEDIR); >> > + // For the translation catalogue of your own project, use the >> > + // name of your project. >> > + bindtextdomain("bison", LOCALEDIR); >> > + textdomain("bison"); >> > + >> > + // usual main content >> > + ... >> > +@} >> > +@end example >> > + >> > +String aliases may be marked for internationalization (@pxref{Token >> > +I18n}): >> >> We should avoid repeating in each language the common bits. I don't >> these bits are needed it, is it? >> > I modified the documentation. > > +%code { >> > +]AT_TOKEN_TRANSLATE_IF([[ >> > + static string _(string s) >> > + { >> > + if (s == "end of input") >> > + return "end of file"; >> > + else if (s == "number") >> > + return "nombre"; >> > + return s; >> >> I personally prefer that we maintain the "else if" chain, instead of >> relying on return to break the execution flow. Make it more >> functional-style >> and add the last else. Currently the style is even inconsistent, with >> just >> one else. >> >> And if D supports 'switch' over string, well, even better :) >> > I used a switch. Thank you for all the help, Akim and Edi! Adela
From a0bf2dff3f6a1bc2ee1a9986a1d327d592b89015 Mon Sep 17 00:00:00 2001 From: Adela Vais <[email protected]> Date: Sat, 28 Nov 2020 21:09:39 +0200 Subject: [PATCH for Dlang support 1/4] d: add internationalisation support The D parser implements this feature similarly to the C parser, by using Gettext. Functions gettext() and dgettext() are imported using extern(C). The internationalisation uses yysymbol_name to report the name of the SymbolKinds. * data/skeletons/d.m4 (SymbolKind.toString.yytranslatable, SymbolKind.toString.yysymbol_name: New), data/skeletons/lalr1.d: Here. * doc/bison.texi: Document it. * tests/calc.at: Test it. --- data/skeletons/d.m4 | 25 ++++++++++++--- data/skeletons/lalr1.d | 69 ++++++++++++++++++++++++++++++++++++------ doc/bison.texi | 36 ++++++++++++++++++++++ tests/calc.at | 19 +++++++++++- 4 files changed, 134 insertions(+), 15 deletions(-) diff --git a/data/skeletons/d.m4 b/data/skeletons/d.m4 index 331c06a8..8dc76e2a 100644 --- a/data/skeletons/d.m4 +++ b/data/skeletons/d.m4 @@ -192,7 +192,12 @@ b4_symbol_foreach([b4_token_enum])dnl } ]) - +# b4_symbol_translate(STRING) +# --------------------------- +# Used by "bison" in the array of symbol names to mark those that +# require translation. +m4_define([b4_symbol_translate], +[[_($1)]]) ## -------------- ## ## Symbol kinds. ## @@ -252,8 +257,20 @@ m4_define([b4_declare_symbol_enum], final void toString(W)(W sink) const if (isOutputRange!(W, char)) { + immutable string[] yy_sname = @{ + ]b4_symbol_names[ + @};]b4_has_translations_if([[ + /* YYTRANSLATABLE[SYMBOL-NUM] -- Whether YY_SNAME[SYMBOL-NUM] is + internationalizable. */ + immutable ]b4_int_type_for([b4_translatable])[[] yytranslatable = @{ + ]b4_translatable[ + @}; + + if (yycode_ < yyntokens_ && yytranslatable[yycode_] > 0) + put(sink, _(yy_sname[yycode_])); + else + put(sink, yy_sname[yycode_]);]], [[ string yystr = yytname_[yycode_]; - if (yystr[0] == '"') { strip_quotes: @@ -280,9 +297,7 @@ m4_define([b4_declare_symbol_enum], { put(sink, "end of input"); return; - } - - put(sink, yystr); + }]])[ } } ]]) diff --git a/data/skeletons/lalr1.d b/data/skeletons/lalr1.d index fd0038d6..78338774 100644 --- a/data/skeletons/lalr1.d +++ b/data/skeletons/lalr1.d @@ -40,7 +40,40 @@ version(D_Version2) { ]b4_user_post_prologue[ ]b4_percent_code_get([[imports]])[ import std.format; +import std.conv; +/** + * Handle error message internationalisation. + */ +static if (!is(typeof(YY_))) { + version(YYENABLE_NLS) + { + version(ENABLE_NLS) + { + extern(C) char* dgettext(const char*, const char*); + string YY_(const char* s) + { + return to!string(dgettext("bison-runtime", s)); + } + } + } + static if (!is(typeof(YY_))) + { + pragma(inline, true) + string YY_(string msg) { return msg; } + } +} + +]b4_has_translations_if([ +/** + * User error message internationalisation. + */ +extern(C) char* gettext(const char*); +string _(string s) +{ + return to!string(gettext(s.ptr)); +} +])[ /** * A Bison parser, automatically generated from <tt>]m4_bpatsubst(b4_file_name, [^"\(.*\)"$], [\1])[</tt>. * @@ -680,20 +713,38 @@ m4_popdef([b4_at_dollar])])dnl immutable int argmax = 5; SymbolKind[] yyarg = new SymbolKind[argmax]; int yycount = yysyntaxErrorArguments(yyctx, yyarg, argmax); - string res = "syntax error, unexpected "; - res ~= format!"%s"(yyarg[0]); - if (yycount < argmax + 1) + string res, yyformat; + import std.string; + switch (yycount) { - for (int yyi = 1; yyi < yycount; yyi++) - { - res ~= yyi == 1 ? ", expecting " : " or "; - res ~= format!"%s"(SymbolKind(yyarg[yyi])); - } + case 1: + yyformat = YY_("syntax error, unexpected %s"); + res = format(yyformat, yyarg[0]); + break; + case 2: + yyformat = YY_("syntax error, unexpected %s, expecting %s"); + res = format(yyformat, yyarg[0], yyarg[1]); + break; + case 3: + yyformat = YY_("syntax error, unexpected %s, expecting %s or %s"); + res = format(yyformat, yyarg[0], yyarg[1], yyarg[2]); + break; + case 4: + yyformat = YY_("syntax error, unexpected %s, expecting %s or %s or %s"); + res = format(yyformat, yyarg[0], yyarg[1], yyarg[2], yyarg[3]); + break; + case 5: + yyformat = YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"); + res = format(yyformat, yyarg[0], yyarg[1], yyarg[2], yyarg[3], yyarg[4]); + break; + default: + res = YY_("syntax error"); + break; } yyerror(]b4_locations_if([yyctx.getLocation(), ])[res); }]], [[simple]], [[ - yyerror(]b4_locations_if([yyctx.getLocation(), ])["syntax error");]])[ + yyerror(]b4_locations_if([yyctx.getLocation(), ])[YY_("syntax error"));]])[ } ]b4_parse_error_bmatch( diff --git a/doc/bison.texi b/doc/bison.texi index 0a55f343..b3510c48 100644 --- a/doc/bison.texi +++ b/doc/bison.texi @@ -13953,6 +13953,42 @@ or nonzero, full tracing. Identify the Bison version and skeleton used to generate this parser. @end deftypecv +The internationalization in D is very simmilar to the one in C. The D +parser uses @code{gettext} for user messages and @code{dgettext} for +Bison messages. + +To enable internationalisation, compile using +@code{-version ENABLE_NLS -version YYENABLE_NLS} and import +@code{bindtextdomain} and @code{textdomain} from C: + +@example +extern(C) char* bindtextdomain(const char* domainname, const char* dirname); +extern(C) char* textdomain(const char* domainname); +@end example + +The main function should load the translation catalogues, similarly to the +@file{c/bistromathic} example: + +@example +int main() +@{ + import core.stdc.locale; + + // Set up internationalization. + setlocale(LC_ALL, ""); + // Use Bison's standard translation catalogue for error messages + // (the generated messages). + bindtextdomain("bison-runtime", BISON_LOCALEDIR); + // For the translation catalogue of your own project, use the + // name of your project. + bindtextdomain("bison", LOCALEDIR); + textdomain("bison"); + + // usual main content + ... +@} +@end example + @node D Parser Context Interface @subsection D Parser Context Interface The parser context provides information to build error reports when you diff --git a/tests/calc.at b/tests/calc.at index b1428316..3d319645 100644 --- a/tests/calc.at +++ b/tests/calc.at @@ -654,6 +654,23 @@ m4_define([_AT_DATA_CALC_Y(d)], }; %printer { fprintf (yyo, "%d", $$); } <ival>; +%code { +]AT_TOKEN_TRANSLATE_IF([[ + static string _(string s) + { + switch (s) + { + case "end of input": + return "end of file"; + case "number": + return "nombre"; + default: + return s; + } + } +]])[ +} + /* Bison Declarations */ %token EOF 0 ]AT_TOKEN_TRANSLATE_IF([_("end of file")], ["end of input"])[ %token <ival> NUM "number" @@ -665,7 +682,7 @@ m4_define([_AT_DATA_CALC_Y(d)], STAR "*" SLASH "/" POW "^" - EOL "\n" + EOL "'\\n'" LPAR "(" RPAR ")" NOT "!" -- 2.17.1
From ebf41272ba1a18dc5f0b1c5422b945b5d47af265 Mon Sep 17 00:00:00 2001 From: Adela Vais <[email protected]> Date: Tue, 5 Jan 2021 17:20:11 +0200 Subject: [PATCH for Dlang support 4/4] d: remove unnecessary imports * data/skeletons/lalr1.d: Here. --- data/skeletons/lalr1.d | 5 ----- 1 file changed, 5 deletions(-) diff --git a/data/skeletons/lalr1.d b/data/skeletons/lalr1.d index 78338774..1a92106b 100644 --- a/data/skeletons/lalr1.d +++ b/data/skeletons/lalr1.d @@ -392,7 +392,6 @@ b4_user_union_members } ]b4_parse_trace_if([[ - import std.conv : to; yy_symbol_print ("-> $$ =", to!SymbolKind (yyr1_[yyn]), yyval]b4_locations_if([, yyloc])[);]])[ yystack.pop (yylen); @@ -674,7 +673,6 @@ m4_popdef([b4_at_dollar])])dnl /* Shift the error token. */]b4_lac_if([[ yylacDiscard("error recovery");]])[]b4_parse_trace_if([[ - import std.conv : to; yy_symbol_print ("Shifting", to!SymbolKind (yystos_[yyn]), yylval]b4_locations_if([, yyloc])[);]])[ yystate = yyn; yystack.push (yyn, yylval]b4_locations_if([, yyloc])[); @@ -714,7 +712,6 @@ m4_popdef([b4_at_dollar])])dnl SymbolKind[] yyarg = new SymbolKind[argmax]; int yycount = yysyntaxErrorArguments(yyctx, yyarg, argmax); string res, yyformat; - import std.string; switch (yycount) { case 1: @@ -892,7 +889,6 @@ m4_popdef([b4_at_dollar])])dnl destroy(yylacStack); // Reduce until we encounter a shift and thereby accept the token. ]b4_parse_trace_if([[ - import std.conv; yycdebug("LAC: checking lookahead " ~ format("%s", yytoken) ~ ":");]])[ int lacTop = 0; while (true) @@ -1083,7 +1079,6 @@ m4_popdef([b4_at_dollar])])dnl yyrule - 1, yylno)); /* The symbols being reduced. */ - import std.conv : to; for (int yyi = 0; yyi < yynrhs; yyi++) yy_symbol_print (format(" $%d =", yyi + 1), to!SymbolKind (yystos_[yystack.stateAt(yynrhs - (yyi + 1))]), -- 2.17.1
From adeab9963c5b5fca90e45cc057b8cbe70c74aa99 Mon Sep 17 00:00:00 2001 From: Adela Vais <[email protected]> Date: Tue, 5 Jan 2021 16:47:29 +0200 Subject: [PATCH for Dlang support 2/4] d: remove special case from SymbolKind.toString The translation is handled by Bison itself. * data/skeletons/d.m4: Here. --- data/skeletons/d.m4 | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/data/skeletons/d.m4 b/data/skeletons/d.m4 index 8dc76e2a..8856ccc5 100644 --- a/data/skeletons/d.m4 +++ b/data/skeletons/d.m4 @@ -292,12 +292,7 @@ m4_define([b4_declare_symbol_enum], case '"': return; } - } - else if (yystr == "$end") - { - put(sink, "end of input"); - return; - }]])[ + }]])[ } } ]]) -- 2.17.1
From 5a71673fce587a2d2db06d4c7f0e60a272e50060 Mon Sep 17 00:00:00 2001 From: Adela Vais <[email protected]> Date: Tue, 5 Jan 2021 16:48:26 +0200 Subject: [PATCH for Dlang support 3/4] d: remove yytnamerr usage It is a backwards-compatible feature for the other parsers. D should not support this option. * data/skeletons/d.m4: Here. --- data/skeletons/d.m4 | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) diff --git a/data/skeletons/d.m4 b/data/skeletons/d.m4 index 8856ccc5..ee17a2f3 100644 --- a/data/skeletons/d.m4 +++ b/data/skeletons/d.m4 @@ -243,12 +243,6 @@ m4_define([b4_declare_symbol_enum], yycode_ = code; } - /* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at \a YYNTOKENS_, nonterminals. */ - static immutable string[] yytname_ = @{ - ]b4_tname[ - @}; - /* Return YYSTR after stripping away unnecessary quotes and backslashes, so that it's suitable for yyerror. The heuristic is that double-quoting is unnecessary unless the string contains an @@ -270,29 +264,7 @@ m4_define([b4_declare_symbol_enum], put(sink, _(yy_sname[yycode_])); else put(sink, yy_sname[yycode_]);]], [[ - string yystr = yytname_[yycode_]; - if (yystr[0] == '"') - { - strip_quotes: - for (int i = 1; i < yystr.length; i++) - switch (yystr[i]) - { - case '\'': - case ',': - break strip_quotes; - - case '\\': - if (yystr[++i] != '\\') - break strip_quotes; - goto default; - default: - put(sink, yystr[i]); - break; - - case '"': - return; - } - }]])[ + put(sink, yy_sname[yycode_]);]])[ } } ]]) -- 2.17.1
