Dear Bison maintainers,

The attached patch implements a solution to a limitation that bison
imposes on C++ developers: the %union construct could only contain
Plain Old Datatypes (POD) if you needed different types of semantic
values. This meant that you could not use classes with constructors or
destructors, so you had to use pointers to classes and implement your
own memory management (with limited help of the %destructor clause ->
only in the case of parsing errors, not to destroy unused
(non)terminals inside an action).

A solution without bison changes is to define YYSTYPE as a polymorphic
C++ class such as boost::variant (whose objects can be any of a number
of types; see http://www.boost.org/libs/variant/ ). The downside of
this approach is that you lose Bison's built-in type handling (the
union fields between angled brackets which are automatically
selected). The risk associated to a polymorphic type is the assignment
of an object of type A to a nonterminal in an action, and the
attempted use of that object as type B in another action.

I have implemented full support for boost::variant AND Bison type
selection. You can still build and use Bison without Boost if you
don't use it (the generated code will be identical). Attached is a
patch for Bison 2.3 (tested) and the same patch adapted for current
Bison cvs (untested but it could work). The attached file
"variant-test.yy" is a grammar example and contains a full explanation
of the principles and the usage.

If you don't want to read the whole file, this is the usage in short:
* use the clause %define "variant" "int, std::string,ANY OTHER TYPE"
 to define all types that are being used.
* in %token and %type clauses, directly write the type instead of a union field.
 Examples are "%token <int> NUMBER" or "%type <std::string> text".
* in actions, the dollar constructs ($$, $1, $2, ...) are to be
interpreted as references
 to objects of the right type. The result ($$) will have been
default-constructed
 at the beginning of the action (no implicit $$=$1).
* The function yylex will be called with a yylval of type boost::variant<...>.
* no %destructor needed anymore

I'd like to see the patch incorporated in future Bison releases (then
we do not have to maintain and upgrade this patch for new Bison
releases; we kindly contribute it to the community and also to fulfill
the GPL license requirements). Could you commit the patch and/or
comment on the code?

Kind regards,

Michiel De Wilde
==
Michiel De Wilde
R&D Engineer
EEsof EDA
Agilent Technologies
--- bison/data/c++.m4.orig	2006-03-12 15:45:27.000000000 +0100
+++ bison/data/c++.m4	2007-06-14 14:12:56.287740000 +0200
@@ -54,7 +54,7 @@
 # --------------------
 # Expansion of $<TYPE>$.
 m4_define([b4_lhs_value],
-[(yyval[]m4_ifval([$1], [.$1]))])
+m4_ifdef([b4_variant],[(m4_ifval([$1], [boost::get<$1>(])yyval[]m4_ifval([$1], [)]))],[(yyval[]m4_ifval([$1], [.$1]))]))
 
 
 # b4_rhs_value(RULE-LENGTH, NUM, [TYPE])
@@ -62,8 +62,8 @@
 # Expansion of $<TYPE>NUM, where the current rule has RULE-LENGTH
 # symbols on RHS.
 m4_define([b4_rhs_value],
-[([EMAIL PROTECTED]($1) - ($2)@}m4_ifval([$3], [.$3]))])
+m4_ifdef([b4_variant],[(m4_ifval([$3], [boost::get<$3>(])[EMAIL PROTECTED]($1) - ($2)@}m4_ifval([$3], [)]))],[([EMAIL PROTECTED]($1) - ($2)@}m4_ifval([$3], [.$3]))]))
 
 # b4_lhs_location()
 # -----------------
--- bison/data/c.m4.orig	2006-05-21 06:48:47.000000000 +0200
+++ bison/data/c.m4	2007-06-14 14:09:20.068589000 +0200
@@ -417,8 +417,8 @@
 # -------------------------------------------------
 m4_define([b4_symbol_actions],
 [m4_pushdef([b4_dollar_dollar],
-   [m4_ifval([$6], [(yyvaluep->$6)], [(*yyvaluep)])])dnl
+   [m4_ifval([$6], m4_ifdef([b4_variant],[(boost::get<$6>(*yyvaluep))],[(yyvaluep->$6)]), [(*yyvaluep)])])dnl
 m4_pushdef([b4_at_dollar], [(*yylocationp)])dnl
       case $4: /* $3 */
 b4_syncline([$2], [$1])
--- bison/data/lalr1.cc.orig	2006-05-15 08:13:53.000000000 +0200
+++ bison/data/lalr1.cc	2007-06-14 14:37:20.720670000 +0200
@@ -44,7 +44,8 @@
 # define PARSER_HEADER_H
 
 #include <string>
-#include <iostream>
+#include <iostream>]m4_ifdef([b4_variant],[
+#include <boost/variant.hpp>])[
 #include "stack.hh"
 
 namespace ]b4_namespace[
@@ -108,13 +109,14 @@
   public:
     /// Symbol semantic values.
 #ifndef YYSTYPE
-]m4_ifdef([b4_stype],
+]m4_ifdef([b4_variant],
+[    typedef boost::variant<b4_variant> semantic_type;],m4_ifdef([b4_stype],
 [    union semantic_type
 b4_stype
 /* Line __line__ of lalr1.cc.  */
 b4_syncline([EMAIL PROTECTED]@], [EMAIL PROTECTED]@])
 	;],
-[    typedef int semantic_type;])[
+[    typedef int semantic_type;]))[
 #else
     typedef YYSTYPE semantic_type;
 #endif
@@ -560,8 +562,11 @@
        stacks with a primary value.  */
     yystate_stack_ = state_stack_type (0);
     yysemantic_stack_ = semantic_stack_type (0);
-    yylocation_stack_ = location_stack_type (0);
-    yysemantic_stack_.push (yylval);
+    yylocation_stack_ = location_stack_type (0);]m4_ifdef([b4_variant],[
+    /* we use swapping to avoid copying large amounts of data */
+    yysemantic_stack_.push (semantic_type());
+    [EMAIL PROTECTED]@}.swap(yylval);],[
+    yysemantic_stack_.push (yylval);])[
     yylocation_stack_.push (yylloc);
 
     /* New state.  */
@@ -627,8 +632,11 @@
     /* Discard the token being shifted unless it is eof.  */
     if (yychar != yyeof_)
       yychar = yyempty_;
-
-    yysemantic_stack_.push (yylval);
+]m4_ifdef([b4_variant],[
+    /* we use swapping to avoid copying large amounts of data */
+    yysemantic_stack_.push (semantic_type());
+    [EMAIL PROTECTED]@}.swap(yylval);],[
+    yysemantic_stack_.push (yylval);])[
     yylocation_stack_.push (yylloc);
 
     /* Count tokens shifted since error; after three, turn off error
@@ -652,7 +660,15 @@
   | yyreduce -- Do a reduction.  |
   `-----------------------------*/
   yyreduce:
-    yylen = yyr2_[yyn];
+    yylen = yyr2_[yyn];]m4_ifdef([b4_variant],[
+    /* Variants are always initialized to an empty instance of the correct type */
+    /* The default $$=$1 rule is NOT applied when using variants */
+    switch (yyn)
+      {
+]b4_variant_initializations[        default:
+          yyval = semantic_type();
+          break;
+      }],[
     /* If YYLEN is nonzero, implement the default value of the action:
        `$$ = $1'.  Otherwise, use the top of the stack.
 
@@ -660,9 +676,9 @@
        This behavior is undocumented and Bison
        users should not rely upon it.  */
     if (yylen)
-      yyval = yysemantic_stack_[yylen - 1];
+      yyval = [EMAIL PROTECTED] - [EMAIL PROTECTED];
     else
-      yyval = yysemantic_stack_[0];
+      yyval = [EMAIL PROTECTED]@};])[
 
     {
       slice<location_type, location_stack_type> slice (yylocation_stack_, yylen);
@@ -681,8 +697,11 @@
     yypop_ (yylen);
     yylen = 0;
     YY_STACK_PRINT ();
-
-    yysemantic_stack_.push (yyval);
+]m4_ifdef([b4_variant],[
+    /* we use swapping to avoid copying large amounts of data */
+    yysemantic_stack_.push (semantic_type());
+    [EMAIL PROTECTED]@}.swap(yyval);],[
+    yysemantic_stack_.push (yyval);])[
     yylocation_stack_.push (yyloc);
 
     /* Shift the result of the reduction.  */
@@ -789,9 +808,12 @@
     yyerror_range[1] = yylloc;
     // Using YYLLOC is tempting, but would change the location of
     // the look-ahead.  YYLOC is available though.
-    YYLLOC_DEFAULT (yyloc, (yyerror_range - 1), 2);
-    yysemantic_stack_.push (yylval);
+    YYLLOC_DEFAULT (yyloc, (yyerror_range - 1), 2);]m4_ifdef([b4_variant],[
+    /* we use swapping to avoid copying large amounts of data */
+    yysemantic_stack_.push (semantic_type());
+    [EMAIL PROTECTED]@}.swap(yylval);],[
+    yysemantic_stack_.push (yylval);])[
     yylocation_stack_.push (yyloc);
 
     /* Shift the error token.  */
--- bison/src/output.c.orig	2006-05-14 22:40:35.000000000 +0200
+++ bison/src/output.c	2007-06-14 14:16:52.296829000 +0200
@@ -280,6 +280,26 @@
 
 
 
+/*------------- ---------------------------------------.
+| Output the code for variant initializations to OUT.  |
+`-----------------------------------------------------*/
+
+static void
+variant_initializations_output (FILE *out)
+{
+  rule_number r;
+
+  fputs ("m4_define([b4_variant_initializations], \n[[", out);
+  for (r = 0; r < nrules; ++r)
+    if (rules[r].lhs->type_name)
+      {
+	fprintf (out, "        case %d:\n", r + 1);
+	fprintf (out, "          yyval = %s();\n          break;\n\n",
+		 rules[r].lhs->type_name);
+      }
+  fputs ("]])\n\n", out);
+}
+
 /*---------------------------------.
 | Output the user actions to OUT.  |
 `---------------------------------*/
@@ -554,6 +574,7 @@
   /* Output the definitions of all the muscles.  */
   fputs ("m4_init()\n", out);
 
+  variant_initializations_output (out);
   user_actions_output (out);
   merger_output (out);
   token_definitions_output (out);
Index: bison/data/c++.m4
===================================================================
RCS file: /sources/bison/bison/data/c++.m4,v
retrieving revision 1.15
diff -u -r1.15 c++.m4
--- bison/data/c++.m4	30 Jan 2007 08:01:29 -0000	1.15
+++ bison/data/c++.m4	15 Jun 2007 06:53:12 -0000
@@ -59,7 +59,7 @@
 # --------------------
 # Expansion of $<TYPE>$.
 m4_define([b4_lhs_value],
-[(yyval[]m4_ifval([$1], [.$1]))])
+m4_ifdef([b4_variant],[(m4_ifval([$1], [boost::get<$1>(])yyval[]m4_ifval([$1], [)]))],[(yyval[]m4_ifval([$1], [.$1]))]))
 
 
 # b4_rhs_value(RULE-LENGTH, NUM, [TYPE])
@@ -67,7 +67,7 @@
 # Expansion of $<TYPE>NUM, where the current rule has RULE-LENGTH
 # symbols on RHS.
 m4_define([b4_rhs_value],
-[([EMAIL PROTECTED]($1) - ($2)@}m4_ifval([$3], [.$3]))])
+m4_ifdef([b4_variant],[(m4_ifval([$3], [boost::get<$3>(])[EMAIL PROTECTED]($1) - ($2)@}m4_ifval([$3], [)]))],[([EMAIL PROTECTED]($1) - ($2)@}m4_ifval([$3], [.$3]))]))
 
 # b4_lhs_location()
 # -----------------
Index: bison/data/c.m4
===================================================================
RCS file: /sources/bison/bison/data/c.m4,v
retrieving revision 1.71
diff -u -r1.71 c.m4
--- bison/data/c.m4	17 Jan 2007 08:36:07 -0000	1.71
+++ bison/data/c.m4	15 Jun 2007 06:53:12 -0000
@@ -355,7 +355,7 @@
 # -------------------------------------------------
 m4_define([b4_symbol_actions],
 [m4_pushdef([b4_dollar_dollar],
-   [m4_ifval([$6], [(yyvaluep->$6)], [(*yyvaluep)])])dnl
+   [m4_ifval([$6], m4_ifdef([b4_variant],[(boost::get<$6>(*yyvaluep))],[(yyvaluep->$6)]), [(*yyvaluep)])])dnl
 m4_pushdef([b4_at_dollar], [(*yylocationp)])dnl
       case $4: /* $3 */
 b4_syncline([$2], [$1])
Index: bison/data/lalr1.cc
===================================================================
RCS file: /sources/bison/bison/data/lalr1.cc,v
retrieving revision 1.157
diff -u -r1.157 lalr1.cc
--- bison/data/lalr1.cc	23 Jan 2007 12:24:27 -0000	1.157
+++ bison/data/lalr1.cc	15 Jun 2007 06:53:13 -0000
@@ -50,7 +50,8 @@
 ]b4_percent_code_get([[requires]])[
 
 #include <string>
-#include <iostream>
+#include <iostream>]m4_ifdef([b4_variant],[
+#include <boost/variant.hpp>])[
 #include "stack.hh"
 
 namespace ]b4_namespace[
@@ -107,14 +108,15 @@
   public:
     /// Symbol semantic values.
 #ifndef YYSTYPE
-]m4_ifdef([b4_stype],
+]m4_ifdef([b4_variant],
+[    typedef boost::variant<b4_variant> semantic_type;],m4_ifdef([b4_stype],
 [    union semantic_type
     {
 b4_user_stype
     };],
 [m4_if(b4_tag_seen_flag, 0,
 [[    typedef int semantic_type;]],
-[[    typedef YYSTYPE semantic_type;]])])[
+[[    typedef YYSTYPE semantic_type;]])]))[
 #else
     typedef YYSTYPE semantic_type;
 #endif
@@ -565,8 +567,11 @@
        stacks with a primary value.  */
     yystate_stack_ = state_stack_type (0);
     yysemantic_stack_ = semantic_stack_type (0);
-    yylocation_stack_ = location_stack_type (0);
-    yysemantic_stack_.push (yylval);
+    yylocation_stack_ = location_stack_type (0);]m4_ifdef([b4_variant],[
+    /* we use swapping to avoid copying large amounts of data */
+    yysemantic_stack_.push (semantic_type());
+    [EMAIL PROTECTED]@}.swap(yylval);],[
+    yysemantic_stack_.push (yylval);])[
     yylocation_stack_.push (yylloc);
 
     /* New state.  */
@@ -632,8 +637,11 @@
 
     /* Discard the token being shifted.  */
     yychar = yyempty_;
-
-    yysemantic_stack_.push (yylval);
+]m4_ifdef([b4_variant],[
+    /* we use swapping to avoid copying large amounts of data */
+    yysemantic_stack_.push (semantic_type());
+    [EMAIL PROTECTED]@}.swap(yylval);],[
+    yysemantic_stack_.push (yylval);])[
     yylocation_stack_.push (yylloc);
 
     /* Count tokens shifted since error; after three, turn off error
@@ -657,7 +665,15 @@
   | yyreduce -- Do a reduction.  |
   `-----------------------------*/
   yyreduce:
-    yylen = yyr2_[yyn];
+    yylen = yyr2_[yyn];]m4_ifdef([b4_variant],[
+    /* Variants are always initialized to an empty instance of the correct type */
+    /* The default $$=$1 rule is NOT applied when using variants */
+    switch (yyn)
+      {
+]b4_variant_initializations[        default:
+          yyval = semantic_type();
+          break;
+      }],[
     /* If YYLEN is nonzero, implement the default value of the action:
        `$$ = $1'.  Otherwise, use the top of the stack.
 
@@ -665,9 +681,9 @@
        This behavior is undocumented and Bison
        users should not rely upon it.  */
     if (yylen)
-      yyval = yysemantic_stack_[yylen - 1];
+      yyval = [EMAIL PROTECTED] - [EMAIL PROTECTED];
     else
-      yyval = yysemantic_stack_[0];
+      yyval = [EMAIL PROTECTED]@};])[
 
     {
       slice<location_type, location_stack_type> slice (yylocation_stack_, yylen);
@@ -684,8 +700,11 @@
     yypop_ (yylen);
     yylen = 0;
     YY_STACK_PRINT ();
-
-    yysemantic_stack_.push (yyval);
+]m4_ifdef([b4_variant],[
+    /* we use swapping to avoid copying large amounts of data */
+    yysemantic_stack_.push (semantic_type());
+    [EMAIL PROTECTED]@}.swap(yyval);],[
+    yysemantic_stack_.push (yyval);])[
     yylocation_stack_.push (yyloc);
 
     /* Shift the result of the reduction.  */
@@ -789,8 +808,11 @@
     yyerror_range[1] = yylloc;
     // Using YYLLOC is tempting, but would change the location of
     // the lookahead.  YYLOC is available though.
-    YYLLOC_DEFAULT (yyloc, (yyerror_range - 1), 2);
-    yysemantic_stack_.push (yylval);
+    YYLLOC_DEFAULT (yyloc, (yyerror_range - 1), 2);]m4_ifdef([b4_variant],[
+    /* we use swapping to avoid copying large amounts of data */
+    yysemantic_stack_.push (semantic_type());
+    [EMAIL PROTECTED]@}.swap(yylval);],[
+    yysemantic_stack_.push (yylval);])[
     yylocation_stack_.push (yyloc);
 
     /* Shift the error token.  */
Index: bison/src/output.c
===================================================================
RCS file: /sources/bison/bison/src/output.c,v
retrieving revision 1.267
diff -u -r1.267 output.c
--- bison/src/output.c	18 Jan 2007 02:18:17 -0000	1.267
+++ bison/src/output.c	15 Jun 2007 06:53:13 -0000
@@ -280,6 +280,26 @@
 
 
 
+/*------------- ---------------------------------------.
+| Output the code for variant initializations to OUT.  |
+`-----------------------------------------------------*/
+
+static void
+variant_initializations_output (FILE *out)
+{
+  rule_number r;
+
+  fputs ("m4_define([b4_variant_initializations], \n[[", out);
+  for (r = 0; r < nrules; ++r)
+    if (rules[r].lhs->type_name)
+      {
+	fprintf (out, "        case %d:\n", r + 1);
+	fprintf (out, "          yyval = %s();\n          break;\n\n",
+		 rules[r].lhs->type_name);
+      }
+  fputs ("]])\n\n", out);
+}
+
 /*---------------------------------.
 | Output the user actions to OUT.  |
 `---------------------------------*/
@@ -532,6 +552,7 @@
   /* Output the definitions of all the muscles.  */
   fputs ("m4_init()\n", out);
 
+  variant_initializations_output (out);
   user_actions_output (out);
   merger_output (out);
   token_definitions_output (out);

Attachment: variant-test.yy
Description: Binary data

Reply via email to