Re: Convert planner's AggInfo and AggTransInfo to Nodes

Tom Lane Tue, 19 Jul 2022 12:24:21 -0700

Peter Eisentraut <[email protected]> writes:
> On 18.07.22 18:08, Tom Lane wrote:
>> I'm kind of tempted to mount an effort to get rid of as many of
>> pathnodes.h's "read_write_ignore" annotations as possible.  Some are
>> necessary to prevent infinite recursion, and others represent considered
>> judgments that they'd bloat node dumps more than they're worth --- but
>> I think quite a lot of them arose from plain laziness about updating
>> outfuncs.c.  With the infrastructure we have now, that's no longer
>> a good reason.


> That was my impression as well, and I agree it would be good to sort 
> that out.

I had a go at doing this, and ended up with something that seems
reasonable for now (attached).  The thing that'd have to be done to
make additional progress is to convert a lot of partitioning-related
structs into full Nodes.  That seems like it might possibly be
worth doing, but I don't feel like doing it.  I doubt that making
planner node dumps smarter is a sufficient excuse for that anyway.
(But possibly if we then larded related code with castNode() and
sibling macros, there'd be enough of a gain in type-safety to
justify it?)

I learned a couple of interesting things along the way:

* I'd thought we already had outfuncs support for writing an array
of node pointers.  We don't, but it's easily added.  I chose to
write the array with parenthesis decoration, mainly because that
eases moving around it in emacs.

* WRITE_OID_ARRAY and WRITE_BOOL_ARRAY needed extension to handle a null
array pointer.  I think we should make all the WRITE_FOO_ARRAY macros
work alike, so I added that to all of them.  I first tried to make the
rest work like WRITE_INDEX_ARRAY, but that failed because readfuncs.c
isn't expecting "<>" for an empty array; it's expecting nothing at
all.  (Note there is no readfuncs equivalent to WRITE_INDEX_ARRAY.)
What I've done here is to change WRITE_INDEX_ARRAY to work like the
others and print nothing for an empty array, but I wonder if now
wouldn't be a good time to redefine the serialized representation
to be more robust.  I'm imagining "<>" for a NULL array pointer and
"(item item item)" otherwise, allowing a cross-check that we're
getting the right number of items.

* gen_node_support.pl was being insufficiently careful about parsing
type names, so I tightened its regexes a bit.

                        regards, tom lane

diff --git a/src/backend/nodes/gen_node_support.pl b/src/backend/nodes/gen_node_support.pl
index f3309c3000..bee48696c7 100644
--- a/src/backend/nodes/gen_node_support.pl
+++ b/src/backend/nodes/gen_node_support.pl
@@ -441,6 +441,8 @@ foreach my $infile (@ARGV)
 					$type =~ s/\s*$//;
 					# strip space between type and "*" (pointer) */
 					$type =~ s/\s+\*$/*/;
+					# strip space between type and "**" (array of pointers) */
+					$type =~ s/\s+\*\*$/**/;
 
 					die
 					  "$infile:$lineno: cannot parse data type in \"$line\"\n"
@@ -745,8 +747,8 @@ _equal${n}(const $n *a, const $n *b)
 				  unless $equal_ignore || $t eq 'CoercionForm';
 			}
 		}
-		# scalar type pointer
-		elsif ($t =~ /(\w+)\*/ and elem $1, @scalar_types)
+		# arrays of scalar types
+		elsif ($t =~ /^(\w+)\*$/ and elem $1, @scalar_types)
 		{
 			my $tt = $1;
 			if (!defined $array_size_field)
@@ -780,13 +782,14 @@ _equal${n}(const $n *a, const $n *b)
 			print $eff "\tCOMPARE_SCALAR_FIELD($f);\n" unless $equal_ignore;
 		}
 		# node type
-		elsif ($t =~ /(\w+)\*/ and elem $1, @node_types)
+		elsif (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
+			and elem $1, @node_types)
 		{
 			print $cff "\tCOPY_NODE_FIELD($f);\n"    unless $copy_ignore;
 			print $eff "\tCOMPARE_NODE_FIELD($f);\n" unless $equal_ignore;
 		}
 		# array (inline)
-		elsif ($t =~ /\w+\[/)
+		elsif ($t =~ /^\w+\[\w+\]$/)
 		{
 			print $cff "\tCOPY_ARRAY_FIELD($f);\n"    unless $copy_ignore;
 			print $eff "\tCOMPARE_ARRAY_FIELD($f);\n" unless $equal_ignore;
@@ -894,11 +897,16 @@ _read${n}(void)
 		my @a = @{ $node_type_info{$n}->{field_attrs}{$f} };
 
 		# extract per-field attributes
-		my $read_write_ignore = 0;
+		my $array_size_field;
 		my $read_as_field;
+		my $read_write_ignore = 0;
 		foreach my $a (@a)
 		{
-			if ($a =~ /^read_as\(([\w.]+)\)$/)
+			if ($a =~ /^array_size\(([\w.]+)\)$/)
+			{
+				$array_size_field = $1;
+			}
+			elsif ($a =~ /^read_as\(([\w.]+)\)$/)
 			{
 				$read_as_field = $1;
 			}
@@ -1015,19 +1023,10 @@ _read${n}(void)
 			print $off "\tWRITE_ENUM_FIELD($f, $t);\n";
 			print $rff "\tREAD_ENUM_FIELD($f, $t);\n" unless $no_read;
 		}
-		# arrays
-		elsif ($t =~ /(\w+)(\*|\[)/ and elem $1, @scalar_types)
+		# arrays of scalar types
+		elsif ($t =~ /^(\w+)(\*|\[\w+\])$/ and elem $1, @scalar_types)
 		{
 			my $tt = uc $1;
-			my $array_size_field;
-			foreach my $a (@a)
-			{
-				if ($a =~ /^array_size\(([\w.]+)\)$/)
-				{
-					$array_size_field = $1;
-					last;
-				}
-			}
 			if (!defined $array_size_field)
 			{
 				die "no array size defined for $n.$f of type $t\n";
@@ -1080,11 +1079,38 @@ _read${n}(void)
 			  . "\t\toutBitmapset(str, NULL);\n";
 		}
 		# node type
-		elsif ($t =~ /(\w+)\*/ and elem $1, @node_types)
+		elsif (($t =~ /^(\w+)\*$/ or $t =~ /^struct\s+(\w+)\*$/)
+			and elem $1, @node_types)
 		{
 			print $off "\tWRITE_NODE_FIELD($f);\n";
 			print $rff "\tREAD_NODE_FIELD($f);\n" unless $no_read;
 		}
+		# arrays of node pointers (currently supported for write only)
+		elsif (($t =~ /^(\w+)\*\*$/ or $t =~ /^struct\s+(\w+)\*\*$/)
+		       and elem $1, @node_types)
+		{
+			if (!defined $array_size_field)
+			{
+				die "no array size defined for $n.$f of type $t\n";
+			}
+			if ($node_type_info{$n}->{field_types}{$array_size_field} eq
+				'List*')
+			{
+				print $off
+				  "\tWRITE_NODE_ARRAY($f, list_length(node->$array_size_field));\n";
+				print $rff
+				  "\tREAD_NODE_ARRAY($f, list_length(local_node->$array_size_field));\n"
+				  unless $no_read;
+			}
+			else
+			{
+				print $off
+				  "\tWRITE_NODE_ARRAY($f, node->$array_size_field);\n";
+				print $rff
+				  "\tREAD_NODE_ARRAY($f, local_node->$array_size_field);\n"
+				  unless $no_read;
+			}
+		}
 		elsif ($t eq 'struct CustomPathMethods*'
 			|| $t eq 'struct CustomScanMethods*')
 		{
diff --git a/src/backend/nodes/outfuncs.c b/src/backend/nodes/outfuncs.c
index 2b85f97f39..01d70a75e4 100644
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@@ -96,49 +96,63 @@ static void outChar(StringInfo str, char c);
 	(appendStringInfoString(str, " :" CppAsString(fldname) " "), \
 	 outBitmapset(str, node->fldname))
 
+#define WRITE_NODE_ARRAY(fldname, len) \
+	do { \
+		appendStringInfoString(str, " :" CppAsString(fldname) " "); \
+		if (node->fldname) \
+		{ \
+			appendStringInfoChar(str, '('); \
+			for (int i = 0; i < len; i++) \
+			{ \
+				appendStringInfoChar(str, ' '); \
+				outNode(str, node->fldname[i]); \
+			} \
+			appendStringInfoChar(str, ')'); \
+		} \
+		else \
+			appendStringInfoString(str, "<>"); \
+	} while(0)
+
 #define WRITE_ATTRNUMBER_ARRAY(fldname, len) \
 	do { \
 		appendStringInfoString(str, " :" CppAsString(fldname) " "); \
-		for (int i = 0; i < len; i++) \
-			appendStringInfo(str, " %d", node->fldname[i]); \
+		if (node->fldname) \
+			for (int i = 0; i < len; i++) \
+				appendStringInfo(str, " %d", node->fldname[i]); \
 	} while(0)
 
 #define WRITE_OID_ARRAY(fldname, len) \
 	do { \
 		appendStringInfoString(str, " :" CppAsString(fldname) " "); \
-		for (int i = 0; i < len; i++) \
-			appendStringInfo(str, " %u", node->fldname[i]); \
+		if (node->fldname) \
+			for (int i = 0; i < len; i++) \
+				appendStringInfo(str, " %u", node->fldname[i]); \
 	} while(0)
 
-/*
- * This macro supports the case that the field is NULL.  For the other array
- * macros, that is currently not needed.
- */
 #define WRITE_INDEX_ARRAY(fldname, len) \
 	do { \
 		appendStringInfoString(str, " :" CppAsString(fldname) " "); \
 		if (node->fldname) \
 			for (int i = 0; i < len; i++) \
 				appendStringInfo(str, " %u", node->fldname[i]); \
-		else \
-			appendStringInfoString(str, "<>"); \
 	} while(0)
 
 #define WRITE_INT_ARRAY(fldname, len) \
 	do { \
 		appendStringInfoString(str, " :" CppAsString(fldname) " "); \
-		for (int i = 0; i < len; i++) \
-			appendStringInfo(str, " %d", node->fldname[i]); \
+		if (node->fldname) \
+			for (int i = 0; i < len; i++) \
+				appendStringInfo(str, " %d", node->fldname[i]); \
 	} while(0)
 
 #define WRITE_BOOL_ARRAY(fldname, len) \
 	do { \
 		appendStringInfoString(str, " :" CppAsString(fldname) " "); \
-		for (int i = 0; i < len; i++) \
-			appendStringInfo(str, " %s", booltostr(node->fldname[i])); \
+		if (node->fldname) \
+			for (int i = 0; i < len; i++) \
+				appendStringInfo(str, " %s", booltostr(node->fldname[i])); \
 	} while(0)
 
-
 #define booltostr(x)  ((x) ? "true" : "false")
 
 
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index e650af5ff2..1f95e46a58 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -89,7 +89,7 @@ typedef enum UpperRelationKind
  * planned.
  *
  * Not all fields are printed.  (In some cases, there is no print support for
- * the field type.)
+ * the field type; in others, doing so would lead to infinite recursion.)
  *----------
  */
 typedef struct PlannerGlobal
@@ -177,7 +177,8 @@ typedef struct PlannerGlobal
  * either here or in that header, whichever is read first.
  *
  * Not all fields are printed.  (In some cases, there is no print support for
- * the field type.)
+ * the field type; in others, doing so would lead to infinite recursion or
+ * bloat dump output more than seems useful.)
  *----------
  */
 #ifndef HAVE_PLANNERINFO_TYPEDEF
@@ -220,14 +221,15 @@ struct PlannerInfo
 	 * does not correspond to a base relation, such as a join RTE or an
 	 * unreferenced view RTE; or if the RelOptInfo hasn't been made yet.
 	 */
-	struct RelOptInfo **simple_rel_array pg_node_attr(read_write_ignore);
+	struct RelOptInfo **simple_rel_array pg_node_attr(array_size(simple_rel_array_size));
 	/* allocated size of array */
-	int			simple_rel_array_size pg_node_attr(read_write_ignore);
+	int			simple_rel_array_size;
 
 	/*
 	 * simple_rte_array is the same length as simple_rel_array and holds
 	 * pointers to the associated rangetable entries.  Using this is a shade
-	 * faster than using rt_fetch(), mostly due to fewer indirections.
+	 * faster than using rt_fetch(), mostly due to fewer indirections.  (Not
+	 * printed because it'd be redundant with parse->rtable.)
 	 */
 	RangeTblEntry **simple_rte_array pg_node_attr(read_write_ignore);
 
@@ -237,7 +239,7 @@ struct PlannerInfo
 	 * child_relid, or NULL if the rel is not an appendrel child.  The array
 	 * itself is not allocated if append_rel_list is empty.
 	 */
-	struct AppendRelInfo **append_rel_array pg_node_attr(read_write_ignore);
+	struct AppendRelInfo **append_rel_array pg_node_attr(array_size(simple_rel_array_size));
 
 	/*
 	 * all_baserels is a Relids set of all base relids (but not "other"
@@ -274,7 +276,7 @@ struct PlannerInfo
 	 * automatically added to the join_rel_level[join_cur_level] list.
 	 * join_rel_level is NULL if not in use.
 	 */
-	/* lists of join-relation RelOptInfos */
+	/* lists of join-relation RelOptInfos (too bulky to print) */
 	List	  **join_rel_level pg_node_attr(read_write_ignore);
 	/* index of list being extended */
 	int			join_cur_level;
@@ -403,8 +405,8 @@ struct PlannerInfo
 	/*
 	 * Fields filled during create_plan() for use in setrefs.c
 	 */
-	/* for GroupingFunc fixup */
-	AttrNumber *grouping_map pg_node_attr(array_size(update_colnos), read_write_ignore);
+	/* for GroupingFunc fixup (can't print: array length not known here) */
+	AttrNumber *grouping_map pg_node_attr(read_write_ignore);
 	/* List of MinMaxAggInfos */
 	List	   *minmax_aggs;
 
@@ -458,7 +460,7 @@ struct PlannerInfo
 	/* PARAM_EXEC ID for the work table */
 	int			wt_param_id;
 	/* a path for non-recursive term */
-	struct Path *non_recursive_path pg_node_attr(read_write_ignore);
+	struct Path *non_recursive_path;
 
 	/*
 	 * These fields are workspace for createplan.c
@@ -470,7 +472,9 @@ struct PlannerInfo
 
 	/*
 	 * These fields are workspace for setrefs.c.  Each is an array
-	 * corresponding to glob->subplans.
+	 * corresponding to glob->subplans.  (We could probably teach
+	 * gen_node_support.pl how to determine the array length, but it doesn't
+	 * seem worth the trouble, so just mark them read_write_ignore.)
 	 */
 	bool	   *isAltSubplan pg_node_attr(read_write_ignore);
 	bool	   *isUsedSubplan pg_node_attr(read_write_ignore);
@@ -928,16 +932,17 @@ typedef struct RelOptInfo
 	 * Number of partitions; -1 if not yet set; in case of a join relation 0
 	 * means it's considered unpartitioned
 	 */
-	int			nparts pg_node_attr(read_write_ignore);
+	int			nparts;
 	/* Partition bounds */
 	struct PartitionBoundInfoData *boundinfo pg_node_attr(read_write_ignore);
 	/* True if partition bounds were created by partition_bounds_merge() */
 	bool		partbounds_merged;
 	/* Partition constraint, if not the root */
-	List	   *partition_qual pg_node_attr(read_write_ignore);
+	List	   *partition_qual;
 
 	/*
 	 * Array of RelOptInfos of partitions, stored in the same order as bounds
+	 * (don't print, too bulky)
 	 */
 	struct RelOptInfo **part_rels pg_node_attr(read_write_ignore);
 
@@ -948,6 +953,12 @@ typedef struct RelOptInfo
 	Bitmapset  *live_parts;
 	/* Relids set of all partition relids */
 	Relids		all_partrels;
+
+	/*
+	 * These arrays are of length partkey->partnatts, which we don't have at
+	 * hand, so don't try to print
+	 */
+
 	/* Non-nullable partition key expressions */
 	List	  **partexprs pg_node_attr(read_write_ignore);
 	/* Nullable partition key expressions */
@@ -1042,30 +1053,26 @@ struct IndexOptInfo
 	int			nkeycolumns;
 
 	/*
-	 * array fields aren't really worth the trouble to print
+	 * table column numbers of index's columns (both key and included
+	 * columns), or 0 for expression columns
 	 */
-
-	/*
-	 * column numbers of index's attributes both key and included columns, or
-	 * 0
-	 */
-	int		   *indexkeys pg_node_attr(read_write_ignore);
+	int		   *indexkeys pg_node_attr(array_size(ncolumns));
 	/* OIDs of collations of index columns */
-	Oid		   *indexcollations pg_node_attr(read_write_ignore);
+	Oid		   *indexcollations pg_node_attr(array_size(nkeycolumns));
 	/* OIDs of operator families for columns */
-	Oid		   *opfamily pg_node_attr(read_write_ignore);
+	Oid		   *opfamily pg_node_attr(array_size(nkeycolumns));
 	/* OIDs of opclass declared input data types */
-	Oid		   *opcintype pg_node_attr(read_write_ignore);
+	Oid		   *opcintype pg_node_attr(array_size(nkeycolumns));
 	/* OIDs of btree opfamilies, if orderable */
-	Oid		   *sortopfamily pg_node_attr(read_write_ignore);
+	Oid		   *sortopfamily pg_node_attr(array_size(nkeycolumns));
 	/* is sort order descending? */
-	bool	   *reverse_sort pg_node_attr(read_write_ignore);
+	bool	   *reverse_sort pg_node_attr(array_size(nkeycolumns));
 	/* do NULLs come first in the sort order? */
-	bool	   *nulls_first pg_node_attr(read_write_ignore);
+	bool	   *nulls_first pg_node_attr(array_size(nkeycolumns));
 	/* opclass-specific options for columns */
 	bytea	  **opclassoptions pg_node_attr(read_write_ignore);
 	/* which index cols can be returned in an index-only scan? */
-	bool	   *canreturn pg_node_attr(read_write_ignore);
+	bool	   *canreturn pg_node_attr(array_size(ncolumns));
 	/* OID of the access method (in pg_am) */
 	Oid			relam;
 
@@ -1098,19 +1105,19 @@ struct IndexOptInfo
 
 	/*
 	 * Remaining fields are copied from the index AM's API struct
-	 * (IndexAmRoutine).  We don't bother to dump them.
+	 * (IndexAmRoutine).
 	 */
-	bool		amcanorderbyop pg_node_attr(read_write_ignore);
-	bool		amoptionalkey pg_node_attr(read_write_ignore);
-	bool		amsearcharray pg_node_attr(read_write_ignore);
-	bool		amsearchnulls pg_node_attr(read_write_ignore);
+	bool		amcanorderbyop;
+	bool		amoptionalkey;
+	bool		amsearcharray;
+	bool		amsearchnulls;
 	/* does AM have amgettuple interface? */
-	bool		amhasgettuple pg_node_attr(read_write_ignore);
+	bool		amhasgettuple;
 	/* does AM have amgetbitmap interface? */
-	bool		amhasgetbitmap pg_node_attr(read_write_ignore);
-	bool		amcanparallel pg_node_attr(read_write_ignore);
+	bool		amhasgetbitmap;
+	bool		amcanparallel;
 	/* does AM have ammarkpos interface? */
-	bool		amcanmarkpos pg_node_attr(read_write_ignore);
+	bool		amcanmarkpos;
 	/* AM's cost estimator */
 	/* Rather than include amapi.h here, we declare amcostestimate like this */
 	void		(*amcostestimate) () pg_node_attr(read_write_ignore);
@@ -1184,12 +1191,9 @@ typedef struct StatisticExtInfo
 	Oid			statOid;
 
 	/* includes child relations */
-	bool		inherit pg_node_attr(read_write_ignore);
+	bool		inherit;
 
-	/*
-	 * back-link to statistic's table; don't print, infinite recursion on plan
-	 * tree dump
-	 */
+	/* back-link to statistic's table; don't print, else infinite recursion */
 	RelOptInfo *rel pg_node_attr(read_write_ignore);
 
 	/* statistics kind of this entry */

Re: Convert planner's AggInfo and AggTransInfo to Nodes

Reply via email to