From 1a9de062aad7b3d909c291d420edd1a2e45db461 Mon Sep 17 00:00:00 2001
From: amit <amitlangote09@gmail.com>
Date: Thu, 18 Jul 2019 10:22:31 +0900
Subject: [PATCH 1/4] Some cosmetic improvements to partitionwise join code

---
 src/backend/optimizer/path/joinrels.c | 18 +++++--
 src/backend/optimizer/util/plancat.c  | 20 ++++----
 src/backend/optimizer/util/relnode.c  | 92 ++++++++++++++++++++++-------------
 src/include/nodes/pathnodes.h         | 36 ++++++++++----
 4 files changed, 109 insertions(+), 57 deletions(-)

diff --git a/src/backend/optimizer/path/joinrels.c b/src/backend/optimizer/path/joinrels.c
index 6a480ab764..6c0904b695 100644
--- a/src/backend/optimizer/path/joinrels.c
+++ b/src/backend/optimizer/path/joinrels.c
@@ -1575,8 +1575,11 @@ build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo,
 }
 
 /*
- * Returns true if there exists an equi-join condition for each pair of
- * partition keys from given relations being joined.
+ * have_partkey_equi_join
+ *
+ * Returns true if there exist equi-join conditions involving pairs
+ * of matching partition keys of the relations being joined for all
+ * partition keys.
  */
 bool
 have_partkey_equi_join(RelOptInfo *joinrel,
@@ -1692,8 +1695,15 @@ have_partkey_equi_join(RelOptInfo *joinrel,
 }
 
 /*
- * Find the partition key from the given relation matching the given
- * expression. If found, return the index of the partition key, else return -1.
+ * match_expr_to_partition_keys
+ *
+ * Tries to match an expression to one of the nullable or non-nullable
+ * partition keys and if a match is found, returns the matched	key's
+ * ordinal position or -1 if the expression could not be matched to any
+ * of the keys.
+ *
+ * strict_op must be true if the expression will be compared with the
+ * partition key using a strict operator.
  */
 static int
 match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index e5f9e04d65..c85d321202 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -2248,9 +2248,8 @@ find_partition_scheme(PlannerInfo *root, Relation relation)
 /*
  * set_baserel_partition_key_exprs
  *
- * Builds partition key expressions for the given base relation and sets them
- * in given RelOptInfo.  Any single column partition keys are converted to Var
- * nodes.  All Var nodes are restamped with the relid of given relation.
+ * Builds partition key expressions for the given base relation and sets
+ * rel->partexprs.
  */
 static void
 set_baserel_partition_key_exprs(Relation relation,
@@ -2298,17 +2297,20 @@ set_baserel_partition_key_exprs(Relation relation,
 			lc = lnext(partkey->partexprs, lc);
 		}
 
+		/* Base relations have a single expression per key. */
 		partexprs[cnt] = list_make1(partexpr);
 	}
 
+	/*
+	 * For base relations, we assume that the partition keys are non-nullable,
+	 * although they are nullable in principle; list and hash partitioned
+	 * tables may contain nulls in the partition key(s), for example.
+	 * Assuming non-nullability is okay for the considerations of partition
+	 * pruning, because pruning is never performed with non-strict operators.
+	 */
 	rel->partexprs = partexprs;
 
-	/*
-	 * A base relation can not have nullable partition key expressions. We
-	 * still allocate array of empty expressions lists to keep partition key
-	 * expression handling code simple. See build_joinrel_partition_info() and
-	 * match_expr_to_partition_keys().
-	 */
+	/* Assigning NIL for each key means there are no nullable keys. */
 	rel->nullable_partexprs = (List **) palloc0(sizeof(List *) * partnatts);
 }
 
diff --git a/src/backend/optimizer/util/relnode.c b/src/backend/optimizer/util/relnode.c
index 03e02423b2..e30aa692d7 100644
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -29,6 +29,7 @@
 #include "optimizer/tlist.h"
 #include "partitioning/partbounds.h"
 #include "utils/hsearch.h"
+#include "utils/lsyscache.h"
 
 
 typedef struct JoinHashEntry
@@ -58,6 +59,9 @@ static void add_join_rel(PlannerInfo *root, RelOptInfo *joinrel);
 static void build_joinrel_partition_info(RelOptInfo *joinrel,
 										 RelOptInfo *outer_rel, RelOptInfo *inner_rel,
 										 List *restrictlist, JoinType jointype);
+static void set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
+								RelOptInfo *outer_rel, RelOptInfo *inner_rel,
+								JoinType jointype);
 static void build_child_join_reltarget(PlannerInfo *root,
 									   RelOptInfo *parentrel,
 									   RelOptInfo *childrel,
@@ -1607,18 +1611,18 @@ find_param_path_info(RelOptInfo *rel, Relids required_outer)
 
 /*
  * build_joinrel_partition_info
- *		If the two relations have same partitioning scheme, their join may be
- *		partitioned and will follow the same partitioning scheme as the joining
- *		relations. Set the partition scheme and partition key expressions in
- *		the join relation.
+ *		Checks if the two relations being joined can use partitionwise join
+ *		and if yes, initialize partitioning information of the resulting
+ *		partitioned relation
+ *
+ * This will set part_scheme and partition key expressions (partexprs and
+ * nullable_partexprs) if required.
  */
 static void
 build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
 							 RelOptInfo *inner_rel, List *restrictlist,
 							 JoinType jointype)
 {
-	int			partnatts;
-	int			cnt;
 	PartitionScheme part_scheme;
 
 	/* Nothing to do if partitionwise join technique is disabled. */
@@ -1685,11 +1689,8 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
 	 */
 	joinrel->part_scheme = part_scheme;
 	joinrel->boundinfo = outer_rel->boundinfo;
-	partnatts = joinrel->part_scheme->partnatts;
-	joinrel->partexprs = (List **) palloc0(sizeof(List *) * partnatts);
-	joinrel->nullable_partexprs =
-		(List **) palloc0(sizeof(List *) * partnatts);
 	joinrel->nparts = outer_rel->nparts;
+	set_joinrel_partition_key_exprs(joinrel, outer_rel, inner_rel, jointype);
 	joinrel->part_rels =
 		(RelOptInfo **) palloc0(sizeof(RelOptInfo *) * joinrel->nparts);
 
@@ -1699,32 +1700,31 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
 	Assert(outer_rel->consider_partitionwise_join);
 	Assert(inner_rel->consider_partitionwise_join);
 	joinrel->consider_partitionwise_join = true;
+}
+
+/*
+ * set_joinrel_partition_key_exprs
+ *		Initialize partition key expressions
+ */
+static void
+set_joinrel_partition_key_exprs(RelOptInfo *joinrel,
+								RelOptInfo *outer_rel, RelOptInfo *inner_rel,
+								JoinType jointype)
+{
+	int		partnatts;
+	int		cnt;
+
+	Assert(joinrel->part_scheme != NULL);
+
+	partnatts = joinrel->part_scheme->partnatts;
+	joinrel->partexprs = (List **) palloc0(sizeof(List *) * partnatts);
+	joinrel->nullable_partexprs =
+		(List **) palloc0(sizeof(List *) * partnatts);
 
 	/*
-	 * Construct partition keys for the join.
-	 *
-	 * An INNER join between two partitioned relations can be regarded as
-	 * partitioned by either key expression.  For example, A INNER JOIN B ON
-	 * A.a = B.b can be regarded as partitioned on A.a or on B.b; they are
-	 * equivalent.
-	 *
-	 * For a SEMI or ANTI join, the result can only be regarded as being
-	 * partitioned in the same manner as the outer side, since the inner
-	 * columns are not retained.
-	 *
-	 * An OUTER join like (A LEFT JOIN B ON A.a = B.b) may produce rows with
-	 * B.b NULL. These rows may not fit the partitioning conditions imposed on
-	 * B.b. Hence, strictly speaking, the join is not partitioned by B.b and
-	 * thus partition keys of an OUTER join should include partition key
-	 * expressions from the OUTER side only.  However, because all
-	 * commonly-used comparison operators are strict, the presence of nulls on
-	 * the outer side doesn't cause any problem; they can't match anything at
-	 * future join levels anyway.  Therefore, we track two sets of
-	 * expressions: those that authentically partition the relation
-	 * (partexprs) and those that partition the relation with the exception
-	 * that extra nulls may be present (nullable_partexprs).  When the
-	 * comparison operator is strict, the latter is just as good as the
-	 * former.
+	 * Join type determines which partition keys are assumed by the resulting
+	 * join relation.  Note that these keys are to be considered when checking
+	 * if any further joins involving this joinrel may be partitioned.
 	 */
 	for (cnt = 0; cnt < partnatts; cnt++)
 	{
@@ -1738,18 +1738,36 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
 
 		switch (jointype)
 		{
+			/*
+			 * Join relation resulting from an INNER join may be regarded as
+			 * partitioned by either of inner and outer relation keys.  For
+			 * example, A INNER JOIN B ON A.a = B.b can be regarded as
+			 * partitioned on either A.a or B.b.
+			 */
 			case JOIN_INNER:
 				partexpr = list_concat_copy(outer_expr, inner_expr);
 				nullable_partexpr = list_concat_copy(outer_null_expr,
 													 inner_null_expr);
 				break;
 
+			/*
+			 * Join relation resulting from a SEMI or ANTI join may be
+			 * regarded as partitioned on the outer relation keys, since the
+			 * inner columns are omitted from the output.
+			 */
 			case JOIN_SEMI:
 			case JOIN_ANTI:
 				partexpr = list_copy(outer_expr);
 				nullable_partexpr = list_copy(outer_null_expr);
 				break;
 
+			/*
+			 * Join relation resulting from a LEFT OUTER JOIN likewise may be
+			 * regarded as partitioned on the (non-nullable) outer relation
+			 * keys.  The inner (nullable) relation keys are okay as partition
+			 * keys for further joins as long as they involve strict join
+			 * operators.
+			 */
 			case JOIN_LEFT:
 				partexpr = list_copy(outer_expr);
 				nullable_partexpr = list_concat_copy(inner_expr,
@@ -1758,6 +1776,12 @@ build_joinrel_partition_info(RelOptInfo *joinrel, RelOptInfo *outer_rel,
 												inner_null_expr);
 				break;
 
+			/*
+			 * For FULL OUTER JOINs, both relations are nullable, so the
+			 * resulting join relation may be regarded as partitioned on
+			 * either of inner and outer relation keys, but only for joins
+			 * that involve strict join operators.
+			 */
 			case JOIN_FULL:
 				nullable_partexpr = list_concat_copy(outer_expr,
 													 inner_expr);
diff --git a/src/include/nodes/pathnodes.h b/src/include/nodes/pathnodes.h
index 23a06d718e..80a5cb77f4 100644
--- a/src/include/nodes/pathnodes.h
+++ b/src/include/nodes/pathnodes.h
@@ -584,16 +584,32 @@ typedef struct PartitionSchemeData *PartitionScheme;
  *								 this relation that are partitioned tables
  *								 themselves, in hierarchical order
  *
- * Note: A base relation always has only one set of partition keys, but a join
- * relation may have as many sets of partition keys as the number of relations
- * being joined. partexprs and nullable_partexprs are arrays containing
- * part_scheme->partnatts elements each. Each of these elements is a list of
- * partition key expressions.  For a base relation each list in partexprs
- * contains only one expression and nullable_partexprs is not populated. For a
- * join relation, partexprs and nullable_partexprs contain partition key
- * expressions from non-nullable and nullable relations resp. Lists at any
- * given position in those arrays together contain as many elements as the
- * number of joining relations.
+ * Notes on partition key expressions (partexprs and nullable_partexprs):
+ *
+ * Partition key expressions will be used to spot references to the partition
+ * keys of the relation in the expressions of a given query so as to apply
+ * various partitioning-based optimizations to certain query constructs.  For
+ * example, pruning unnecessary partitions of a table using baserestrictinfo
+ * clauses that contain partition keys, converting a join between two
+ * partitioned relations into a series of joins between pairs of their
+ * constituent partitions if the joined rows follow the same partitioning
+ * as the relations being joined.
+ *
+ * The partexprs and nullable_partexprs arrays each contain
+ * part_scheme->partnatts elements.  Each of the elements is a list of
+ * partition key expressions.  For partitioned *base* relations, there is one
+ * expression in every list, whereas for partitioned *join* relations, there
+ * can be as many as the number of component relations.
+ *
+ * nullable_partexprs are populated only in partitioned *join* relationss,
+ * that is, if any of their component relations are nullable due to OUTER JOIN
+ * considerations.  It contains only the expressions of the nullable component
+ * relations, while those of the non-nullable relations are present in the
+ * partexprs.  For the considerations of partitionwise join, nullable partition
+ * keys can be considered to partition the underlying relation in the same
+ * manner as the non-nullable partition keys do, as long as the join operator
+ * is stable, because those null-valued keys can't be joined further, thus
+ * preserving the partitioning.
  *----------
  */
 typedef enum RelOptKind
-- 
2.11.0

