Re: missing indexes in indexlist with partitioned tables

Arne Roland Sat, 01 Oct 2022 09:34:48 -0700

On Tue, Sep 20, 2022 at 4:53 PM David Rowley <dgrowle...@gmail.com> wrote:
> Arne sent me an off-list
> message to say he's planning on working on the patch that uses the
> existing field instead of the new one he originally added. Let's hold
> off for that patch.


I wouldn't say, I explicitly stated that. But I ended up doing something, that 
resulted in the attached patch. :)


For my own sanity I greped one last time for the usage of indexlist.

Most of the (untouched) usages have comments that, they are only called for 
baserels/plain tables.

Namely all but the cluster of partitioned tables. I had to reread that section. 
There we are just traversing the tree and omitting partitioned tables.


There is now a test section in join.sql for partitioned tables, that tests very 
similar to the

baserel case. That's more thorough, than what I originally went for.

Further feedback would be appreciated!


Regards
Arne

From 290252bab5837c1a6f42bd53cf788c8696d5d0ec Mon Sep 17 00:00:00 2001
From: Arne Roland <arne.rol...@index.de>
Date: Sat, 1 Oct 2022 18:14:34 +0200
Subject: [PATCH v8_indexlist_contains_partitioned_indexes] v8

---
 src/backend/optimizer/util/plancat.c         | 226 ++++++++++---------
 src/backend/storage/buffer/bufmgr.c          |   6 +-
 src/backend/utils/adt/selfuncs.c             |   4 +
 src/test/regress/expected/inherit.out        |   6 +
 src/test/regress/expected/join.out           | 124 +++++++++-
 src/test/regress/expected/partition_join.out |  30 +--
 src/test/regress/sql/inherit.sql             |   2 +
 src/test/regress/sql/join.sql                |  69 +++++-
 src/test/regress/sql/partition_join.sql      |   6 +-
 9 files changed, 348 insertions(+), 125 deletions(-)

diff --git a/src/backend/optimizer/util/plancat.c b/src/backend/optimizer/util/plancat.c
index 6d5718ee4c..7545ae862d 100644
--- a/src/backend/optimizer/util/plancat.c
+++ b/src/backend/optimizer/util/plancat.c
@@ -106,10 +106,12 @@ static void set_baserel_partition_constraint(Relation relation,
  * cases these are left as zeroes, but sometimes we need to compute attr
  * widths here, and we may as well cache the results for costsize.c.
  *
- * If inhparent is true, all we need to do is set up the attr arrays:
- * the RelOptInfo actually represents the appendrel formed by an inheritance
- * tree, and so the parent rel's physical size and index information isn't
- * important for it.
+ * If inhparent is true, we don't care about physical size, index am
+ * and estimates.
+ * We still need index uniqueness for join removal.
+ * When it comes to the path, the RelOptInfo actually represents
+ * the appendrel formed by an inheritance tree, and so the parent
+ * rel's physical size isn't important for it.
  */
 void
 get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
@@ -157,10 +159,12 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 
 	/*
 	 * Make list of indexes.  Ignore indexes on system catalogs if told to.
-	 * Don't bother with indexes for an inheritance parent, either.
+	 * Don't bother with indexes from traditional inheritance parents.
+	 * We care about partitioned indexes for join pruning,
+	 * even if we don't have any am for them.
 	 */
-	if (inhparent ||
-		(IgnoreSystemIndexes && IsSystemRelation(relation)))
+	if ((inhparent && relation->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		|| (IgnoreSystemIndexes && IsSystemRelation(relation)))
 		hasindex = false;
 	else
 		hasindex = relation->rd_rel->relhasindex;
@@ -191,8 +195,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 			IndexAmRoutine *amroutine;
 			IndexOptInfo *info;
 			int			ncolumns,
-						nkeycolumns;
-			int			i;
+						nkeycolumns,
+			            i;
 
 			/*
 			 * Extract info from the relation descriptor for the index.
@@ -213,16 +217,6 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 				continue;
 			}
 
-			/*
-			 * Ignore partitioned indexes, since they are not usable for
-			 * queries.
-			 */
-			if (indexRelation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
-			{
-				index_close(indexRelation, NoLock);
-				continue;
-			}
-
 			/*
 			 * If the index is valid, but cannot yet be used, ignore it; but
 			 * mark the plan we are generating as transient. See
@@ -267,108 +261,133 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 
 			info->relam = indexRelation->rd_rel->relam;
 
-			/* We copy just the fields we need, not all of rd_indam */
-			amroutine = indexRelation->rd_indam;
-			info->amcanorderbyop = amroutine->amcanorderbyop;
-			info->amoptionalkey = amroutine->amoptionalkey;
-			info->amsearcharray = amroutine->amsearcharray;
-			info->amsearchnulls = amroutine->amsearchnulls;
-			info->amcanparallel = amroutine->amcanparallel;
-			info->amhasgettuple = (amroutine->amgettuple != NULL);
-			info->amhasgetbitmap = amroutine->amgetbitmap != NULL &&
-				relation->rd_tableam->scan_bitmap_next_block != NULL;
-			info->amcanmarkpos = (amroutine->ammarkpos != NULL &&
-								  amroutine->amrestrpos != NULL);
-			info->amcostestimate = amroutine->amcostestimate;
-			Assert(info->amcostestimate != NULL);
-
-			/* Fetch index opclass options */
-			info->opclassoptions = RelationGetIndexAttOptions(indexRelation, true);
-
 			/*
-			 * Fetch the ordering information for the index, if any.
+			 * We don't have am for partitioned indexes, therefore we skip
+			 * gathering the info. This is ok, because we don't use
+			 * partitioned indexes in paths. We resolve inheritance before
+			 * generating paths.
 			 */
-			if (info->relam == BTREE_AM_OID)
+			if (indexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
 			{
+				/* We copy just the fields we need, not all of rd_indam */
+				amroutine = indexRelation->rd_indam;
+				info->amcanorderbyop = amroutine->amcanorderbyop;
+				info->amoptionalkey = amroutine->amoptionalkey;
+				info->amsearcharray = amroutine->amsearcharray;
+				info->amsearchnulls = amroutine->amsearchnulls;
+				info->amcanparallel = amroutine->amcanparallel;
+				info->amhasgettuple = (amroutine->amgettuple != NULL);
+				info->amhasgetbitmap = amroutine->amgetbitmap != NULL &&
+					relation->rd_tableam->scan_bitmap_next_block != NULL;
+				info->amcanmarkpos = (amroutine->ammarkpos != NULL &&
+					amroutine->amrestrpos != NULL);
+				info->amcostestimate = amroutine->amcostestimate;
+				Assert(info->amcostestimate != NULL);
+
+				/* Fetch index opclass options */
+				info->opclassoptions = RelationGetIndexAttOptions(indexRelation, true);
+
 				/*
-				 * If it's a btree index, we can use its opfamily OIDs
-				 * directly as the sort ordering opfamily OIDs.
+				 * Fetch the ordering information for the index, if any.
 				 */
-				Assert(amroutine->amcanorder);
-
-				info->sortopfamily = info->opfamily;
-				info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns);
-				info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns);
-
-				for (i = 0; i < nkeycolumns; i++)
+				if (info->relam == BTREE_AM_OID)
 				{
-					int16		opt = indexRelation->rd_indoption[i];
+					/*
+					 * If it's a btree index, we can use its opfamily OIDs
+					 * directly as the sort ordering opfamily OIDs.
+					 */
+					Assert(amroutine->amcanorder);
 
-					info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
-					info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
-				}
-			}
-			else if (amroutine->amcanorder)
-			{
-				/*
-				 * Otherwise, identify the corresponding btree opfamilies by
-				 * trying to map this index's "<" operators into btree.  Since
-				 * "<" uniquely defines the behavior of a sort order, this is
-				 * a sufficient test.
-				 *
-				 * XXX This method is rather slow and also requires the
-				 * undesirable assumption that the other index AM numbers its
-				 * strategies the same as btree.  It'd be better to have a way
-				 * to explicitly declare the corresponding btree opfamily for
-				 * each opfamily of the other index type.  But given the lack
-				 * of current or foreseeable amcanorder index types, it's not
-				 * worth expending more effort on now.
-				 */
-				info->sortopfamily = (Oid *) palloc(sizeof(Oid) * nkeycolumns);
-				info->reverse_sort = (bool *) palloc(sizeof(bool) * nkeycolumns);
-				info->nulls_first = (bool *) palloc(sizeof(bool) * nkeycolumns);
+					info->sortopfamily = info->opfamily;
+					info->reverse_sort = (bool *)palloc(sizeof(bool) * nkeycolumns);
+					info->nulls_first = (bool *)palloc(sizeof(bool) * nkeycolumns);
 
-				for (i = 0; i < nkeycolumns; i++)
-				{
-					int16		opt = indexRelation->rd_indoption[i];
-					Oid			ltopr;
-					Oid			btopfamily;
-					Oid			btopcintype;
-					int16		btstrategy;
-
-					info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
-					info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
-
-					ltopr = get_opfamily_member(info->opfamily[i],
-												info->opcintype[i],
-												info->opcintype[i],
-												BTLessStrategyNumber);
-					if (OidIsValid(ltopr) &&
-						get_ordering_op_properties(ltopr,
-												   &btopfamily,
-												   &btopcintype,
-												   &btstrategy) &&
-						btopcintype == info->opcintype[i] &&
-						btstrategy == BTLessStrategyNumber)
+					for (i = 0; i < nkeycolumns; i++)
 					{
-						/* Successful mapping */
-						info->sortopfamily[i] = btopfamily;
+						int16		opt = indexRelation->rd_indoption[i];
+
+						info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
+						info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
 					}
-					else
+				}
+				else if (amroutine->amcanorder)
+				{
+					/*
+					 * Otherwise, identify the corresponding btree opfamilies by
+					 * trying to map this index's "<" operators into btree.  Since
+					 * "<" uniquely defines the behavior of a sort order, this is
+					 * a sufficient test.
+					 *
+					 * XXX This method is rather slow and also requires the
+					 * undesirable assumption that the other index AM numbers its
+					 * strategies the same as btree.  It'd be better to have a way
+					 * to explicitly declare the corresponding btree opfamily for
+					 * each opfamily of the other index type.  But given the lack
+					 * of current or foreseeable amcanorder index types, it's not
+					 * worth expending more effort on now.
+					 */
+					info->sortopfamily = (Oid *)palloc(sizeof(Oid) * nkeycolumns);
+					info->reverse_sort = (bool *)palloc(sizeof(bool) * nkeycolumns);
+					info->nulls_first = (bool *)palloc(sizeof(bool) * nkeycolumns);
+
+					for (i = 0; i < nkeycolumns; i++)
 					{
-						/* Fail ... quietly treat index as unordered */
-						info->sortopfamily = NULL;
-						info->reverse_sort = NULL;
-						info->nulls_first = NULL;
-						break;
+						int16		opt = indexRelation->rd_indoption[i];
+						Oid			ltopr;
+						Oid			btopfamily;
+						Oid			btopcintype;
+						int16		btstrategy;
+
+						info->reverse_sort[i] = (opt & INDOPTION_DESC) != 0;
+						info->nulls_first[i] = (opt & INDOPTION_NULLS_FIRST) != 0;
+
+						ltopr = get_opfamily_member(info->opfamily[i],
+							info->opcintype[i],
+							info->opcintype[i],
+							BTLessStrategyNumber);
+						if (OidIsValid(ltopr) &&
+							get_ordering_op_properties(ltopr,
+								&btopfamily,
+								&btopcintype,
+								&btstrategy) &&
+							btopcintype == info->opcintype[i] &&
+							btstrategy == BTLessStrategyNumber)
+						{
+							/* Successful mapping */
+							info->sortopfamily[i] = btopfamily;
+						}
+						else
+						{
+							/* Fail ... quietly treat index as unordered */
+							info->sortopfamily = NULL;
+							info->reverse_sort = NULL;
+							info->nulls_first = NULL;
+							break;
+						}
 					}
 				}
+				else
+				{
+					info->sortopfamily = NULL;
+					info->reverse_sort = NULL;
+					info->nulls_first = NULL;
+				}
 			}
 			else
 			{
 				info->sortopfamily = NULL;
 				info->reverse_sort = NULL;
 				info->nulls_first = NULL;
+
+				info->amcanorderbyop = false;
+				info->amoptionalkey = false;
+				info->amsearcharray = false;
+				info->amsearchnulls = false;
+				info->amcanparallel = false;
+				info->amhasgettuple = false;
+				info->amhasgetbitmap = false;
+				info->amcanmarkpos = false;
+				info->amcostestimate = NULL;
 			}
 
 			/*
@@ -415,7 +434,8 @@ get_relation_info(PlannerInfo *root, Oid relationObjectId, bool inhparent,
 					info->tuples = rel->tuples;
 			}
 
-			if (info->relam == BTREE_AM_OID)
+			if (info->relam == BTREE_AM_OID &&
+				indexRelation->rd_rel->relkind != RELKIND_PARTITIONED_INDEX)
 			{
 				/* For btrees, get tree height while we have the index open */
 				info->tree_height = _bt_getrootheight(indexRelation);
diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 5b0e531f97..91df65414b 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -2971,9 +2971,11 @@ RelationGetNumberOfBlocksInFork(Relation relation, ForkNumber forkNum)
 		return smgrnblocks(RelationGetSmgr(relation), forkNum);
 	}
 	else
-		Assert(false);
+	{
+		Assert(relation->rd_rel->relkind == RELKIND_PARTITIONED_INDEX);
+	}
 
-	return 0;					/* keep compiler quiet */
+	return 0;
 }
 
 /*
diff --git a/src/backend/utils/adt/selfuncs.c b/src/backend/utils/adt/selfuncs.c
index 1808388397..f1a97132bd 100644
--- a/src/backend/utils/adt/selfuncs.c
+++ b/src/backend/utils/adt/selfuncs.c
@@ -5995,6 +5995,10 @@ get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
 	rte = root->simple_rte_array[rel->relid];
 	Assert(rte->rtekind == RTE_RELATION);
 
+	/* ignore partitioned tables.  Any indexes here are not real indexes */
+	if (rte->relkind == RELKIND_PARTITIONED_TABLE)
+		return false;
+
 	/* Search through the indexes to see if any match our problem */
 	foreach(lc, rel->indexlist)
 	{
diff --git a/src/test/regress/expected/inherit.out b/src/test/regress/expected/inherit.out
index 2d49e765de..c1619754a7 100644
--- a/src/test/regress/expected/inherit.out
+++ b/src/test/regress/expected/inherit.out
@@ -721,6 +721,12 @@ select * from d;
  32 | one | two | three
 (1 row)
 
+DROP TABLE a CASCADE;
+NOTICE:  drop cascades to 4 other objects
+DETAIL:  drop cascades to table b
+drop cascades to table c
+drop cascades to table d
+drop cascades to table z
 -- The above verified that we can change the type of a multiply-inherited
 -- column; but we should reject that if any definition was inherited from
 -- an unrelated parent.
diff --git a/src/test/regress/expected/join.out b/src/test/regress/expected/join.out
index 2ed2e542a4..a5f1aa577b 100644
--- a/src/test/regress/expected/join.out
+++ b/src/test/regress/expected/join.out
@@ -4588,7 +4588,8 @@ select a.q2, b.q1
 reset enable_hashjoin;
 reset enable_nestloop;
 --
--- test join removal
+-- test join removal for plain tables
+-- we have almost the smare tests for partitioned tables below
 --
 begin;
 CREATE TEMP TABLE a (id int PRIMARY KEY, b_id int);
@@ -4722,6 +4723,127 @@ select 1 from (select a.id FROM a left join b on a.b_id = b.id) q,
          Filter: (a.id = i)
 (4 rows)
 
+rollback;
+--
+-- test join removal for partitioned tables
+-- this is analog to the non partitioned case above
+--
+begin;
+CREATE TABLE a (id int PRIMARY KEY, b_id int) partition by range(id);
+CREATE TABLE a_m partition of a for values from (0) to (10) partition by range(id);
+CREATE TABLE a_c partition of a_m for values from (0) to (10);
+CREATE TABLE b (id int PRIMARY KEY, c_id int) partition by range(id);
+CREATE TABLE b_m partition of b for values from (0) to (10) partition by range(id);
+CREATE TABLE b_c partition of b_m for values from (0) to (10);
+CREATE TABLE c (id int PRIMARY KEY) partition by range(id);
+CREATE TABLE c_m partition of c for values from (0) to (10) partition by range(id);
+CREATE TABLE c_c partition of c_m for values from (0) to (10);
+CREATE TABLE d (a int, b int) partition by range(a);
+CREATE TABLE d_c partition of d for values from (0) to (10);
+INSERT INTO a VALUES (0, 0), (1, NULL);
+INSERT INTO b VALUES (0, 0), (1, NULL);
+INSERT INTO c VALUES (0), (1);
+INSERT INTO d VALUES (1,3), (2,2), (3,1);
+-- all three cases should be optimizable into a simple seqscan
+explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
+    QUERY PLAN     
+-------------------
+ Seq Scan on a_c a
+(1 row)
+
+explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id;
+    QUERY PLAN     
+-------------------
+ Seq Scan on b_c b
+(1 row)
+
+explain (costs off)
+  SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id)
+  ON (a.b_id = b.id);
+    QUERY PLAN     
+-------------------
+ Seq Scan on a_c a
+(1 row)
+
+-- check optimization of outer join within another special join
+explain (costs off)
+select id from a where id in (
+	select b.id from b left join c on b.id = c.id
+);
+          QUERY PLAN           
+-------------------------------
+ Hash Join
+   Hash Cond: (a.id = b.id)
+   ->  Seq Scan on a_c a
+   ->  Hash
+         ->  Seq Scan on b_c b
+(5 rows)
+
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique by its GROUP BY clause
+explain (costs off)
+select d.* from d left join (select * from b group by b.id, b.c_id) s
+  on d.a = s.id and d.b = s.c_id;
+    QUERY PLAN     
+-------------------
+ Seq Scan on d_c d
+(1 row)
+
+-- similarly, but keying off a DISTINCT clause
+explain (costs off)
+select d.* from d left join (select distinct * from b) s
+  on d.a = s.id and d.b = s.c_id;
+    QUERY PLAN     
+-------------------
+ Seq Scan on d_c d
+(1 row)
+
+-- join removal is not possible when the GROUP BY contains a column that is
+-- not in the join condition.  See above for further notes about this.
+explain (costs off)
+select d.* from d left join (select * from b group by b.id, b.c_id) s
+  on d.a = s.id;
+                   QUERY PLAN                   
+------------------------------------------------
+ Merge Right Join
+   Merge Cond: (b.id = d.a)
+   ->  Group
+         Group Key: b.id
+         ->  Index Scan using b_c_pkey on b_c b
+   ->  Sort
+         Sort Key: d.a
+         ->  Seq Scan on d_c d
+(8 rows)
+
+-- similarly, but keying off a DISTINCT clause
+explain (costs off)
+select d.* from d left join (select distinct * from b) s
+  on d.a = s.id;
+                 QUERY PLAN                  
+---------------------------------------------
+ Merge Left Join
+   Merge Cond: (d.a = s.id)
+   ->  Sort
+         Sort Key: d.a
+         ->  Seq Scan on d_c d
+   ->  Sort
+         Sort Key: s.id
+         ->  Subquery Scan on s
+               ->  HashAggregate
+                     Group Key: b.id, b.c_id
+                     ->  Seq Scan on b_c b
+(11 rows)
+
+-- check join removal works when uniqueness of the join condition is enforced
+-- by a UNION
+explain (costs off)
+select d.* from d left join (select id from a union select id from b) s
+  on d.a = s.id;
+    QUERY PLAN     
+-------------------
+ Seq Scan on d_c d
+(1 row)
+
 rollback;
 create temp table parent (k int primary key, pd int);
 create temp table child (k int unique, cd int);
diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out
index 03926a8413..c854228482 100644
--- a/src/test/regress/expected/partition_join.out
+++ b/src/test/regress/expected/partition_join.out
@@ -4852,46 +4852,46 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2
 (8 rows)
 
 -- partitionwise join with fractional paths
-CREATE TABLE fract_t (id BIGINT, PRIMARY KEY (id)) PARTITION BY RANGE (id);
+CREATE TABLE fract_t (id BIGINT, c INT, PRIMARY KEY (id)) PARTITION BY RANGE (id);
 CREATE TABLE fract_t0 PARTITION OF fract_t FOR VALUES FROM ('0') TO ('1000');
 CREATE TABLE fract_t1 PARTITION OF fract_t FOR VALUES FROM ('1000') TO ('2000');
 -- insert data
-INSERT INTO fract_t (id) (SELECT generate_series(0, 1999));
+INSERT INTO fract_t (id,c) SELECT i,i FROM generate_series(0, 1999) i;
 ANALYZE fract_t;
--- verify plan; nested index only scans
+-- verify plan; nested index scans
 SET max_parallel_workers_per_gather = 0;
 SET enable_partitionwise_join = on;
 EXPLAIN (COSTS OFF)
 SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id ASC LIMIT 10;
-                              QUERY PLAN                               
------------------------------------------------------------------------
+                            QUERY PLAN                            
+------------------------------------------------------------------
  Limit
    ->  Merge Append
          Sort Key: x.id
          ->  Merge Left Join
                Merge Cond: (x_1.id = y_1.id)
-               ->  Index Only Scan using fract_t0_pkey on fract_t0 x_1
-               ->  Index Only Scan using fract_t0_pkey on fract_t0 y_1
+               ->  Index Scan using fract_t0_pkey on fract_t0 x_1
+               ->  Index Scan using fract_t0_pkey on fract_t0 y_1
          ->  Merge Left Join
                Merge Cond: (x_2.id = y_2.id)
-               ->  Index Only Scan using fract_t1_pkey on fract_t1 x_2
-               ->  Index Only Scan using fract_t1_pkey on fract_t1 y_2
+               ->  Index Scan using fract_t1_pkey on fract_t1 x_2
+               ->  Index Scan using fract_t1_pkey on fract_t1 y_2
 (11 rows)
 
 EXPLAIN (COSTS OFF)
 SELECT * FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY id DESC LIMIT 10;
-                                   QUERY PLAN                                   
---------------------------------------------------------------------------------
+                                QUERY PLAN                                 
+---------------------------------------------------------------------------
  Limit
    ->  Merge Append
          Sort Key: x.id DESC
          ->  Nested Loop Left Join
-               ->  Index Only Scan Backward using fract_t0_pkey on fract_t0 x_1
-               ->  Index Only Scan using fract_t0_pkey on fract_t0 y_1
+               ->  Index Scan Backward using fract_t0_pkey on fract_t0 x_1
+               ->  Index Scan using fract_t0_pkey on fract_t0 y_1
                      Index Cond: (id = x_1.id)
          ->  Nested Loop Left Join
-               ->  Index Only Scan Backward using fract_t1_pkey on fract_t1 x_2
-               ->  Index Only Scan using fract_t1_pkey on fract_t1 y_2
+               ->  Index Scan Backward using fract_t1_pkey on fract_t1 x_2
+               ->  Index Scan using fract_t1_pkey on fract_t1 y_2
                      Index Cond: (id = x_2.id)
 (11 rows)
 
diff --git a/src/test/regress/sql/inherit.sql b/src/test/regress/sql/inherit.sql
index 195aedb5ff..ee55fc1ec8 100644
--- a/src/test/regress/sql/inherit.sql
+++ b/src/test/regress/sql/inherit.sql
@@ -208,6 +208,8 @@ insert into d values('test','one','two','three');
 alter table a alter column aa type integer using bit_length(aa);
 select * from d;
 
+DROP TABLE a CASCADE;
+
 -- The above verified that we can change the type of a multiply-inherited
 -- column; but we should reject that if any definition was inherited from
 -- an unrelated parent.
diff --git a/src/test/regress/sql/join.sql b/src/test/regress/sql/join.sql
index 27e7e741a1..df7fad22bc 100644
--- a/src/test/regress/sql/join.sql
+++ b/src/test/regress/sql/join.sql
@@ -1579,7 +1579,8 @@ reset enable_hashjoin;
 reset enable_nestloop;
 
 --
--- test join removal
+-- test join removal for plain tables
+-- we have almost the smare tests for partitioned tables below
 --
 
 begin;
@@ -1648,6 +1649,72 @@ select 1 from (select a.id FROM a left join b on a.b_id = b.id) q,
 
 rollback;
 
+--
+-- test join removal for partitioned tables
+-- this is analog to the non partitioned case above
+--
+
+begin;
+
+CREATE TABLE a (id int PRIMARY KEY, b_id int) partition by range(id);
+CREATE TABLE a_m partition of a for values from (0) to (10) partition by range(id);
+CREATE TABLE a_c partition of a_m for values from (0) to (10);
+CREATE TABLE b (id int PRIMARY KEY, c_id int) partition by range(id);
+CREATE TABLE b_m partition of b for values from (0) to (10) partition by range(id);
+CREATE TABLE b_c partition of b_m for values from (0) to (10);
+CREATE TABLE c (id int PRIMARY KEY) partition by range(id);
+CREATE TABLE c_m partition of c for values from (0) to (10) partition by range(id);
+CREATE TABLE c_c partition of c_m for values from (0) to (10);
+CREATE TABLE d (a int, b int) partition by range(a);
+CREATE TABLE d_c partition of d for values from (0) to (10);
+INSERT INTO a VALUES (0, 0), (1, NULL);
+INSERT INTO b VALUES (0, 0), (1, NULL);
+INSERT INTO c VALUES (0), (1);
+INSERT INTO d VALUES (1,3), (2,2), (3,1);
+
+-- all three cases should be optimizable into a simple seqscan
+explain (costs off) SELECT a.* FROM a LEFT JOIN b ON a.b_id = b.id;
+explain (costs off) SELECT b.* FROM b LEFT JOIN c ON b.c_id = c.id;
+explain (costs off)
+  SELECT a.* FROM a LEFT JOIN (b left join c on b.c_id = c.id)
+  ON (a.b_id = b.id);
+
+-- check optimization of outer join within another special join
+explain (costs off)
+select id from a where id in (
+	select b.id from b left join c on b.id = c.id
+);
+
+-- check that join removal works for a left join when joining a subquery
+-- that is guaranteed to be unique by its GROUP BY clause
+explain (costs off)
+select d.* from d left join (select * from b group by b.id, b.c_id) s
+  on d.a = s.id and d.b = s.c_id;
+
+-- similarly, but keying off a DISTINCT clause
+explain (costs off)
+select d.* from d left join (select distinct * from b) s
+  on d.a = s.id and d.b = s.c_id;
+
+-- join removal is not possible when the GROUP BY contains a column that is
+-- not in the join condition.  See above for further notes about this.
+explain (costs off)
+select d.* from d left join (select * from b group by b.id, b.c_id) s
+  on d.a = s.id;
+
+-- similarly, but keying off a DISTINCT clause
+explain (costs off)
+select d.* from d left join (select distinct * from b) s
+  on d.a = s.id;
+
+-- check join removal works when uniqueness of the join condition is enforced
+-- by a UNION
+explain (costs off)
+select d.* from d left join (select id from a union select id from b) s
+  on d.a = s.id;
+
+rollback;
+
 create temp table parent (k int primary key, pd int);
 create temp table child (k int unique, cd int);
 insert into parent values (1, 10), (2, 20), (3, 30);
diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql
index 67f506361f..009184d348 100644
--- a/src/test/regress/sql/partition_join.sql
+++ b/src/test/regress/sql/partition_join.sql
@@ -1144,15 +1144,15 @@ SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2
 SELECT t1.*, t2.* FROM alpha t1 INNER JOIN beta t2 ON (t1.a = t2.a AND t1.b = t2.b AND t1.c = t2.c) WHERE ((t1.b >= 100 AND t1.b < 110) OR (t1.b >= 200 AND t1.b < 210)) AND ((t2.b >= 100 AND t2.b < 110) OR (t2.b >= 200 AND t2.b < 210)) AND t1.c IN ('0004', '0009') ORDER BY t1.a, t1.b;
 
 -- partitionwise join with fractional paths
-CREATE TABLE fract_t (id BIGINT, PRIMARY KEY (id)) PARTITION BY RANGE (id);
+CREATE TABLE fract_t (id BIGINT, c INT, PRIMARY KEY (id)) PARTITION BY RANGE (id);
 CREATE TABLE fract_t0 PARTITION OF fract_t FOR VALUES FROM ('0') TO ('1000');
 CREATE TABLE fract_t1 PARTITION OF fract_t FOR VALUES FROM ('1000') TO ('2000');
 
 -- insert data
-INSERT INTO fract_t (id) (SELECT generate_series(0, 1999));
+INSERT INTO fract_t (id,c) SELECT i,i FROM generate_series(0, 1999) i;
 ANALYZE fract_t;
 
--- verify plan; nested index only scans
+-- verify plan; nested index scans
 SET max_parallel_workers_per_gather = 0;
 SET enable_partitionwise_join = on;
 
-- 
2.35.3

Re: missing indexes in indexlist with partitioned tables

Reply via email to