From ab4461126f1ae642dab7fc36507ad2d91f88478f Mon Sep 17 00:00:00 2001
From: amitlan <amitlangote09@gmail.com>
Date: Wed, 28 Sep 2022 16:37:55 +0900
Subject: [PATCH v5 4/4] Teach ri_LookupKeyInPkRel() to pass
 omit_detached_snapshot

Now that the RI triggers that need to look up PK rows in a
partitioned table can manipulate partitions directly through
ExecGetLeafPartitionForKey(), the snapshot being passed to omit or
include detach-pending partitions can also now be passed explicitly,
rather than using ActiveSnapshot for that purpose.

For the detach-pending partitions to be correctly omitted or included
from the consideration of PK row lookup, the PartitionDesc machinery
needs to see the latest snapshot.  Pushing the latest snapshot to be
the ActiveSnapshot as is done presently meant that even the scans that
should NOT be using the latest snapshot also end up using one to
time-qualify table/partition rows.  That led to incorrect results of
PK lookups over partitioned tables running under REPEATABLE READ
isolation; 00cb86e75d added a test that demonstrates this bug.

To fix, do not force-push the latest snapshot in the cases of PK
lookup over partitioned tables (as was being done by passing
detectNewRows=true to ri_PerformCheck()), but rather make
ri_LookupKeyInPkRel() pass the latest snapshot directly to
PartitionDirectoryLookup() through its new omit_detached_snapshot
parameter.

The buggy output in src/test/isolation/expected/fk-snapshot.out
of the relevant test case that was added by 00cb86e75d has been
changed to the correct output.
---
 src/backend/executor/execPartition.c        | 12 +++++++++++-
 src/backend/partitioning/partdesc.c         |  6 ++----
 src/backend/utils/adt/ri_triggers.c         | 16 ++++++----------
 src/include/executor/execPartition.h        |  1 +
 src/test/isolation/expected/fk-snapshot.out |  4 ++--
 src/test/isolation/specs/fk-snapshot.spec   |  5 +----
 6 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/src/backend/executor/execPartition.c b/src/backend/executor/execPartition.c
index c90f07c433..65cd365a8b 100644
--- a/src/backend/executor/execPartition.c
+++ b/src/backend/executor/execPartition.c
@@ -1607,6 +1607,14 @@ get_partition_for_tuple(PartitionKey key,
  *
  * Any intermediate parent tables encountered on the way to finding the leaf
  * partition are locked using 'lockmode' when opening.
+ *
+ * In 'omit_detached_snapshot' a caller can specify the snapshot to pass to
+ * PartitionDirectoryLookup() that in turn passes it down to the code that
+ * scans the pg_inherits catalog when building the partition descriptor from
+ * scratch.  Any detach-pending partitions are omitted from the considerations
+ * of this function if the DETACH operation appears committed to *this*
+ * snapshot.
+
  *
  * Returns NULL if no leaf partition is found for the key.
  *
@@ -1624,6 +1632,7 @@ ExecGetLeafPartitionForKey(Relation root_rel, int key_natts,
 						   const AttrNumber *key_attnums,
 						   Datum *key_vals, char *key_nulls,
 						   Oid root_idxoid, int lockmode,
+						   Snapshot omit_detached_snapshot,
 						   Oid *leaf_idxoid)
 {
 	Relation	rel = root_rel;
@@ -1709,7 +1718,8 @@ ExecGetLeafPartitionForKey(Relation root_rel, int key_natts,
 
 		/* Get the PartitionDesc using the partition directory machinery.  */
 		partdir = CreatePartitionDirectory(CurrentMemoryContext, true);
-		partdesc = PartitionDirectoryLookup(partdir, rel, NULL);
+		partdesc = PartitionDirectoryLookup(partdir, rel,
+											omit_detached_snapshot);
 
 		/* Find the partition for the key. */
 		partidx = get_partition_for_tuple(partkey, partdesc, partkey_vals,
diff --git a/src/backend/partitioning/partdesc.c b/src/backend/partitioning/partdesc.c
index 863b04c17d..4bfa4076bd 100644
--- a/src/backend/partitioning/partdesc.c
+++ b/src/backend/partitioning/partdesc.c
@@ -444,8 +444,6 @@ CreatePartitionDirectory(MemoryContext mcxt, bool omit_detached)
  * result of that visibility check, such a partition is either included in
  * the returned PartitionDesc, considering it not yet detached, or omitted
  * from it, considering it detached.
- * XXX - currently unused, because we don't have any callers of this that
- * would like to pass a snapshot that is not ActiveSnapshot.
  */
 PartitionDesc
 PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel,
@@ -464,8 +462,8 @@ PartitionDirectoryLookup(PartitionDirectory pdir, Relation rel,
 		 */
 		RelationIncrementReferenceCount(rel);
 		pde->rel = rel;
-		Assert(omit_detached_snapshot == NULL);
-		if (pdir->omit_detached && ActiveSnapshotSet())
+		if (pdir->omit_detached &&
+			omit_detached_snapshot == NULL && ActiveSnapshotSet())
 			omit_detached_snapshot = GetActiveSnapshot();
 		pde->pd = RelationGetPartitionDescExt(rel, pdir->omit_detached,
 											  omit_detached_snapshot);
diff --git a/src/backend/utils/adt/ri_triggers.c b/src/backend/utils/adt/ri_triggers.c
index 9894bc4951..f0a33df9c9 100644
--- a/src/backend/utils/adt/ri_triggers.c
+++ b/src/backend/utils/adt/ri_triggers.c
@@ -434,17 +434,11 @@ RI_FKey_check(TriggerData *trigdata)
 							 &qkey, fk_rel, pk_rel);
 	}
 
-	/*
-	 * Now check that foreign key exists in PK table
-	 *
-	 * XXX detectNewRows must be true when a partitioned table is on the
-	 * referenced side.  The reason is that our snapshot must be fresh in
-	 * order for the hack in find_inheritance_children() to work.
-	 */
+	/* Now check that foreign key exists in PK table */
 	ri_PerformCheck(riinfo, &qkey, qplan,
 					fk_rel, pk_rel,
 					NULL, newslot,
-					pk_rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE,
+					false,
 					CMD_SELECT);
 
 	table_close(pk_rel, RowShareLock);
@@ -2679,8 +2673,9 @@ ri_LookupKeyInPkRel(struct RI_Plan *plan,
 		Oid		leaf_idxoid;
 
 		/*
-		 * Note that this relies on the latest snapshot having been pushed by
-		 * the caller to be the ActiveSnapshot.  The PartitionDesc machinery
+		 * Pass the latest snapshot for omit_detached_snapshot so that any
+		 * detach-pending partitions are correctly omitted or included from
+		 * the considerations of this lookup.  The PartitionDesc machinery
 		 * that runs as part of this will need to use the snapshot to determine
 		 * whether to omit or include any detach-pending partition based on the
 		 * whether the pg_inherits row that marks it as detach-pending is
@@ -2690,6 +2685,7 @@ ri_LookupKeyInPkRel(struct RI_Plan *plan,
 												 riinfo->pk_attnums,
 												 pk_vals, pk_nulls,
 												 idxoid, RowShareLock,
+												 GetLatestSnapshot(),
 												 &leaf_idxoid);
 
 		/*
diff --git a/src/include/executor/execPartition.h b/src/include/executor/execPartition.h
index cbe1d996e6..18c6b676f6 100644
--- a/src/include/executor/execPartition.h
+++ b/src/include/executor/execPartition.h
@@ -36,6 +36,7 @@ extern Relation ExecGetLeafPartitionForKey(Relation root_rel,
 										   const AttrNumber *key_attnums,
 										   Datum *key_vals, char *key_nulls,
 										   Oid root_idxoid, int lockmode,
+										   Snapshot omit_detached_snapshot,
 										   Oid *leaf_idxoid);
 
 
diff --git a/src/test/isolation/expected/fk-snapshot.out b/src/test/isolation/expected/fk-snapshot.out
index 5faf80d6ce..22752cc742 100644
--- a/src/test/isolation/expected/fk-snapshot.out
+++ b/src/test/isolation/expected/fk-snapshot.out
@@ -47,12 +47,12 @@ a
 
 step s2ifn2: INSERT INTO fk_noparted VALUES (2);
 step s2c: COMMIT;
+ERROR:  insert or update on table "fk_noparted" violates foreign key constraint "fk_noparted_a_fkey"
 step s2sfn: SELECT * FROM fk_noparted;
 a
 -
 1
-2
-(2 rows)
+(1 row)
 
 
 starting permutation: s1brc s2brc s2ip2 s1sp s2c s1sp s1ifp2 s2brc s2sfp s1c s1sfp s2ifn2 s2c s2sfn
diff --git a/src/test/isolation/specs/fk-snapshot.spec b/src/test/isolation/specs/fk-snapshot.spec
index 378507fbc3..64d27f29c3 100644
--- a/src/test/isolation/specs/fk-snapshot.spec
+++ b/src/test/isolation/specs/fk-snapshot.spec
@@ -46,10 +46,7 @@ step s2sfn	{ SELECT * FROM fk_noparted; }
 # inserting into referencing tables in transaction-snapshot mode
 # PK table is non-partitioned
 permutation s1brr s2brc s2ip2 s1sp s2c s1sp s1ifp2 s1c s1sfp
-# PK table is partitioned: buggy, because s2's serialization transaction can
-# see the uncommitted row thanks to the latest snapshot taken for
-# partition lookup to work correctly also ends up getting used by the PK index
-# scan
+# PK table is partitioned
 permutation s2ip2 s2brr s1brc s1ifp2 s2sfp s1c s2sfp s2ifn2 s2c s2sfn
 
 # inserting into referencing tables in up-to-date snapshot mode
-- 
2.35.3

