>From 8be88eaebd1beeb98ebaa49b3053009c6a0c6ed0 Mon Sep 17 00:00:00 2001
From: David Zhang <david.zhang@highgo.ca>
Date: Thu, 17 Nov 2022 12:26:25 -0800
Subject: [PATCH 4/4] support global unique index insert and update

---
 src/backend/access/nbtree/nbtinsert.c  |  30 +++++-
 src/backend/access/nbtree/nbtree.c     | 123 ++++++++++++++++++++++++-
 src/include/access/nbtree.h            |   5 +
 src/test/regress/expected/indexing.out |  41 +++++++++
 src/test/regress/sql/indexing.sql      |  20 ++++
 5 files changed, 213 insertions(+), 6 deletions(-)

diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c
index f6f4af8bfe..ad95726ea9 100644
--- a/src/backend/access/nbtree/nbtinsert.c
+++ b/src/backend/access/nbtree/nbtinsert.c
@@ -34,7 +34,7 @@ static BTStack _bt_search_insert(Relation rel, BTInsertState insertstate);
 static TransactionId _bt_check_unique(Relation rel, BTInsertState insertstate,
 									  Relation heapRel,
 									  IndexUniqueCheck checkUnique, bool *is_unique,
-									  uint32 *speculativeToken);
+									  uint32 *speculativeToken, Relation origHeapRel);
 static OffsetNumber _bt_findinsertloc(Relation rel,
 									  BTInsertState insertstate,
 									  bool checkingunique,
@@ -73,6 +73,11 @@ static BlockNumber *_bt_deadblocks(Page page, OffsetNumber *deletable,
 								   int *nblocks);
 static inline int _bt_blk_cmp(const void *arg1, const void *arg2);
 
+TransactionId _bt_check_unique_gi(Relation rel, BTInsertState insertstate,
+								  Relation heapRel,
+								  IndexUniqueCheck checkUnique, bool *is_unique,
+								  uint32 *speculativeToken, Relation origHeapRel);
+
 /*
  *	_bt_doinsert() -- Handle insertion of a single index tuple in the tree.
  *
@@ -206,7 +211,7 @@ search:
 		uint32		speculativeToken;
 
 		xwait = _bt_check_unique(rel, &insertstate, heapRel, checkUnique,
-								 &is_unique, &speculativeToken);
+								 &is_unique, &speculativeToken, NULL);
 
 		if (unlikely(TransactionIdIsValid(xwait)))
 		{
@@ -379,6 +384,15 @@ _bt_search_insert(Relation rel, BTInsertState insertstate)
 					  NULL);
 }
 
+TransactionId
+_bt_check_unique_gi(Relation rel, BTInsertState insertstate, Relation heapRel,
+					IndexUniqueCheck checkUnique, bool *is_unique,
+					uint32 *speculativeToken, Relation origHeapRel)
+{
+	return _bt_check_unique(rel, insertstate, heapRel, checkUnique,
+							is_unique, speculativeToken, origHeapRel);
+}
+
 /*
  *	_bt_check_unique() -- Check for violation of unique index constraint
  *
@@ -405,7 +419,7 @@ _bt_search_insert(Relation rel, BTInsertState insertstate)
 static TransactionId
 _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 				 IndexUniqueCheck checkUnique, bool *is_unique,
-				 uint32 *speculativeToken)
+				 uint32 *speculativeToken, Relation origHeapRel)
 {
 	IndexTuple	itup = insertstate->itup;
 	IndexTuple	curitup = NULL;
@@ -560,6 +574,7 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 													   &all_dead))
 				{
 					TransactionId xwait;
+					bool		idx_fetch_result;
 
 					/*
 					 * It is a duplicate. If we are only doing a partial
@@ -613,8 +628,13 @@ _bt_check_unique(Relation rel, BTInsertState insertstate, Relation heapRel,
 					 * entry.
 					 */
 					htid = itup->t_tid;
-					if (table_index_fetch_tuple_check(heapRel, &htid,
-													  SnapshotSelf, NULL))
+					if (origHeapRel)
+						idx_fetch_result = table_index_fetch_tuple_check(origHeapRel, &htid,
+																		 SnapshotSelf, NULL);
+					else
+						idx_fetch_result = table_index_fetch_tuple_check(heapRel, &htid,
+																		 SnapshotSelf, NULL);
+					if (idx_fetch_result)
 					{
 						/* Normal case --- it's still live */
 					}
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index b52eca8f38..84dc58ba38 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -23,6 +23,8 @@
 #include "access/relscan.h"
 #include "access/xlog.h"
 #include "access/xloginsert.h"
+#include "access/table.h"
+#include "catalog/partition.h"
 #include "commands/progress.h"
 #include "commands/vacuum.h"
 #include "miscadmin.h"
@@ -34,9 +36,11 @@
 #include "storage/ipc.h"
 #include "storage/lmgr.h"
 #include "storage/smgr.h"
+#include "storage/predicate.h"
 #include "utils/builtins.h"
 #include "utils/index_selfuncs.h"
 #include "utils/memutils.h"
+#include "partitioning/partdesc.h"
 
 
 /*
@@ -86,7 +90,9 @@ static BTVacuumPosting btreevacuumposting(BTVacState *vstate,
 										  IndexTuple posting,
 										  OffsetNumber updatedoffset,
 										  int *nremaining);
-
+static void
+			btinsert_check_unique_gi(IndexTuple itup, Relation idxRel,
+									 Relation heapRel, IndexUniqueCheck checkUnique);
 
 /*
  * Btree handler function: return IndexAmRoutine with access method parameters
@@ -177,6 +183,118 @@ btbuildempty(Relation index)
 	smgrimmedsync(RelationGetSmgr(index), INIT_FORKNUM);
 }
 
+/*
+ *	btinsert_check_unique_gi() -- cross partitions uniqueness check.
+ *
+ *		loop all partitions with global index for uniqueness check.
+ */
+static void
+btinsert_check_unique_gi(IndexTuple itup, Relation idxRel,
+						 Relation heapRel, IndexUniqueCheck checkUnique)
+{
+	bool		is_unique = false;
+	BTScanInsert itup_key = _bt_mkscankey(idxRel, itup);
+
+	if (!itup_key->anynullkeys &&
+		idxRel->rd_rel->relkind == RELKIND_GLOBAL_INDEX)
+	{
+		Oid			parentId;
+		Relation	parentTbl;
+		PartitionDesc partDesc;
+		int			i;
+		int			nparts;
+		Oid		   *partOids;
+
+		itup_key->scantid = NULL;
+		parentId = heapRel->rd_rel->relispartition ?
+			get_partition_parent(idxRel->rd_index->indrelid, false) : InvalidOid;
+		parentTbl = table_open(parentId, AccessShareLock);
+		partDesc = RelationGetPartitionDesc(parentTbl, true);
+		nparts = partDesc->nparts;
+		partOids = palloc(sizeof(Oid) * nparts);
+		memcpy(partOids, partDesc->oids, sizeof(Oid) * nparts);
+		for (i = 0; i < nparts; i++)
+		{
+			Oid			childRelid = partOids[i];
+			List	   *childidxs;
+			ListCell   *cell;
+
+			if (childRelid != heapRel->rd_rel->oid)
+			{
+				Relation	hRel = table_open(childRelid, AccessShareLock);
+
+				childidxs = RelationGetIndexList(hRel);
+				foreach(cell, childidxs)
+				{
+					Oid			cldidxid = lfirst_oid(cell);
+					Relation	iRel = index_open(cldidxid, AccessShareLock);
+
+					if (iRel->rd_rel->relkind == RELKIND_GLOBAL_INDEX
+						&& iRel->rd_rel->oid != idxRel->rd_rel->oid)
+					{
+						BTStack		stack;
+						uint32		speculativeToken;
+						BTInsertStateData insertstate;
+						TransactionId xwait = InvalidBuffer;
+
+						insertstate.itup = itup;
+						insertstate.itemsz = MAXALIGN(IndexTupleSize(itup));
+						insertstate.itup_key = itup_key;
+						insertstate.bounds_valid = false;
+						insertstate.buf = InvalidBuffer;
+						insertstate.postingoff = 0;
+
+				search_global:
+						stack = _bt_search(iRel, insertstate.itup_key,
+										   &insertstate.buf, BT_READ, NULL);
+						xwait = _bt_check_unique_gi(iRel, &insertstate,
+													hRel, checkUnique, &is_unique,
+													&speculativeToken, heapRel);
+						if (unlikely(TransactionIdIsValid(xwait)))
+						{
+							/* Have to wait for the other guy ... */
+							if (insertstate.buf)
+							{
+								_bt_relbuf(iRel, insertstate.buf);
+								insertstate.buf = InvalidBuffer;
+							}
+
+							/*
+							 * If it's a speculative insertion, wait for it to
+							 * finish (ie. to go ahead with the insertion, or
+							 * kill the tuple).  Otherwise wait for the
+							 * transaction to finish as usual.
+							 */
+							if (speculativeToken)
+								SpeculativeInsertionWait(xwait, speculativeToken);
+							else
+								XactLockTableWait(xwait, iRel, &itup->t_tid, XLTW_InsertIndex);
+
+							/* start over... */
+							if (stack)
+								_bt_freestack(stack);
+							goto search_global;
+						}
+						if (insertstate.buf)
+							_bt_relbuf(iRel, insertstate.buf);
+						if (stack)
+							_bt_freestack(stack);
+					}
+					index_close(iRel, AccessShareLock);
+				}
+				if (childidxs)
+					list_free(childidxs);
+				table_close(hRel, AccessShareLock);
+			}
+		}
+		if (partOids)
+			pfree(partOids);
+		table_close(parentTbl, AccessShareLock);
+	}
+	if (itup_key)
+		pfree(itup_key);
+}
+
 /*
  *	btinsert() -- insert an index tuple into a btree.
  *
@@ -199,6 +317,9 @@ btinsert(Relation rel, Datum *values, bool *isnull,
 
 	result = _bt_doinsert(rel, itup, checkUnique, indexUnchanged, heapRel);
 
+	if (checkUnique != UNIQUE_CHECK_NO)
+		btinsert_check_unique_gi(itup, rel, heapRel, checkUnique);
+
 	pfree(itup);
 
 	return result;
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index 8e4f6864e5..19761a4c31 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -1284,4 +1284,9 @@ extern IndexBuildResult *btbuild(Relation heap, Relation index,
 								 struct IndexInfo *indexInfo);
 extern void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc);
 
+extern TransactionId _bt_check_unique_gi(Relation rel, BTInsertState insertstate,
+										 Relation heapRel,
+										 IndexUniqueCheck checkUnique, bool *is_unique,
+										 uint32 *speculativeToken, Relation origHeapRel);
+
 #endif							/* NBTREE_H */
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index 58de14c037..f0102fabe6 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -1459,6 +1459,47 @@ unique, btree, for table "public.gidxpart"
 Partitions: gidxpart1_b_idx,
             gidxpart2_b_idx
 
+-- cross-partition uniqueness check for insert and update
+insert into gidxpart values (1, 1, 'first');
+insert into gidxpart values (11, 11, 'eleventh');
+insert into gidxpart values (2, 11, 'duplicated (b)=(11) on other partition');
+ERROR:  duplicate key value violates unique constraint "gidxpart2_b_idx"
+DETAIL:  Key (b)=(11) already exists.
+insert into gidxpart values (12, 1, 'duplicated (b)=(1) on other partition');
+ERROR:  duplicate key value violates unique constraint "gidxpart1_b_idx"
+DETAIL:  Key (b)=(1) already exists.
+insert into gidxpart values (2, 120, 'second');
+insert into gidxpart values (12, 2, 'twelfth');
+update gidxpart set b=2 where a=2;
+ERROR:  duplicate key value violates unique constraint "gidxpart2_b_idx"
+DETAIL:  Key (b)=(2) already exists.
+update gidxpart set b=1 where a=12;
+ERROR:  duplicate key value violates unique constraint "gidxpart1_b_idx"
+DETAIL:  Key (b)=(1) already exists.
+update gidxpart set b=12 where a=12;
+update gidxpart set b=2 where a=2;
+select * from gidxpart;
+ a  | b  |    c     
+----+----+----------
+  1 |  1 | first
+  2 |  2 | second
+ 11 | 11 | eleventh
+ 12 | 12 | twelfth
+(4 rows)
+
+-- cross-partition uniqueness check applys to newly created partition
+create table gidxpart3 partition of gidxpart for values from (100) to (200);
+select relname, relkind from pg_class where relname = 'gidxpart3_b_idx';
+     relname     | relkind 
+-----------------+---------
+ gidxpart3_b_idx | g
+(1 row)
+
+insert into gidxpart values (150, 11, 'duplicated (b)=(11) on other partition');
+ERROR:  duplicate key value violates unique constraint "gidxpart2_b_idx"
+DETAIL:  Key (b)=(11) already exists.
+insert into gidxpart values (150, 13, 'no duplicate b');
+-- clean up global index tests
 drop index gidx_u;
 drop table gidxpart;
 -- Test the cross-partition uniqueness with non-partition key with global unique index
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 78649bb5ca..42ee1ce19f 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -769,6 +769,26 @@ create unique index gidx_u on gidxpart using btree(b) global;
 select relname, relhasindex, relkind from pg_class where relname like '%gidx%' order by oid;
 \d+ gidxpart
 \d+ gidx_u
+-- cross-partition uniqueness check for insert and update
+insert into gidxpart values (1, 1, 'first');
+insert into gidxpart values (11, 11, 'eleventh');
+insert into gidxpart values (2, 11, 'duplicated (b)=(11) on other partition');
+insert into gidxpart values (12, 1, 'duplicated (b)=(1) on other partition');
+insert into gidxpart values (2, 120, 'second');
+insert into gidxpart values (12, 2, 'twelfth');
+update gidxpart set b=2 where a=2;
+update gidxpart set b=1 where a=12;
+update gidxpart set b=12 where a=12;
+update gidxpart set b=2 where a=2;
+select * from gidxpart;
+
+-- cross-partition uniqueness check applys to newly created partition
+create table gidxpart3 partition of gidxpart for values from (100) to (200);
+select relname, relkind from pg_class where relname = 'gidxpart3_b_idx';
+insert into gidxpart values (150, 11, 'duplicated (b)=(11) on other partition');
+insert into gidxpart values (150, 13, 'no duplicate b');
+
+-- clean up global index tests
 drop index gidx_u;
 drop table gidxpart;
 
-- 
2.17.1

