On Mon, Jun 15, 2020 at 09:37:42PM +0900, Michael Paquier wrote:
> On Mon, Jun 15, 2020 at 08:15:05PM +0800, 李杰(慎追) wrote:
> > As shown above, an error occurred while creating an index in the second 
> > partition. 
> > It can be clearly seen that the index of the partitioned table is invalid 
> > and the index of the first partition is normal, the second partition is 
> > invalid, 
> > and the Third Partition index does not exist at all.
> 
> That's a problem.  I really think that we should make the steps of the
> concurrent operation consistent across all relations, meaning that all
> the indexes should be created as invalid for all the parts of the
> partition tree, including partitioned tables as well as their
> partitions, in the same transaction.  Then a second new transaction
> gets used for the index build, followed by a third one for the
> validation that switches the indexes to become valid.

Note that the mentioned problem wasn't serious: there was missing index on
child table, therefor the parent index was invalid, as intended.  However I
agree that it's not nice that the command can fail so easily and leave behind
some indexes created successfully and some failed some not created at all.

But I took your advice initially creating invalid inds.

On Tue, Jun 16, 2020 at 10:02:21AM +0900, Michael Paquier wrote:
> CIC is an operation that exists while allowing read and writes to
> still happen in parallel, so that's fine IMO if it takes time.  Now it
> is true that it may not scale well so we should be careful with the
> approach taken.  Also, I think that the case of REINDEX would require
> less work overall because we already have some code in place to gather
> multiple indexes from one or more relations and work on these
> consistently, all at once.

I'm not sure if by reindex you mean my old 0002.  That's now 0001, so if it can
be simplified somehow, that's great..

That gave me the idea to layer CIC on top of Reindex, since I think it does
exactly what's needed.

-- 
Justin
>From 3c465187a190ecfe21dc24dd36e64cbf304cbd17 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 7 Jun 2020 15:59:54 -0500
Subject: [PATCH v4 1/3] Implement REINDEX of partitioned tables/indexes

---
 doc/src/sgml/ref/reindex.sgml              |   5 -
 src/backend/catalog/index.c                |   7 +-
 src/backend/commands/indexcmds.c           | 119 +++++++++++++--------
 src/backend/tcop/utility.c                 |   6 +-
 src/include/commands/defrem.h              |   6 +-
 src/test/regress/expected/create_index.out |  18 ++--
 src/test/regress/sql/create_index.sql      |  13 ++-
 7 files changed, 106 insertions(+), 68 deletions(-)

diff --git a/doc/src/sgml/ref/reindex.sgml b/doc/src/sgml/ref/reindex.sgml
index aac5d5be23..e2e32b3ba0 100644
--- a/doc/src/sgml/ref/reindex.sgml
+++ b/doc/src/sgml/ref/reindex.sgml
@@ -258,11 +258,6 @@ REINDEX [ ( <replaceable class="parameter">option</replaceable> [, ...] ) ] { IN
    can always reindex anything.
   </para>
 
-  <para>
-   Reindexing partitioned tables or partitioned indexes is not supported.
-   Each individual partition can be reindexed separately instead.
-  </para>
-
   <refsect2 id="sql-reindex-concurrently" xreflabel="Rebuilding Indexes Concurrently">
    <title>Rebuilding Indexes Concurrently</title>
 
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 1be27eec52..7abd7f5cfc 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -3660,12 +3660,7 @@ reindex_relation(Oid relid, int flags, int options)
 	 */
 	rel = table_open(relid, ShareLock);
 
-	/*
-	 * This may be useful when implemented someday; but that day is not today.
-	 * For now, avoid erroring out when called in a multi-table context
-	 * (REINDEX SCHEMA) and happen to come across a partitioned table.  The
-	 * partitions may be reindexed on their own anyway.
-	 */
+	/* Avoid erroring out */
 	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 	{
 		ereport(WARNING,
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index 2baca12c5f..df3e567acb 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -88,7 +88,8 @@ static List *ChooseIndexColumnNames(List *indexElems);
 static void RangeVarCallbackForReindexIndex(const RangeVar *relation,
 											Oid relId, Oid oldRelId, void *arg);
 static bool ReindexRelationConcurrently(Oid relationOid, int options);
-static void ReindexPartitionedIndex(Relation parentIdx);
+static void ReindexPartitionedRel(Oid reloid, int options, bool concurrent,
+		bool isTopLevel);
 static void update_relispartition(Oid relationId, bool newval);
 static bool CompareOpclassOptions(Datum *opts1, Datum *opts2, int natts);
 
@@ -2420,11 +2421,10 @@ ChooseIndexColumnNames(List *indexElems)
  *		Recreate a specific index.
  */
 void
-ReindexIndex(RangeVar *indexRelation, int options, bool concurrent)
+ReindexIndex(RangeVar *indexRelation, int options, bool concurrent, bool isTopLevel)
 {
 	struct ReindexIndexCallbackState state;
 	Oid			indOid;
-	Relation	irel;
 	char		persistence;
 
 	/*
@@ -2445,22 +2445,10 @@ ReindexIndex(RangeVar *indexRelation, int options, bool concurrent)
 									  RangeVarCallbackForReindexIndex,
 									  &state);
 
-	/*
-	 * Obtain the current persistence of the existing index.  We already hold
-	 * lock on the index.
-	 */
-	irel = index_open(indOid, NoLock);
-
-	if (irel->rd_rel->relkind == RELKIND_PARTITIONED_INDEX)
-	{
-		ReindexPartitionedIndex(irel);
-		return;
-	}
-
-	persistence = irel->rd_rel->relpersistence;
-	index_close(irel, NoLock);
-
-	if (concurrent && persistence != RELPERSISTENCE_TEMP)
+	persistence = get_rel_persistence(indOid);
+	if (get_rel_relkind(indOid) == RELKIND_PARTITIONED_INDEX)
+		ReindexPartitionedRel(indOid, options, concurrent, isTopLevel);
+	else if (concurrent && persistence != RELPERSISTENCE_TEMP)
 		ReindexRelationConcurrently(indOid, options);
 	else
 		reindex_index(indOid, false, persistence,
@@ -2542,7 +2530,7 @@ RangeVarCallbackForReindexIndex(const RangeVar *relation,
  *		Recreate all indexes of a table (and of its toast table, if any)
  */
 Oid
-ReindexTable(RangeVar *relation, int options, bool concurrent)
+ReindexTable(RangeVar *relation, int options, bool concurrent, bool isTopLevel)
 {
 	Oid			heapOid;
 	bool		result;
@@ -2560,7 +2548,9 @@ ReindexTable(RangeVar *relation, int options, bool concurrent)
 									   0,
 									   RangeVarCallbackOwnsTable, NULL);
 
-	if (concurrent && get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP)
+	if (get_rel_relkind(heapOid) == RELKIND_PARTITIONED_TABLE)
+		ReindexPartitionedRel(heapOid, options, concurrent, isTopLevel);
+	else if (concurrent && get_rel_persistence(heapOid) != RELPERSISTENCE_TEMP)
 	{
 		result = ReindexRelationConcurrently(heapOid, options);
 
@@ -2688,11 +2678,8 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 		 * Only regular tables and matviews can have indexes, so ignore any
 		 * other kind of relation.
 		 *
-		 * It is tempting to also consider partitioned tables here, but that
-		 * has the problem that if the children are in the same schema, they
-		 * would be processed twice.  Maybe we could have a separate list of
-		 * partitioned tables, and expand that afterwards into relids,
-		 * ignoring any duplicates.
+		 * Partitioned tables/indexes are skipped but matching leaf
+		 * partitions are processed.
 		 */
 		if (classtuple->relkind != RELKIND_RELATION &&
 			classtuple->relkind != RELKIND_MATVIEW)
@@ -2805,8 +2792,7 @@ ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
  * view.  For tables and materialized views, all its indexes will be rebuilt,
  * excluding invalid indexes and any indexes used in exclusion constraints,
  * but including its associated toast table indexes.  For indexes, the index
- * itself will be rebuilt.  If 'relationOid' belongs to a partitioned table
- * then we issue a warning to mention these are not yet supported.
+ * itself will be rebuilt.
  *
  * The locks taken on parent tables and involved indexes are kept until the
  * transaction is committed, at which point a session lock is taken on each
@@ -3010,13 +2996,6 @@ ReindexRelationConcurrently(Oid relationOid, int options)
 				MemoryContextSwitchTo(oldcontext);
 				break;
 			}
-		case RELKIND_PARTITIONED_TABLE:
-			/* see reindex_relation() */
-			ereport(WARNING,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("REINDEX of partitioned tables is not yet implemented, skipping \"%s\"",
-							get_rel_name(relationOid))));
-			return false;
 		default:
 			/* Return error if type of relation is not supported */
 			ereport(ERROR,
@@ -3478,17 +3457,71 @@ ReindexRelationConcurrently(Oid relationOid, int options)
 }
 
 /*
- *	ReindexPartitionedIndex
- *		Reindex each child of the given partitioned index.
- *
- * Not yet implemented.
+ *	ReindexPartitionedRel
+ *		Reindex each child of the given partitioned relation.
  */
 static void
-ReindexPartitionedIndex(Relation parentIdx)
+ReindexPartitionedRel(Oid reloid, int options, bool concurrent, bool isTopLevel)
 {
-	ereport(ERROR,
-			(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-			 errmsg("REINDEX is not yet implemented for partitioned indexes")));
+	MemoryContext oldcontext, reindex_context;
+	List		*inhoids;
+	ListCell	 *lc;
+
+	/*
+	 * This cannot run inside a user transaction block; if
+	 * we were inside a transaction, then its commit- and
+	 * start-transaction-command calls would not have the
+	 * intended effect!
+	 */
+	PreventInTransactionBlock(isTopLevel,
+		"REINDEX of partitioned relations"); // XXX
+
+	/*
+	 * Create list of children in longlived context, since we process each
+	 * child in a separate transaction
+	 */
+	reindex_context = AllocSetContextCreate(PortalContext, "Reindex",
+			ALLOCSET_DEFAULT_SIZES);
+	oldcontext = MemoryContextSwitchTo(reindex_context);
+	inhoids = find_all_inheritors(reloid, NoLock, NULL);
+	MemoryContextSwitchTo(oldcontext);
+
+	foreach (lc, inhoids)
+	{
+		Oid	inhrelid = lfirst_oid(lc);
+
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+
+		StartTransactionCommand();
+		/* functions in indexes may want a snapshot set */
+		PushActiveSnapshot(GetTransactionSnapshot());
+
+		switch (get_rel_relkind(inhrelid))
+		{
+		case RELKIND_PARTITIONED_INDEX:
+		case RELKIND_PARTITIONED_TABLE:
+			/*
+			 * We have a full list of direct and indirect children, so skip
+			 * partitioned relations and just handle their children.
+			 */
+			continue;
+		case RELKIND_INDEX:
+			reindex_index(inhrelid, false, get_rel_persistence(inhrelid),
+						  options | REINDEXOPT_REPORT_PROGRESS);
+			break;
+		case RELKIND_RELATION:
+			(void) reindex_relation(inhrelid,
+									  REINDEX_REL_PROCESS_TOAST |
+									  REINDEX_REL_CHECK_CONSTRAINTS,
+									  options | REINDEXOPT_REPORT_PROGRESS);
+			break;
+		}
+	}
+
+	PopActiveSnapshot();
+	CommitTransactionCommand();
+	StartTransactionCommand();
 }
 
 /*
diff --git a/src/backend/tcop/utility.c b/src/backend/tcop/utility.c
index 9b0c376c8c..fd6bc65c18 100644
--- a/src/backend/tcop/utility.c
+++ b/src/backend/tcop/utility.c
@@ -926,10 +926,12 @@ standard_ProcessUtility(PlannedStmt *pstmt,
 				switch (stmt->kind)
 				{
 					case REINDEX_OBJECT_INDEX:
-						ReindexIndex(stmt->relation, stmt->options, stmt->concurrent);
+						ReindexIndex(stmt->relation, stmt->options,
+								stmt->concurrent, isTopLevel);
 						break;
 					case REINDEX_OBJECT_TABLE:
-						ReindexTable(stmt->relation, stmt->options, stmt->concurrent);
+						ReindexTable(stmt->relation, stmt->options,
+								stmt->concurrent, isTopLevel);
 						break;
 					case REINDEX_OBJECT_SCHEMA:
 					case REINDEX_OBJECT_SYSTEM:
diff --git a/src/include/commands/defrem.h b/src/include/commands/defrem.h
index c26a102b17..df32f5b201 100644
--- a/src/include/commands/defrem.h
+++ b/src/include/commands/defrem.h
@@ -34,8 +34,10 @@ extern ObjectAddress DefineIndex(Oid relationId,
 								 bool check_not_in_use,
 								 bool skip_build,
 								 bool quiet);
-extern void ReindexIndex(RangeVar *indexRelation, int options, bool concurrent);
-extern Oid	ReindexTable(RangeVar *relation, int options, bool concurrent);
+extern void ReindexIndex(RangeVar *indexRelation, int options, bool concurrent,
+						bool isTopLevel);
+extern Oid	ReindexTable(RangeVar *relation, int options, bool concurrent,
+						bool isTopLevel);
 extern void ReindexMultipleTables(const char *objectName, ReindexObjectType objectKind,
 								  int options, bool concurrent);
 extern char *makeObjectName(const char *name1, const char *name2,
diff --git a/src/test/regress/expected/create_index.out b/src/test/regress/expected/create_index.out
index e3e6634d7e..61b4a30db4 100644
--- a/src/test/regress/expected/create_index.out
+++ b/src/test/regress/expected/create_index.out
@@ -2196,18 +2196,20 @@ SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_ind
  concur_reindex_part_index_0_2 | concur_reindex_part_index_0 |     2
 (5 rows)
 
--- REINDEX fails for partitioned indexes
+-- REINDEX for partitioned indexes
+REINDEX INDEX concur_reindex_part_index;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index;
+REINDEX INDEX concur_reindex_part_index_0;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index_0;
 REINDEX INDEX concur_reindex_part_index_10;
-ERROR:  REINDEX is not yet implemented for partitioned indexes
 REINDEX INDEX CONCURRENTLY concur_reindex_part_index_10;
-ERROR:  REINDEX is not yet implemented for partitioned indexes
--- REINDEX is a no-op for partitioned tables
+-- REINDEX for partitioned tables
+REINDEX TABLE concur_reindex_part_10;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
+REINDEX TABLE concur_reindex_part_0;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_0;
 REINDEX TABLE concur_reindex_part_10;
-WARNING:  REINDEX of partitioned tables is not yet implemented, skipping "concur_reindex_part_10"
-NOTICE:  table "concur_reindex_part_10" has no indexes to reindex
 REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
-WARNING:  REINDEX of partitioned tables is not yet implemented, skipping "concur_reindex_part_10"
-NOTICE:  table "concur_reindex_part_10" has no indexes that can be reindexed concurrently
 SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_index')
   ORDER BY relid, level;
              relid             |         parentrelid         | level 
diff --git a/src/test/regress/sql/create_index.sql b/src/test/regress/sql/create_index.sql
index f3667bacdc..a751dd5caa 100644
--- a/src/test/regress/sql/create_index.sql
+++ b/src/test/regress/sql/create_index.sql
@@ -903,12 +903,21 @@ CREATE INDEX concur_reindex_part_index_0_2 ON ONLY concur_reindex_part_0_2 (c1);
 ALTER INDEX concur_reindex_part_index_0 ATTACH PARTITION concur_reindex_part_index_0_2;
 SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_index')
   ORDER BY relid, level;
--- REINDEX fails for partitioned indexes
+-- REINDEX for partitioned indexes
+REINDEX INDEX concur_reindex_part_index;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index;
+REINDEX INDEX concur_reindex_part_index_0;
+REINDEX INDEX CONCURRENTLY concur_reindex_part_index_0;
 REINDEX INDEX concur_reindex_part_index_10;
 REINDEX INDEX CONCURRENTLY concur_reindex_part_index_10;
--- REINDEX is a no-op for partitioned tables
+-- REINDEX for partitioned tables
 REINDEX TABLE concur_reindex_part_10;
 REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
+REINDEX TABLE concur_reindex_part_0;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_0;
+REINDEX TABLE concur_reindex_part_10;
+REINDEX TABLE CONCURRENTLY concur_reindex_part_10;
+
 SELECT relid, parentrelid, level FROM pg_partition_tree('concur_reindex_part_index')
   ORDER BY relid, level;
 -- REINDEX should preserve dependencies of partition tree.
-- 
2.17.0

>From e7b233c7d76732f5cdc55f325483de0ffc4f053a Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sat, 6 Jun 2020 17:42:23 -0500
Subject: [PATCH v4 2/3] Allow CREATE INDEX CONCURRENTLY on partitioned table

Note, this effectively reverts 050098b14, so take care to not reintroduce the
bug it fixed.
---
 doc/src/sgml/ref/create_index.sgml     |   9 --
 src/backend/commands/indexcmds.c       | 109 +++++++++++++++++++------
 src/test/regress/expected/indexing.out |  57 ++++++++++++-
 src/test/regress/sql/indexing.sql      |  16 +++-
 4 files changed, 150 insertions(+), 41 deletions(-)

diff --git a/doc/src/sgml/ref/create_index.sgml b/doc/src/sgml/ref/create_index.sgml
index 33aa64e81d..c780dc9547 100644
--- a/doc/src/sgml/ref/create_index.sgml
+++ b/doc/src/sgml/ref/create_index.sgml
@@ -657,15 +657,6 @@ Indexes:
     cannot.
    </para>
 
-   <para>
-    Concurrent builds for indexes on partitioned tables are currently not
-    supported.  However, you may concurrently build the index on each
-    partition individually and then finally create the partitioned index
-    non-concurrently in order to reduce the time where writes to the
-    partitioned table will be locked out.  In this case, building the
-    partitioned index is a metadata only operation.
-   </para>
-
   </refsect2>
  </refsect1>
 
diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index df3e567acb..291046cb16 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -653,17 +653,6 @@ DefineIndex(Oid relationId,
 	partitioned = rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE;
 	if (partitioned)
 	{
-		/*
-		 * Note: we check 'stmt->concurrent' rather than 'concurrent', so that
-		 * the error is thrown also for temporary tables.  Seems better to be
-		 * consistent, even though we could do it on temporary table because
-		 * we're not actually doing it concurrently.
-		 */
-		if (stmt->concurrent)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot create index on partitioned table \"%s\" concurrently",
-							RelationGetRelationName(rel))));
 		if (stmt->excludeOpNames)
 			ereport(ERROR,
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
@@ -1098,6 +1087,9 @@ DefineIndex(Oid relationId,
 		if (pd->nparts != 0)
 			flags |= INDEX_CREATE_INVALID;
 	}
+	else if (concurrent && OidIsValid(parentIndexId))
+		/* Initial concurrent build index partition as invalid */
+		flags |= INDEX_CREATE_INVALID;
 
 	if (stmt->deferrable)
 		constr_flags |= INDEX_CONSTR_CREATE_DEFERRABLE;
@@ -1140,6 +1132,10 @@ DefineIndex(Oid relationId,
 		CreateComments(indexRelationId, RelationRelationId, 0,
 					   stmt->idxcomment);
 
+	/* save lockrelid and locktag for below */
+	heaprelid = rel->rd_lockInfo.lockRelId;
+	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
+
 	if (partitioned)
 	{
 		/*
@@ -1150,8 +1146,17 @@ DefineIndex(Oid relationId,
 		 */
 		if (!stmt->relation || stmt->relation->inh)
 		{
+			/*
+			 * Need to close the relation before recursing into children, so
+			 * copy needed data into a longlived context.
+			 */
+
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
 			PartitionDesc partdesc = RelationGetPartitionDesc(rel);
 			int			nparts = partdesc->nparts;
+			char		*relname = pstrdup(RelationGetRelationName(rel));
 			Oid		   *part_oids = palloc(sizeof(Oid) * nparts);
 			bool		invalidate_parent = false;
 			TupleDesc	parentDesc;
@@ -1161,8 +1166,10 @@ DefineIndex(Oid relationId,
 										 nparts);
 
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
+			parentDesc = CreateTupleDescCopy(RelationGetDescr(rel));
+			table_close(rel, NoLock);
+			MemoryContextSwitchTo(oldcontext);
 
-			parentDesc = RelationGetDescr(rel);
 			opfamOids = palloc(sizeof(Oid) * numberOfKeyAttributes);
 			for (i = 0; i < numberOfKeyAttributes; i++)
 				opfamOids[i] = get_opclass_family(classObjectId[i]);
@@ -1197,18 +1204,20 @@ DefineIndex(Oid relationId,
 						ereport(ERROR,
 								(errcode(ERRCODE_WRONG_OBJECT_TYPE),
 								 errmsg("cannot create unique index on partitioned table \"%s\"",
-										RelationGetRelationName(rel)),
+										relname),
 								 errdetail("Table \"%s\" contains partitions that are foreign tables.",
-										   RelationGetRelationName(rel))));
+										   relname)));
 
 					table_close(childrel, lockmode);
 					continue;
 				}
 
+				oldcontext = MemoryContextSwitchTo(ind_context);
 				childidxs = RelationGetIndexList(childrel);
 				attmap =
 					build_attrmap_by_name(RelationGetDescr(childrel),
 										  parentDesc);
+				MemoryContextSwitchTo(oldcontext);
 
 				foreach(cell, childidxs)
 				{
@@ -1334,10 +1343,16 @@ DefineIndex(Oid relationId,
 								createdConstraintId,
 								is_alter_table, check_rights, check_not_in_use,
 								skip_build, quiet);
+					if (concurrent)
+					{
+						PushActiveSnapshot(GetTransactionSnapshot());
+						invalidate_parent = true;
+					}
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				if (!concurrent)
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 i + 1);
 				free_attrmap(attmap);
 			}
 
@@ -1364,23 +1379,72 @@ DefineIndex(Oid relationId,
 				table_close(pg_index, RowExclusiveLock);
 				heap_freetuple(newtup);
 			}
+		} else
+			table_close(rel, NoLock);
+
+		if (concurrent)
+		{
+			List	*childs;
+			ListCell *lc;
+			int		npart = 0;
+
+			/* Reindex invalid child indexes created earlier */
+			MemoryContext	ind_context = AllocSetContextCreate(PortalContext, "CREATE INDEX",
+					ALLOCSET_DEFAULT_SIZES);
+			MemoryContext	oldcontext = MemoryContextSwitchTo(ind_context);
+			childs = find_inheritance_children(indexRelationId, NoLock);
+			MemoryContextSwitchTo(oldcontext);
+
+			/* Make the catalog changes visible to get_partition_parent */
+			CommandCounterIncrement();
+
+			foreach (lc, childs)
+			{
+				Oid			indexrelid = lfirst_oid(lc);
+				Relation	cldidx;
+				bool		isvalid;
+
+				if (!OidIsValid(parentIndexId))
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 npart++);
+
+				cldidx = index_open(indexrelid, ShareUpdateExclusiveLock);
+				isvalid = cldidx->rd_index->indisvalid;
+				index_close(cldidx, NoLock);
+				if (isvalid)
+					continue;
+
+				/* This may be a partitioned index, which is fine too */
+				ReindexRelationConcurrently(indexrelid, 0);
+				PushActiveSnapshot(GetTransactionSnapshot());
+			}
+
+			PopActiveSnapshot();
+			CommitTransactionCommand();
+			StartTransactionCommand();
+
+			/*
+			 * CIC needs to mark a partitioned index as VALID, which itself
+			 * requires setting READY, which is unset for CIC (even though
+			 * it's meaningless for an index without storage).
+			 */
+			index_set_state_flags(indexRelationId, INDEX_CREATE_SET_READY);
+			CommandCounterIncrement();
+			index_set_state_flags(indexRelationId, INDEX_CREATE_SET_VALID);
 		}
 
 		/*
 		 * Indexes on partitioned tables are not themselves built, so we're
 		 * done here.
 		 */
-		table_close(rel, NoLock);
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
 		return address;
 	}
 
+	table_close(rel, NoLock);
 	if (!concurrent)
 	{
-		/* Close the heap and we're done, in the non-concurrent case */
-		table_close(rel, NoLock);
-
 		/* If this is the top-level index, we're done. */
 		if (!OidIsValid(parentIndexId))
 			pgstat_progress_end_command();
@@ -1388,11 +1452,6 @@ DefineIndex(Oid relationId,
 		return address;
 	}
 
-	/* save lockrelid and locktag for below, then close rel */
-	heaprelid = rel->rd_lockInfo.lockRelId;
-	SET_LOCKTAG_RELATION(heaplocktag, heaprelid.dbId, heaprelid.relId);
-	table_close(rel, NoLock);
-
 	/*
 	 * For a concurrent build, it's important to make the catalog entries
 	 * visible to other transactions before we start to build the index. That
diff --git a/src/test/regress/expected/indexing.out b/src/test/regress/expected/indexing.out
index f78865ef81..1ae58e110f 100644
--- a/src/test/regress/expected/indexing.out
+++ b/src/test/regress/expected/indexing.out
@@ -50,11 +50,60 @@ select relname, relkind, relhassubclass, inhparent::regclass
 (8 rows)
 
 drop table idxpart;
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
-ERROR:  cannot create index on partitioned table "idxpart" concurrently
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+ERROR:  could not create unique index "idxpart2_a_idx2"
+DETAIL:  Key (a)=(10) is duplicated.
+\d idxpart
+        Partitioned table "public.idxpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "idxpart_a_idx" btree (a)
+    "idxpart_a_idx1" UNIQUE, btree (a) INVALID
+Number of partitions: 2 (Use \d+ to list them.)
+
+\d idxpart1
+        Partitioned table "public.idxpart1"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (0) TO (10)
+Partition key: RANGE (a)
+Indexes:
+    "idxpart1_a_idx" btree (a)
+    "idxpart1_a_idx1" btree (a)
+    "idxpart1_a_idx2" UNIQUE, btree (a)
+Number of partitions: 1 (Use \d+ to list them.)
+
+\d idxpart2
+              Table "public.idxpart2"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+ b      | integer |           |          | 
+ c      | text    |           |          | 
+Partition of: idxpart FOR VALUES FROM (10) TO (20)
+Indexes:
+    "idxpart2_a_idx" btree (a)
+    "idxpart2_a_idx1" btree (a)
+    "idxpart2_a_idx2" UNIQUE, btree (a) INVALID
+
 drop table idxpart;
 -- Verify bugfix with query on indexed partitioned table with no partitions
 -- https://postgr.es/m/20180124162006.pmapfiznhgngwtjf@alvherre.pgsql
diff --git a/src/test/regress/sql/indexing.sql b/src/test/regress/sql/indexing.sql
index 35d159f41b..d908ee6d17 100644
--- a/src/test/regress/sql/indexing.sql
+++ b/src/test/regress/sql/indexing.sql
@@ -29,10 +29,20 @@ select relname, relkind, relhassubclass, inhparent::regclass
 	where relname like 'idxpart%' order by relname;
 drop table idxpart;
 
--- Some unsupported features
+-- CIC on partitioned table
 create table idxpart (a int, b int, c text) partition by range (a);
-create table idxpart1 partition of idxpart for values from (0) to (10);
-create index concurrently on idxpart (a);
+create table idxpart1 partition of idxpart for values from (0) to (10) partition by range(a);
+create table idxpart11 partition of idxpart1 for values from (0) to (10) partition by range(a);
+create table idxpart2 partition of idxpart for values from (10) to (20);
+insert into idxpart2 values(10),(10); -- not unique
+create index concurrently on idxpart (a); -- partitioned
+create index concurrently on idxpart1 (a); -- partitioned and partition
+create index concurrently on idxpart11 (a); -- partitioned and partition, with no leaves
+create index concurrently on idxpart2 (a); -- leaf
+create unique index concurrently on idxpart (a); -- partitioned, unique failure
+\d idxpart
+\d idxpart1
+\d idxpart2
 drop table idxpart;
 
 -- Verify bugfix with query on indexed partitioned table with no partitions
-- 
2.17.0

>From 39a4f6c6db0838026769d34855f3f2e2553a2fb1 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sun, 7 Jun 2020 16:58:42 -0500
Subject: [PATCH v4 3/3] Implement CLUSTER of partitioned table..

This requires either specification of a partitioned index on which to cluster,
or that an partitioned index was previously set clustered.
---
 src/backend/commands/cluster.c        | 139 +++++++++++++++++++-------
 src/bin/psql/tab-complete.c           |   1 +
 src/test/regress/expected/cluster.out |  23 ++++-
 src/test/regress/sql/cluster.sql      |  12 ++-
 4 files changed, 133 insertions(+), 42 deletions(-)

diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 04d12a7ece..7409c17b0b 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -33,6 +33,7 @@
 #include "catalog/namespace.h"
 #include "catalog/objectaccess.h"
 #include "catalog/pg_am.h"
+#include "catalog/pg_inherits.h"
 #include "catalog/toasting.h"
 #include "commands/cluster.h"
 #include "commands/progress.h"
@@ -72,6 +73,9 @@ static void copy_table_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex,
 							bool verbose, bool *pSwapToastByContent,
 							TransactionId *pFreezeXid, MultiXactId *pCutoffMulti);
 static List *get_tables_to_cluster(MemoryContext cluster_context);
+static List *get_tables_to_cluster_partitioned(MemoryContext cluster_context,
+		Oid indexOid);
+static void cluster_multiple_rels(List *rvs, int options);
 
 
 /*---------------------------------------------------------------------------
@@ -124,14 +128,6 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
 					 errmsg("cannot cluster temporary tables of other sessions")));
 
-		/*
-		 * Reject clustering a partitioned table.
-		 */
-		if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-			ereport(ERROR,
-					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-					 errmsg("cannot cluster a partitioned table")));
-
 		if (stmt->indexname == NULL)
 		{
 			ListCell   *index;
@@ -169,8 +165,37 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 		/* close relation, keep lock till commit */
 		table_close(rel, NoLock);
 
-		/* Do the job. */
-		cluster_rel(tableOid, indexOid, stmt->options);
+		if (rel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE)
+		{
+			/* Do the job. */
+			cluster_rel(tableOid, indexOid, stmt->options);
+		} else {
+			List	   *rvs;
+			MemoryContext cluster_context;
+
+			/* Check index directly since cluster_rel isn't called for partitioned table */
+			check_index_is_clusterable(rel, indexOid, true, AccessExclusiveLock);
+
+			/* Refuse to hold strong locks in a user transaction */
+			PreventInTransactionBlock(isTopLevel, "CLUSTER");
+
+			cluster_context = AllocSetContextCreate(PortalContext,
+												"Cluster",
+												ALLOCSET_DEFAULT_SIZES);
+
+			rvs = get_tables_to_cluster_partitioned(cluster_context, indexOid);
+			cluster_multiple_rels(rvs, stmt->options);
+
+			/* Start a new transaction for the cleanup work. */
+			StartTransactionCommand();
+
+			rel = table_open(tableOid, ShareUpdateExclusiveLock);
+			mark_index_clustered(rel, indexOid, true);
+			table_close(rel, NoLock);
+
+			/* Clean up working storage */
+			MemoryContextDelete(cluster_context);
+		}
 	}
 	else
 	{
@@ -180,7 +205,6 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 		 */
 		MemoryContext cluster_context;
 		List	   *rvs;
-		ListCell   *rv;
 
 		/*
 		 * We cannot run this form of CLUSTER inside a user transaction block;
@@ -204,25 +228,7 @@ cluster(ClusterStmt *stmt, bool isTopLevel)
 		 */
 		rvs = get_tables_to_cluster(cluster_context);
 
-		/* Commit to get out of starting transaction */
-		PopActiveSnapshot();
-		CommitTransactionCommand();
-
-		/* Ok, now that we've got them all, cluster them one by one */
-		foreach(rv, rvs)
-		{
-			RelToCluster *rvtc = (RelToCluster *) lfirst(rv);
-
-			/* Start a new transaction for each relation. */
-			StartTransactionCommand();
-			/* functions in indexes may want a snapshot set */
-			PushActiveSnapshot(GetTransactionSnapshot());
-			/* Do the job. */
-			cluster_rel(rvtc->tableOid, rvtc->indexOid,
-						stmt->options | CLUOPT_RECHECK);
-			PopActiveSnapshot();
-			CommitTransactionCommand();
-		}
+		cluster_multiple_rels(rvs, stmt->options);
 
 		/* Start a new transaction for the cleanup work. */
 		StartTransactionCommand();
@@ -483,12 +489,6 @@ mark_index_clustered(Relation rel, Oid indexOid, bool is_internal)
 	Relation	pg_index;
 	ListCell   *index;
 
-	/* Disallow applying to a partitioned table */
-	if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
-		ereport(ERROR,
-				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
-				 errmsg("cannot mark index clustered in partitioned table")));
-
 	/*
 	 * If the index is already marked clustered, no need to do anything.
 	 */
@@ -1557,3 +1557,70 @@ get_tables_to_cluster(MemoryContext cluster_context)
 
 	return rvs;
 }
+
+/*
+ * Return a List of tables and associated index, where each index is a
+ * partition of the given index
+ */
+static List *
+get_tables_to_cluster_partitioned(MemoryContext cluster_context, Oid indexOid)
+{
+	List		*inhoids;
+	ListCell	*lc;
+	List		*rvs = NIL;
+
+	MemoryContext old_context = MemoryContextSwitchTo(cluster_context);
+
+	inhoids = find_all_inheritors(indexOid, NoLock, NULL);
+	foreach(lc, inhoids)
+	{
+		Oid		indexrelid = lfirst_oid(lc);
+		Oid		relid = IndexGetRelation(indexrelid, false);
+		RelToCluster	*rvtc;
+
+		/*
+		 * We have a full list of direct and indirect children, so skip
+		 * partitioned tables and just handle their children.
+		 */
+		if (get_rel_relkind(relid) == RELKIND_PARTITIONED_TABLE)
+			continue;
+
+		rvtc = (RelToCluster *) palloc(sizeof(RelToCluster));
+		rvtc->tableOid = relid;
+		rvtc->indexOid = indexrelid;
+		rvs = lappend(rvs, rvtc);
+	}
+
+	MemoryContextSwitchTo(old_context);
+	return rvs;
+}
+
+/* Cluster each relation in a separate transaction */
+static void
+cluster_multiple_rels(List *rvs, int options)
+{
+		ListCell *lc;
+
+		/* Commit to get out of starting transaction */
+		PopActiveSnapshot();
+		CommitTransactionCommand();
+
+		/* Ok, now that we've got them all, cluster them one by one */
+		foreach(lc, rvs)
+		{
+			RelToCluster *rvtc = (RelToCluster *) lfirst(lc);
+
+			/* Start a new transaction for each relation. */
+			StartTransactionCommand();
+
+			/* functions in indexes may want a snapshot set */
+			PushActiveSnapshot(GetTransactionSnapshot());
+
+			/* Do the job. */
+			cluster_rel(rvtc->tableOid, rvtc->indexOid,
+						options | CLUOPT_RECHECK);
+
+			PopActiveSnapshot();
+			CommitTransactionCommand();
+		}
+}
diff --git a/src/bin/psql/tab-complete.c b/src/bin/psql/tab-complete.c
index c4af40bfa9..e68808218a 100644
--- a/src/bin/psql/tab-complete.c
+++ b/src/bin/psql/tab-complete.c
@@ -587,6 +587,7 @@ static const SchemaQuery Query_for_list_of_clusterables = {
 	.catname = "pg_catalog.pg_class c",
 	.selcondition =
 	"c.relkind IN (" CppAsString2(RELKIND_RELATION) ", "
+	CppAsString2(RELKIND_PARTITIONED_TABLE) ", "
 	CppAsString2(RELKIND_MATVIEW) ")",
 	.viscondition = "pg_catalog.pg_table_is_visible(c.oid)",
 	.namespace = "c.relnamespace",
diff --git a/src/test/regress/expected/cluster.out b/src/test/regress/expected/cluster.out
index bdae8fe00c..21b28fde6e 100644
--- a/src/test/regress/expected/cluster.out
+++ b/src/test/regress/expected/cluster.out
@@ -439,13 +439,28 @@ select * from clstr_temp;
 
 drop table clstr_temp;
 RESET SESSION AUTHORIZATION;
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
 CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
 CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
-ERROR:  cannot mark index clustered in partitioned table
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
 CLUSTER clstrpart USING clstrpart_idx;
-ERROR:  cannot cluster a partitioned table
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
+       Partitioned table "public.clstrpart"
+ Column |  Type   | Collation | Nullable | Default 
+--------+---------+-----------+----------+---------
+ a      | integer |           |          | 
+Partition key: RANGE (a)
+Indexes:
+    "clstrpart_idx" btree (a) CLUSTER
+Number of partitions: 2 (Use \d+ to list them.)
+
 DROP TABLE clstrpart;
 -- Test CLUSTER with external tuplesorting
 create table clstr_4 as select * from tenk1;
diff --git a/src/test/regress/sql/cluster.sql b/src/test/regress/sql/cluster.sql
index 188183647c..4a76848213 100644
--- a/src/test/regress/sql/cluster.sql
+++ b/src/test/regress/sql/cluster.sql
@@ -196,11 +196,19 @@ drop table clstr_temp;
 
 RESET SESSION AUTHORIZATION;
 
--- Check that partitioned tables cannot be clustered
+-- Check that partitioned tables can be clustered
 CREATE TABLE clstrpart (a int) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart1 PARTITION OF clstrpart FOR VALUES FROM (1)TO(10) PARTITION BY RANGE (a);
+CREATE TABLE clstrpart11 PARTITION OF clstrpart1 FOR VALUES FROM (1)TO(10);
+CREATE TABLE clstrpart12 PARTITION OF clstrpart1 FOR VALUES FROM (10)TO(20) PARTITION BY RANGE(a);
+CREATE TABLE clstrpart2 PARTITION OF clstrpart FOR VALUES FROM (20)TO(30);
 CREATE INDEX clstrpart_idx ON clstrpart (a);
-ALTER TABLE clstrpart CLUSTER ON clstrpart_idx;
+ALTER TABLE clstrpart SET WITHOUT CLUSTER;
 CLUSTER clstrpart USING clstrpart_idx;
+CLUSTER clstrpart1 USING clstrpart1_a_idx; -- partition which is itself partitioned
+CLUSTER clstrpart12 USING clstrpart12_a_idx; -- partition which is itself partitioned, no childs
+CLUSTER clstrpart2 USING clstrpart2_a_idx; -- leaf
+\d clstrpart
 DROP TABLE clstrpart;
 
 -- Test CLUSTER with external tuplesorting
-- 
2.17.0

Reply via email to