On Sat, Dec 10, 2022 at 12:18:32PM +0400, Ilya Gladyshev wrote:
> Hi,
> 
> I have noticed that progress reporting for CREATE INDEX of partitioned
> tables seems to be working poorly for nested partitioned tables. In
> particular, it overwrites total and done partitions count when it
> recurses down to child partitioned tables and it only reports top-level
> partitions. So it's not hard to see something like this during CREATE
> INDEX now:
> 
> postgres=# select partitions_total, partitions_done from
> pg_stat_progress_create_index ;
>  partitions_total | partitions_done 
> ------------------+-----------------
>                 1 |               2
> (1 row)

Yeah.  I didn't verify, but it looks like this is a bug going to back to
v12.  As you said, when called recursively, DefineIndex() clobbers the
number of completed partitions.

Maybe DefineIndex() could flatten the list of partitions.  But I don't
think that can work easily with iteration rather than recursion.

Could you check what I've written as a counter-proposal ?

As long as we're changing partitions_done to include nested
sub-partitions, it seems to me like we should exclude intermediate
"catalog-only" partitioned indexes, and count only physical leaf
partitions.  Should it alo exclude any children with matching indexes,
which will also be catalog-only changes?  Probably not.

The docs say:
|When creating an index on a partitioned table, this column is set to the
|total number of partitions on which the index is to be created. This
|field is 0 during a REINDEX.

> I changed current behaviour to report the total number of partitions in
> the inheritance tree and fixed recursion in the attached patch. I used
> a static variable to keep the counter to avoid ABI breakage of
> DefineIndex, so that we could backpatch this to previous versions.

I wrote a bunch of assertions for this, which seems to have uncovered an
similar issue with COPY progress reporting, dating to 8a4f618e7.  I'm
not sure the assertions are okay.  I imagine they may break other
extensions, as with file_fdw.

-- 
Justin
>From b8077babf9a101f9d1bf41dd1ad866d2ea38b603 Mon Sep 17 00:00:00 2001
From: Justin Pryzby <pryz...@telsasoft.com>
Date: Sat, 10 Dec 2022 16:17:50 -0600
Subject: [PATCH] fix progress reporting of nested, partitioned indexes..

---
 src/backend/commands/indexcmds.c              | 51 +++++++++++--
 src/backend/utils/activity/backend_progress.c | 72 +++++++++++++++++++
 2 files changed, 118 insertions(+), 5 deletions(-)

diff --git a/src/backend/commands/indexcmds.c b/src/backend/commands/indexcmds.c
index b5b860c3abf..6caa1f94ed7 100644
--- a/src/backend/commands/indexcmds.c
+++ b/src/backend/commands/indexcmds.c
@@ -482,6 +482,26 @@ WaitForOlderSnapshots(TransactionId limitXmin, bool progress)
 	}
 }
 
+/*
+ * Count the number of direct and indirect leaf partitions, excluding foreign
+ * tables.
+ */
+static int
+num_leaf_partitions(Oid relid)
+{
+	int		nleaves = 0;
+	List	*childs = find_all_inheritors(relid, NoLock, NULL);
+	ListCell *lc;
+
+	foreach(lc, childs)
+	{
+		Oid	partrelid = lfirst_oid(lc);
+		if (RELKIND_HAS_STORAGE(get_rel_relkind(partrelid)))
+			nleaves++;
+	}
+
+	return nleaves;
+}
 
 /*
  * DefineIndex
@@ -1212,14 +1232,22 @@ DefineIndex(Oid relationId,
 		partdesc = RelationGetPartitionDesc(rel, true);
 		if ((!stmt->relation || stmt->relation->inh) && partdesc->nparts > 0)
 		{
-			int			nparts = partdesc->nparts;
+			int			nparts = partdesc->nparts; /* only direct children */
+			int			nparts_done = 0; /* direct and indirect children */
 			Oid		   *part_oids = palloc_array(Oid, nparts);
 			bool		invalidate_parent = false;
 			Relation	parentIndex;
 			TupleDesc	parentDesc;
 
-			pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
-										 nparts);
+			if (!OidIsValid(parentIndexId))
+			{
+				/*
+				 * Report the number of leaf partitions (excluding foreign
+				 * tables), not just direct children.
+				 */
+				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_TOTAL,
+											 num_leaf_partitions(relationId));
+			}
 
 			/* Make a local copy of partdesc->oids[], just for safety */
 			memcpy(part_oids, partdesc->oids, sizeof(Oid) * nparts);
@@ -1431,8 +1459,21 @@ DefineIndex(Oid relationId,
 										   child_save_sec_context);
 				}
 
-				pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
-											 i + 1);
+				if (!OidIsValid(parentIndexId))
+				{
+					/*
+					 * Report progress only when processing top-level indexes.
+					 * When recursing, the called functions wouldn't know the
+					 * current number of partitions which were processed.
+					 * After recursing, increment by the number of children.
+					 * This works also for physical/leaf partitions.
+					 */
+					nparts_done += num_leaf_partitions(childRelid);
+
+					pgstat_progress_update_param(PROGRESS_CREATEIDX_PARTITIONS_DONE,
+												 nparts_done);
+				}
+
 				free_attrmap(attmap);
 			}
 
diff --git a/src/backend/utils/activity/backend_progress.c b/src/backend/utils/activity/backend_progress.c
index f29199725b7..1db1332ebe6 100644
--- a/src/backend/utils/activity/backend_progress.c
+++ b/src/backend/utils/activity/backend_progress.c
@@ -10,6 +10,7 @@
  */
 #include "postgres.h"
 
+#include "commands/progress.h"
 #include "port/atomics.h"		/* for memory barriers */
 #include "utils/backend_progress.h"
 #include "utils/backend_status.h"
@@ -37,6 +38,75 @@ pgstat_progress_start_command(ProgressCommandType cmdtype, Oid relid)
 	PGSTAT_END_WRITE_ACTIVITY(beentry);
 }
 
+static void
+pgstat_progress_asserts(void)
+{
+	volatile PgBackendStatus *beentry = MyBEEntry;
+	volatile int64		*a = beentry->st_progress_param;
+
+	switch (beentry->st_progress_command)
+	{
+	case PROGRESS_COMMAND_VACUUM:
+		Assert(a[PROGRESS_VACUUM_HEAP_BLKS_SCANNED] ==
+				a[PROGRESS_VACUUM_TOTAL_HEAP_BLKS]);
+		Assert(a[PROGRESS_VACUUM_HEAP_BLKS_VACUUMED] ==
+				a[PROGRESS_VACUUM_TOTAL_HEAP_BLKS]);
+		Assert(a[PROGRESS_VACUUM_NUM_DEAD_TUPLES] <=
+				a[PROGRESS_VACUUM_MAX_DEAD_TUPLES]);
+		break;
+
+	case PROGRESS_COMMAND_ANALYZE:
+		Assert(a[PROGRESS_ANALYZE_BLOCKS_DONE] ==
+				a[PROGRESS_ANALYZE_BLOCKS_TOTAL]);
+		Assert(a[PROGRESS_ANALYZE_EXT_STATS_COMPUTED] <=
+				a[PROGRESS_ANALYZE_EXT_STATS_TOTAL]);
+		Assert(a[PROGRESS_ANALYZE_CHILD_TABLES_DONE] ==
+				a[PROGRESS_ANALYZE_CHILD_TABLES_TOTAL]);
+		break;
+
+	case PROGRESS_COMMAND_CLUSTER:
+		Assert(a[PROGRESS_CLUSTER_HEAP_BLKS_SCANNED] ==
+				a[PROGRESS_CLUSTER_TOTAL_HEAP_BLKS]);
+		break;
+
+	case PROGRESS_COMMAND_CREATE_INDEX:
+		Assert(a[PROGRESS_CREATEIDX_TUPLES_DONE] <=
+				a[PROGRESS_CREATEIDX_TUPLES_TOTAL]);
+		Assert(a[PROGRESS_CREATEIDX_PARTITIONS_DONE] ==
+				a[PROGRESS_CREATEIDX_PARTITIONS_TOTAL]);
+		break;
+
+	case PROGRESS_COMMAND_BASEBACKUP:
+		/* progress is optional */
+		if (a[PROGRESS_BASEBACKUP_BACKUP_TOTAL] >= 0)
+		{
+			Assert(a[PROGRESS_BASEBACKUP_BACKUP_STREAMED] ==
+					a[PROGRESS_BASEBACKUP_BACKUP_TOTAL]);
+			Assert(a[PROGRESS_BASEBACKUP_TBLSPC_STREAMED] ==
+					a[PROGRESS_BASEBACKUP_TBLSPC_TOTAL]);
+		}
+		break;
+
+	case PROGRESS_COMMAND_COPY:
+#if 0
+// This currently fails file_fdw tests, since pgstat_prorgress evidently fails
+// to support simultaneous copy commands, as happens during JOIN.
+		/* bytes progress is not available in all cases */
+		if (a[PROGRESS_COPY_BYTES_TOTAL] > 0)
+			// Assert(a[PROGRESS_COPY_BYTES_PROCESSED] <= a[PROGRESS_COPY_BYTES_TOTAL]);
+			if (a[PROGRESS_COPY_BYTES_PROCESSED] > a[PROGRESS_COPY_BYTES_TOTAL])
+				elog(WARNING, "PROGRESS_COPY_BYTES_PROCESSED %ld %ld",
+							a[PROGRESS_COPY_BYTES_PROCESSED],
+							a[PROGRESS_COPY_BYTES_TOTAL]);
+
+		break;
+#endif
+
+	case PROGRESS_COMMAND_INVALID:
+		break; /* Do nothing */
+	}
+}
+
 /*-----------
  * pgstat_progress_update_param() -
  *
@@ -105,6 +175,8 @@ pgstat_progress_end_command(void)
 	if (beentry->st_progress_command == PROGRESS_COMMAND_INVALID)
 		return;
 
+	pgstat_progress_asserts();
+
 	PGSTAT_BEGIN_WRITE_ACTIVITY(beentry);
 	beentry->st_progress_command = PROGRESS_COMMAND_INVALID;
 	beentry->st_progress_command_target = InvalidOid;
-- 
2.25.1

Reply via email to