From fee4f5882971cc353e0b68f6974ab7af0e344ddd Mon Sep 17 00:00:00 2001
From: Mats Kindahl <mats@timescale.com>
Date: Thu, 12 Sep 2024 18:14:04 +0200
Subject: [PATCH] Add function to compute vacuum attribute statistics

Refactor analyze.c so that the code that computes vacuum attribute
statistics is a separate function and available for extensions to use
to set up their own ReadStream for analyze.
---
 src/backend/commands/analyze.c | 120 ++++++++++++++++++---------------
 src/include/commands/vacuum.h  |   2 +
 2 files changed, 69 insertions(+), 53 deletions(-)

diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index c590a2adc35..8a402ad15c6 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -269,6 +269,72 @@ analyze_rel(Oid relid, RangeVar *relation,
 	pgstat_progress_end_command();
 }
 
+/*
+ * Determine which columns to analyze
+ *
+ * Note that system attributes are never analyzed, so we just reject them
+ * at the lookup stage.  We also reject duplicate column mentions.  (We
+ * could alternatively ignore duplicates, but analyzing a column twice
+ * won't work; we'd end up making a conflicting update in pg_statistic.)
+ */
+int
+analyze_compute_vacattrstats(Relation onerel, List *va_cols, VacAttrStats ***vacattrstats_out)
+{
+	int			tcnt,
+				i,
+				attr_cnt;
+	VacAttrStats **vacattrstats;
+
+	if (va_cols != NIL)
+	{
+		Bitmapset  *unique_cols = NULL;
+		ListCell   *le;
+
+		vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) *
+												sizeof(VacAttrStats *));
+		tcnt = 0;
+		foreach(le, va_cols)
+		{
+			char	   *col = strVal(lfirst(le));
+
+			i = attnameAttNum(onerel, col, false);
+			if (i == InvalidAttrNumber)
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_COLUMN),
+						 errmsg("column \"%s\" of relation \"%s\" does not exist",
+								col, RelationGetRelationName(onerel))));
+			if (bms_is_member(i, unique_cols))
+				ereport(ERROR,
+						(errcode(ERRCODE_DUPLICATE_COLUMN),
+						 errmsg("column \"%s\" of relation \"%s\" appears more than once",
+								col, RelationGetRelationName(onerel))));
+			unique_cols = bms_add_member(unique_cols, i);
+
+			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
+			if (vacattrstats[tcnt] != NULL)
+				tcnt++;
+		}
+		attr_cnt = tcnt;
+	}
+	else
+	{
+		attr_cnt = onerel->rd_att->natts;
+		vacattrstats = (VacAttrStats **)
+			palloc(attr_cnt * sizeof(VacAttrStats *));
+		tcnt = 0;
+		for (i = 1; i <= attr_cnt; i++)
+		{
+			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
+			if (vacattrstats[tcnt] != NULL)
+				tcnt++;
+		}
+		attr_cnt = tcnt;
+	}
+
+	*vacattrstats_out = vacattrstats;
+	return attr_cnt;
+}
+
 /*
  *	do_analyze_rel() -- analyze one relation, recursively or not
  *
@@ -353,59 +419,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 		starttime = GetCurrentTimestamp();
 	}
 
-	/*
-	 * Determine which columns to analyze
-	 *
-	 * Note that system attributes are never analyzed, so we just reject them
-	 * at the lookup stage.  We also reject duplicate column mentions.  (We
-	 * could alternatively ignore duplicates, but analyzing a column twice
-	 * won't work; we'd end up making a conflicting update in pg_statistic.)
-	 */
-	if (va_cols != NIL)
-	{
-		Bitmapset  *unique_cols = NULL;
-		ListCell   *le;
-
-		vacattrstats = (VacAttrStats **) palloc(list_length(va_cols) *
-												sizeof(VacAttrStats *));
-		tcnt = 0;
-		foreach(le, va_cols)
-		{
-			char	   *col = strVal(lfirst(le));
-
-			i = attnameAttNum(onerel, col, false);
-			if (i == InvalidAttrNumber)
-				ereport(ERROR,
-						(errcode(ERRCODE_UNDEFINED_COLUMN),
-						 errmsg("column \"%s\" of relation \"%s\" does not exist",
-								col, RelationGetRelationName(onerel))));
-			if (bms_is_member(i, unique_cols))
-				ereport(ERROR,
-						(errcode(ERRCODE_DUPLICATE_COLUMN),
-						 errmsg("column \"%s\" of relation \"%s\" appears more than once",
-								col, RelationGetRelationName(onerel))));
-			unique_cols = bms_add_member(unique_cols, i);
-
-			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
-			if (vacattrstats[tcnt] != NULL)
-				tcnt++;
-		}
-		attr_cnt = tcnt;
-	}
-	else
-	{
-		attr_cnt = onerel->rd_att->natts;
-		vacattrstats = (VacAttrStats **)
-			palloc(attr_cnt * sizeof(VacAttrStats *));
-		tcnt = 0;
-		for (i = 1; i <= attr_cnt; i++)
-		{
-			vacattrstats[tcnt] = examine_attribute(onerel, i, NULL);
-			if (vacattrstats[tcnt] != NULL)
-				tcnt++;
-		}
-		attr_cnt = tcnt;
-	}
+	attr_cnt = analyze_compute_vacattrstats(onerel, va_cols, &vacattrstats);
 
 	/*
 	 * Open all indexes of the relation, and see if there are any analyzable
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 759f9a87d38..f456734855d 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -378,6 +378,8 @@ extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
 extern void analyze_rel(Oid relid, RangeVar *relation,
 						VacuumParams *params, List *va_cols, bool in_outer_xact,
 						BufferAccessStrategy bstrategy);
+extern int	analyze_compute_vacattrstats(Relation onerel, List *va_cols,
+										 VacAttrStats ***vacattrstats_out);
 extern bool std_typanalyze(VacAttrStats *stats);
 
 /* in utils/misc/sampling.c --- duplicate of declarations in utils/sampling.h */
-- 
2.43.0

