Re: [PATCH] pg_dump: Do not dump statistics for excluded tables

2023-12-27 Thread Tom Lane
Rian McGuire  writes:
> I've attached a patch against master that addresses a small bug in pg_dump.
> Previously, pg_dump would include CREATE STATISTICS statements for
> tables that were excluded from the dump, causing reload to fail if any
> excluded tables had extended statistics.

I agree that's a bug ...

> The patch skips the creation of the StatsExtInfo if the associated
> table does not have the DUMP_COMPONENT_DEFINITION flag set. This is
> similar to how getPublicationTables behaves if a table is excluded.

... but I don't like the details of this patch (and I'm not too
thrilled with the implementation of getPublicationTables, either).
The style in pg_dump is to put such decisions into separate
policy-setting subroutines.  Also, skipping creation of the
DumpableObject altogether is the wrong thing because it'd prevent
pg_dump from tracing or reasoning about dependencies involving the
stats object, which can be relevant even if the object itself isn't
dumped --- this is why all the other data-collection subroutines
operate as they do.  getPublicationTables can probably get away
with its low-rent approach given that publication membership isn't
represented by pg_depend entries, but it's far from clear that it'll
never be an issue for stats.

So I think it needs to be more like the attached.
(I did use your test case verbatim.)

regards, tom lane

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 8c0b5486b9..050a831226 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -2046,6 +2046,26 @@ selectDumpablePublicationObject(DumpableObject *dobj, Archive *fout)
 		DUMP_COMPONENT_ALL : DUMP_COMPONENT_NONE;
 }
 
+/*
+ * selectDumpableStatisticsObject: policy-setting subroutine
+ *		Mark an extended statistics object as to be dumped or not
+ *
+ * We dump an extended statistics object if the schema it's in and the table
+ * it's for are being dumped.  (This'll need more thought if statistics
+ * objects ever support cross-table stats.)
+ */
+static void
+selectDumpableStatisticsObject(StatsExtInfo *sobj, Archive *fout)
+{
+	if (checkExtensionMembership(>dobj, fout))
+		return;	/* extension membership overrides all else */
+
+	sobj->dobj.dump = sobj->dobj.namespace->dobj.dump_contains;
+	if (sobj->stattable == NULL ||
+		!(sobj->stattable->dobj.dump & DUMP_COMPONENT_DEFINITION))
+		sobj->dobj.dump = DUMP_COMPONENT_NONE;
+}
+
 /*
  * selectDumpableObject: policy-setting subroutine
  *		Mark a generic dumpable object as to be dumped or not
@@ -7291,6 +7311,7 @@ getExtendedStatistics(Archive *fout)
 	int			i_stxname;
 	int			i_stxnamespace;
 	int			i_stxowner;
+	int			i_stxrelid;
 	int			i_stattarget;
 	int			i;
 
@@ -7302,11 +7323,11 @@ getExtendedStatistics(Archive *fout)
 
 	if (fout->remoteVersion < 13)
 		appendPQExpBufferStr(query, "SELECT tableoid, oid, stxname, "
-			 "stxnamespace, stxowner, (-1) AS stxstattarget "
+			 "stxnamespace, stxowner, stxrelid, (-1) AS stxstattarget "
 			 "FROM pg_catalog.pg_statistic_ext");
 	else
 		appendPQExpBufferStr(query, "SELECT tableoid, oid, stxname, "
-			 "stxnamespace, stxowner, stxstattarget "
+			 "stxnamespace, stxowner, stxrelid, stxstattarget "
 			 "FROM pg_catalog.pg_statistic_ext");
 
 	res = ExecuteSqlQuery(fout, query->data, PGRES_TUPLES_OK);
@@ -7318,6 +7339,7 @@ getExtendedStatistics(Archive *fout)
 	i_stxname = PQfnumber(res, "stxname");
 	i_stxnamespace = PQfnumber(res, "stxnamespace");
 	i_stxowner = PQfnumber(res, "stxowner");
+	i_stxrelid = PQfnumber(res, "stxrelid");
 	i_stattarget = PQfnumber(res, "stxstattarget");
 
 	statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo));
@@ -7332,10 +7354,12 @@ getExtendedStatistics(Archive *fout)
 		statsextinfo[i].dobj.namespace =
 			findNamespace(atooid(PQgetvalue(res, i, i_stxnamespace)));
 		statsextinfo[i].rolname = getRoleName(PQgetvalue(res, i, i_stxowner));
+		statsextinfo[i].stattable =
+			findTableByOid(atooid(PQgetvalue(res, i, i_stxrelid)));
 		statsextinfo[i].stattarget = atoi(PQgetvalue(res, i, i_stattarget));
 
 		/* Decide whether we want to dump it */
-		selectDumpableObject(&(statsextinfo[i].dobj), fout);
+		selectDumpableStatisticsObject(&(statsextinfo[i]), fout);
 	}
 
 	PQclear(res);
diff --git a/src/bin/pg_dump/pg_dump.h b/src/bin/pg_dump/pg_dump.h
index 2fe3cbed9a..673ca5c92d 100644
--- a/src/bin/pg_dump/pg_dump.h
+++ b/src/bin/pg_dump/pg_dump.h
@@ -423,7 +423,8 @@ typedef struct _indexAttachInfo
 typedef struct _statsExtInfo
 {
 	DumpableObject dobj;
-	const char *rolname;
+	const char *rolname;		/* owner */
+	TableInfo  *stattable;		/* link to table the stats are for */
 	int			stattarget;		/* statistics target */
 } StatsExtInfo;
 
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index eb3ec534b4..a671603cd2 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -3742,14 +3742,15 @@ my %tests = (
 	

[PATCH] pg_dump: Do not dump statistics for excluded tables

2023-12-26 Thread Rian McGuire

Hi hackers,

I've attached a patch against master that addresses a small bug in pg_dump.

Previously, pg_dump would include CREATE STATISTICS statements for
tables that were excluded from the dump, causing reload to fail if any
excluded tables had extended statistics.

The patch skips the creation of the StatsExtInfo if the associated
table does not have the DUMP_COMPONENT_DEFINITION flag set. This is
similar to how getPublicationTables behaves if a table is excluded.

I've covered this with a regression test by altering one of the CREATE
STATISTICS examples to work with the existing 'exclude_test_table'
run. Without the fix, that causes the test to fail with:
# Failed test 'exclude_test_table: should not dump CREATE STATISTICS
extended_stats_no_options'
# at t/002_pg_dump.pl line 4934.

Regards,
RianFrom 0fe06338728981fa727e3e6b99247741deda75fb Mon Sep 17 00:00:00 2001
From: Rian McGuire 
Date: Wed, 27 Dec 2023 13:09:31 +1100
Subject: [PATCH] pg_dump: Do not dump statistics for excluded tables

Previously, pg_dump would include CREATE STATISTICS statements for
tables that were excluded from the dump, causing reload to fail.
---
 src/bin/pg_dump/pg_dump.c| 43 +++-
 src/bin/pg_dump/t/002_pg_dump.pl |  5 ++--
 2 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/src/bin/pg_dump/pg_dump.c b/src/bin/pg_dump/pg_dump.c
index 8c0b5486b9..c67ed416e9 100644
--- a/src/bin/pg_dump/pg_dump.c
+++ b/src/bin/pg_dump/pg_dump.c
@@ -7288,11 +7288,13 @@ getExtendedStatistics(Archive *fout)
int ntups;
int i_tableoid;
int i_oid;
+   int i_stxrelid;
int i_stxname;
int i_stxnamespace;
int i_stxowner;
int i_stattarget;
-   int i;
+   int i,
+   j;
 
/* Extended statistics were new in v10 */
if (fout->remoteVersion < 10)
@@ -7301,11 +7303,11 @@ getExtendedStatistics(Archive *fout)
query = createPQExpBuffer();
 
if (fout->remoteVersion < 13)
-   appendPQExpBufferStr(query, "SELECT tableoid, oid, stxname, "
+   appendPQExpBufferStr(query, "SELECT tableoid, oid, stxrelid, 
stxname, "
 "stxnamespace, 
stxowner, (-1) AS stxstattarget "
 "FROM 
pg_catalog.pg_statistic_ext");
else
-   appendPQExpBufferStr(query, "SELECT tableoid, oid, stxname, "
+   appendPQExpBufferStr(query, "SELECT tableoid, oid, stxrelid, 
stxname, "
 "stxnamespace, 
stxowner, stxstattarget "
 "FROM 
pg_catalog.pg_statistic_ext");
 
@@ -7315,27 +7317,44 @@ getExtendedStatistics(Archive *fout)
 
i_tableoid = PQfnumber(res, "tableoid");
i_oid = PQfnumber(res, "oid");
+   i_stxrelid = PQfnumber(res, "stxrelid");
i_stxname = PQfnumber(res, "stxname");
i_stxnamespace = PQfnumber(res, "stxnamespace");
i_stxowner = PQfnumber(res, "stxowner");
i_stattarget = PQfnumber(res, "stxstattarget");
 
+   /* this allocation may be more than we need */
statsextinfo = (StatsExtInfo *) pg_malloc(ntups * sizeof(StatsExtInfo));
+   j = 0;
 
for (i = 0; i < ntups; i++)
{
-   statsextinfo[i].dobj.objType = DO_STATSEXT;
-   statsextinfo[i].dobj.catId.tableoid = atooid(PQgetvalue(res, i, 
i_tableoid));
-   statsextinfo[i].dobj.catId.oid = atooid(PQgetvalue(res, i, 
i_oid));
-   AssignDumpId([i].dobj);
-   statsextinfo[i].dobj.name = pg_strdup(PQgetvalue(res, i, 
i_stxname));
-   statsextinfo[i].dobj.namespace =
+   Oid stxrelid;
+   TableInfo  *tbinfo;
+
+   /*
+* Only dump extended statistics if we're going to dump the 
definition
+* of the table that they're associated with.
+*/
+   stxrelid = atoi(PQgetvalue(res, i, i_stxrelid));
+   tbinfo = findTableByOid(stxrelid);
+   if (tbinfo == NULL || !(tbinfo->dobj.dump & 
DUMP_COMPONENT_DEFINITION))
+   continue;
+
+   statsextinfo[j].dobj.objType = DO_STATSEXT;
+   statsextinfo[j].dobj.catId.tableoid = atooid(PQgetvalue(res, i, 
i_tableoid));
+   statsextinfo[j].dobj.catId.oid = atooid(PQgetvalue(res, i, 
i_oid));
+   AssignDumpId([j].dobj);
+   statsex