> > > I too was thinking of network timeouts (and in such a case I thought > that even if PQsendQuery was run successfully, the following > PQgetResult would have to block for a long time). And yes, that > situation can occur for normal queries, but in such a query, the extra > functions call is done only when aborting the remote transaction, in a > way libpq functions don't have to wait for a network timeout. >
I've copied the ERROR-behavior that was already done in postgresGetAnalyzeInfoForForeignTable(), which I assume will be to your liking. Structural error checks (how many columns does the result set have, etc) are now elog errors, bad PQresultStatus-es are now errors as well. The warnings when the query ran fine but no data was found are now NOTICE level. > I forgot to mention this, but when we have reltuples = 0 for v14 or > later, the patch tries to import both relstats and attstats, but in > that case I think it would be OK to do so only for the former for the > consistency with the normal processing. What do you think about that? > I've update the logic that we don't try to fetch attrstats if the reltuples is 0 or -1, and though the comments still mention the difference in server version, the code behaves the same in new versions and old. My thinking is that either value means "you are not going to find attstats" and our reaction is the same either way. I've also moved the column matching - finding which rows of the attstats result set match to which columns in the local table - up into the fetch portion, something you had mentioned wanting to see as well. This resulted in some significant refactoring, but overall I think you will find the changes for the better.
From aff24e106b88de903f4a1b563485a7adee9438ad Mon Sep 17 00:00:00 2001 From: Corey Huinker <[email protected]> Date: Sat, 17 Jan 2026 16:39:45 -0500 Subject: [PATCH v14 1/3] Add FDW functions for importing optimizer statistics. Add the the function StatisticsAreImportable which is used as a lightweight preliminary scan to determine if the remote table could potentially have statistics that could be imported instead of fetching a row sample from the remote table, which can be expensive. Also add the function ImportStatistics which attempts to actually fetch those statistics from the remote server, and if successful import them into the local statistics catalog. --- src/include/foreign/fdwapi.h | 8 ++++ src/backend/commands/analyze.c | 68 ++++++++++++++++++++++++---------- 2 files changed, 56 insertions(+), 20 deletions(-) diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h index 96b6f692d2a..59beeae4f45 100644 --- a/src/include/foreign/fdwapi.h +++ b/src/include/foreign/fdwapi.h @@ -157,6 +157,12 @@ typedef bool (*AnalyzeForeignTable_function) (Relation relation, AcquireSampleRowsFunc *func, BlockNumber *totalpages); +typedef bool (*StatisticsAreImportable_function) (Relation relation); + +typedef bool (*ImportStatistics_function) (Relation relation, + List *va_cols, + int elevel); + typedef List *(*ImportForeignSchema_function) (ImportForeignSchemaStmt *stmt, Oid serverOid); @@ -255,6 +261,8 @@ typedef struct FdwRoutine /* Support functions for ANALYZE */ AnalyzeForeignTable_function AnalyzeForeignTable; + StatisticsAreImportable_function StatisticsAreImportable; + ImportStatistics_function ImportStatistics; /* Support functions for IMPORT FOREIGN SCHEMA */ ImportForeignSchema_function ImportForeignSchema; diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index 53adac9139b..f175c0d8ed7 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -113,6 +113,9 @@ analyze_rel(Oid relid, RangeVar *relation, int elevel; AcquireSampleRowsFunc acquirefunc = NULL; BlockNumber relpages = 0; + FdwRoutine *fdwroutine = NULL; + bool can_import_stats = false; + bool stats_imported = false; /* Select logging level */ if (params.options & VACOPT_VERBOSE) @@ -195,27 +198,16 @@ analyze_rel(Oid relid, RangeVar *relation, else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) { /* - * For a foreign table, call the FDW's hook function to see whether it - * supports analysis. + * For a foreign table, call the FDW's hook functions to see whether + * it supports statistics import or analysis. */ - FdwRoutine *fdwroutine; - bool ok = false; fdwroutine = GetFdwRoutineForRelation(onerel, false); - if (fdwroutine->AnalyzeForeignTable != NULL) - ok = fdwroutine->AnalyzeForeignTable(onerel, - &acquirefunc, - &relpages); - - if (!ok) - { - ereport(WARNING, - (errmsg("skipping \"%s\" --- cannot analyze this foreign table", - RelationGetRelationName(onerel)))); - relation_close(onerel, ShareUpdateExclusiveLock); - return; - } + if (fdwroutine->ImportStatistics != NULL && + fdwroutine->StatisticsAreImportable != NULL && + fdwroutine->StatisticsAreImportable(onerel)) + can_import_stats = true; } else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) { @@ -247,10 +239,46 @@ analyze_rel(Oid relid, RangeVar *relation, PROGRESS_ANALYZE_STARTED_BY_MANUAL); /* - * Do the normal non-recursive ANALYZE. We can skip this for partitioned - * tables, which don't contain any rows. + * If this table can import statistics, attempt to do so. */ - if (onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + if (can_import_stats) + { + Assert(onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE); + Assert(fdwroutine != NULL); + Assert(fdwroutine->ImportStatistics != NULL); + stats_imported = fdwroutine->ImportStatistics(onerel, va_cols, elevel); + } + + /* + * Foreign tables that were not able to import stats will resort to + * regular sampling. + */ + if ((onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) + && !stats_imported) + { + bool ok = false; + + if (fdwroutine->AnalyzeForeignTable != NULL) + ok = fdwroutine->AnalyzeForeignTable(onerel, + &acquirefunc, + &relpages); + + if (!ok) + { + ereport(WARNING, + errmsg("skipping \"%s\" -- cannot analyze this foreign table.", + RelationGetRelationName(onerel))); + relation_close(onerel, ShareUpdateExclusiveLock); + return; + } + } + + /* + * Do the normal non-recursive ANALYZE. We can skip this for partitioned + * tables and foreign tables that successfully imported statistics. + */ + if ((onerel->rd_rel->relkind != RELKIND_PARTITIONED_TABLE) + && !stats_imported) do_analyze_rel(onerel, params, va_cols, acquirefunc, relpages, false, in_outer_xact, elevel); base-commit: 0123ce131fca454009439dfa3b2266d1d40737d7 -- 2.53.0
From 3178ba57a67ce9f63718f16fd9272866f42405b7 Mon Sep 17 00:00:00 2001 From: Corey Huinker <[email protected]> Date: Sat, 17 Jan 2026 19:06:05 -0500 Subject: [PATCH v14 2/3] Add remote statistics fetching to postgres_fdw. This adds the ability to fetch and import statistics from a remote server table table rather than fetching the data or data sample from that table. This is managed via a new option, fetch_stats, which is available at the server level and table level. The global default is true, but this can be overriden by at the server level, which in turn can be overridden at the table level. If fetch_stats is true, and the remote table is an actual table or materialized view that can hold statistics, and the local table does not have any extended statistics objects built upon it, then the remote database will first be queried for relation statistics and attribute statistics similar to the method used in pg_dump. If there are remote statistics for all of the columns in the local table, then those statistics will be imported using pg_restore_relation_statistics and pg_restore_attribute_statistics functions similar to the way pg_restore uses them. If fetch_stats is false, or if the fetched statistics were missing or otherwise inadequate, then the ANALYZE operation will issue a WARNING and then fall back to regular table sampling to generate local statistics. --- src/backend/commands/analyze.c | 3 +- doc/src/sgml/postgres-fdw.sgml | 25 +- .../postgres_fdw/expected/postgres_fdw.out | 17 +- contrib/postgres_fdw/option.c | 5 + contrib/postgres_fdw/postgres_fdw.c | 910 ++++++++++++++++++ contrib/postgres_fdw/sql/postgres_fdw.sql | 13 +- 6 files changed, 969 insertions(+), 4 deletions(-) diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c index f175c0d8ed7..3128485578d 100644 --- a/src/backend/commands/analyze.c +++ b/src/backend/commands/analyze.c @@ -239,7 +239,8 @@ analyze_rel(Oid relid, RangeVar *relation, PROGRESS_ANALYZE_STARTED_BY_MANUAL); /* - * If this table can import statistics, attempt to do so. + * If this table can import statistics, attempt to do so. If statistics + * import fails, then we will fall back to table sampling. */ if (can_import_stats) { diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml index de69ddcdebc..d7c0dc8ed14 100644 --- a/doc/src/sgml/postgres-fdw.sgml +++ b/doc/src/sgml/postgres-fdw.sgml @@ -332,7 +332,7 @@ OPTIONS (ADD password_required 'false'); </para> <para> - The following option controls how such an <command>ANALYZE</command> + The following options control how such an <command>ANALYZE</command> operation behaves: </para> @@ -364,6 +364,29 @@ OPTIONS (ADD password_required 'false'); </listitem> </varlistentry> + <varlistentry> + <term><literal>fetch_stats</literal> (<type>boolean</type>)</term> + <listitem> + <para> + This option, which can be specified for a foreign table or a foreign + server, determines if <command>ANALYZE</command> on a foreign table + will instead attempt to fetch the existing relation and attribute + statistics from the remote table, and if all of the attributes being + analyzed have statistics in the remote table, then it will import + those statistics directly using + <function>pg_restore_relation_stats</function> and + <function>pg_restore_attribute_stats</function>. This option is only + useful if the remote relation is one that can have regular statistics + (tables and materialized views). If the foreign table is a partition + of a partitioned table, analyzing the partitioned table will still + result in row sampling on the foreign table regardless of this setting, + though direct analysis of the foreign table would have attempted to + fetch remote statistics first. + The default is <literal>true</literal>. + </para> + </listitem> + </varlistentry> + </variablelist> </sect3> diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index 0f5271d476e..a76f1278538 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -252,6 +252,7 @@ SELECT c3, c4 FROM ft1 ORDER BY c3, c1 LIMIT 1; -- should work again -- Now we should be able to run ANALYZE. -- To exercise multiple code paths, we use local stats on ft1 -- and remote-estimate mode on ft2. +ALTER SERVER loopback OPTIONS (ADD fetch_stats 'false'); ANALYZE ft1; ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true'); -- =================================================================== @@ -4559,7 +4560,8 @@ REINDEX TABLE reind_fdw_parent; -- ok REINDEX TABLE CONCURRENTLY reind_fdw_parent; -- ok DROP TABLE reind_fdw_parent; -- =================================================================== --- conversion error +-- conversion error, will generate a WARNING for imported stats and an +-- error on locally computed stats. -- =================================================================== ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE int; SELECT * FROM ft1 ftx(x1,x2,x3,x4,x5,x6,x7,x8) WHERE x1 = 1; -- ERROR @@ -11481,6 +11483,13 @@ CREATE FOREIGN TABLE async_p2 PARTITION OF async_pt FOR VALUES FROM (2000) TO (3 SERVER loopback2 OPTIONS (table_name 'base_tbl2'); INSERT INTO async_p1 SELECT 1000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i; INSERT INTO async_p2 SELECT 2000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i; +-- Will failover to sampling on async_p2 because fetch_stats = true (the default) on +-- loopback2, and is set to false on loopback +ANALYZE async_pt; +NOTICE: remote table public.base_tbl2 has no statistics to import +HINT: Falling back to ANALYZE with regular row sampling. +-- Turning off fetch_stats at the table level for async_p2 removes the warning. +ALTER FOREIGN TABLE async_p2 OPTIONS (ADD fetch_stats 'false'); ANALYZE async_pt; -- simple queries CREATE TABLE result_tbl (a int, b int, c text); @@ -11587,6 +11596,12 @@ CREATE TABLE base_tbl3 (a int, b int, c text); CREATE FOREIGN TABLE async_p3 PARTITION OF async_pt FOR VALUES FROM (3000) TO (4000) SERVER loopback2 OPTIONS (table_name 'base_tbl3'); INSERT INTO async_p3 SELECT 3000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i; +-- Will fail because fetch_stats = true (the default) on async_p3/loopback2 +ANALYZE async_pt; +NOTICE: remote table public.base_tbl3 has no statistics to import +HINT: Falling back to ANALYZE with regular row sampling. +-- Turn off fetch_stats at the server level. +ALTER SERVER loopback2 OPTIONS (ADD fetch_stats 'false'); ANALYZE async_pt; EXPLAIN (VERBOSE, COSTS OFF) INSERT INTO result_tbl SELECT * FROM async_pt WHERE b === 505; diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c index b0bd72d1e58..5b7726800d0 100644 --- a/contrib/postgres_fdw/option.c +++ b/contrib/postgres_fdw/option.c @@ -120,6 +120,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS) strcmp(def->defname, "async_capable") == 0 || strcmp(def->defname, "parallel_commit") == 0 || strcmp(def->defname, "parallel_abort") == 0 || + strcmp(def->defname, "fetch_stats") == 0 || strcmp(def->defname, "keep_connections") == 0) { /* these accept only boolean values */ @@ -278,6 +279,10 @@ InitPgFdwOptions(void) {"use_scram_passthrough", ForeignServerRelationId, false}, {"use_scram_passthrough", UserMappingRelationId, false}, + /* fetch_stats is available on both server and table */ + {"fetch_stats", ForeignServerRelationId, false}, + {"fetch_stats", ForeignTableRelationId, false}, + /* * sslcert and sslkey are in fact libpq options, but we repeat them * here to allow them to appear in both foreign server context (when diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index 60d90329a65..c76ca2fc69b 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -18,10 +18,13 @@ #include "access/sysattr.h" #include "access/table.h" #include "catalog/pg_opfamily.h" +#include "catalog/pg_statistic_ext.h" #include "commands/defrem.h" #include "commands/explain_format.h" #include "commands/explain_state.h" #include "executor/execAsync.h" +#include "executor/spi.h" +#include "fmgr.h" #include "foreign/fdwapi.h" #include "funcapi.h" #include "miscadmin.h" @@ -42,12 +45,14 @@ #include "storage/latch.h" #include "utils/builtins.h" #include "utils/float.h" +#include "utils/fmgroids.h" #include "utils/guc.h" #include "utils/lsyscache.h" #include "utils/memutils.h" #include "utils/rel.h" #include "utils/sampling.h" #include "utils/selfuncs.h" +#include "utils/syscache.h" PG_MODULE_MAGIC_EXT( .name = "postgres_fdw", @@ -317,6 +322,21 @@ typedef struct List *already_used; /* expressions already dealt with */ } ec_member_foreign_arg; +/* Result sets that are returned from a foreign statistics scan */ +typedef struct +{ + PGresult *rel; + PGresult *att; +} RemoteStatsResults; + +/* Pairs of remote columns with local attnums */ +typedef struct +{ + char remote_attname[NAMEDATALEN]; + AttrNumber local_attnum; + int res_index; +} RemoteAttributeMapping; + /* * SQL functions */ @@ -402,6 +422,10 @@ static void postgresExecForeignTruncate(List *rels, static bool postgresAnalyzeForeignTable(Relation relation, AcquireSampleRowsFunc *func, BlockNumber *totalpages); +static bool postgresStatisticsAreImportable(Relation relation); +static bool postgresImportStatistics(Relation relation, + List *va_cols, + int elevel); static List *postgresImportForeignSchema(ImportForeignSchemaStmt *stmt, Oid serverOid); static void postgresGetForeignJoinPaths(PlannerInfo *root, @@ -546,6 +570,188 @@ static void merge_fdw_options(PgFdwRelationInfo *fpinfo, const PgFdwRelationInfo *fpinfo_i); static int get_batch_size_option(Relation rel); +/* + * Static queries for querying remote statistics. + */ + +/* All static relstats queries have the same column order */ +enum RelStatsColumns +{ + RELSTATS_RELKIND = 0, + RELSTATS_RELPAGES, + RELSTATS_RELTUPLES, + RELSTATS_NUM_FIELDS, +}; + +/* range stats introduced in v17 */ +static const char *attstats_query_17 = +"SELECT DISTINCT ON (s.attname COLLATE \"C\") attname, s.null_frac, s.avg_width, " +"s.n_distinct, s.most_common_vals, s.most_common_freqs, " +"s.histogram_bounds, s.correlation, s.most_common_elems, " +"s.most_common_elem_freqs, s.elem_count_histogram, " +"s.range_length_histogram, s.range_empty_frac, s.range_bounds_histogram " +"FROM pg_catalog.pg_stats AS s " +"WHERE s.schemaname = $1 AND s.tablename = $2 " +"AND s.attname = ANY($3::text[]) " +"ORDER BY s.attname COLLATE \"C\", s.inherited DESC"; + +/* elements stats introduced in 9.2 */ +static const char *attstats_query_9_2 = +"SELECT DISTINCT ON (s.attname COLLATE \"C\") attname, s.null_frac, s.avg_width, " +"s.n_distinct, s.most_common_vals, s.most_common_freqs, " +"s.histogram_bounds, s.correlation, s.most_common_elems, " +"s.most_common_elem_freqs, s.elem_count_histogram, " +"NULL AS range_length_histogram, NULL AS range_empty_frac, " +"NULL AS range_bounds_histogram " +"FROM pg_catalog.pg_stats AS s " +"WHERE s.schemaname = $1 AND s.tablename = $2 " +"AND s.attname = ANY($3::text[]) " +"ORDER BY s.attname COLLATE \"C\", s.inherited DESC"; + +/* inherited introduced in 9.0 */ +static const char *attstats_query_9_0 = +"SELECT DISTINCT ON (s.attname COLLATE \"C\") attname, s.null_frac, s.avg_width, " +"s.n_distinct, s.most_common_vals, s.most_common_freqs, " +"s.histogram_bounds, s.correlation, NULL AS most_common_elems, " +"NULL AS most_common_elem_freqs, NULL AS elem_count_histogram, " +"NULL AS range_length_histogram, NULL AS range_empty_frac, " +"NULL AS range_bounds_histogram " +"FROM pg_catalog.pg_stats AS s " +"WHERE s.schemaname = $1 AND s.tablename = $2 " +"AND s.attname = ANY($3::text[]) " +"ORDER BY s.attname COLLATE \"C\", s.inherited DESC"; + +static const char *attstats_query_default = +"SELECT s.attname, s.null_frac, s.avg_width, " +"s.n_distinct, s.most_common_vals, s.most_common_freqs, " +"s.histogram_bounds, s.correlation, NULL AS most_common_elems, " +"NULL AS most_common_elem_freqs, NULL AS elem_count_histogram, " +"NULL AS range_length_histogram, NULL AS range_empty_frac, " +"NULL AS range_bounds_histogram " +"FROM pg_catalog.pg_stats AS s " +"WHERE s.schemaname = $1 AND s.tablename = $2 " +"AND s.attname = ANY($3::text[]) " +"ORDER BY s.attname COLLATE \"C\""; + +/* All static attstats queries have the same column order */ +enum AttStatsColumns +{ + ATTSTATS_ATTNAME = 0, + ATTSTATS_NULL_FRAC, + ATTSTATS_AVG_WIDTH, + ATTSTATS_N_DISTINCT, + ATTSTATS_MOST_COMMON_VALS, + ATTSTATS_MOST_COMMON_FREQS, + ATTSTATS_HISTOGRAM_BOUNDS, + ATTSTATS_CORRELATION, + ATTSTATS_MOST_COMMON_ELEMS, + ATTSTATS_MOST_COMMON_ELEM_FREQS, + ATTSTATS_ELEM_COUNT_HISTOGRAM, + ATTSTATS_RANGE_LENGTH_HISTOGRAM, + ATTSTATS_RANGE_EMPTY_FRAC, + ATTSTATS_RANGE_BOUNDS_HISTOGRAM, + ATTSTATS_NUM_FIELDS, +}; + +static const char *relimport_sql = +"SELECT pg_catalog.pg_restore_relation_stats(\n" +"\t'version', $1,\n" +"\t'schemaname', $2,\n" +"\t'relname', $3,\n" +"\t'relpages', $4::integer,\n" +"\t'reltuples', $5::real)"; + +enum RelImportSqlArgs +{ + RELIMPORT_SQL_VERSION = 0, + RELIMPORT_SQL_SCHEMANAME, + RELIMPORT_SQL_RELNAME, + RELIMPORT_SQL_RELPAGES, + RELIMPORT_SQL_RELTUPLES, + RELIMPORT_SQL_NUM_FIELDS +}; + +static const Oid relimport_argtypes[RELIMPORT_SQL_NUM_FIELDS] = { +INT4OID, TEXTOID, TEXTOID, TEXTOID, TEXTOID,}; + +static const char *attimport_sql = +"SELECT pg_catalog.pg_restore_attribute_stats(\n" +"\t'version', $1,\n" +"\t'schemaname', $2,\n" +"\t'relname', $3,\n" +"\t'attnum', $4,\n" +"\t'inherited', false::boolean,\n" +"\t'null_frac', $5::real,\n" +"\t'avg_width', $6::integer,\n" +"\t'n_distinct', $7::real,\n" +"\t'most_common_vals', $8,\n" +"\t'most_common_freqs', $9::real[],\n" +"\t'histogram_bounds', $10,\n" +"\t'correlation', $11::real,\n" +"\t'most_common_elems', $12,\n" +"\t'most_common_elem_freqs', $13::real[],\n" +"\t'elem_count_histogram', $14::real[],\n" +"\t'range_length_histogram', $15,\n" +"\t'range_empty_frac', $16::real,\n" +"\t'range_bounds_histogram', $17)"; + +enum AttImportSqlArgs +{ + ATTIMPORT_SQL_VERSION = 0, + ATTIMPORT_SQL_SCHEMANAME, + ATTIMPORT_SQL_RELNAME, + ATTIMPORT_SQL_ATTNUM, + ATTIMPORT_SQL_NULL_FRAC, + ATTIMPORT_SQL_AVG_WIDTH, + ATTIMPORT_SQL_N_DISTINCT, + ATTIMPORT_SQL_MOST_COMMON_VALS, + ATTIMPORT_SQL_MOST_COMMON_FREQS, + ATTIMPORT_SQL_HISTOGRAM_BOUNDS, + ATTIMPORT_SQL_CORRELATION, + ATTIMPORT_SQL_MOST_COMMON_ELEMS, + ATTIMPORT_SQL_MOST_COMMON_ELEM_FREQS, + ATTIMPORT_SQL_ELEM_COUNT_HISTOGRAM, + ATTIMPORT_SQL_RANGE_LENGTH_HISTOGRAM, + ATTIMPORT_SQL_RANGE_EMPTY_FRAC, + ATTIMPORT_SQL_RANGE_BOUNDS_HISTOGRAM, + ATTIMPORT_SQL_NUM_FIELDS +}; + +static const Oid attimport_argtypes[ATTIMPORT_SQL_NUM_FIELDS] = { + INT4OID, TEXTOID, TEXTOID, INT2OID, + TEXTOID, TEXTOID, TEXTOID, TEXTOID, + TEXTOID, TEXTOID, TEXTOID, TEXTOID, + TEXTOID, TEXTOID, TEXTOID, TEXTOID, +TEXTOID,}; + +/* Pairs AttStatsColumns:AttImportSqlArgs Map */ +typedef struct +{ + enum AttStatsColumns res_field; + enum AttImportSqlArgs arg_num; +} AttrResultArgMap; + +/* + * The mapping of attribute stats query columns to the positional arguments in + * the prepared pg_restore_attribute_stats() statement. + */ +#define NUM_MAPPED_ATTIMPORT_ARGS 13 +static const AttrResultArgMap attr_result_arg_map[NUM_MAPPED_ATTIMPORT_ARGS] = { + {ATTSTATS_NULL_FRAC, ATTIMPORT_SQL_NULL_FRAC}, + {ATTSTATS_AVG_WIDTH, ATTIMPORT_SQL_AVG_WIDTH}, + {ATTSTATS_N_DISTINCT, ATTIMPORT_SQL_N_DISTINCT}, + {ATTSTATS_MOST_COMMON_VALS, ATTIMPORT_SQL_MOST_COMMON_VALS}, + {ATTSTATS_MOST_COMMON_FREQS, ATTIMPORT_SQL_MOST_COMMON_FREQS}, + {ATTSTATS_HISTOGRAM_BOUNDS, ATTIMPORT_SQL_HISTOGRAM_BOUNDS}, + {ATTSTATS_CORRELATION, ATTIMPORT_SQL_CORRELATION}, + {ATTSTATS_MOST_COMMON_ELEMS, ATTIMPORT_SQL_MOST_COMMON_ELEMS}, + {ATTSTATS_MOST_COMMON_ELEM_FREQS, ATTIMPORT_SQL_MOST_COMMON_ELEM_FREQS}, + {ATTSTATS_ELEM_COUNT_HISTOGRAM, ATTIMPORT_SQL_ELEM_COUNT_HISTOGRAM}, + {ATTSTATS_RANGE_LENGTH_HISTOGRAM, ATTIMPORT_SQL_RANGE_LENGTH_HISTOGRAM}, + {ATTSTATS_RANGE_EMPTY_FRAC, ATTIMPORT_SQL_RANGE_EMPTY_FRAC}, + {ATTSTATS_RANGE_BOUNDS_HISTOGRAM, ATTIMPORT_SQL_RANGE_BOUNDS_HISTOGRAM}, +}; + /* * Foreign-data wrapper handler function: return a struct with pointers @@ -595,6 +801,8 @@ postgres_fdw_handler(PG_FUNCTION_ARGS) /* Support functions for ANALYZE */ routine->AnalyzeForeignTable = postgresAnalyzeForeignTable; + routine->StatisticsAreImportable = postgresStatisticsAreImportable; + routine->ImportStatistics = postgresImportStatistics; /* Support functions for IMPORT FOREIGN SCHEMA */ routine->ImportForeignSchema = postgresImportForeignSchema; @@ -4935,6 +5143,708 @@ postgresAnalyzeForeignTable(Relation relation, return true; } +/* + * Test if an attribute name is in the list. + * + * An empty list means that all attribute names are in the list. + */ +static bool +attname_in_list(const char *attname, List *va_cols) +{ + ListCell *le; + + if (va_cols == NIL) + return true; + + foreach(le, va_cols) + { + char *col = strVal(lfirst(le)); + + if (strcmp(attname, col) == 0) + return true; + } + return false; +} + +/* + * Move a string value from a result set to a Text value of a Datum array. + */ +static void +map_field_to_arg(PGresult *res, int row, int field, + int arg, Datum *values, char *nulls) +{ + if (PQgetisnull(res, row, field)) + { + values[arg] = (Datum) 0; + nulls[arg] = 'n'; + } + else + { + const char *s = PQgetvalue(res, row, field); + + values[arg] = CStringGetTextDatum(s); + nulls[arg] = ' '; + } +} + +/* + * Check the 1x1 result set of a pg_restore_*_stats() command for success. + */ +static bool +import_spi_query_ok(int spirc) +{ + char *s; + bool ok; + + if (spirc != SPI_OK_SELECT + || SPI_tuptable == NULL + || SPI_processed != 1) + return false; + + s = SPI_getvalue(SPI_tuptable->vals[0], SPI_tuptable->tupdesc, 1); + + ok = (s != NULL && s[0] == 't' && s[1] == '\0'); + pfree(s); + + return ok; +} + +/* + * Import fetched statistics into the local statistics tables. + */ +static bool +import_fetched_statistics(const char *schemaname, const char *relname, + int server_version_num, int natts, + const RemoteAttributeMapping * remattrmap, + RemoteStatsResults * remstats) +{ + SPIPlanPtr plan = NULL; + PGresult *res = remstats->att; + int spirc; + Datum values[ATTIMPORT_SQL_NUM_FIELDS]; + char nulls[ATTIMPORT_SQL_NUM_FIELDS]; + bool ok = false; + + /* Assign all the invariant parameters */ + values[ATTIMPORT_SQL_VERSION] = Int32GetDatum(server_version_num); + nulls[ATTIMPORT_SQL_VERSION] = ' '; + + values[ATTIMPORT_SQL_SCHEMANAME] = CStringGetTextDatum(schemaname); + nulls[ATTIMPORT_SQL_SCHEMANAME] = ' '; + + values[ATTIMPORT_SQL_RELNAME] = CStringGetTextDatum(relname); + nulls[ATTIMPORT_SQL_RELNAME] = ' '; + + nulls[ATTIMPORT_SQL_ATTNUM] = ' '; + + SPI_connect(); + + plan = SPI_prepare(attimport_sql, ATTIMPORT_SQL_NUM_FIELDS, + (Oid *) attimport_argtypes); + + if (plan == NULL || SPI_result < 0) + { + ereport(WARNING, + errmsg("import attribute statistics prepare failed %s " + "with error code %d", + attimport_sql, SPI_result)); + goto import_cleanup; + } + + /* + * We import the attribute statistics first, because those are more prone + * to errors. This avoids making a modification of pg_class that will just + * get rolled back by a failed attribute import. + */ + for (int mapidx = 0; mapidx < natts; mapidx++) + { + int row = remattrmap[mapidx].res_index; + + /* All mappings should have been assigned a result set row. */ + Assert(row >= 0); + + values[ATTIMPORT_SQL_ATTNUM] = + Int16GetDatum(remattrmap[mapidx].local_attnum); + + /* Loop through all mappable columns->arguments */ + for (int i = 0; i < NUM_MAPPED_ATTIMPORT_ARGS; i++) + map_field_to_arg(res, row, + attr_result_arg_map[i].res_field, + attr_result_arg_map[i].arg_num, + values, nulls); + + spirc = SPI_execute_plan(plan, values, nulls, false, 0); + + if (!import_spi_query_ok(spirc)) + { + ereport(WARNING, + errmsg("attribute statistics import failed %s", + attimport_sql)); + goto import_cleanup; + } + } + + /* + * Import relstats. + * + * Import relation stats. We only perform this once, so there is no point + * in preparing the statement. + * + * We can re-use the values/nulls because the number of parameters is less + * and the first three params are the same as attstats_import. + */ + map_field_to_arg(remstats->rel, 0, RELSTATS_RELPAGES, + RELIMPORT_SQL_RELPAGES, values, nulls); + map_field_to_arg(remstats->rel, 0, RELSTATS_RELTUPLES, + RELIMPORT_SQL_RELTUPLES, values, nulls); + + spirc = SPI_execute_with_args(relimport_sql, RELIMPORT_SQL_NUM_FIELDS, + (Oid *) relimport_argtypes, + values, nulls, false, 1); + + if (!import_spi_query_ok(spirc)) + { + ereport(WARNING, + errmsg("relation statistics import failed %s", + relimport_sql)); + goto import_cleanup; + } + + ok = true; + +import_cleanup: + if (plan) + SPI_freeplan(plan); + SPI_finish(); + return ok; +} + +/* + * Attempt to fetch remote relations stats. + */ +static PGresult * +fetch_relstats(PGconn *conn, + const char *remote_schemaname, const char *remote_relname) +{ + const char *params[2] = {remote_schemaname, remote_relname}; + + const char *sql = "SELECT c.relkind, c.relpages, c.reltuples " + "FROM pg_catalog.pg_class AS c " + "JOIN pg_catalog.pg_namespace AS n " + "ON n.oid = c.relnamespace " + "WHERE n.nspname = $1 AND c.relname = $2"; + + PGresult *res = NULL; + + if (!PQsendQueryParams(conn, sql, 2, NULL, params, NULL, NULL, 0)) + pgfdw_report_error(NULL, conn, sql); + + res = pgfdw_get_result(conn); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(NULL, conn, sql); + + if (PQntuples(res) != 1 || + PQnfields(res) != RELSTATS_NUM_FIELDS || + PQgetisnull(res, 0, RELSTATS_RELKIND)) + elog(ERROR, "unexpected result from fetch_relstats query"); + + return res; +} + +/* + * Attempt to fetch remote attribute stats. + */ +static PGresult * +fetch_attstats(PGconn *conn, int server_version_num, + const char *remote_schemaname, const char *remote_relname, + const char *column_list) +{ + const char *params[3] = {remote_schemaname, remote_relname, column_list}; + const char *sql; + PGresult *res; + + if (server_version_num >= 170000) + sql = attstats_query_17; + else if (server_version_num >= 90200) + sql = attstats_query_9_2; + else if (server_version_num >= 90000) + sql = attstats_query_9_0; + else + sql = attstats_query_default; + + if (!PQsendQueryParams(conn, sql, 3, NULL, params, NULL, NULL, 0)) + pgfdw_report_error(NULL, conn, sql); + + res = pgfdw_get_result(conn); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pgfdw_report_error(NULL, conn, sql); + + if (PQnfields(res) != ATTSTATS_NUM_FIELDS) + elog(ERROR, "unexpected result from fetch_attstats query"); + + return res; +} + +/* + * Match result set rows to local attnums. + * + * Every row of the result should be an attribute that we specifically + * filtered for, so every row should have at least one match in the + * RemoteAttributeMapping, which is also ordered by attname, so we only + * need to walk that array once. + */ +static +bool +match_attrmap(PGresult *res, const char *remote_schemaname, + const char *remote_relname, int natts, + RemoteAttributeMapping * remattrmap) +{ + int mapidx = 0; + + for (int row = 0; row < PQntuples(res); row++) + { + char *remote_attname; + bool match_found = false; + int cmp = 0; + + if (PQgetisnull(res, row, ATTSTATS_ATTNAME)) + elog(ERROR, + "remote statistics returned a row with NULL attribute name"); + + remote_attname = PQgetvalue(res, row, ATTSTATS_ATTNAME); + + /* + * Scan the remote attribute map for matching names. The first one + * should be a match but there may also be others. For each match, + * assign the result set row number to that entry. + * + * Remember the type of comparison (< or >) that caused us to break + * out of the loop. + */ + for (; mapidx < natts; mapidx++) + { + cmp = strcmp(remote_attname, remattrmap[mapidx].remote_attname); + + /* Stop scanning on the first non-match */ + if (cmp != 0) + break; + + match_found = true; + remattrmap[mapidx].res_index = row; + } + + /* + * If we found at least one match, move onto the next result. + */ + if (match_found) + continue; + + /* + * If we didn't find a match, then either the list is out of order, + * either the map is out of order or we got a row that we didn't ask + * for. + */ + if (cmp > 0) + { + /* We expected a stats for a column, but found none. */ + ereport(LOG, + errmsg("no remote statistics found for %s of %s.%s", + remattrmap[mapidx].remote_attname, + remote_schemaname, remote_relname)); + return false; + } + else + { + /* + * We got a stat row that we didn't expect. This is a sign that + * the query was flawed, which is concerning but not a reason to + * block fallback to regular sampling. + */ + ereport(WARNING, + errmsg("extra remote statistics found for %s of %s.%s", + remote_attname, remote_schemaname, remote_relname)); + return false; + } + } + + if (mapidx < natts) + { + /* We exhausted the result set before mapping all attributes. */ + ereport(LOG, + errmsg("no remote statistics found for %s of %s.%s", + remattrmap[mapidx].remote_attname, + remote_schemaname, remote_relname)); + return false; + } + + return true; +} + +/* + * Attempt to fetch statistics from a remote server. + */ +static bool +fetch_remote_statistics(PGconn *conn, + const char *remote_schemaname, + const char *remote_relname, + int server_version_num, int natts, + RemoteAttributeMapping * remattrmap, + const char *column_list, + RemoteStatsResults * remstats) +{ + PGresult *attstats = NULL; + PGresult *relstats = NULL; + + char relkind; + char *reltuples; + + relstats = fetch_relstats(conn, remote_schemaname, remote_relname); + + /* + * Verify that the remote table is the sort that can have meaningful stats + * in pg_stats. + * + * Note that while relations of kinds RELKIND_INDEX and + * RELKIND_PARTITIONED_INDEX can have rows in pg_stats, they obviously + * can't support a foreign table. + */ + relkind = *PQgetvalue(relstats, 0, RELSTATS_RELKIND); + + switch (relkind) + { + case RELKIND_RELATION: + case RELKIND_PARTITIONED_TABLE: + case RELKIND_FOREIGN_TABLE: + case RELKIND_MATVIEW: + break; + default: + ereport(WARNING, + errmsg("remote table %s.%s is of relkind %c which cannot have statistics", + remote_schemaname, remote_relname, relkind), + errhint("Falling back to ANALYZE with regular row sampling.")); + goto cleanup; + } + + /* + * If the reltuples value > 0, then then we can expect to find attribute + * stats for the table. + * + * A reltuples value of -1 means the table has never been analyzed (v14+). + * + * In versions prior to v14, a value of 0 was ambiguous, it could mean + * that the table had never been analyzed, or that it was empty at the + * time that it was analyzed. Either way, we wouldn't expect to find + * attstats for the relation. + */ + reltuples = PQgetvalue(relstats, 0, RELSTATS_RELTUPLES); + if ((strcmp(reltuples, "0") == 0) || + (strcmp(reltuples, "-1") == 0)) + goto notfound; + + /* See if it actually has any attribute stats. */ + attstats = fetch_attstats(conn, server_version_num, remote_schemaname, + remote_relname, column_list); + + if (PQntuples(attstats) == 0) + goto notfound; + + /* Reject the stats if any are missing or in excess. */ + if (!match_attrmap(attstats, remote_schemaname, remote_relname, natts, + remattrmap)) + goto cleanup; + + remstats->rel = relstats; + remstats->att = attstats; + return true; + +notfound: + ereport(NOTICE, + errmsg("remote table %s.%s has no statistics to import", + remote_schemaname, remote_relname), + errhint("Falling back to ANALYZE with regular row sampling.")); +cleanup: + PQclear(attstats); + PQclear(relstats); + return false; +} + +/* + * Test if the relation has extended statistics objects. + */ +static bool +table_has_extended_stats(Relation relation) +{ + Relation pg_statext; + SysScanDesc scan; + ScanKeyData skey; + bool found; + + pg_statext = table_open(StatisticExtRelationId, RowExclusiveLock); + + /* + * Prepare to scan pg_statistic_ext for entries having stxrelid = this + * rel. + */ + ScanKeyInit(&skey, + Anum_pg_statistic_ext_stxrelid, + BTEqualStrategyNumber, F_OIDEQ, + ObjectIdGetDatum(RelationGetRelid(relation))); + + scan = systable_beginscan(pg_statext, StatisticExtRelidIndexId, true, + NULL, 1, &skey); + + found = (systable_getnext(scan) != NULL); + + systable_endscan(scan); + + table_close(pg_statext, RowExclusiveLock); + + return found; +} + +static bool +postgresStatisticsAreImportable(Relation relation) +{ + ForeignTable *table; + ForeignServer *server; + ListCell *lc; + bool fetch_stats = true; + + table = GetForeignTable(RelationGetRelid(relation)); + server = GetForeignServer(table->serverid); + + /* + * Server-level options can be overridden by table-level options, so check + * server-level first. + */ + foreach(lc, server->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "fetch_stats") == 0) + { + fetch_stats = defGetBoolean(def); + break; + } + } + + foreach(lc, table->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "fetch_stats") == 0) + { + fetch_stats = defGetBoolean(def); + break; + } + } + + /* + * Additional checks that can disqualify a table from importing stats. + */ + if (fetch_stats) + { + /* + * We can't compute extended stats from the column stats, for that we + * need a table sample. + */ + if (table_has_extended_stats(relation)) + { + ereport(WARNING, + errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot import statistics for foreign table \"%s\" " + "because it has extended statistics objects.", + RelationGetRelationName(relation))); + fetch_stats = false; + } + } + + return fetch_stats; +} + +/* + * Compare two RemoteAttributeMappings for sorting. + */ +static int +remattrmap_cmp(const void *v1, const void *v2) +{ + const RemoteAttributeMapping *r1 = v1; + const RemoteAttributeMapping *r2 = v2; + + return strncmp(r1->remote_attname, r2->remote_attname, NAMEDATALEN); +} + +/* + * Fetch attstattarget from a pg_attribute tuple. + */ +static int16 +get_attstattarget(Relation relation, AttrNumber attnum) +{ + HeapTuple atttuple; + bool isnull; + Datum dat; + int16 attstattarget; + Oid relid = RelationGetRelid(relation); + + atttuple = SearchSysCache2(ATTNUM, + ObjectIdGetDatum(relid), + Int16GetDatum(attnum)); + if (!HeapTupleIsValid(atttuple)) + elog(ERROR, "cache lookup failed for attribute %d of relation %u", + attnum, relid); + dat = SysCacheGetAttr(ATTNUM, atttuple, + Anum_pg_attribute_attstattarget, + &isnull); + attstattarget = isnull ? -1 : DatumGetInt16(dat); + ReleaseSysCache(atttuple); + + return attstattarget; +} + +/* + * postgresImportStatistics + * Attempt to fetch remote statistics and apply those instead of analyzing. + */ +static bool +postgresImportStatistics(Relation relation, List *va_cols, int elevel) +{ + + ForeignTable *table; + UserMapping *user; + PGconn *conn; + ListCell *lc; + int server_version_num = 0; + const char *schemaname = NULL; + const char *relname = NULL; + const char *remote_schemaname = NULL; + const char *remote_relname = NULL; + TupleDesc tupdesc = RelationGetDescr(relation); + int natts = 0; + bool ok = false; + + RemoteAttributeMapping *remattrmap; + StringInfoData column_list; + + RemoteStatsResults remstats = {.rel = NULL,.att = NULL}; + + table = GetForeignTable(RelationGetRelid(relation)); + user = GetUserMapping(GetUserId(), table->serverid); + conn = GetConnection(user, false, NULL); + server_version_num = PQserverVersion(conn); + schemaname = get_namespace_name(RelationGetNamespace(relation)); + relname = RelationGetRelationName(relation); + + foreach(lc, table->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "schema_name") == 0) + remote_schemaname = defGetString(def); + else if (strcmp(def->defname, "table_name") == 0) + remote_relname = defGetString(def); + } + + /* + * Assume the relation/schema names are the same as the local name unless + * the options tell us otherwise. + */ + if (remote_schemaname == NULL) + remote_schemaname = schemaname; + if (remote_relname == NULL) + remote_relname = relname; + + /* + * Build attnum/remote-attname map and column list. + */ + remattrmap = palloc_array(RemoteAttributeMapping, tupdesc->natts); + initStringInfo(&column_list); + appendStringInfoChar(&column_list, '{'); + for (int i = 0; i < tupdesc->natts; i++) + { + char *attname; + char *remote_colname; + List *fc_options; + ListCell *fc_lc; + AttrNumber attnum; + + Form_pg_attribute attr = TupleDescAttr(tupdesc, i); + + /* Ignore dropped columns. */ + if (attr->attisdropped) + continue; + + /* Ignore generated columns. */ + if (attr->attgenerated) + continue; + + attname = NameStr(attr->attname); + + /* If a list is specified, exclude any attnames not in it. */ + if (!attname_in_list(attname, va_cols)) + continue; + + attnum = attr->attnum; + + /* Ignore if attstatarget is 0 */ + if (get_attstattarget(relation, attnum) == 0) + continue; + + /* If column_name is not specified, go with attname. */ + remote_colname = attname; + fc_options = GetForeignColumnOptions(RelationGetRelid(relation), attnum); + + foreach(fc_lc, fc_options) + { + DefElem *def = (DefElem *) lfirst(fc_lc); + + if (strcmp(def->defname, "column_name") == 0) + { + remote_colname = defGetString(def); + break; + } + } + + if (i > 0) + appendStringInfoChar(&column_list, ','); + appendStringInfoString(&column_list, quote_identifier(remote_colname)); + + remattrmap[natts].local_attnum = attnum; + strncpy(remattrmap[natts].remote_attname, remote_colname, NAMEDATALEN); + remattrmap[natts].res_index = -1; + natts++; + } + appendStringInfoChar(&column_list, '}'); + + /* Sort mapping by remote attribute name */ + qsort(remattrmap, natts, sizeof(RemoteAttributeMapping), remattrmap_cmp); + + ok = fetch_remote_statistics(conn, remote_schemaname, remote_relname, + server_version_num, natts, remattrmap, + column_list.data, &remstats); + + ReleaseConnection(conn); + pfree(column_list.data); + + if (ok) + { + Assert(remstats.rel != NULL); + Assert(PQnfields(remstats.rel) == RELSTATS_NUM_FIELDS); + Assert(PQntuples(remstats.rel) == 1); + Assert(remstats.att != NULL); + Assert(PQnfields(remstats.att) == ATTSTATS_NUM_FIELDS); + Assert(PQntuples(remstats.att) >= 1); + ok = import_fetched_statistics(schemaname, relname, server_version_num, + natts, remattrmap, &remstats); + } + + pfree(remattrmap); + PQclear(remstats.att); + PQclear(remstats.rel); + return ok; +} + /* * postgresGetAnalyzeInfoForForeignTable * Count tuples in foreign table (just get pg_class.reltuples). diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index 49ed797e8ef..a0f52e57dbc 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -241,6 +241,7 @@ SELECT c3, c4 FROM ft1 ORDER BY c3, c1 LIMIT 1; -- should work again -- Now we should be able to run ANALYZE. -- To exercise multiple code paths, we use local stats on ft1 -- and remote-estimate mode on ft2. +ALTER SERVER loopback OPTIONS (ADD fetch_stats 'false'); ANALYZE ft1; ALTER FOREIGN TABLE ft2 OPTIONS (use_remote_estimate 'true'); @@ -1293,7 +1294,8 @@ REINDEX TABLE CONCURRENTLY reind_fdw_parent; -- ok DROP TABLE reind_fdw_parent; -- =================================================================== --- conversion error +-- conversion error, will generate a WARNING for imported stats and an +-- error on locally computed stats. -- =================================================================== ALTER FOREIGN TABLE ft1 ALTER COLUMN c8 TYPE int; SELECT * FROM ft1 ftx(x1,x2,x3,x4,x5,x6,x7,x8) WHERE x1 = 1; -- ERROR @@ -3917,6 +3919,11 @@ CREATE FOREIGN TABLE async_p2 PARTITION OF async_pt FOR VALUES FROM (2000) TO (3 SERVER loopback2 OPTIONS (table_name 'base_tbl2'); INSERT INTO async_p1 SELECT 1000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i; INSERT INTO async_p2 SELECT 2000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i; +-- Will failover to sampling on async_p2 because fetch_stats = true (the default) on +-- loopback2, and is set to false on loopback +ANALYZE async_pt; +-- Turning off fetch_stats at the table level for async_p2 removes the warning. +ALTER FOREIGN TABLE async_p2 OPTIONS (ADD fetch_stats 'false'); ANALYZE async_pt; -- simple queries @@ -3954,6 +3961,10 @@ CREATE TABLE base_tbl3 (a int, b int, c text); CREATE FOREIGN TABLE async_p3 PARTITION OF async_pt FOR VALUES FROM (3000) TO (4000) SERVER loopback2 OPTIONS (table_name 'base_tbl3'); INSERT INTO async_p3 SELECT 3000 + i, i, to_char(i, 'FM0000') FROM generate_series(0, 999, 5) i; +-- Will fail because fetch_stats = true (the default) on async_p3/loopback2 +ANALYZE async_pt; +-- Turn off fetch_stats at the server level. +ALTER SERVER loopback2 OPTIONS (ADD fetch_stats 'false'); ANALYZE async_pt; EXPLAIN (VERBOSE, COSTS OFF) -- 2.53.0
From f05cfecc37b3a8ac687fe862b3f096c088e4e31a Mon Sep 17 00:00:00 2001 From: Corey Huinker <[email protected]> Date: Sat, 17 Jan 2026 19:33:53 -0500 Subject: [PATCH v14 3/3] Add remote_analyze to postgres_fdw remote statistics fetching. This is accomplished through a new option, remote_analyze, which is available at the server level and table level. The default value is false. If remote_analyze is enabled, and if the first attempt to fetch remote statistics did not fetch attribute statistics for every local table column, then an attempt will be made to ANALYZE the remote table. If that remote ANALYZE succeeds, then a second and final attempt will be made to fetch remote statistics. If the statistics found are still insufficient, then the local ANALYZE command will fall back to regular row sampling and computing the statistics locally. --- doc/src/sgml/postgres-fdw.sgml | 16 ++ .../postgres_fdw/expected/postgres_fdw.out | 38 +++++ contrib/postgres_fdw/option.c | 5 + contrib/postgres_fdw/postgres_fdw.c | 150 ++++++++++++++---- contrib/postgres_fdw/sql/postgres_fdw.sql | 34 ++++ 5 files changed, 214 insertions(+), 29 deletions(-) diff --git a/doc/src/sgml/postgres-fdw.sgml b/doc/src/sgml/postgres-fdw.sgml index d7c0dc8ed14..4c7559dad6f 100644 --- a/doc/src/sgml/postgres-fdw.sgml +++ b/doc/src/sgml/postgres-fdw.sgml @@ -387,6 +387,22 @@ OPTIONS (ADD password_required 'false'); </listitem> </varlistentry> + <varlistentry> + <term><literal>remote_analyze</literal> (<type>boolean</type>)</term> + <listitem> + <para> + This option, which can be specified for a foreign table or a foreign + server, determines whether an <command>ANALYZE</command> on a foreign + table will attempt to <command>ANALYZE</command> the remote table if + the first attempt to fetch remote statistics fails, and will then + make a second and final attempt to fetch remote statistics. This option + has no meaning if the foreign table has <literal>fetch_stats</literal> + disabled. + The default is <literal>false</literal>. + </para> + </listitem> + </varlistentry> + </variablelist> </sect3> diff --git a/contrib/postgres_fdw/expected/postgres_fdw.out b/contrib/postgres_fdw/expected/postgres_fdw.out index a76f1278538..f7a578d267d 100644 --- a/contrib/postgres_fdw/expected/postgres_fdw.out +++ b/contrib/postgres_fdw/expected/postgres_fdw.out @@ -12699,6 +12699,44 @@ ANALYZE analyze_ftable; DROP FOREIGN TABLE analyze_ftable; DROP TABLE analyze_table; -- =================================================================== +-- test remote analyze +-- =================================================================== +CREATE TABLE remote_analyze_table (id int, a text, b bigint); +INSERT INTO remote_analyze_table (SELECT x FROM generate_series(1,1000) x); +CREATE FOREIGN TABLE remote_analyze_ftable (id int, a text, b bigint) + SERVER loopback + OPTIONS (table_name 'remote_analyze_table', + fetch_stats 'true', + remote_analyze 'true'); +-- no stats before +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + tablename | num_stats +-----------+----------- +(0 rows) + +ANALYZE remote_analyze_ftable; +-- both stats after +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + tablename | num_stats +-----------------------+----------- + remote_analyze_ftable | 3 + remote_analyze_table | 3 +(2 rows) + +-- cleanup +DROP FOREIGN TABLE remote_analyze_ftable; +DROP TABLE remote_analyze_table; +-- =================================================================== -- test for postgres_fdw_get_connections function with check_conn = true -- =================================================================== -- Disable debug_discard_caches in order to manage remote connections diff --git a/contrib/postgres_fdw/option.c b/contrib/postgres_fdw/option.c index 5b7726800d0..2941ecbfb87 100644 --- a/contrib/postgres_fdw/option.c +++ b/contrib/postgres_fdw/option.c @@ -121,6 +121,7 @@ postgres_fdw_validator(PG_FUNCTION_ARGS) strcmp(def->defname, "parallel_commit") == 0 || strcmp(def->defname, "parallel_abort") == 0 || strcmp(def->defname, "fetch_stats") == 0 || + strcmp(def->defname, "remote_analyze") == 0 || strcmp(def->defname, "keep_connections") == 0) { /* these accept only boolean values */ @@ -283,6 +284,10 @@ InitPgFdwOptions(void) {"fetch_stats", ForeignServerRelationId, false}, {"fetch_stats", ForeignTableRelationId, false}, + /* remote_analyze is available on both server and table */ + {"remote_analyze", ForeignServerRelationId, false}, + {"remote_analyze", ForeignTableRelationId, false}, + /* * sslcert and sslkey are in fact libpq options, but we repeat them * here to allow them to appear in both foreign server context (when diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c index c76ca2fc69b..5f79ea57e73 100644 --- a/contrib/postgres_fdw/postgres_fdw.c +++ b/contrib/postgres_fdw/postgres_fdw.c @@ -5319,6 +5319,36 @@ import_cleanup: return ok; } +/* + * Analyze a remote table. + */ +static bool +analyze_remote_table(PGconn *conn, const char *remote_schemaname, + const char *remote_relname) +{ + StringInfoData buf; + PGresult *res; + bool ok = true; + + initStringInfo(&buf); + + appendStringInfo(&buf, "ANALYZE %s", + quote_qualified_identifier(remote_schemaname, remote_relname)); + + res = pgfdw_exec_query(conn, buf.data, NULL); + + if (res == NULL || + PQresultStatus(res) != PGRES_COMMAND_OK) + { + pgfdw_report(WARNING, res, conn, buf.data); + ok = false; + } + + PQclear(res); + pfree(buf.data); + return ok; +} + /* * Attempt to fetch remote relations stats. */ @@ -5482,6 +5512,33 @@ match_attrmap(PGresult *res, const char *remote_schemaname, return true; } +/* + * Convenience routine to test if the relstats result set shows that the + * relation has tuples. + */ +static +bool +has_tuples(PGresult *relstats) +{ + char *reltuples; + + reltuples = PQgetvalue(relstats, 0, RELSTATS_RELTUPLES); + + /* + * If the reltuples value > 0, then then we can expect to find attribute + * stats for the table. + * + * A reltuples value of -1 means the table has never been analyzed (v14+). + * + * In versions prior to v14, a value of 0 was ambiguous, it could mean + * that the table had never been analyzed, or that it was empty at the + * time that it was analyzed. Either way, we wouldn't expect to find + * attstats for the relation. + */ + return ((strcmp(reltuples, "0") != 0) && + (strcmp(reltuples, "-1") != 0)); +} + /* * Attempt to fetch statistics from a remote server. */ @@ -5492,13 +5549,13 @@ fetch_remote_statistics(PGconn *conn, int server_version_num, int natts, RemoteAttributeMapping * remattrmap, const char *column_list, - RemoteStatsResults * remstats) + bool remote_analyze, RemoteStatsResults * remstats) { PGresult *attstats = NULL; PGresult *relstats = NULL; char relkind; - char *reltuples; + bool has_stats = false; relstats = fetch_relstats(conn, remote_schemaname, remote_relname); @@ -5527,34 +5584,51 @@ fetch_remote_statistics(PGconn *conn, goto cleanup; } + if (has_tuples(relstats)) + { + /* See if the table actually has attribute stats. */ + attstats = fetch_attstats(conn, server_version_num, remote_schemaname, + remote_relname, column_list); + + if ((PQntuples(attstats) > 0) && + match_attrmap(attstats, remote_schemaname, remote_relname, + natts, remattrmap)) + has_stats = true; + } + + if (!has_stats) + { + /* If no second chance, skip to reporting no stats found */ + if (!remote_analyze) + goto notfound; + + ereport(LOG, + errmsg("attempting remote analyze of table %s.%s", + remote_schemaname, remote_relname)); + PQclear(attstats); + PQclear(relstats); + + if (!analyze_remote_table(conn, remote_schemaname, remote_relname)) + goto cleanup; + + relstats = fetch_relstats(conn, remote_schemaname, remote_relname); + if (!has_tuples(relstats)) + goto notfound; + + attstats = fetch_attstats(conn, server_version_num, remote_schemaname, + remote_relname, column_list); + + if (PQntuples(attstats) == 0) + goto notfound; + + if (!match_attrmap(attstats, remote_schemaname, remote_relname, + natts, remattrmap)) + goto cleanup; + } + /* - * If the reltuples value > 0, then then we can expect to find attribute - * stats for the table. - * - * A reltuples value of -1 means the table has never been analyzed (v14+). - * - * In versions prior to v14, a value of 0 was ambiguous, it could mean - * that the table had never been analyzed, or that it was empty at the - * time that it was analyzed. Either way, we wouldn't expect to find - * attstats for the relation. + * We found stats, on either the first try or the second. */ - reltuples = PQgetvalue(relstats, 0, RELSTATS_RELTUPLES); - if ((strcmp(reltuples, "0") == 0) || - (strcmp(reltuples, "-1") == 0)) - goto notfound; - - /* See if it actually has any attribute stats. */ - attstats = fetch_attstats(conn, server_version_num, remote_schemaname, - remote_relname, column_list); - - if (PQntuples(attstats) == 0) - goto notfound; - - /* Reject the stats if any are missing or in excess. */ - if (!match_attrmap(attstats, remote_schemaname, remote_relname, natts, - remattrmap)) - goto cleanup; - remstats->rel = relstats; remstats->att = attstats; return true; @@ -5712,9 +5786,11 @@ postgresImportStatistics(Relation relation, List *va_cols, int elevel) { ForeignTable *table; + ForeignServer *server; UserMapping *user; PGconn *conn; ListCell *lc; + bool remote_analyze = false; int server_version_num = 0; const char *schemaname = NULL; const char *relname = NULL; @@ -5730,12 +5806,25 @@ postgresImportStatistics(Relation relation, List *va_cols, int elevel) RemoteStatsResults remstats = {.rel = NULL,.att = NULL}; table = GetForeignTable(RelationGetRelid(relation)); + server = GetForeignServer(table->serverid); user = GetUserMapping(GetUserId(), table->serverid); conn = GetConnection(user, false, NULL); server_version_num = PQserverVersion(conn); schemaname = get_namespace_name(RelationGetNamespace(relation)); relname = RelationGetRelationName(relation); + /* + * Server-level options can be overridden by table-level options, so check + * server-level first. + */ + foreach(lc, server->options) + { + DefElem *def = (DefElem *) lfirst(lc); + + if (strcmp(def->defname, "remote_analyze") == 0) + remote_analyze = defGetBoolean(def); + } + foreach(lc, table->options) { DefElem *def = (DefElem *) lfirst(lc); @@ -5744,6 +5833,8 @@ postgresImportStatistics(Relation relation, List *va_cols, int elevel) remote_schemaname = defGetString(def); else if (strcmp(def->defname, "table_name") == 0) remote_relname = defGetString(def); + else if (strcmp(def->defname, "remote_analyze") == 0) + remote_analyze = defGetBoolean(def); } /* @@ -5822,7 +5913,8 @@ postgresImportStatistics(Relation relation, List *va_cols, int elevel) ok = fetch_remote_statistics(conn, remote_schemaname, remote_relname, server_version_num, natts, remattrmap, - column_list.data, &remstats); + column_list.data, remote_analyze, + &remstats); ReleaseConnection(conn); pfree(column_list.data); diff --git a/contrib/postgres_fdw/sql/postgres_fdw.sql b/contrib/postgres_fdw/sql/postgres_fdw.sql index a0f52e57dbc..3390f6e5ec1 100644 --- a/contrib/postgres_fdw/sql/postgres_fdw.sql +++ b/contrib/postgres_fdw/sql/postgres_fdw.sql @@ -4412,6 +4412,40 @@ ANALYZE analyze_ftable; DROP FOREIGN TABLE analyze_ftable; DROP TABLE analyze_table; +-- =================================================================== +-- test remote analyze +-- =================================================================== +CREATE TABLE remote_analyze_table (id int, a text, b bigint); +INSERT INTO remote_analyze_table (SELECT x FROM generate_series(1,1000) x); + +CREATE FOREIGN TABLE remote_analyze_ftable (id int, a text, b bigint) + SERVER loopback + OPTIONS (table_name 'remote_analyze_table', + fetch_stats 'true', + remote_analyze 'true'); + +-- no stats before +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + +ANALYZE remote_analyze_ftable; + +-- both stats after +SELECT s.tablename, COUNT(*) AS num_stats +FROM pg_stats AS s +WHERE s.schemaname = 'public' +AND s.tablename IN ('remote_analyze_table', 'remote_analyze_ftable') +GROUP BY s.tablename +ORDER BY s.tablename; + +-- cleanup +DROP FOREIGN TABLE remote_analyze_ftable; +DROP TABLE remote_analyze_table; + -- =================================================================== -- test for postgres_fdw_get_connections function with check_conn = true -- =================================================================== -- 2.53.0
