This is an automated email from the ASF dual-hosted git repository.

chenjinbao1989 pushed a commit to branch cbdb-postgres-merge
in repository https://gitbox.apache.org/repos/asf/cloudberry.git

commit 9c52e158039234045302de9725141e2e6c639c60
Author: liushengsong <[email protected]>
AuthorDate: Sat Mar 7 05:23:40 2026 +0800

    feat: collect QE relation stats on QD to enable auto-ANALYZE for 
distributed tables
    
    In GPDB, DML (INSERT/UPDATE/DELETE) executes on QE segments, but
    autovacuum lives on the QD coordinator.  Before this change the QD
    never received modification counts from QEs, so n_mod_since_analyze
    remained 0 and auto-ANALYZE never triggered for distributed tables.
    
    Add two functions:
    - pgstat_send_qd_tabstats() (QE side, pgstat.c): reads per-table
      counts from pgStatXactStack before finish_xact_command() NULLs it,
      serialises them as PgStatTabRecordFromQE[] and attaches the array to
      the libpq PGresult via extras/PGExtraTypeTableStats.
    - pgstat_combine_from_qe() (QD side, pgstat_relation.c): called from
      mppExecutorFinishup/mppExecutorWait after dispatch; iterates QE
      results, deserialises PgStatTabRecordFromQE arrays and merges each
      record into the QD's pending stats via pgstat_prep_relation_pending(),
      then calls pgstat_force_next_flush() to bypass rate-limiting.
    
    Using the pending path (rather than writing directly to shared memory)
    is critical when auto_stats triggers ANALYZE in the same command:
    pgstat_report_analyze() resets mod_since_analyze to 0, but pending
    counts are added back when pgstat_report_stat() flushes after ANALYZE.
    
    Also add pgstat_get_current_xact_stack() to read pgStatXactStack
    without side effects, and update the Makefile to include libpq_srcdir
    for libpq-int.h (PGExtraType, pg_result internals).
    
    The autovacuum-analyze isolation2 test verifies all four auto-ANALYZE
    scenarios: plain heap table, partition leaf tables, lock-conflict abort,
    and coexistence with auto_stats (on_no_stats / on_change / below
    threshold).
    
    Co-Authored-By: Claude Opus 4.6 <[email protected]>
---
 src/backend/cdb/dispatcher/cdbdisp_query.c    |   6 +
 src/backend/executor/execUtils.c              |  12 ++
 src/backend/tcop/postgres.c                   |   8 +
 src/backend/utils/activity/Makefile           |   3 +
 src/backend/utils/activity/pgstat.c           | 137 +++++++++++++
 src/backend/utils/activity/pgstat_relation.c  | 108 ++++++++++
 src/backend/utils/activity/pgstat_xact.c      |  10 +
 src/include/pgstat.h                          |  21 ++
 src/include/utils/pgstat_internal.h           |   1 +
 src/test/isolation2/expected/qe_qd_pgstat.out | 278 ++++++++++++++++++++++++++
 src/test/isolation2/sql/qe_qd_pgstat.sql      | 136 +++++++++++++
 11 files changed, 720 insertions(+)

diff --git a/src/backend/cdb/dispatcher/cdbdisp_query.c 
b/src/backend/cdb/dispatcher/cdbdisp_query.c
index 004050f018b..c69bf54460d 100644
--- a/src/backend/cdb/dispatcher/cdbdisp_query.c
+++ b/src/backend/cdb/dispatcher/cdbdisp_query.c
@@ -526,6 +526,12 @@ cdbdisp_dispatchCommandInternal(DispatchCommandQueryParms 
*pQueryParms,
                ThrowErrorData(qeError);
        }
 
+       /*
+        * GPDB: Merge relation stats sent by QEs so QD's mod_since_analyze
+        * stays up to date for autovacuum triggering.
+        */
+       pgstat_combine_from_qe(pr);
+
        cdbdisp_returnResults(pr, cdb_pgresults);
 
        cdbdisp_destroyDispatcherState(ds);
diff --git a/src/backend/executor/execUtils.c b/src/backend/executor/execUtils.c
index f9d5719fbe3..5f9473c379e 100644
--- a/src/backend/executor/execUtils.c
+++ b/src/backend/executor/execUtils.c
@@ -2144,6 +2144,12 @@ void mppExecutorFinishup(QueryDesc *queryDesc)
                if (ProcessDispatchResult_hook)
                        ProcessDispatchResult_hook(ds);
 
+               /*
+                * GPDB: Merge relation stats sent by QEs so QD's 
mod_since_analyze
+                * stays up to date for autovacuum triggering.
+                */
+               pgstat_combine_from_qe(pr);
+
                /* get num of rows processed from writer QEs. */
                estate->es_processed +=
                        cdbdisp_sumCmdTuples(pr, primaryWriterSliceIndex);
@@ -2225,6 +2231,12 @@ uint64 mppExecutorWait(QueryDesc *queryDesc)
                                                                                
LocallyExecutingSliceIndex(queryDesc->estate),
                                                                                
estate->showstatctx);
                }
+               /*
+                * GPDB: Merge relation stats sent by QEs so QD's 
mod_since_analyze
+                * stays up to date for autovacuum triggering.
+                */
+               pgstat_combine_from_qe(pr);
+
                /* get num of rows processed from writer QEs. */
                es_processed +=
                        cdbdisp_sumCmdTuples(pr, primaryWriterSliceIndex);
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 20be306646e..92f744e673f 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -1498,6 +1498,14 @@ exec_mpp_query(const char *query_string,
 
                PortalDrop(portal, false);
 
+               /*
+                * GPDB: Send pending relation stats to QD before closing the
+                * transaction.  The stats are in pgStatXactStack 
(transaction-level
+                * counts); finish_xact_command() will call AtEOXact_PgStat() 
which
+                * NULLs pgStatXactStack, so we must capture the stats first.
+                */
+               pgstat_send_qd_tabstats();
+
                /*
                 * Close down transaction statement before reporting 
command-complete.
                 * This is so that any end-of-transaction errors are reported 
before
diff --git a/src/backend/utils/activity/Makefile 
b/src/backend/utils/activity/Makefile
index d7d7d6c6b0d..3ff4f0e5235 100644
--- a/src/backend/utils/activity/Makefile
+++ b/src/backend/utils/activity/Makefile
@@ -13,6 +13,9 @@ subdir = src/backend/utils/activity
 top_builddir = ../../../..
 include $(top_builddir)/src/Makefile.global
 
+# GPDB: needed for libpq-int.h (PGExtraType, pg_result struct)
+override CPPFLAGS := -I$(libpq_srcdir) $(CPPFLAGS)
+
 OBJS = \
        backend_progress.o \
        backend_status.o \
diff --git a/src/backend/utils/activity/pgstat.c 
b/src/backend/utils/activity/pgstat.c
index ab7768154c2..926800c9c09 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -96,6 +96,8 @@
 #include "access/transam.h"
 #include "access/xact.h"
 #include "lib/dshash.h"
+#include "libpq/pqformat.h"
+#include "libpq-int.h"
 #include "pgstat.h"
 #include "port/atomics.h"
 #include "storage/fd.h"
@@ -107,6 +109,8 @@
 #include "utils/memutils.h"
 #include "utils/pgstat_internal.h"
 #include "utils/timestamp.h"
+#include "catalog/gp_distribution_policy.h"
+#include "cdb/cdbvars.h"
 
 
 /* ----------
@@ -1744,3 +1748,136 @@ assign_stats_fetch_consistency(int newval, void *extra)
        if (pgstat_fetch_consistency != newval)
                force_stats_snapshot_clear = true;
 }
+
+
+/* -----------------------------------------------------------------------
+ * GPDB: QE→QD pgstat collection.
+ *
+ * After a DML statement completes on QE, send the accumulated pending
+ * relation stats (from pgStatPending) to the QD via a 'y' protocol message.
+ * The QD collects these in pgstat_combine_from_qe() and merges them into
+ * its own pending stats, so autovacuum can see modification counts.
+ * -----------------------------------------------------------------------
+ */
+
+/*
+ * pgstat_send_qd_tabstats -- QE side: send relation stats to QD.
+ *
+ * Must be called only on QE (Gp_role == GP_ROLE_EXECUTE), BEFORE
+ * finish_xact_command().  At call time the transaction-level per-table
+ * counts are still in pgStatXactStack.  finish_xact_command() calls
+ * AtEOXact_PgStat() which NULLs pgStatXactStack, so we must read the
+ * stats before that happens.
+ */
+void
+pgstat_send_qd_tabstats(void)
+{
+       PgStat_SubXactStatus *xact_state;
+       StringInfoData          buf;
+       PgStatTabRecordFromQE *records;
+       int                                     nrecords = 0;
+       int                                     capacity = 64;
+
+       if (Gp_role != GP_ROLE_EXECUTE || !Gp_is_writer)
+               return;
+
+       /*
+        * On QE inside a distributed transaction, stats for the current
+        * statement are in pgStatXactStack (not yet merged to pgStatPending,
+        * because the top-level transaction hasn't committed yet).  Read the
+        * current nesting level's per-table insert/update/delete counts.
+        */
+       xact_state = pgstat_get_current_xact_stack();
+
+       if (xact_state == NULL)
+               return;
+
+       records = (PgStatTabRecordFromQE *)
+               palloc(capacity * sizeof(PgStatTabRecordFromQE));
+
+       /*
+        * Send only the current nesting level's per-table insert/update/delete
+        * counts.  QD will place these into its own transactional state 
(trans),
+        * letting PG's normal AtEOXact_PgStat_Relations / 
AtEOSubXact_PgStat_Relations
+        * machinery handle delta_live_tuples, delta_dead_tuples, 
changed_tuples,
+        * and subtransaction commit/abort correctly.
+        */
+       {
+               PgStat_TableXactStatus *trans;
+
+               for (trans = xact_state->first; trans != NULL; trans = 
trans->next)
+               {
+                       PgStat_TableStatus *tabstat = trans->parent;
+                       PgStat_Counter     ins, upd, del;
+
+                       ins = trans->tuples_inserted;
+                       upd = trans->tuples_updated;
+                       del = trans->tuples_deleted;
+
+                       if (ins == 0 && upd == 0 && del == 0 && 
!trans->truncdropped)
+                               continue;
+
+                       /*
+                        * Filter by distribution policy: skip catalog tables 
(QD has
+                        * the same updates) and deduplicate replicated tables 
(only
+                        * one segment reports, to avoid overcounting).
+                        */
+                       {
+                               GpPolicy *gppolicy = GpPolicyFetch(tabstat->id);
+
+                               switch (gppolicy->ptype)
+                               {
+                                       case POLICYTYPE_ENTRY:
+                                               pfree(gppolicy);
+                                               continue;
+                                       case POLICYTYPE_REPLICATED:
+                                               if (GpIdentity.segindex != 
tabstat->id % gppolicy->numsegments)
+                                               {
+                                                       pfree(gppolicy);
+                                                       continue;
+                                               }
+                                               break;
+                                       case POLICYTYPE_PARTITIONED:
+                                               break;
+                                       default:
+                                               elog(ERROR, "unrecognized 
policy type %d",
+                                                        gppolicy->ptype);
+                               }
+                               pfree(gppolicy);
+                       }
+
+                       /* New entry — each table appears at most once per 
nesting level */
+                       if (nrecords >= capacity)
+                       {
+                               capacity *= 2;
+                               records = (PgStatTabRecordFromQE *)
+                                       repalloc(records, capacity * 
sizeof(PgStatTabRecordFromQE));
+                       }
+
+                       records[nrecords].t_id                    = tabstat->id;
+                       records[nrecords].t_shared                = 
tabstat->shared;
+                       records[nrecords].truncdropped    = trans->truncdropped;
+                       records[nrecords].tuples_inserted = ins;
+                       records[nrecords].tuples_updated  = upd;
+                       records[nrecords].tuples_deleted  = del;
+                       nrecords++;
+               }
+       }
+
+       if (nrecords == 0)
+       {
+               pfree(records);
+               return;
+       }
+
+       pq_beginmessage(&buf, 'y');
+       pq_sendstring(&buf, "PGSTAT");
+       pq_sendbyte(&buf, false);       /* result not complete yet */
+       pq_sendint(&buf, PGExtraTypeTableStats, sizeof(PGExtraType));
+       pq_sendint(&buf, nrecords * sizeof(PgStatTabRecordFromQE), sizeof(int));
+       pq_sendbytes(&buf, (char *) records, nrecords * 
sizeof(PgStatTabRecordFromQE));
+       pq_endmessage(&buf);
+
+       pfree(records);
+
+}
diff --git a/src/backend/utils/activity/pgstat_relation.c 
b/src/backend/utils/activity/pgstat_relation.c
index f24091c5078..33532b0030e 100644
--- a/src/backend/utils/activity/pgstat_relation.c
+++ b/src/backend/utils/activity/pgstat_relation.c
@@ -20,12 +20,16 @@
 #include "access/twophase_rmgr.h"
 #include "access/xact.h"
 #include "catalog/partition.h"
+#include "libpq-int.h"
 #include "postmaster/autovacuum.h"
 #include "utils/memutils.h"
 #include "utils/pgstat_internal.h"
 #include "utils/rel.h"
 #include "utils/timestamp.h"
 #include "catalog/catalog.h"
+#include "cdb/cdbdispatchresult.h"
+#include "utils/faultinjector.h"
+#include "utils/lsyscache.h"
 
 
 /* Record that's written to 2PC state file when pgstat state is persisted */
@@ -953,3 +957,107 @@ restore_truncdrop_counters(PgStat_TableXactStatus *trans)
                trans->tuples_deleted = trans->deleted_pre_truncdrop;
        }
 }
+
+
+/* -----------------------------------------------------------------------
+ * GPDB: QD side — merge relation stats received from QEs.
+ *
+ * Called from mppExecutorFinishup() / mppExecutorWait() after QEs have
+ * completed their work.  Each QE sends a 'y' protocol message containing
+ * an array of PgStatTabRecordFromQE — the current nesting level's
+ * per-table insert/update/delete counts.
+ *
+ * We place these into the QD's own transactional state 
(PgStat_TableXactStatus)
+ * at the current nesting level.  This lets PG's normal end-of-transaction
+ * machinery (AtEOXact_PgStat_Relations / AtEOSubXact_PgStat_Relations) handle
+ * delta_live_tuples, delta_dead_tuples, changed_tuples, and subtransaction
+ * commit/abort correctly — just as if the DML had happened locally.
+ *
+ * Because QE sends cumulative values for the current nesting level (trans
+ * accumulates within a level across statements), we must zero QD's trans
+ * before re-accumulating from all segments.  On first encounter of each
+ * relation we zero its trans, then sum across all segments in one pass.
+ * -----------------------------------------------------------------------
+ */
+void
+pgstat_combine_from_qe(CdbDispatchResults *primaryResults)
+{
+       int             i,
+                       j;
+       int             nest_level = GetCurrentTransactionNestLevel();
+       List   *zeroed_rels = NIL;
+
+       if (primaryResults == NULL)
+               return;
+
+       for (i = 0; i < primaryResults->resultCount; i++)
+       {
+               CdbDispatchResult  *dispResult = 
&primaryResults->resultArray[i];
+               int                                     nres = 
cdbdisp_numPGresult(dispResult);
+
+               for (j = 0; j < nres; j++)
+               {
+                       PGresult                   *pgresult = 
cdbdisp_getPGresult(dispResult, j);
+                       PgStatTabRecordFromQE *records;
+                       int                                     nrecords,
+                                                               k;
+
+                       if (pgresult == NULL ||
+                               pgresult->extras == NULL ||
+                               pgresult->extraType != PGExtraTypeTableStats)
+                               continue;
+
+                       records = (PgStatTabRecordFromQE *) pgresult->extras;
+                       nrecords = pgresult->extraslen / 
sizeof(PgStatTabRecordFromQE);
+
+                       for (k = 0; k < nrecords; k++)
+                       {
+                               PgStatTabRecordFromQE *rec = &records[k];
+                               PgStat_TableStatus        *tabstat;
+                               PgStat_TableXactStatus *trans;
+
+#ifdef FAULT_INJECTOR
+                               if (*numActiveFaults_ptr > 0)
+                               {
+                                       char *relname = get_rel_name(rec->t_id);
+                                       if (relname)
+                                       {
+                                               FaultInjector_InjectFaultIfSet(
+                                                       
"gp_pgstat_report_on_master", DDLNotSpecified,
+                                                       "", relname);
+                                               pfree(relname);
+                                       }
+                               }
+#endif
+
+                               tabstat = 
pgstat_prep_relation_pending(rec->t_id, rec->t_shared);
+
+                               /* Ensure a trans exists at current nesting 
level */
+                               if (tabstat->trans == NULL ||
+                                       tabstat->trans->nest_level != 
nest_level)
+                                       add_tabstat_xact_level(tabstat, 
nest_level);
+
+                               trans = tabstat->trans;
+
+                               /* Zero on first encounter to undo previous 
merge */
+                               if (!list_member_oid(zeroed_rels, rec->t_id))
+                               {
+                                       trans->tuples_inserted = 0;
+                                       trans->tuples_updated = 0;
+                                       trans->tuples_deleted = 0;
+                                       trans->truncdropped = false;
+                                       zeroed_rels = lappend_oid(zeroed_rels, 
rec->t_id);
+                               }
+
+                               /* Accumulate QE counts from this segment */
+                               trans->tuples_inserted += rec->tuples_inserted;
+                               trans->tuples_updated  += rec->tuples_updated;
+                               trans->tuples_deleted  += rec->tuples_deleted;
+                               if (rec->truncdropped)
+                                       trans->truncdropped = true;
+                       }
+               }
+       }
+
+       list_free(zeroed_rels);
+}
diff --git a/src/backend/utils/activity/pgstat_xact.c 
b/src/backend/utils/activity/pgstat_xact.c
index 369239d5014..110ddca3c91 100644
--- a/src/backend/utils/activity/pgstat_xact.c
+++ b/src/backend/utils/activity/pgstat_xact.c
@@ -253,6 +253,16 @@ pgstat_get_xact_stack_level(int nest_level)
        return xact_state;
 }
 
+/*
+ * GPDB: Return the current xact stats stack without allocating new levels.
+ * Used by pgstat_send_qd_tabstats() to read in-progress transaction stats.
+ */
+PgStat_SubXactStatus *
+pgstat_get_current_xact_stack(void)
+{
+       return pgStatXactStack;
+}
+
 /*
  * Get stat items that need to be dropped at commit / abort.
  *
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index c4f3b88444a..2a85773c30c 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -719,6 +719,27 @@ extern PgStat_StatTabEntry 
*pgstat_fetch_stat_tabentry_ext(bool shared,
                                                                                
                                   Oid reloid);
 extern PgStat_TableStatus *find_tabstat_entry(Oid rel_id);
 
+/*
+ * GPDB: Data structure for transmitting per-table stats from QE to QD.
+ * Used by pgstat_send_qd_tabstats() and pgstat_combine_from_qe().
+ */
+typedef struct PgStatTabRecordFromQE
+{
+       Oid                             t_id;                           /* 
table OID */
+       bool                    t_shared;                       /* is it a 
shared catalog? */
+       bool                    truncdropped;           /* was it 
truncated/dropped? */
+       PgStat_Counter  tuples_inserted;
+       PgStat_Counter  tuples_updated;
+       PgStat_Counter  tuples_deleted;
+} PgStatTabRecordFromQE;
+
+/* GPDB: QE sends pending relation stats to QD via 'y' protocol message */
+extern void pgstat_send_qd_tabstats(void);
+
+/* GPDB: QD combines relation stats received from QEs after dispatch */
+struct CdbDispatchResults;
+extern void pgstat_combine_from_qe(struct CdbDispatchResults *primaryResults);
+
 
 /*
  * Functions in pgstat_replslot.c
diff --git a/src/include/utils/pgstat_internal.h 
b/src/include/utils/pgstat_internal.h
index 2b3da610a31..861dd7d7184 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -705,6 +705,7 @@ extern void 
pgstat_subscription_reset_timestamp_cb(PgStatShared_Common *header,
  */
 
 extern PgStat_SubXactStatus *pgstat_get_xact_stack_level(int nest_level);
+extern PgStat_SubXactStatus *pgstat_get_current_xact_stack(void);
 extern void pgstat_drop_transactional(PgStat_Kind kind, Oid dboid, Oid objoid);
 extern void pgstat_create_transactional(PgStat_Kind kind, Oid dboid, Oid 
objoid);
 
diff --git a/src/test/isolation2/expected/qe_qd_pgstat.out 
b/src/test/isolation2/expected/qe_qd_pgstat.out
new file mode 100644
index 00000000000..762295c0d75
--- /dev/null
+++ b/src/test/isolation2/expected/qe_qd_pgstat.out
@@ -0,0 +1,278 @@
+-- Test: QE→QD pgstat collection
+-- Verifies that DML stats from QE segments reach the QD coordinator's
+-- pg_stat_user_tables, enabling autovacuum to see modification counts.
+-- Also verifies that gp_stat_user_tables_summary remains accurate on QEs.
+
+--
+-- Setup: disable autovacuum and auto_stats to prevent interference.
+--
+ALTER SYSTEM SET autovacuum = off;
+ALTER
+SELECT pg_reload_conf();
+ pg_reload_conf 
+----------------
+ t              
+(1 row)
+SELECT pg_sleep(0.5);
+ pg_sleep 
+----------
+          
+(1 row)
+SET gp_autostats_mode = none;
+SET
+
+--
+-- Test 1: Distributed (hash) table — INSERT/UPDATE/DELETE stats reach QD
+--
+CREATE TABLE test_pgstat_dist(id int, val int) DISTRIBUTED BY (id);
+CREATE
+
+INSERT INTO test_pgstat_dist SELECT i, 0 FROM generate_series(1, 1000) i;
+INSERT 1000
+SELECT gp_stat_force_next_flush();
+ gp_stat_force_next_flush 
+--------------------------
+                          
+(1 row)
+
+-- QD should see the stats sent from QEs
+SELECT n_tup_ins, n_mod_since_analyze FROM pg_stat_user_tables WHERE relname = 
'test_pgstat_dist';
+ n_tup_ins | n_mod_since_analyze 
+-----------+---------------------
+ 1000      | 1000                
+(1 row)
+
+-- QE summary should also show the same counts
+SELECT n_tup_ins, n_mod_since_analyze FROM gp_stat_user_tables_summary WHERE 
relname = 'test_pgstat_dist';
+ n_tup_ins | n_mod_since_analyze 
+-----------+---------------------
+ 1000      | 1000                
+(1 row)
+
+-- UPDATE non-distribution-key column so it's a real update, not split update
+UPDATE test_pgstat_dist SET val = 1 WHERE id <= 100;
+UPDATE 100
+SELECT gp_stat_force_next_flush();
+ gp_stat_force_next_flush 
+--------------------------
+                          
+(1 row)
+
+SELECT n_tup_upd FROM pg_stat_user_tables WHERE relname = 'test_pgstat_dist';
+ n_tup_upd 
+-----------
+ 100       
+(1 row)
+SELECT n_tup_upd FROM gp_stat_user_tables_summary WHERE relname = 
'test_pgstat_dist';
+ n_tup_upd 
+-----------
+ 100       
+(1 row)
+
+DELETE FROM test_pgstat_dist WHERE id <= 50;
+DELETE 50
+SELECT gp_stat_force_next_flush();
+ gp_stat_force_next_flush 
+--------------------------
+                          
+(1 row)
+
+SELECT n_tup_del FROM pg_stat_user_tables WHERE relname = 'test_pgstat_dist';
+ n_tup_del 
+-----------
+ 50        
+(1 row)
+SELECT n_tup_del FROM gp_stat_user_tables_summary WHERE relname = 
'test_pgstat_dist';
+ n_tup_del 
+-----------
+ 50        
+(1 row)
+
+--
+-- Test 2: Replicated table — stats not double-counted
+-- With 3 segments, each segment has all 500 rows, but only one segment
+-- should report stats to QD.
+--
+CREATE TABLE test_pgstat_repl(id int) DISTRIBUTED REPLICATED;
+CREATE
+
+INSERT INTO test_pgstat_repl SELECT i FROM generate_series(1, 500) i;
+INSERT 500
+SELECT gp_stat_force_next_flush();
+ gp_stat_force_next_flush 
+--------------------------
+                          
+(1 row)
+
+-- QD should show exactly 500, not 1500 (3 segments * 500)
+SELECT n_tup_ins, n_mod_since_analyze FROM pg_stat_user_tables WHERE relname = 
'test_pgstat_repl';
+ n_tup_ins | n_mod_since_analyze 
+-----------+---------------------
+ 500       | 500                 
+(1 row)
+
+-- QE summary divides replicated table stats by numsegments, so also 500
+SELECT n_tup_ins, n_mod_since_analyze FROM gp_stat_user_tables_summary WHERE 
relname = 'test_pgstat_repl';
+ n_tup_ins | n_mod_since_analyze 
+-----------+---------------------
+ 500       | 500                 
+(1 row)
+
+--
+-- Test 3: Transaction — committed DML stats are counted
+--
+CREATE TABLE test_pgstat_xact(id int) DISTRIBUTED BY (id);
+CREATE
+
+BEGIN;
+BEGIN
+INSERT INTO test_pgstat_xact SELECT i FROM generate_series(1, 300) i;
+INSERT 300
+DELETE FROM test_pgstat_xact WHERE id <= 100;
+DELETE 100
+COMMIT;
+COMMIT
+SELECT gp_stat_force_next_flush();
+ gp_stat_force_next_flush 
+--------------------------
+                          
+(1 row)
+
+SELECT n_tup_ins, n_tup_del FROM pg_stat_user_tables WHERE relname = 
'test_pgstat_xact';
+ n_tup_ins | n_tup_del 
+-----------+-----------
+ 300       | 100       
+(1 row)
+SELECT n_tup_ins, n_tup_del FROM gp_stat_user_tables_summary WHERE relname = 
'test_pgstat_xact';
+ n_tup_ins | n_tup_del 
+-----------+-----------
+ 300       | 100       
+(1 row)
+SELECT count(*) FROM test_pgstat_xact;
+ count 
+-------
+ 200   
+(1 row)
+
+--
+-- Test 4: Subtransaction rollback — n_tup_ins counts all attempted inserts
+-- (PG counts attempted actions regardless of commit/abort)
+--
+CREATE TABLE test_pgstat_subxact(id int) DISTRIBUTED BY (id);
+CREATE
+
+BEGIN;
+BEGIN
+INSERT INTO test_pgstat_subxact SELECT i FROM generate_series(1, 200) i;
+INSERT 200
+SAVEPOINT sp1;
+SAVEPOINT
+INSERT INTO test_pgstat_subxact SELECT i FROM generate_series(201, 700) i;
+INSERT 500
+ROLLBACK TO sp1;
+ROLLBACK
+COMMIT;
+COMMIT
+SELECT gp_stat_force_next_flush();
+ gp_stat_force_next_flush 
+--------------------------
+                          
+(1 row)
+
+-- n_tup_ins counts all attempted inserts (200 + 500 = 700)
+-- but only 200 rows are actually in the table
+SELECT n_tup_ins FROM pg_stat_user_tables WHERE relname = 
'test_pgstat_subxact';
+ n_tup_ins 
+-----------
+ 700       
+(1 row)
+SELECT n_tup_ins FROM gp_stat_user_tables_summary WHERE relname = 
'test_pgstat_subxact';
+ n_tup_ins 
+-----------
+ 700       
+(1 row)
+SELECT count(*) FROM test_pgstat_subxact;
+ count 
+-------
+ 200   
+(1 row)
+
+--
+-- Test 5: Nested subtransactions — RELEASE merges into parent, ROLLBACK TO 
discards
+--
+CREATE TABLE test_pgstat_nested(id int) DISTRIBUTED BY (id);
+CREATE
+
+BEGIN;
+BEGIN
+INSERT INTO test_pgstat_nested SELECT i FROM generate_series(1, 100) i;
+INSERT 100
+SAVEPOINT sp1;
+SAVEPOINT
+INSERT INTO test_pgstat_nested SELECT i FROM generate_series(101, 200) i;
+INSERT 100
+SAVEPOINT sp2;
+SAVEPOINT
+INSERT INTO test_pgstat_nested SELECT i FROM generate_series(201, 300) i;
+INSERT 100
+RELEASE SAVEPOINT sp2;
+RELEASE
+ROLLBACK TO sp1;
+ROLLBACK
+COMMIT;
+COMMIT
+SELECT gp_stat_force_next_flush();
+ gp_stat_force_next_flush 
+--------------------------
+                          
+(1 row)
+
+-- All 300 attempted inserts counted (100 outer + 100 sp1 + 100 sp2)
+-- but only 100 rows remain (sp1 rollback discards sp1 and released sp2)
+SELECT n_tup_ins FROM pg_stat_user_tables WHERE relname = 'test_pgstat_nested';
+ n_tup_ins 
+-----------
+ 300       
+(1 row)
+SELECT n_tup_ins FROM gp_stat_user_tables_summary WHERE relname = 
'test_pgstat_nested';
+ n_tup_ins 
+-----------
+ 300       
+(1 row)
+SELECT count(*) FROM test_pgstat_nested;
+ count 
+-------
+ 100   
+(1 row)
+
+--
+-- Test 6: Catalog (entry) table — QE doesn't crash on catalog DML
+-- Catalog tables are filtered out (POLICYTYPE_ENTRY), so the stats
+-- infrastructure should simply skip them without error.
+--
+CREATE FUNCTION test_pgstat_func() RETURNS void AS $$ BEGIN END; $$ LANGUAGE 
plpgsql;
+CREATE
+DROP FUNCTION test_pgstat_func();
+DROP
+
+--
+-- Cleanup
+--
+DROP TABLE test_pgstat_dist;
+DROP
+DROP TABLE test_pgstat_repl;
+DROP
+DROP TABLE test_pgstat_xact;
+DROP
+DROP TABLE test_pgstat_subxact;
+DROP
+DROP TABLE test_pgstat_nested;
+DROP
+
+ALTER SYSTEM RESET autovacuum;
+ALTER
+SELECT pg_reload_conf();
+ pg_reload_conf 
+----------------
+ t              
+(1 row)
diff --git a/src/test/isolation2/sql/qe_qd_pgstat.sql 
b/src/test/isolation2/sql/qe_qd_pgstat.sql
new file mode 100644
index 00000000000..1d25ed5a623
--- /dev/null
+++ b/src/test/isolation2/sql/qe_qd_pgstat.sql
@@ -0,0 +1,136 @@
+-- Test: QE→QD pgstat collection
+-- Verifies that DML stats from QE segments reach the QD coordinator's
+-- pg_stat_user_tables, enabling autovacuum to see modification counts.
+-- Also verifies that gp_stat_user_tables_summary remains accurate on QEs.
+
+--
+-- Setup: disable autovacuum and auto_stats to prevent interference.
+--
+ALTER SYSTEM SET autovacuum = off;
+SELECT pg_reload_conf();
+SELECT pg_sleep(0.5);
+SET gp_autostats_mode = none;
+
+--
+-- Test 1: Distributed (hash) table — INSERT/UPDATE/DELETE stats reach QD
+--
+CREATE TABLE test_pgstat_dist(id int, val int) DISTRIBUTED BY (id);
+
+INSERT INTO test_pgstat_dist SELECT i, 0 FROM generate_series(1, 1000) i;
+SELECT gp_stat_force_next_flush();
+
+-- QD should see the stats sent from QEs
+SELECT n_tup_ins, n_mod_since_analyze
+  FROM pg_stat_user_tables WHERE relname = 'test_pgstat_dist';
+
+-- QE summary should also show the same counts
+SELECT n_tup_ins, n_mod_since_analyze
+  FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_dist';
+
+-- UPDATE non-distribution-key column so it's a real update, not split update
+UPDATE test_pgstat_dist SET val = 1 WHERE id <= 100;
+SELECT gp_stat_force_next_flush();
+
+SELECT n_tup_upd FROM pg_stat_user_tables WHERE relname = 'test_pgstat_dist';
+SELECT n_tup_upd FROM gp_stat_user_tables_summary WHERE relname = 
'test_pgstat_dist';
+
+DELETE FROM test_pgstat_dist WHERE id <= 50;
+SELECT gp_stat_force_next_flush();
+
+SELECT n_tup_del FROM pg_stat_user_tables WHERE relname = 'test_pgstat_dist';
+SELECT n_tup_del FROM gp_stat_user_tables_summary WHERE relname = 
'test_pgstat_dist';
+
+--
+-- Test 2: Replicated table — stats not double-counted
+-- With 3 segments, each segment has all 500 rows, but only one segment
+-- should report stats to QD.
+--
+CREATE TABLE test_pgstat_repl(id int) DISTRIBUTED REPLICATED;
+
+INSERT INTO test_pgstat_repl SELECT i FROM generate_series(1, 500) i;
+SELECT gp_stat_force_next_flush();
+
+-- QD should show exactly 500, not 1500 (3 segments * 500)
+SELECT n_tup_ins, n_mod_since_analyze
+  FROM pg_stat_user_tables WHERE relname = 'test_pgstat_repl';
+
+-- QE summary divides replicated table stats by numsegments, so also 500
+SELECT n_tup_ins, n_mod_since_analyze
+  FROM gp_stat_user_tables_summary WHERE relname = 'test_pgstat_repl';
+
+--
+-- Test 3: Transaction — committed DML stats are counted
+--
+CREATE TABLE test_pgstat_xact(id int) DISTRIBUTED BY (id);
+
+BEGIN;
+INSERT INTO test_pgstat_xact SELECT i FROM generate_series(1, 300) i;
+DELETE FROM test_pgstat_xact WHERE id <= 100;
+COMMIT;
+SELECT gp_stat_force_next_flush();
+
+SELECT n_tup_ins, n_tup_del FROM pg_stat_user_tables WHERE relname = 
'test_pgstat_xact';
+SELECT n_tup_ins, n_tup_del FROM gp_stat_user_tables_summary WHERE relname = 
'test_pgstat_xact';
+SELECT count(*) FROM test_pgstat_xact;
+
+--
+-- Test 4: Subtransaction rollback — n_tup_ins counts all attempted inserts
+-- (PG counts attempted actions regardless of commit/abort)
+--
+CREATE TABLE test_pgstat_subxact(id int) DISTRIBUTED BY (id);
+
+BEGIN;
+INSERT INTO test_pgstat_subxact SELECT i FROM generate_series(1, 200) i;
+SAVEPOINT sp1;
+INSERT INTO test_pgstat_subxact SELECT i FROM generate_series(201, 700) i;
+ROLLBACK TO sp1;
+COMMIT;
+SELECT gp_stat_force_next_flush();
+
+-- n_tup_ins counts all attempted inserts (200 + 500 = 700)
+-- but only 200 rows are actually in the table
+SELECT n_tup_ins FROM pg_stat_user_tables WHERE relname = 
'test_pgstat_subxact';
+SELECT n_tup_ins FROM gp_stat_user_tables_summary WHERE relname = 
'test_pgstat_subxact';
+SELECT count(*) FROM test_pgstat_subxact;
+
+--
+-- Test 5: Nested subtransactions — RELEASE merges into parent, ROLLBACK TO 
discards
+--
+CREATE TABLE test_pgstat_nested(id int) DISTRIBUTED BY (id);
+
+BEGIN;
+INSERT INTO test_pgstat_nested SELECT i FROM generate_series(1, 100) i;
+SAVEPOINT sp1;
+INSERT INTO test_pgstat_nested SELECT i FROM generate_series(101, 200) i;
+SAVEPOINT sp2;
+INSERT INTO test_pgstat_nested SELECT i FROM generate_series(201, 300) i;
+RELEASE SAVEPOINT sp2;
+ROLLBACK TO sp1;
+COMMIT;
+SELECT gp_stat_force_next_flush();
+
+-- All 300 attempted inserts counted (100 outer + 100 sp1 + 100 sp2)
+-- but only 100 rows remain (sp1 rollback discards sp1 and released sp2)
+SELECT n_tup_ins FROM pg_stat_user_tables WHERE relname = 'test_pgstat_nested';
+SELECT n_tup_ins FROM gp_stat_user_tables_summary WHERE relname = 
'test_pgstat_nested';
+SELECT count(*) FROM test_pgstat_nested;
+
+--
+-- Test 6: Catalog (entry) table — QE doesn't crash on catalog DML
+-- Catalog tables are filtered out (POLICYTYPE_ENTRY), so the stats
+-- infrastructure should simply skip them without error.
+--
+CREATE FUNCTION test_pgstat_func() RETURNS void AS $$ BEGIN END; $$ LANGUAGE 
plpgsql;
+DROP FUNCTION test_pgstat_func();
+
+--
+-- Cleanup
+--
+DROP TABLE test_pgstat_dist;
+DROP TABLE test_pgstat_repl;
+DROP TABLE test_pgstat_xact;
+DROP TABLE test_pgstat_subxact;
+DROP TABLE test_pgstat_nested;
+
+ALTER SYSTEM RESET autovacuum;
+SELECT pg_reload_conf();


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to