From f282f5dde3b4bc58b2cd7b66e55803df26e357aa Mon Sep 17 00:00:00 2001
From: Amit Langote <amitlan@postgresql.org>
Date: Sat, 20 Dec 2025 23:09:37 +0900
Subject: [PATCH v5 3/5] Add EXPLAIN (BATCHES) option for tuple batching
 statistics

Add a BATCHES option to EXPLAIN that reports per-node batch statistics
when a node uses batch mode execution.

For nodes that support batching (currently SeqScan), this shows the
number of batches fetched along with average, minimum, and maximum
rows per batch. Output is supported in both text and non-text formats.

Add regression tests covering text output, JSON format, filtered scans,
LIMIT, and disabled batching.

Discussion: https://postgr.es/m/CA+HiwqFfAY_ZFqN8wcAEMw71T9hM_kA8UtyHaZZEZtuT3UyogA@mail.gmail.com
---
 src/backend/commands/explain.c        | 30 ++++++++++++++
 src/backend/commands/explain_state.c  |  2 +
 src/backend/executor/execBatch.c      | 31 +++++++++++++-
 src/backend/executor/nodeSeqscan.c    | 24 ++++++-----
 src/include/commands/explain_state.h  |  1 +
 src/include/executor/execBatch.h      | 16 +++++++-
 src/include/executor/instrument.h     |  1 +
 src/test/regress/expected/explain.out | 58 +++++++++++++++++++++++++++
 src/test/regress/sql/explain.sql      | 27 +++++++++++++
 9 files changed, 177 insertions(+), 13 deletions(-)

diff --git a/src/backend/commands/explain.c b/src/backend/commands/explain.c
index b7bb111688c..f3d521e1f93 100644
--- a/src/backend/commands/explain.c
+++ b/src/backend/commands/explain.c
@@ -22,6 +22,7 @@
 #include "commands/explain_format.h"
 #include "commands/explain_state.h"
 #include "commands/prepare.h"
+#include "executor/execBatch.h"
 #include "foreign/fdwapi.h"
 #include "jit/jit.h"
 #include "libpq/pqformat.h"
@@ -517,6 +518,8 @@ ExplainOnePlan(PlannedStmt *plannedstmt, IntoClause *into, ExplainState *es,
 		instrument_option |= INSTRUMENT_BUFFERS;
 	if (es->wal)
 		instrument_option |= INSTRUMENT_WAL;
+	if (es->batches)
+		instrument_option |= INSTRUMENT_BATCHES;
 
 	/*
 	 * We always collect timing for the entire statement, even when node-level
@@ -2294,6 +2297,33 @@ ExplainNode(PlanState *planstate, List *ancestors,
 		show_buffer_usage(es, &planstate->instrument->bufusage);
 	if (es->wal && planstate->instrument)
 		show_wal_usage(es, &planstate->instrument->walusage);
+	if (es->batches && planstate->ps_Batch)
+	{
+		TupleBatch *b = planstate->ps_Batch;
+
+		if (b->stat_batches > 0)
+		{
+			if (es->format == EXPLAIN_FORMAT_TEXT)
+			{
+				ExplainIndentText(es);
+				appendStringInfo(es->str,
+								 "Batches: %lld  Avg Rows: %.1f  Max: %d  Min: %d\n",
+								 (long long) b->stat_batches,
+								 TupleBatchAvgRows(b),
+								 b->stat_max_rows,
+								 b->stat_min_rows == INT_MAX ? 0 : b->stat_min_rows);
+			}
+			else
+			{
+				ExplainPropertyInteger("Batches", NULL, b->stat_batches, es);
+				ExplainPropertyFloat("Average Batch Rows", NULL,
+									 TupleBatchAvgRows(b), 1, es);
+				ExplainPropertyInteger("Max Batch Rows", NULL, b->stat_max_rows, es);
+				ExplainPropertyInteger("Min Batch Rows", NULL,
+									   b->stat_min_rows == INT_MAX ? 0 : b->stat_min_rows, es);
+			}
+		}
+	}
 
 	/* Prepare per-worker buffer/WAL usage */
 	if (es->workers_state && (es->buffers || es->wal) && es->verbose)
diff --git a/src/backend/commands/explain_state.c b/src/backend/commands/explain_state.c
index 803c74dd178..ad5b223ede7 100644
--- a/src/backend/commands/explain_state.c
+++ b/src/backend/commands/explain_state.c
@@ -159,6 +159,8 @@ ParseExplainOptionList(ExplainState *es, List *options, ParseState *pstate)
 								"EXPLAIN", opt->defname, p),
 						 parser_errposition(pstate, opt->location)));
 		}
+		else if (strcmp(opt->defname, "batches") == 0)
+			es->batches = defGetBoolean(opt);
 		else if (!ApplyExtensionExplainOption(es, opt, pstate))
 			ereport(ERROR,
 					(errcode(ERRCODE_SYNTAX_ERROR),
diff --git a/src/backend/executor/execBatch.c b/src/backend/executor/execBatch.c
index 1ef4117b87c..ed54e3165c8 100644
--- a/src/backend/executor/execBatch.c
+++ b/src/backend/executor/execBatch.c
@@ -19,7 +19,7 @@
  *		Allocate and initialize a new TupleBatch envelope.
  */
 TupleBatch *
-TupleBatchCreate(TupleDesc scandesc, int capacity)
+TupleBatchCreate(TupleDesc scandesc, int capacity, bool track_stats)
 {
 	TupleBatch  *b;
 	TupleTableSlot **inslots,
@@ -47,6 +47,12 @@ TupleBatchCreate(TupleDesc scandesc, int capacity)
 	b->nvalid = 0;
 	b->next = 0;
 
+	b->track_stats = track_stats;
+	b->stat_batches = 0;
+	b->stat_rows = 0;
+	b->stat_max_rows = 0;
+	b->stat_min_rows = INT_MAX;
+
 	return b;
 }
 
@@ -110,3 +116,26 @@ TupleBatchGetNumValid(TupleBatch *b)
 {
 	return b->nvalid;
 }
+
+void
+TupleBatchRecordStats(TupleBatch *b, int rows)
+{
+	if (!b->track_stats)
+		return;
+
+	b->stat_batches++;
+	b->stat_rows += rows;
+	if (rows > b->stat_max_rows)
+		b->stat_max_rows = rows;
+	if (rows < b->stat_min_rows && rows > 0)
+		b->stat_min_rows = rows;
+}
+
+double
+TupleBatchAvgRows(TupleBatch *b)
+{
+	if (b->stat_batches == 0)
+		return 0.0;
+
+	return (double) b->stat_rows / b->stat_batches;
+}
diff --git a/src/backend/executor/nodeSeqscan.c b/src/backend/executor/nodeSeqscan.c
index 08d93e6f0be..f36b31d4fbb 100644
--- a/src/backend/executor/nodeSeqscan.c
+++ b/src/backend/executor/nodeSeqscan.c
@@ -213,8 +213,9 @@ SeqNextBatch(SeqScanState *node)
 	TableScanDesc scandesc;
 	EState	   *estate;
 	ScanDirection direction;
+	TupleBatch *b = node->ss.ps.ps_Batch;
 
-	Assert(node->ss.ps.ps_Batch != NULL);
+	Assert(b != NULL);
 
 	/*
 	 * get information from the estate and scan state
@@ -237,22 +238,21 @@ SeqNextBatch(SeqScanState *node)
 	}
 
 	/* Lazily create the AM batch payload. */
-	if (node->ss.ps.ps_Batch->am_payload == NULL)
+	if (b->am_payload == NULL)
 	{
 		const TableAmRoutine *tam PG_USED_FOR_ASSERTS_ONLY = scandesc->rs_rd->rd_tableam;
 
 		Assert(tam && tam->scan_begin_batch);
-		node->ss.ps.ps_Batch->am_payload =
-			table_scan_begin_batch(scandesc, node->ss.ps.ps_Batch->maxslots);
-		node->ss.ps.ps_Batch->ops = table_batch_callbacks(node->ss.ss_currentRelation);
+		b->am_payload = table_scan_begin_batch(scandesc, b->maxslots);
+		b->ops = table_batch_callbacks(node->ss.ss_currentRelation);
 	}
 
-	node->ss.ps.ps_Batch->ntuples =
-		table_scan_getnextbatch(scandesc, node->ss.ps.ps_Batch->am_payload, direction);
-	node->ss.ps.ps_Batch->nvalid = node->ss.ps.ps_Batch->ntuples;
-	node->ss.ps.ps_Batch->materialized = false;
+	b->ntuples = table_scan_getnextbatch(scandesc, b->am_payload, direction);
+	b->nvalid = b->ntuples;
+	b->materialized = false;
+	TupleBatchRecordStats(b, b->ntuples);
 
-	return node->ss.ps.ps_Batch->ntuples > 0;
+	return b->ntuples > 0;
 }
 
 static bool
@@ -340,8 +340,10 @@ SeqScanInitBatching(SeqScanState *scanstate, int eflags)
 {
 	const int cap = executor_batch_rows;
 	TupleDesc	scandesc = RelationGetDescr(scanstate->ss.ss_currentRelation);
+	EState *estate = scanstate->ss.ps.state;
+	bool track_stats = estate->es_instrument && (estate->es_instrument & INSTRUMENT_BATCHES);
 
-	scanstate->ss.ps.ps_Batch = TupleBatchCreate(scandesc, cap);
+	scanstate->ss.ps.ps_Batch = TupleBatchCreate(scandesc, cap, track_stats);
 
 	/* Choose batch variant to preserve your specialization matrix */
 	if (scanstate->ss.ps.qual == NULL)
diff --git a/src/include/commands/explain_state.h b/src/include/commands/explain_state.h
index 0b695f7d812..0a99f0f2341 100644
--- a/src/include/commands/explain_state.h
+++ b/src/include/commands/explain_state.h
@@ -55,6 +55,7 @@ typedef struct ExplainState
 	bool		memory;			/* print planner's memory usage information */
 	bool		settings;		/* print modified settings */
 	bool		generic;		/* generate a generic plan */
+	bool		batches;		/* print batch statistics */
 	ExplainSerializeOption serialize;	/* serialize the query's output? */
 	ExplainFormat format;		/* output format */
 	/* state for output formatting --- not reset for each new plan tree */
diff --git a/src/include/executor/execBatch.h b/src/include/executor/execBatch.h
index 2d0066103ce..1efc194d8ff 100644
--- a/src/include/executor/execBatch.h
+++ b/src/include/executor/execBatch.h
@@ -13,6 +13,8 @@
 #ifndef EXECBATCH_H
 #define EXECBATCH_H
 
+#include <limits.h>
+
 #include "executor/tuptable.h"
 
 /*
@@ -45,11 +47,18 @@ typedef struct TupleBatch
 
 	int		nvalid;		/* number of returnable tuples in outslots */
 	int		next;		/* 0-based index of next tuple to be returned */
+
+	/* Statistics (populated when EXPLAIN ANALYZE BATCHES) */
+	bool	track_stats;	/* whether to collect stats */
+	int64	stat_batches;	/* total number of batches fetched */
+	int64	stat_rows;		/* total tuples across all batches */
+	int		stat_max_rows;	/* max rows in any single batch */
+	int		stat_min_rows;	/* min rows in any single batch (non-zero) */
 } TupleBatch;
 
 
 /* Helpers */
-extern TupleBatch *TupleBatchCreate(TupleDesc scandesc, int capacity);
+extern TupleBatch *TupleBatchCreate(TupleDesc scandesc, int capacity, bool track_stats);
 extern void TupleBatchReset(TupleBatch *b, bool drop_slots);
 extern void TupleBatchUseInput(TupleBatch *b, int nvalid);
 extern void TupleBatchUseOutput(TupleBatch *b, int nvalid);
@@ -96,4 +105,9 @@ TupleBatchMaterializeAll(TupleBatch *b)
 	TupleBatchUseInput(b, b->ntuples);
 }
 
+/* === Batching stats. ===*/
+
+extern void TupleBatchRecordStats(TupleBatch *b, int rows);
+extern double TupleBatchAvgRows(TupleBatch *b);
+
 #endif	/* EXECBATCH_H */
diff --git a/src/include/executor/instrument.h b/src/include/executor/instrument.h
index 9759f3ea5d8..bee69b4ac8f 100644
--- a/src/include/executor/instrument.h
+++ b/src/include/executor/instrument.h
@@ -64,6 +64,7 @@ typedef enum InstrumentOption
 	INSTRUMENT_BUFFERS = 1 << 1,	/* needs buffer usage */
 	INSTRUMENT_ROWS = 1 << 2,	/* needs row count */
 	INSTRUMENT_WAL = 1 << 3,	/* needs WAL usage */
+	INSTRUMENT_BATCHES = 1 << 4, /* needs batches */
 	INSTRUMENT_ALL = PG_INT32_MAX
 } InstrumentOption;
 
diff --git a/src/test/regress/expected/explain.out b/src/test/regress/expected/explain.out
index 7c1f26b182c..1bec59eea9e 100644
--- a/src/test/regress/expected/explain.out
+++ b/src/test/regress/expected/explain.out
@@ -822,3 +822,61 @@ select explain_filter('explain (analyze,buffers off,costs off) select sum(n) ove
 (9 rows)
 
 reset work_mem;
+-- Test BATCHES option
+set executor_batch_rows = 64;
+create table batch_test (a int, b text);
+insert into batch_test select i, repeat('x', 100) from generate_series(1, 10000) i;
+analyze batch_test;
+-- Basic batch stats output
+select explain_filter('explain (analyze, batches, buffers off, costs off) select * from batch_test');
+                         explain_filter                         
+----------------------------------------------------------------
+ Seq Scan on batch_test (actual time=N.N..N.N rows=N.N loops=N)
+   Batches: N  Avg Rows: N.N  Max: N  Min: N
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(4 rows)
+
+-- With filter
+select explain_filter('explain (analyze, batches, buffers off, costs off) select * from batch_test where a > 5000');
+                         explain_filter                         
+----------------------------------------------------------------
+ Seq Scan on batch_test (actual time=N.N..N.N rows=N.N loops=N)
+   Filter: (a > N)
+   Rows Removed by Filter: N
+   Batches: N  Avg Rows: N.N  Max: N  Min: N
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(6 rows)
+
+-- With LIMIT - partial scan shows fewer batches
+select explain_filter('explain (analyze, batches, buffers off, costs off) select * from batch_test limit 100');
+                            explain_filter                            
+----------------------------------------------------------------------
+ Limit (actual time=N.N..N.N rows=N.N loops=N)
+   ->  Seq Scan on batch_test (actual time=N.N..N.N rows=N.N loops=N)
+         Batches: N  Avg Rows: N.N  Max: N  Min: N
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(5 rows)
+
+-- Batching disabled - no batch line
+set executor_batch_rows = 0;
+select explain_filter('explain (analyze, batches, buffers off, costs off) select * from batch_test');
+                         explain_filter                         
+----------------------------------------------------------------
+ Seq Scan on batch_test (actual time=N.N..N.N rows=N.N loops=N)
+ Planning Time: N.N ms
+ Execution Time: N.N ms
+(3 rows)
+
+reset executor_batch_rows;
+-- JSON format
+select explain_filter_to_json('explain (analyze, batches, buffers off, format json) select * from batch_test where a < 1000') #> '{0,Plan,Batches}';
+ ?column? 
+----------
+ 0
+(1 row)
+
+drop table batch_test;
+reset executor_batch_rows;
diff --git a/src/test/regress/sql/explain.sql b/src/test/regress/sql/explain.sql
index ebdab42604b..7881c674495 100644
--- a/src/test/regress/sql/explain.sql
+++ b/src/test/regress/sql/explain.sql
@@ -188,3 +188,30 @@ select explain_filter('explain (analyze,buffers off,costs off) select sum(n) ove
 -- Test tuplestore storage usage in Window aggregate (memory and disk case, final result is disk)
 select explain_filter('explain (analyze,buffers off,costs off) select sum(n) over(partition by m) from (SELECT n < 3 as m, n from generate_series(1,2500) a(n))');
 reset work_mem;
+
+-- Test BATCHES option
+set executor_batch_rows = 64;
+
+create table batch_test (a int, b text);
+insert into batch_test select i, repeat('x', 100) from generate_series(1, 10000) i;
+analyze batch_test;
+
+-- Basic batch stats output
+select explain_filter('explain (analyze, batches, buffers off, costs off) select * from batch_test');
+
+-- With filter
+select explain_filter('explain (analyze, batches, buffers off, costs off) select * from batch_test where a > 5000');
+
+-- With LIMIT - partial scan shows fewer batches
+select explain_filter('explain (analyze, batches, buffers off, costs off) select * from batch_test limit 100');
+
+-- Batching disabled - no batch line
+set executor_batch_rows = 0;
+select explain_filter('explain (analyze, batches, buffers off, costs off) select * from batch_test');
+reset executor_batch_rows;
+
+-- JSON format
+select explain_filter_to_json('explain (analyze, batches, buffers off, format json) select * from batch_test where a < 1000') #> '{0,Plan,Batches}';
+
+drop table batch_test;
+reset executor_batch_rows;
-- 
2.47.3