From e455ad14d53a4c1737c303aa036c2138537a3152 Mon Sep 17 00:00:00 2001
From: alterego655 <824662526@qq.com>
Date: Tue, 27 Jan 2026 12:11:26 +0800
Subject: [PATCH v1 2/4] Add pg_stat_recovery system view

Introduce pg_stat_recovery to expose WAL recovery state of the startup
process. The view provides:

- promote_triggered: whether promotion has been requested
- last_replayed_read_lsn/last_replayed_end_lsn: boundaries of last replayed record
- last_replayed_tli: timeline of last replayed record
- replay_end_lsn/replay_end_tli: current replay position
- recovery_last_xact_time: timestamp of last replayed commit/abort
- current_chunk_start_time: start time of current WAL chunk
- pause_state: recovery pause status (not paused/pause requested/paused)

Sensitive columns (LSNs, timelines, timestamps) require pg_read_all_stats
membership; basic status columns are visible to all users.

Returns zero rows when not in recovery.
---
 doc/src/sgml/monitoring.sgml           | 156 +++++++++++++++++++++++++
 src/backend/access/transam/xlogfuncs.c | 143 +++++++++++++++++++++++
 src/backend/catalog/system_views.sql   |  13 +++
 src/include/catalog/catversion.h       |   2 +-
 src/include/catalog/pg_proc.dat        |   8 ++
 src/test/regress/expected/rules.out    |  10 ++
 src/test/regress/expected/sysviews.out |   7 ++
 src/test/regress/sql/sysviews.sql      |   3 +
 8 files changed, 341 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index b77d189a500..ac2cb309976 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -338,6 +338,15 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
       </entry>
      </row>
 
+     <row>
+      <entry><structname>pg_stat_recovery</structname><indexterm><primary>pg_stat_recovery</primary></indexterm></entry>
+      <entry>Only one row, showing statistics about the recovery state of the
+       startup process.  This view returns no row when not in recovery.
+       See <link linkend="monitoring-pg-stat-recovery-view">
+       <structname>pg_stat_recovery</structname></link> for details.
+      </entry>
+     </row>
+
      <row>
       <entry><structname>pg_stat_recovery_prefetch</structname><indexterm><primary>pg_stat_recovery_prefetch</primary></indexterm></entry>
       <entry>Only one row, showing statistics about blocks prefetched during recovery.
@@ -1912,6 +1921,153 @@ description | Waiting for a newly initialized WAL file to reach durable storage
 
  </sect2>
 
+ <sect2 id="monitoring-pg-stat-recovery-view">
+  <title><structname>pg_stat_recovery</structname></title>
+
+  <indexterm>
+   <primary>pg_stat_recovery</primary>
+  </indexterm>
+
+  <para>
+   The <structname>pg_stat_recovery</structname> view will contain only
+   one row, showing statistics about the recovery state of the startup
+   process. This view returns no row when the server is not in recovery.
+  </para>
+
+  <para>
+   Some columns are restricted to members of the
+   <literal>pg_read_all_stats</literal> role; other users will see
+   <literal>NULL</literal> values for these columns.  The restricted columns
+   include LSN values, timeline IDs, and timestamps.  Basic operational status
+   columns (<structfield>promote_triggered</structfield>,
+   <structfield>pause_state</structfield>) are visible to all users.
+  </para>
+
+  <table id="pg-stat-recovery-view" xreflabel="pg_stat_recovery">
+   <title><structname>pg_stat_recovery</structname> View</title>
+   <tgroup cols="1">
+    <thead>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       Column Type
+      </para>
+      <para>
+       Description
+      </para></entry>
+     </row>
+    </thead>
+
+    <tbody>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>promote_triggered</structfield> <type>boolean</type>
+      </para>
+      <para>
+       True if a promotion has been triggered for this standby server.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>last_replayed_read_lsn</structfield> <type>pg_lsn</type>
+      </para>
+      <para>
+       Start LSN of the last WAL record replayed during recovery.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>last_replayed_end_lsn</structfield> <type>pg_lsn</type>
+      </para>
+      <para>
+       End LSN of the last WAL record replayed during recovery.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>last_replayed_tli</structfield> <type>integer</type>
+      </para>
+      <para>
+       Timeline of the last replayed WAL record.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>replay_end_lsn</structfield> <type>pg_lsn</type>
+      </para>
+      <para>
+       Current replay position. When replaying a record, this is the end
+       position of the record being replayed; otherwise it equals
+       <structfield>last_replayed_end_lsn</structfield>.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>replay_end_tli</structfield> <type>integer</type>
+      </para>
+      <para>
+       Timeline of the current replay position.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>recovery_last_xact_time</structfield> <type>timestamp with time zone</type>
+      </para>
+      <para>
+       Timestamp of the last transaction commit or abort replayed during
+       recovery. This is the commit/abort time recorded in the WAL record.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>current_chunk_start_time</structfield> <type>timestamp with time zone</type>
+      </para>
+      <para>
+       Time when the server started replaying the current chunk of WAL data.
+       This is used to calculate recovery conflict timeouts.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>pause_state</structfield> <type>text</type>
+      </para>
+      <para>
+       Recovery pause state. Possible values are:
+      </para>
+       <itemizedlist>
+        <listitem>
+         <para>
+          <literal>not paused</literal>: Recovery is proceeding normally.
+         </para>
+        </listitem>
+        <listitem>
+         <para>
+          <literal>pause requested</literal>: A pause has been requested
+          but recovery has not yet paused.
+         </para>
+        </listitem>
+        <listitem>
+         <para>
+          <literal>paused</literal>: Recovery is paused.
+         </para>
+        </listitem>
+       </itemizedlist>
+      </entry>
+     </row>
+
+    </tbody>
+   </tgroup>
+  </table>
+
+ </sect2>
+
  <sect2 id="monitoring-pg-stat-recovery-prefetch">
   <title><structname>pg_stat_recovery_prefetch</structname></title>
 
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 2efe4105efb..330bf00f6d9 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -22,10 +22,13 @@
 #include "access/xlog_internal.h"
 #include "access/xlogbackup.h"
 #include "access/xlogrecovery.h"
+#include "catalog/pg_authid.h"
 #include "catalog/pg_type.h"
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "pgstat.h"
+#include "storage/spin.h"
+#include "utils/acl.h"
 #include "replication/walreceiver.h"
 #include "storage/fd.h"
 #include "storage/latch.h"
@@ -748,3 +751,143 @@ pg_promote(PG_FUNCTION_ARGS)
 						   wait_seconds)));
 	PG_RETURN_BOOL(false);
 }
+
+/*
+ * pg_stat_get_recovery - returns information about WAL recovery state
+ *
+ * Returns zero rows when not in recovery, one row when in recovery.
+ *
+ * Privilege model: View is public, sensitive fields (LSNs, timelines,
+ * timestamps) are redacted (set to NULL) for users without pg_read_all_stats
+ * membership.  Basic operational state (promote_triggered, pause_state) is
+ * always visible.
+ */
+Datum
+pg_stat_get_recovery(PG_FUNCTION_ARGS)
+{
+#define PG_STAT_GET_RECOVERY_COLS 9
+	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+	Datum		values[PG_STAT_GET_RECOVERY_COLS];
+	bool		nulls[PG_STAT_GET_RECOVERY_COLS];
+	bool		has_privs;
+
+	/* Local copies of shared state */
+	bool		promote_triggered;
+	XLogRecPtr	last_replayed_read_lsn;
+	XLogRecPtr	last_replayed_end_lsn;
+	TimeLineID	last_replayed_tli;
+	XLogRecPtr	replay_end_lsn;
+	TimeLineID	replay_end_tli;
+	TimestampTz recovery_last_xact_time;
+	TimestampTz current_chunk_start_time;
+	RecoveryPauseState pause_state;
+
+	InitMaterializedSRF(fcinfo, 0);
+
+	/* Return zero rows when not in recovery */
+	if (!RecoveryInProgress())
+		return (Datum) 0;
+
+	/*
+	 * Check privilege level.  We still return a row for unprivileged users,
+	 * but redact sensitive fields (LSNs, timelines, timestamps).
+	 */
+	has_privs = has_privs_of_role(GetUserId(), ROLE_PG_READ_ALL_STATS);
+
+	/* Take a lock to ensure value consistency */
+	SpinLockAcquire(&XLogRecoveryCtl->info_lck);
+	promote_triggered = XLogRecoveryCtl->SharedPromoteIsTriggered;
+	last_replayed_read_lsn = XLogRecoveryCtl->lastReplayedReadRecPtr;
+	last_replayed_end_lsn = XLogRecoveryCtl->lastReplayedEndRecPtr;
+	last_replayed_tli = XLogRecoveryCtl->lastReplayedTLI;
+	replay_end_lsn = XLogRecoveryCtl->replayEndRecPtr;
+	replay_end_tli = XLogRecoveryCtl->replayEndTLI;
+	recovery_last_xact_time = XLogRecoveryCtl->recoveryLastXTime;
+	current_chunk_start_time = XLogRecoveryCtl->currentChunkStartTime;
+	pause_state = XLogRecoveryCtl->recoveryPauseState;
+	SpinLockRelease(&XLogRecoveryCtl->info_lck);
+
+	/* Initialize nulls array */
+	memset(nulls, 0, sizeof(nulls));
+
+	/* promote_triggered - always visible */
+	values[0] = BoolGetDatum(promote_triggered);
+
+	/*
+	 * Sensitive fields: LSNs, timelines, timestamps.  Redact for users
+	 * without pg_read_all_stats privilege.
+	 */
+	if (has_privs)
+	{
+		/* last_replayed_read_lsn */
+		if (XLogRecPtrIsValid(last_replayed_read_lsn))
+			values[1] = LSNGetDatum(last_replayed_read_lsn);
+		else
+			nulls[1] = true;
+
+		/* last_replayed_end_lsn */
+		if (XLogRecPtrIsValid(last_replayed_end_lsn))
+			values[2] = LSNGetDatum(last_replayed_end_lsn);
+		else
+			nulls[2] = true;
+
+		/* last_replayed_tli - NULL if corresponding LSN is invalid */
+		if (XLogRecPtrIsValid(last_replayed_end_lsn))
+			values[3] = Int32GetDatum(last_replayed_tli);
+		else
+			nulls[3] = true;
+
+		/* replay_end_lsn */
+		if (XLogRecPtrIsValid(replay_end_lsn))
+			values[4] = LSNGetDatum(replay_end_lsn);
+		else
+			nulls[4] = true;
+
+		/* replay_end_tli - NULL if corresponding LSN is invalid */
+		if (XLogRecPtrIsValid(replay_end_lsn))
+			values[5] = Int32GetDatum(replay_end_tli);
+		else
+			nulls[5] = true;
+
+		/* recovery_last_xact_time */
+		if (recovery_last_xact_time != 0)
+			values[6] = TimestampTzGetDatum(recovery_last_xact_time);
+		else
+			nulls[6] = true;
+
+		/* current_chunk_start_time */
+		if (current_chunk_start_time != 0)
+			values[7] = TimestampTzGetDatum(current_chunk_start_time);
+		else
+			nulls[7] = true;
+	}
+	else
+	{
+		/* Redact sensitive fields for unprivileged users */
+		nulls[1] = true;		/* last_replayed_read_lsn */
+		nulls[2] = true;		/* last_replayed_end_lsn */
+		nulls[3] = true;		/* last_replayed_tli */
+		nulls[4] = true;		/* replay_end_lsn */
+		nulls[5] = true;		/* replay_end_tli */
+		nulls[6] = true;		/* recovery_last_xact_time */
+		nulls[7] = true;		/* current_chunk_start_time */
+	}
+
+	/* pause_state - always visible */
+	switch (pause_state)
+	{
+		case RECOVERY_NOT_PAUSED:
+			values[8] = CStringGetTextDatum("not paused");
+			break;
+		case RECOVERY_PAUSE_REQUESTED:
+			values[8] = CStringGetTextDatum("pause requested");
+			break;
+		case RECOVERY_PAUSED:
+			values[8] = CStringGetTextDatum("paused");
+			break;
+	}
+
+	tuplestore_putvalues(rsinfo->setResult, rsinfo->setDesc, values, nulls);
+
+	return (Datum) 0;
+}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 7553f31fef0..a88cac599c4 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -984,6 +984,19 @@ CREATE VIEW pg_stat_wal_receiver AS
     FROM pg_stat_get_wal_receiver() s
     WHERE s.pid IS NOT NULL;
 
+CREATE VIEW pg_stat_recovery AS
+    SELECT
+            s.promote_triggered,
+            s.last_replayed_read_lsn,
+            s.last_replayed_end_lsn,
+            s.last_replayed_tli,
+            s.replay_end_lsn,
+            s.replay_end_tli,
+            s.recovery_last_xact_time,
+            s.current_chunk_start_time,
+            s.pause_state
+    FROM pg_stat_get_recovery() s;
+
 CREATE VIEW pg_stat_recovery_prefetch AS
     SELECT
             s.stats_reset,
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index fb577026666..fe3acbfb911 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -57,6 +57,6 @@
  */
 
 /*							yyyymmddN */
-#define CATALOG_VERSION_NO	202601261
+#define CATALOG_VERSION_NO	202601271
 
 #endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 5e5e33f64fc..3b1569d8a3f 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5697,6 +5697,14 @@
   proargmodes => '{o,o,o,o,o,o,o,o,o,o,o,o,o,o,o}',
   proargnames => '{pid,status,receive_start_lsn,receive_start_tli,written_lsn,flushed_lsn,received_tli,last_msg_send_time,last_msg_receipt_time,latest_end_lsn,latest_end_time,slot_name,sender_host,sender_port,conninfo}',
   prosrc => 'pg_stat_get_wal_receiver' },
+{ oid => '9949', descr => 'statistics: information about WAL recovery',
+  proname => 'pg_stat_get_recovery', prorows => '1', proretset => 't',
+  provolatile => 's', proparallel => 'r', prorettype => 'record',
+  proargtypes => '',
+  proallargtypes => '{bool,pg_lsn,pg_lsn,int4,pg_lsn,int4,timestamptz,timestamptz,text}',
+  proargmodes => '{o,o,o,o,o,o,o,o,o}',
+  proargnames => '{promote_triggered,last_replayed_read_lsn,last_replayed_end_lsn,last_replayed_tli,replay_end_lsn,replay_end_tli,recovery_last_xact_time,current_chunk_start_time,pause_state}',
+  prosrc => 'pg_stat_get_recovery' },
 { oid => '6169', descr => 'statistics: information about replication slot',
   proname => 'pg_stat_get_replication_slot', provolatile => 's',
   proparallel => 'r', prorettype => 'record', proargtypes => 'text',
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index f4ee2bd7459..7f3499291b5 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -2127,6 +2127,16 @@ pg_stat_progress_vacuum| SELECT s.pid,
         END AS started_by
    FROM (pg_stat_get_progress_info('VACUUM'::text) s(pid, datid, relid, param1, param2, param3, param4, param5, param6, param7, param8, param9, param10, param11, param12, param13, param14, param15, param16, param17, param18, param19, param20)
      LEFT JOIN pg_database d ON ((s.datid = d.oid)));
+pg_stat_recovery| SELECT promote_triggered,
+    last_replayed_read_lsn,
+    last_replayed_end_lsn,
+    last_replayed_tli,
+    replay_end_lsn,
+    replay_end_tli,
+    recovery_last_xact_time,
+    current_chunk_start_time,
+    pause_state
+   FROM pg_stat_get_recovery() s(promote_triggered, last_replayed_read_lsn, last_replayed_end_lsn, last_replayed_tli, replay_end_lsn, replay_end_tli, recovery_last_xact_time, current_chunk_start_time, pause_state);
 pg_stat_recovery_prefetch| SELECT stats_reset,
     prefetch,
     hit,
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 3dd63fd88ed..132b56a5864 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -143,6 +143,13 @@ select count(*) = 0 as ok from pg_stat_wal_receiver;
  t
 (1 row)
 
+-- We expect no recovery state in this test (running on primary)
+select count(*) = 0 as ok from pg_stat_recovery;
+ ok 
+----
+ t
+(1 row)
+
 -- This is to record the prevailing planner enable_foo settings during
 -- a regression test run.
 select name, setting from pg_settings where name like 'enable%';
diff --git a/src/test/regress/sql/sysviews.sql b/src/test/regress/sql/sysviews.sql
index 004f9a70e00..507e400ad4a 100644
--- a/src/test/regress/sql/sysviews.sql
+++ b/src/test/regress/sql/sysviews.sql
@@ -76,6 +76,9 @@ select count(*) = 1 as ok from pg_stat_wal;
 -- We expect no walreceiver running in this test
 select count(*) = 0 as ok from pg_stat_wal_receiver;
 
+-- We expect no recovery state in this test (running on primary)
+select count(*) = 0 as ok from pg_stat_recovery;
+
 -- This is to record the prevailing planner enable_foo settings during
 -- a regression test run.
 select name, setting from pg_settings where name like 'enable%';
-- 
2.51.0

