Hi,

On Wed, Sep 25, 2024 at 04:04:43PM +0200, Peter Eisentraut wrote:
> Is there a reason for this elaborate error handling:

Thanks for looking at it!

> +     fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
> +
> +     if (fd < 0 && errno == ENOENT)
> +             ereport(ERROR,
> +                             errmsg("file \"%s\" does not exist", path));
> +     else if (fd < 0)
> +             ereport(ERROR,
> +                             (errcode_for_file_access(),
> +                              errmsg("could not open file \"%s\": %m", 
> path)));
> 
> Couldn't you just use the second branch for all errno's?

Yeah, I think it comes from copying/pasting from SnapBuildRestore() too 
"quickly".
v10 attached uses the second branch only.

Regards,

-- 
Bertrand Drouvot
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
>From a093c768f1b3a70c80046742d7a264bee5d4d205 Mon Sep 17 00:00:00 2001
From: Bertrand Drouvot <bertranddrouvot...@gmail.com>
Date: Wed, 14 Aug 2024 08:46:05 +0000
Subject: [PATCH v10] Add contrib/pg_logicalinspect

Provides SQL functions that allow to inspect logical decoding components.

It currently allows to inspect the contents of serialized logical snapshots of
a running database cluster, which is useful for debugging or educational
purposes.
---
 contrib/Makefile                              |   1 +
 contrib/meson.build                           |   1 +
 contrib/pg_logicalinspect/.gitignore          |   4 +
 contrib/pg_logicalinspect/Makefile            |  31 +++
 .../expected/logical_inspect.out              |  52 ++++
 contrib/pg_logicalinspect/logicalinspect.conf |   1 +
 contrib/pg_logicalinspect/meson.build         |  39 +++
 .../pg_logicalinspect--1.0.sql                |  43 +++
 contrib/pg_logicalinspect/pg_logicalinspect.c | 262 ++++++++++++++++++
 .../pg_logicalinspect.control                 |   5 +
 .../specs/logical_inspect.spec                |  34 +++
 doc/src/sgml/contrib.sgml                     |   1 +
 doc/src/sgml/filelist.sgml                    |   1 +
 doc/src/sgml/pglogicalinspect.sgml            | 151 ++++++++++
 src/backend/replication/logical/snapbuild.c   | 190 +------------
 src/include/port/pg_crc32c.h                  |  16 +-
 src/include/replication/snapbuild.h           |   6 +-
 src/include/replication/snapbuild_internal.h  | 204 ++++++++++++++
 18 files changed, 849 insertions(+), 193 deletions(-)
   7.8% contrib/pg_logicalinspect/expected/
   5.7% contrib/pg_logicalinspect/specs/
  32.9% contrib/pg_logicalinspect/
  13.6% doc/src/sgml/
  16.9% src/backend/replication/logical/
   4.0% src/include/port/
  18.7% src/include/replication/

diff --git a/contrib/Makefile b/contrib/Makefile
index abd780f277..952855d9b6 100644
--- a/contrib/Makefile
+++ b/contrib/Makefile
@@ -32,6 +32,7 @@ SUBDIRS = \
 		passwordcheck	\
 		pg_buffercache	\
 		pg_freespacemap \
+		pg_logicalinspect \
 		pg_prewarm	\
 		pg_stat_statements \
 		pg_surgery	\
diff --git a/contrib/meson.build b/contrib/meson.build
index 14a8906865..159ff41555 100644
--- a/contrib/meson.build
+++ b/contrib/meson.build
@@ -46,6 +46,7 @@ subdir('passwordcheck')
 subdir('pg_buffercache')
 subdir('pgcrypto')
 subdir('pg_freespacemap')
+subdir('pg_logicalinspect')
 subdir('pg_prewarm')
 subdir('pgrowlocks')
 subdir('pg_stat_statements')
diff --git a/contrib/pg_logicalinspect/.gitignore b/contrib/pg_logicalinspect/.gitignore
new file mode 100644
index 0000000000..5dcb3ff972
--- /dev/null
+++ b/contrib/pg_logicalinspect/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/contrib/pg_logicalinspect/Makefile b/contrib/pg_logicalinspect/Makefile
new file mode 100644
index 0000000000..55124514d4
--- /dev/null
+++ b/contrib/pg_logicalinspect/Makefile
@@ -0,0 +1,31 @@
+# contrib/pg_logicalinspect/Makefile
+
+MODULE_big = pg_logicalinspect
+OBJS = \
+	$(WIN32RES) \
+	pg_logicalinspect.o
+PGFILEDESC = "pg_logicalinspect - functions to inspect logical decoding components"
+
+EXTENSION = pg_logicalinspect
+DATA = pg_logicalinspect--1.0.sql
+
+EXTRA_INSTALL = contrib/test_decoding
+
+ISOLATION = logical_inspect
+
+ISOLATION_OPTS = --temp-config $(top_srcdir)/contrib/pg_logicalinspect/logicalinspect.conf
+
+# Disabled because these tests require "wal_level=logical", which
+# some installcheck users do not have (e.g. buildfarm clients).
+NO_INSTALLCHECK = 1
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = contrib/pg_logicalinspect
+top_builddir = ../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/contrib/pg_logicalinspect/expected/logical_inspect.out b/contrib/pg_logicalinspect/expected/logical_inspect.out
new file mode 100644
index 0000000000..08de273197
--- /dev/null
+++ b/contrib/pg_logicalinspect/expected/logical_inspect.out
@@ -0,0 +1,52 @@
+Parsed test spec with 2 sessions
+
+starting permutation: s0_init s0_begin s0_savepoint s0_truncate s1_checkpoint s1_get_changes s0_commit s0_begin s0_insert s1_checkpoint s1_get_changes s0_commit s1_get_changes s1_get_logical_snapshot_info s1_get_logical_snapshot_meta
+step s0_init: SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding');
+?column?
+--------
+init    
+(1 row)
+
+step s0_begin: BEGIN;
+step s0_savepoint: SAVEPOINT sp1;
+step s0_truncate: TRUNCATE tbl1;
+step s1_checkpoint: CHECKPOINT;
+step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0');
+data
+----
+(0 rows)
+
+step s0_commit: COMMIT;
+step s0_begin: BEGIN;
+step s0_insert: INSERT INTO tbl1 VALUES (1);
+step s1_checkpoint: CHECKPOINT;
+step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0');
+data                                   
+---------------------------------------
+BEGIN                                  
+table public.tbl1: TRUNCATE: (no-flags)
+COMMIT                                 
+(3 rows)
+
+step s0_commit: COMMIT;
+step s1_get_changes: SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0');
+data                                                         
+-------------------------------------------------------------
+BEGIN                                                        
+table public.tbl1: INSERT: val1[integer]:1 val2[integer]:null
+COMMIT                                                       
+(3 rows)
+
+step s1_get_logical_snapshot_info: SELECT (pg_get_logical_snapshot_info(f.name::pg_lsn)).state,(pg_get_logical_snapshot_info(f.name::pg_lsn)).catchange_count,array_length((pg_get_logical_snapshot_info(f.name::pg_lsn)).catchange_xip,1) AS catchange_array_length,(pg_get_logical_snapshot_info(f.name::pg_lsn)).committed_count,array_length((pg_get_logical_snapshot_info(f.name::pg_lsn)).committed_xip,1) AS committed_array_length FROM (SELECT replace(replace(name,'.snap',''),'-','/') AS name FROM pg_ls_logicalsnapdir()) AS f ORDER BY 2;
+state     |catchange_count|catchange_array_length|committed_count|committed_array_length
+----------+---------------+----------------------+---------------+----------------------
+consistent|              0|                      |              2|                     2
+consistent|              2|                     2|              0|                      
+(2 rows)
+
+step s1_get_logical_snapshot_meta: SELECT COUNT((pg_get_logical_snapshot_meta(f.name::pg_lsn))) FROM (SELECT replace(replace(name,'.snap',''),'-','/') AS name FROM pg_ls_logicalsnapdir()) AS f;
+count
+-----
+    2
+(1 row)
+
diff --git a/contrib/pg_logicalinspect/logicalinspect.conf b/contrib/pg_logicalinspect/logicalinspect.conf
new file mode 100644
index 0000000000..e3d257315f
--- /dev/null
+++ b/contrib/pg_logicalinspect/logicalinspect.conf
@@ -0,0 +1 @@
+wal_level = logical
diff --git a/contrib/pg_logicalinspect/meson.build b/contrib/pg_logicalinspect/meson.build
new file mode 100644
index 0000000000..3ec635509b
--- /dev/null
+++ b/contrib/pg_logicalinspect/meson.build
@@ -0,0 +1,39 @@
+# Copyright (c) 2024, PostgreSQL Global Development Group
+
+pg_logicalinspect_sources = files('pg_logicalinspect.c')
+
+if host_system == 'windows'
+  pg_logicalinspect_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'pg_logicalinspect',
+    '--FILEDESC', 'pg_logicalinspect - functions to inspect logical decoding components',])
+endif
+
+pg_logicalinspect = shared_module('pg_logicalinspect',
+  pg_logicalinspect_sources,
+  kwargs: contrib_mod_args + {
+      'dependencies': contrib_mod_args['dependencies'],
+  },
+)
+contrib_targets += pg_logicalinspect
+
+install_data(
+  'pg_logicalinspect.control',
+  'pg_logicalinspect--1.0.sql',
+  kwargs: contrib_data_args,
+)
+
+tests += {
+  'name': 'pg_logicalinspect',
+  'sd': meson.current_source_dir(),
+  'bd': meson.current_build_dir(),
+  'isolation': {
+    'specs': [
+      'logical_inspect',
+    ],
+    'regress_args': [
+      '--temp-config', files('logicalinspect.conf'),
+    ],
+    # see above
+    'runningcheck': false,
+  },
+}
diff --git a/contrib/pg_logicalinspect/pg_logicalinspect--1.0.sql b/contrib/pg_logicalinspect/pg_logicalinspect--1.0.sql
new file mode 100644
index 0000000000..fc06e428ce
--- /dev/null
+++ b/contrib/pg_logicalinspect/pg_logicalinspect--1.0.sql
@@ -0,0 +1,43 @@
+/* contrib/pg_logicalinspect/pg_logicalinspect--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION pg_logicalinspect" to load this file. \quit
+
+--
+-- pg_get_logical_snapshot_meta()
+--
+CREATE FUNCTION pg_get_logical_snapshot_meta(IN in_lsn pg_lsn,
+    OUT magic int4,
+    OUT checksum int8,
+    OUT version int4
+)
+AS 'MODULE_PATHNAME', 'pg_get_logical_snapshot_meta'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_logical_snapshot_meta(pg_lsn) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_logical_snapshot_meta(pg_lsn) TO pg_read_server_files;
+
+--
+-- pg_get_logical_snapshot_info()
+--
+CREATE FUNCTION pg_get_logical_snapshot_info(IN in_lsn pg_lsn,
+    OUT state text,
+    OUT xmin xid,
+    OUT xmax xid,
+    OUT start_decoding_at pg_lsn,
+    OUT two_phase_at pg_lsn,
+    OUT initial_xmin_horizon xid,
+    OUT building_full_snapshot boolean,
+    OUT in_slot_creation boolean,
+    OUT last_serialized_snapshot pg_lsn,
+    OUT next_phase_at xid,
+    OUT committed_count int8,
+    OUT committed_xip xid[],
+    OUT catchange_count int8,
+    OUT catchange_xip xid[]
+)
+AS 'MODULE_PATHNAME', 'pg_get_logical_snapshot_info'
+LANGUAGE C STRICT PARALLEL SAFE;
+
+REVOKE EXECUTE ON FUNCTION pg_get_logical_snapshot_info(pg_lsn) FROM PUBLIC;
+GRANT EXECUTE ON FUNCTION pg_get_logical_snapshot_info(pg_lsn) TO pg_read_server_files;
diff --git a/contrib/pg_logicalinspect/pg_logicalinspect.c b/contrib/pg_logicalinspect/pg_logicalinspect.c
new file mode 100644
index 0000000000..4460f07a36
--- /dev/null
+++ b/contrib/pg_logicalinspect/pg_logicalinspect.c
@@ -0,0 +1,262 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_logicalinspect.c
+ *		  Functions to inspect contents of PostgreSQL logical snapshots
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		  contrib/pg_logicalinspect/pg_logicalinspect.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "funcapi.h"
+#include "replication/snapbuild_internal.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/pg_lsn.h"
+
+PG_MODULE_MAGIC;
+
+PG_FUNCTION_INFO_V1(pg_get_logical_snapshot_meta);
+PG_FUNCTION_INFO_V1(pg_get_logical_snapshot_info);
+
+/*
+ * Lookup table for SnapBuildState. The lookup index is offset by 1
+ * because the consecutive SnapBuildState enum values start at -1.
+ */
+#define SNAPBUILD_STATE_INCR 1
+
+static const char *const SnapBuildStateDesc[] = {
+	[SNAPBUILD_START + SNAPBUILD_STATE_INCR] = "start",
+	[SNAPBUILD_BUILDING_SNAPSHOT + SNAPBUILD_STATE_INCR] = "building",
+	[SNAPBUILD_FULL_SNAPSHOT + SNAPBUILD_STATE_INCR] = "full",
+	[SNAPBUILD_CONSISTENT + SNAPBUILD_STATE_INCR] = "consistent",
+};
+
+/*
+ * NOTE: For any code change or issue fix here, it is highly recommended to
+ * give a thought about doing the same in SnapBuildRestore() as well.
+ */
+
+/*
+ * Validate the logical snapshot file and read its contents to 'ondisk'.
+ */
+static void
+ValidateAndRestoreSnapshotFile(XLogRecPtr lsn, SnapBuildOnDisk *ondisk,
+							   const char *path)
+{
+	int			fd;
+	Size		sz;
+	pg_crc32c	checksum;
+	MemoryContext context;
+
+	context = AllocSetContextCreate(CurrentMemoryContext,
+									"logicalsnapshot inspect context",
+									ALLOCSET_DEFAULT_SIZES);
+
+	fd = OpenTransientFile(path, O_RDONLY | PG_BINARY);
+
+	if (fd < 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not open file \"%s\": %m", path)));
+
+	/* ----
+	 * Make sure the snapshot had been stored safely to disk, that's normally
+	 * cheap.
+	 * Note that we do not need PANIC here, nobody will be able to use the
+	 * slot without fsyncing, and saving it won't succeed without an fsync()
+	 * either...
+	 * ----
+	 */
+	fsync_fname(path, false);
+	fsync_fname(PG_LOGICAL_SNAPSHOTS_DIR, true);
+
+	/* read statically sized portion of snapshot */
+	SnapBuildRestoreContents(fd, (char *) ondisk, SnapBuildOnDiskConstantSize, path);
+
+	if (ondisk->magic != SNAPBUILD_MAGIC)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg("snapbuild state file \"%s\" has wrong magic number: %u instead of %u",
+						path, ondisk->magic, SNAPBUILD_MAGIC)));
+
+	if (ondisk->version != SNAPBUILD_VERSION)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg("snapbuild state file \"%s\" has unsupported version: %u instead of %u",
+						path, ondisk->version, SNAPBUILD_VERSION)));
+
+	INIT_CRC32C(checksum);
+	COMP_CRC32C(checksum,
+				((char *) ondisk) + SnapBuildOnDiskNotChecksummedSize,
+				SnapBuildOnDiskConstantSize - SnapBuildOnDiskNotChecksummedSize);
+
+	/* read SnapBuild */
+	SnapBuildRestoreContents(fd, (char *) &ondisk->builder, sizeof(SnapBuild), path);
+	COMP_CRC32C(checksum, &ondisk->builder, sizeof(SnapBuild));
+
+	ondisk->builder.context = context;
+
+	/* restore committed xacts information */
+	if (ondisk->builder.committed.xcnt > 0)
+	{
+		sz = sizeof(TransactionId) * ondisk->builder.committed.xcnt;
+		ondisk->builder.committed.xip = MemoryContextAllocZero(ondisk->builder.context, sz);
+		SnapBuildRestoreContents(fd, (char *) ondisk->builder.committed.xip, sz, path);
+		COMP_CRC32C(checksum, ondisk->builder.committed.xip, sz);
+	}
+
+	/* restore catalog modifying xacts information */
+	if (ondisk->builder.catchange.xcnt > 0)
+	{
+		sz = sizeof(TransactionId) * ondisk->builder.catchange.xcnt;
+		ondisk->builder.catchange.xip = MemoryContextAllocZero(ondisk->builder.context, sz);
+		SnapBuildRestoreContents(fd, (char *) ondisk->builder.catchange.xip, sz, path);
+		COMP_CRC32C(checksum, ondisk->builder.catchange.xip, sz);
+	}
+
+	if (CloseTransientFile(fd) != 0)
+		ereport(ERROR,
+				(errcode_for_file_access(),
+				 errmsg("could not close file \"%s\": %m", path)));
+
+	FIN_CRC32C(checksum);
+
+	/* verify checksum of what we've read */
+	if (!EQ_CRC32C(checksum, ondisk->checksum))
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_CORRUPTED),
+				 errmsg("checksum mismatch for snapbuild state file \"%s\": is %u, should be %u",
+						path, checksum, ondisk->checksum)));
+}
+
+/*
+ * Retrieve the logical snapshot file metadata.
+ */
+Datum
+pg_get_logical_snapshot_meta(PG_FUNCTION_ARGS)
+{
+#define PG_GET_LOGICAL_SNAPSHOT_META_COLS 3
+	SnapBuildOnDisk ondisk;
+	XLogRecPtr	lsn;
+	HeapTuple	tuple;
+	Datum		values[PG_GET_LOGICAL_SNAPSHOT_META_COLS];
+	bool		nulls[PG_GET_LOGICAL_SNAPSHOT_META_COLS];
+	TupleDesc	tupdesc;
+	char		path[MAXPGPATH];
+	int			i = 0;
+
+	lsn = PG_GETARG_LSN(0);
+
+	sprintf(path, "%s/%X-%X.snap",
+			PG_LOGICAL_SNAPSHOTS_DIR,
+			LSN_FORMAT_ARGS(lsn));
+
+	ValidateAndRestoreSnapshotFile(lsn, &ondisk, path);
+
+	/* Build a tuple descriptor for our result type. */
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
+
+	memset(nulls, 0, sizeof(nulls));
+
+	values[i++] = UInt32GetDatum(ondisk.magic);
+	values[i++] = Int64GetDatum((int64) ondisk.checksum);
+	values[i++] = UInt32GetDatum(ondisk.version);
+
+	Assert(i == PG_GET_LOGICAL_SNAPSHOT_META_COLS);
+
+	tuple = heap_form_tuple(tupdesc, values, nulls);
+
+	MemoryContextReset(ondisk.builder.context);
+
+	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+
+#undef PG_GET_LOGICAL_SNAPSHOT_META_COLS
+}
+
+Datum
+pg_get_logical_snapshot_info(PG_FUNCTION_ARGS)
+{
+#define PG_GET_LOGICAL_SNAPSHOT_INFO_COLS 14
+	SnapBuildOnDisk ondisk;
+	XLogRecPtr	lsn;
+	HeapTuple	tuple;
+	Datum		values[PG_GET_LOGICAL_SNAPSHOT_INFO_COLS];
+	bool		nulls[PG_GET_LOGICAL_SNAPSHOT_INFO_COLS];
+	TupleDesc	tupdesc;
+	char		path[MAXPGPATH];
+	int			i = 0;
+
+	lsn = PG_GETARG_LSN(0);
+
+	sprintf(path, "%s/%X-%X.snap",
+			PG_LOGICAL_SNAPSHOTS_DIR,
+			LSN_FORMAT_ARGS(lsn));
+
+	ValidateAndRestoreSnapshotFile(lsn, &ondisk, path);
+
+	/* Build a tuple descriptor for our result type. */
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
+
+	memset(nulls, 0, sizeof(nulls));
+
+	values[i++] = CStringGetTextDatum(SnapBuildStateDesc[ondisk.builder.state +
+														 SNAPBUILD_STATE_INCR]);
+	values[i++] = TransactionIdGetDatum(ondisk.builder.xmin);
+	values[i++] = TransactionIdGetDatum(ondisk.builder.xmax);
+	values[i++] = LSNGetDatum(ondisk.builder.start_decoding_at);
+	values[i++] = LSNGetDatum(ondisk.builder.two_phase_at);
+	values[i++] = TransactionIdGetDatum(ondisk.builder.initial_xmin_horizon);
+	values[i++] = BoolGetDatum(ondisk.builder.building_full_snapshot);
+	values[i++] = BoolGetDatum(ondisk.builder.in_slot_creation);
+	values[i++] = LSNGetDatum(ondisk.builder.last_serialized_snapshot);
+	values[i++] = TransactionIdGetDatum(ondisk.builder.next_phase_at);
+
+	values[i++] = Int64GetDatum(ondisk.builder.committed.xcnt);
+	if (ondisk.builder.committed.xcnt > 0)
+	{
+		Datum	   *arrayelems;
+		int			narrayelems = 0;
+
+		arrayelems = (Datum *) palloc(ondisk.builder.committed.xcnt * sizeof(Datum));
+
+		for (; narrayelems < ondisk.builder.committed.xcnt; narrayelems++)
+			arrayelems[narrayelems] = Int64GetDatum((int64) ondisk.builder.committed.xip[narrayelems]);
+
+		values[i++] = PointerGetDatum(construct_array_builtin(arrayelems, narrayelems, INT8OID));
+	}
+	else
+		nulls[i++] = true;
+
+	values[i++] = Int64GetDatum(ondisk.builder.catchange.xcnt);
+	if (ondisk.builder.catchange.xcnt > 0)
+	{
+		Datum	   *arrayelems;
+		int			narrayelems = 0;
+
+		arrayelems = (Datum *) palloc(ondisk.builder.catchange.xcnt * sizeof(Datum));
+
+		for (; narrayelems < ondisk.builder.catchange.xcnt; narrayelems++)
+			arrayelems[narrayelems] = Int64GetDatum((int64) ondisk.builder.catchange.xip[narrayelems]);
+
+		values[i++] = PointerGetDatum(construct_array_builtin(arrayelems, narrayelems, INT8OID));
+	}
+	else
+		nulls[i++] = true;
+
+	Assert(i == PG_GET_LOGICAL_SNAPSHOT_INFO_COLS);
+
+	tuple = heap_form_tuple(tupdesc, values, nulls);
+
+	MemoryContextReset(ondisk.builder.context);
+
+	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+
+#undef PG_GET_LOGICAL_SNAPSHOT_INFO_COLS
+}
diff --git a/contrib/pg_logicalinspect/pg_logicalinspect.control b/contrib/pg_logicalinspect/pg_logicalinspect.control
new file mode 100644
index 0000000000..b4a70e57ba
--- /dev/null
+++ b/contrib/pg_logicalinspect/pg_logicalinspect.control
@@ -0,0 +1,5 @@
+# pg_logicalinspect extension
+comment = 'functions to inspect logical decoding components'
+default_version = '1.0'
+module_pathname = '$libdir/pg_logicalinspect'
+relocatable = true
diff --git a/contrib/pg_logicalinspect/specs/logical_inspect.spec b/contrib/pg_logicalinspect/specs/logical_inspect.spec
new file mode 100644
index 0000000000..47641163fe
--- /dev/null
+++ b/contrib/pg_logicalinspect/specs/logical_inspect.spec
@@ -0,0 +1,34 @@
+# Test the pg_logicalinspect functions: that needs some permutation to
+# ensure that we are creating multiple logical snapshots and that one of them
+# contains ongoing catalogs changes.
+setup
+{
+    DROP TABLE IF EXISTS tbl1;
+    CREATE TABLE tbl1 (val1 integer, val2 integer);
+    CREATE EXTENSION pg_logicalinspect;
+}
+
+teardown
+{
+    DROP TABLE tbl1;
+    SELECT 'stop' FROM pg_drop_replication_slot('isolation_slot');
+    DROP EXTENSION pg_logicalinspect;
+}
+
+session "s0"
+setup { SET synchronous_commit=on; }
+step "s0_init" { SELECT 'init' FROM pg_create_logical_replication_slot('isolation_slot', 'test_decoding'); }
+step "s0_begin" { BEGIN; }
+step "s0_savepoint" { SAVEPOINT sp1; }
+step "s0_truncate" { TRUNCATE tbl1; }
+step "s0_insert" { INSERT INTO tbl1 VALUES (1); }
+step "s0_commit" { COMMIT; }
+
+session "s1"
+setup { SET synchronous_commit=on; }
+step "s1_checkpoint" { CHECKPOINT; }
+step "s1_get_changes" { SELECT data FROM pg_logical_slot_get_changes('isolation_slot', NULL, NULL, 'skip-empty-xacts', '1', 'include-xids', '0'); }
+step "s1_get_logical_snapshot_meta" { SELECT COUNT((pg_get_logical_snapshot_meta(f.name::pg_lsn))) FROM (SELECT replace(replace(name,'.snap',''),'-','/') AS name FROM pg_ls_logicalsnapdir()) AS f; }
+step "s1_get_logical_snapshot_info" { SELECT (pg_get_logical_snapshot_info(f.name::pg_lsn)).state,(pg_get_logical_snapshot_info(f.name::pg_lsn)).catchange_count,array_length((pg_get_logical_snapshot_info(f.name::pg_lsn)).catchange_xip,1) AS catchange_array_length,(pg_get_logical_snapshot_info(f.name::pg_lsn)).committed_count,array_length((pg_get_logical_snapshot_info(f.name::pg_lsn)).committed_xip,1) AS committed_array_length FROM (SELECT replace(replace(name,'.snap',''),'-','/') AS name FROM pg_ls_logicalsnapdir()) AS f ORDER BY 2; }
+
+permutation "s0_init" "s0_begin" "s0_savepoint" "s0_truncate" "s1_checkpoint" "s1_get_changes" "s0_commit" "s0_begin" "s0_insert" "s1_checkpoint" "s1_get_changes" "s0_commit" "s1_get_changes" "s1_get_logical_snapshot_info" "s1_get_logical_snapshot_meta"
diff --git a/doc/src/sgml/contrib.sgml b/doc/src/sgml/contrib.sgml
index 44639a8dca..7c381949a5 100644
--- a/doc/src/sgml/contrib.sgml
+++ b/doc/src/sgml/contrib.sgml
@@ -154,6 +154,7 @@ CREATE EXTENSION <replaceable>extension_name</replaceable>;
  &pgbuffercache;
  &pgcrypto;
  &pgfreespacemap;
+ &pglogicalinspect;
  &pgprewarm;
  &pgrowlocks;
  &pgstatstatements;
diff --git a/doc/src/sgml/filelist.sgml b/doc/src/sgml/filelist.sgml
index a7ff5f8264..66e6dccd4c 100644
--- a/doc/src/sgml/filelist.sgml
+++ b/doc/src/sgml/filelist.sgml
@@ -143,6 +143,7 @@
 <!ENTITY pgbuffercache   SYSTEM "pgbuffercache.sgml">
 <!ENTITY pgcrypto        SYSTEM "pgcrypto.sgml">
 <!ENTITY pgfreespacemap  SYSTEM "pgfreespacemap.sgml">
+<!ENTITY pglogicalinspect  SYSTEM "pglogicalinspect.sgml">
 <!ENTITY pgprewarm       SYSTEM "pgprewarm.sgml">
 <!ENTITY pgrowlocks      SYSTEM "pgrowlocks.sgml">
 <!ENTITY pgstatstatements SYSTEM "pgstatstatements.sgml">
diff --git a/doc/src/sgml/pglogicalinspect.sgml b/doc/src/sgml/pglogicalinspect.sgml
new file mode 100644
index 0000000000..f0b26637d0
--- /dev/null
+++ b/doc/src/sgml/pglogicalinspect.sgml
@@ -0,0 +1,151 @@
+<!-- doc/src/sgml/pglogicalinspect.sgml -->
+
+<sect1 id="pglogicalinspect" xreflabel="pg_logicalinspect">
+ <title>pg_logicalinspect &mdash; logical decoding components inspection</title>
+
+ <indexterm zone="pglogicalinspect">
+  <primary>pg_logicalinspect</primary>
+ </indexterm>
+
+ <para>
+  The <filename>pg_logicalinspect</filename> module provides SQL functions
+  that allow you to inspect the contents of logical decoding components. It
+  allows the inspection of serialized logical snapshots of a running
+  <productname>PostgreSQL</productname> database cluster, which is useful
+  for debugging or educational purposes.
+ </para>
+
+ <note>
+  <para>
+   The <filename>pg_logicalinspect</filename> functions are called
+   using an LSN argument that can be extracted from the output name of the
+   <function>pg_ls_logicalsnapdir</function>() function.
+  </para>
+ </note>
+
+ <para>
+  By default, use of these functions is restricted to superusers and members of
+  the <literal>pg_read_server_files</literal> role. Access may be granted by
+  superusers to others using <command>GRANT</command>.
+ </para>
+
+ <sect2 id="pglogicalinspect-funcs">
+  <title>General Functions</title>
+
+  <variablelist>
+   <varlistentry id="pglogicalinspect-funcs-pg-get-logical-snapshot-meta">
+    <term>
+     <function>pg_get_logical_snapshot_meta(in_lsn pg_lsn) returns record</function>
+    </term>
+
+    <listitem>
+     <para>
+      Gets logical snapshot metadata about a snapshot file that is located in
+      the server's <filename>pg_logical/snapshots</filename> directory.
+      The <replaceable>in_lsn</replaceable> argument can be extracted from the
+      snapshot file name.
+      For example:
+<screen>
+postgres=# SELECT * FROM pg_ls_logicalsnapdir();
+-[ RECORD 1 ]+-----------------------
+name         | 0-40796E18.snap
+size         | 152
+modification | 2024-08-14 16:36:32+00
+
+postgres=# SELECT * FROM pg_get_logical_snapshot_meta('0/40796E18');
+-[ RECORD 1 ]--------
+magic    | 1369563137
+checksum | 1028045905
+version  | 6
+
+postgres=# SELECT (pg_get_logical_snapshot_meta(f.name::pg_lsn)).*
+           FROM (SELECT replace(replace(name,'.snap',''),'-','/') AS name
+                 FROM pg_ls_logicalsnapdir()) AS f;
+-[ RECORD 1 ]--------
+magic    | 1369563137
+checksum | 1028045905
+version  | 6
+</screen>
+     </para>
+     <para>
+      If <replaceable>in_lsn</replaceable> does not match a snapshot file, the
+      function raises an error.
+     </para>
+    </listitem>
+   </varlistentry>
+
+   <varlistentry id="pglogicalinspect-funcs-pg-get-logical-snapshot-info">
+    <term>
+     <function>pg_get_logical_snapshot_info(in_lsn pg_lsn) returns record</function>
+    </term>
+
+    <listitem>
+     <para>
+      Gets logical snapshot information about a snapshot file that is located in
+      the server's <filename>pg_logical/snapshots</filename> directory.
+      The <replaceable>in_lsn</replaceable> argument can be extracted from the
+      snapshot file name.
+      For example:
+<screen>
+postgres=# SELECT * FROM pg_ls_logicalsnapdir();
+-[ RECORD 1 ]+-----------------------
+name         | 0-40796E18.snap
+size         | 152
+modification | 2024-08-14 16:36:32+00
+
+postgres=# SELECT * FROM pg_get_logical_snapshot_info('0/40796E18');
+-[ RECORD 1 ]------------+-----------
+state                    | consistent
+xmin                     | 751
+xmax                     | 751
+start_decoding_at        | 0/40796AF8
+two_phase_at             | 0/40796AF8
+initial_xmin_horizon     | 0
+building_full_snapshot   | f
+in_slot_creation         | f
+last_serialized_snapshot | 0/0
+next_phase_at            | 0
+committed_count          | 0
+committed_xip            |
+catchange_count          | 2
+catchange_xip            | {751,752}
+
+postgres=# SELECT (pg_get_logical_snapshot_info(f.name::pg_lsn)).*
+           FROM (SELECT replace(replace(name,'.snap',''),'-','/') AS name
+                 FROM pg_ls_logicalsnapdir()) AS f;
+-[ RECORD 1 ]------------+-----------
+state                    | consistent
+xmin                     | 751
+xmax                     | 751
+start_decoding_at        | 0/40796AF8
+two_phase_at             | 0/40796AF8
+initial_xmin_horizon     | 0
+building_full_snapshot   | f
+in_slot_creation         | f
+last_serialized_snapshot | 0/0
+next_phase_at            | 0
+committed_count          | 0
+committed_xip            |
+catchange_count          | 2
+catchange_xip            | {751,752}
+</screen>
+     </para>
+     <para>
+      If <replaceable>in_lsn</replaceable> does not match a snapshot file, the
+      function raises an error.
+     </para>
+    </listitem>
+   </varlistentry>
+
+  </variablelist>
+ </sect2>
+
+ <sect2 id="pglogicalinspect-author">
+  <title>Author</title>
+
+  <para>
+   Bertrand Drouvot <email>bertranddrouvot...@gmail.com</email>
+  </para>
+ </sect2>
+
+</sect1>
diff --git a/src/backend/replication/logical/snapbuild.c b/src/backend/replication/logical/snapbuild.c
index 0450f94ba8..bb7def9440 100644
--- a/src/backend/replication/logical/snapbuild.c
+++ b/src/backend/replication/logical/snapbuild.c
@@ -134,6 +134,7 @@
 #include "replication/logical.h"
 #include "replication/reorderbuffer.h"
 #include "replication/snapbuild.h"
+#include "replication/snapbuild_internal.h"
 #include "storage/fd.h"
 #include "storage/lmgr.h"
 #include "storage/proc.h"
@@ -143,146 +144,6 @@
 #include "utils/memutils.h"
 #include "utils/snapmgr.h"
 #include "utils/snapshot.h"
-
-/*
- * This struct contains the current state of the snapshot building
- * machinery. Besides a forward declaration in the header, it is not exposed
- * to the public, so we can easily change its contents.
- */
-struct SnapBuild
-{
-	/* how far are we along building our first full snapshot */
-	SnapBuildState state;
-
-	/* private memory context used to allocate memory for this module. */
-	MemoryContext context;
-
-	/* all transactions < than this have committed/aborted */
-	TransactionId xmin;
-
-	/* all transactions >= than this are uncommitted */
-	TransactionId xmax;
-
-	/*
-	 * Don't replay commits from an LSN < this LSN. This can be set externally
-	 * but it will also be advanced (never retreat) from within snapbuild.c.
-	 */
-	XLogRecPtr	start_decoding_at;
-
-	/*
-	 * LSN at which two-phase decoding was enabled or LSN at which we found a
-	 * consistent point at the time of slot creation.
-	 *
-	 * The prepared transactions, that were skipped because previously
-	 * two-phase was not enabled or are not covered by initial snapshot, need
-	 * to be sent later along with commit prepared and they must be before
-	 * this point.
-	 */
-	XLogRecPtr	two_phase_at;
-
-	/*
-	 * Don't start decoding WAL until the "xl_running_xacts" information
-	 * indicates there are no running xids with an xid smaller than this.
-	 */
-	TransactionId initial_xmin_horizon;
-
-	/* Indicates if we are building full snapshot or just catalog one. */
-	bool		building_full_snapshot;
-
-	/*
-	 * Indicates if we are using the snapshot builder for the creation of a
-	 * logical replication slot. If it's true, the start point for decoding
-	 * changes is not determined yet. So we skip snapshot restores to properly
-	 * find the start point. See SnapBuildFindSnapshot() for details.
-	 */
-	bool		in_slot_creation;
-
-	/*
-	 * Snapshot that's valid to see the catalog state seen at this moment.
-	 */
-	Snapshot	snapshot;
-
-	/*
-	 * LSN of the last location we are sure a snapshot has been serialized to.
-	 */
-	XLogRecPtr	last_serialized_snapshot;
-
-	/*
-	 * The reorderbuffer we need to update with usable snapshots et al.
-	 */
-	ReorderBuffer *reorder;
-
-	/*
-	 * TransactionId at which the next phase of initial snapshot building will
-	 * happen. InvalidTransactionId if not known (i.e. SNAPBUILD_START), or
-	 * when no next phase necessary (SNAPBUILD_CONSISTENT).
-	 */
-	TransactionId next_phase_at;
-
-	/*
-	 * Array of transactions which could have catalog changes that committed
-	 * between xmin and xmax.
-	 */
-	struct
-	{
-		/* number of committed transactions */
-		size_t		xcnt;
-
-		/* available space for committed transactions */
-		size_t		xcnt_space;
-
-		/*
-		 * Until we reach a CONSISTENT state, we record commits of all
-		 * transactions, not just the catalog changing ones. Record when that
-		 * changes so we know we cannot export a snapshot safely anymore.
-		 */
-		bool		includes_all_transactions;
-
-		/*
-		 * Array of committed transactions that have modified the catalog.
-		 *
-		 * As this array is frequently modified we do *not* keep it in
-		 * xidComparator order. Instead we sort the array when building &
-		 * distributing a snapshot.
-		 *
-		 * TODO: It's unclear whether that reasoning has much merit. Every
-		 * time we add something here after becoming consistent will also
-		 * require distributing a snapshot. Storing them sorted would
-		 * potentially also make it easier to purge (but more complicated wrt
-		 * wraparound?). Should be improved if sorting while building the
-		 * snapshot shows up in profiles.
-		 */
-		TransactionId *xip;
-	}			committed;
-
-	/*
-	 * Array of transactions and subtransactions that had modified catalogs
-	 * and were running when the snapshot was serialized.
-	 *
-	 * We normally rely on some WAL record types such as HEAP2_NEW_CID to know
-	 * if the transaction has changed the catalog. But it could happen that
-	 * the logical decoding decodes only the commit record of the transaction
-	 * after restoring the previously serialized snapshot in which case we
-	 * will miss adding the xid to the snapshot and end up looking at the
-	 * catalogs with the wrong snapshot.
-	 *
-	 * Now to avoid the above problem, we serialize the transactions that had
-	 * modified the catalogs and are still running at the time of snapshot
-	 * serialization. We fill this array while restoring the snapshot and then
-	 * refer it while decoding commit to ensure if the xact has modified the
-	 * catalog. We discard this array when all the xids in the list become old
-	 * enough to matter. See SnapBuildPurgeOlderTxn for details.
-	 */
-	struct
-	{
-		/* number of transactions */
-		size_t		xcnt;
-
-		/* This array must be sorted in xidComparator order */
-		TransactionId *xip;
-	}			catchange;
-};
-
 /*
  * Starting a transaction -- which we need to do while exporting a snapshot --
  * removes knowledge about the previously used resowner, so we save it here.
@@ -312,7 +173,6 @@ static void SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutof
 /* serialization functions */
 static void SnapBuildSerialize(SnapBuild *builder, XLogRecPtr lsn);
 static bool SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn);
-static void SnapBuildRestoreContents(int fd, char *dest, Size size, const char *path);
 
 /*
  * Allocate a new snapshot builder.
@@ -1557,48 +1417,6 @@ SnapBuildWaitSnapshot(xl_running_xacts *running, TransactionId cutoff)
 	}
 }
 
-/* -----------------------------------
- * Snapshot serialization support
- * -----------------------------------
- */
-
-/*
- * We store current state of struct SnapBuild on disk in the following manner:
- *
- * struct SnapBuildOnDisk;
- * TransactionId * committed.xcnt; (*not xcnt_space*)
- * TransactionId * catchange.xcnt;
- *
- */
-typedef struct SnapBuildOnDisk
-{
-	/* first part of this struct needs to be version independent */
-
-	/* data not covered by checksum */
-	uint32		magic;
-	pg_crc32c	checksum;
-
-	/* data covered by checksum */
-
-	/* version, in case we want to support pg_upgrade */
-	uint32		version;
-	/* how large is the on disk data, excluding the constant sized part */
-	uint32		length;
-
-	/* version dependent part */
-	SnapBuild	builder;
-
-	/* variable amount of TransactionIds follows */
-} SnapBuildOnDisk;
-
-#define SnapBuildOnDiskConstantSize \
-	offsetof(SnapBuildOnDisk, builder)
-#define SnapBuildOnDiskNotChecksummedSize \
-	offsetof(SnapBuildOnDisk, version)
-
-#define SNAPBUILD_MAGIC 0x51A1E001
-#define SNAPBUILD_VERSION 6
-
 /*
  * Store/Load a snapshot from disk, depending on the snapshot builder's state.
  *
@@ -1859,6 +1677,10 @@ out:
 /*
  * Restore a snapshot into 'builder' if previously one has been stored at the
  * location indicated by 'lsn'. Returns true if successful, false otherwise.
+ *
+ * NOTE: For any code change or issue fix here, it is highly recommended to
+ * give a thought about doing the same in pg_logicalinspect contrib module
+ * as well.
  */
 static bool
 SnapBuildRestore(SnapBuild *builder, XLogRecPtr lsn)
@@ -2033,7 +1855,7 @@ snapshot_not_interesting:
 /*
  * Read the contents of the serialized snapshot to 'dest'.
  */
-static void
+void
 SnapBuildRestoreContents(int fd, char *dest, Size size, const char *path)
 {
 	int			readBytes;
diff --git a/src/include/port/pg_crc32c.h b/src/include/port/pg_crc32c.h
index 63c8e3a00b..cfc8c07944 100644
--- a/src/include/port/pg_crc32c.h
+++ b/src/include/port/pg_crc32c.h
@@ -47,7 +47,7 @@ typedef uint32 pg_crc32c;
 	((crc) = pg_comp_crc32c_sse42((crc), (data), (len)))
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 
-extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
+extern PGDLLIMPORT pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
 
 #elif defined(USE_ARMV8_CRC32C)
 /* Use ARMv8 CRC Extension instructions. */
@@ -56,7 +56,7 @@ extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t le
 	((crc) = pg_comp_crc32c_armv8((crc), (data), (len)))
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 
-extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
+extern PGDLLIMPORT pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
 
 #elif defined(USE_LOONGARCH_CRC32C)
 /* Use LoongArch CRCC instructions. */
@@ -65,7 +65,7 @@ extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t le
 	((crc) = pg_comp_crc32c_loongarch((crc), (data), (len)))
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 
-extern pg_crc32c pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_t len);
+extern PGDLLIMPORT pg_crc32c pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_t len);
 
 #elif defined(USE_SSE42_CRC32C_WITH_RUNTIME_CHECK) || defined(USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK)
 
@@ -77,14 +77,14 @@ extern pg_crc32c pg_comp_crc32c_loongarch(pg_crc32c crc, const void *data, size_
 	((crc) = pg_comp_crc32c((crc), (data), (len)))
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 
-extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
-extern pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len);
+extern PGDLLIMPORT pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
+extern PGDLLIMPORT pg_crc32c (*pg_comp_crc32c) (pg_crc32c crc, const void *data, size_t len);
 
 #ifdef USE_SSE42_CRC32C_WITH_RUNTIME_CHECK
-extern pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
+extern PGDLLIMPORT pg_crc32c pg_comp_crc32c_sse42(pg_crc32c crc, const void *data, size_t len);
 #endif
 #ifdef USE_ARMV8_CRC32C_WITH_RUNTIME_CHECK
-extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
+extern PGDLLIMPORT pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t len);
 #endif
 
 #else
@@ -103,7 +103,7 @@ extern pg_crc32c pg_comp_crc32c_armv8(pg_crc32c crc, const void *data, size_t le
 #define FIN_CRC32C(crc) ((crc) ^= 0xFFFFFFFF)
 #endif
 
-extern pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
+extern PGDLLIMPORT pg_crc32c pg_comp_crc32c_sb8(pg_crc32c crc, const void *data, size_t len);
 
 #endif
 
diff --git a/src/include/replication/snapbuild.h b/src/include/replication/snapbuild.h
index caa5113ff8..e844a89882 100644
--- a/src/include/replication/snapbuild.h
+++ b/src/include/replication/snapbuild.h
@@ -15,6 +15,10 @@
 #include "access/xlogdefs.h"
 #include "utils/snapmgr.h"
 
+/*
+ * Please keep SnapBuildStateDesc[] (located in the pg_logicalinspect module)
+ * updated if a change needs to be made to SnapBuildState.
+ */
 typedef enum
 {
 	/*
@@ -46,7 +50,7 @@ typedef enum
 	SNAPBUILD_CONSISTENT = 2,
 } SnapBuildState;
 
-/* forward declare so we don't have to expose the struct to the public */
+/* forward declare so we don't have to include snapbuild_internal.h */
 struct SnapBuild;
 typedef struct SnapBuild SnapBuild;
 
diff --git a/src/include/replication/snapbuild_internal.h b/src/include/replication/snapbuild_internal.h
new file mode 100644
index 0000000000..0e47ddfcb4
--- /dev/null
+++ b/src/include/replication/snapbuild_internal.h
@@ -0,0 +1,204 @@
+/*-------------------------------------------------------------------------
+ *
+ * snapbuild_internal.h
+ *    This file contains declarations for logical decoding utility
+ *    functions for internal use.
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ * src/include/replication/snapbuild_internal.h
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef SNAPBUILD_INTERNAL_H
+#define SNAPBUILD_INTERNAL_H
+
+#include "port/pg_crc32c.h"
+#include "replication/reorderbuffer.h"
+#include "replication/snapbuild.h"
+
+/*
+ * This struct contains the current state of the snapshot building
+ * machinery. It is exposed to the public, so pay attention when changing its
+ * contents.
+ */
+typedef struct SnapBuild
+{
+	/* how far are we along building our first full snapshot */
+	SnapBuildState state;
+
+	/* private memory context used to allocate memory for this module. */
+	MemoryContext context;
+
+	/* all transactions < than this have committed/aborted */
+	TransactionId xmin;
+
+	/* all transactions >= than this are uncommitted */
+	TransactionId xmax;
+
+	/*
+	 * Don't replay commits from an LSN < this LSN. This can be set externally
+	 * but it will also be advanced (never retreat) from within snapbuild.c.
+	 */
+	XLogRecPtr	start_decoding_at;
+
+	/*
+	 * LSN at which two-phase decoding was enabled or LSN at which we found a
+	 * consistent point at the time of slot creation.
+	 *
+	 * The prepared transactions, that were skipped because previously
+	 * two-phase was not enabled or are not covered by initial snapshot, need
+	 * to be sent later along with commit prepared and they must be before
+	 * this point.
+	 */
+	XLogRecPtr	two_phase_at;
+
+	/*
+	 * Don't start decoding WAL until the "xl_running_xacts" information
+	 * indicates there are no running xids with an xid smaller than this.
+	 */
+	TransactionId initial_xmin_horizon;
+
+	/* Indicates if we are building full snapshot or just catalog one. */
+	bool		building_full_snapshot;
+
+	/*
+	 * Indicates if we are using the snapshot builder for the creation of a
+	 * logical replication slot. If it's true, the start point for decoding
+	 * changes is not determined yet. So we skip snapshot restores to properly
+	 * find the start point. See SnapBuildFindSnapshot() for details.
+	 */
+	bool		in_slot_creation;
+
+	/*
+	 * Snapshot that's valid to see the catalog state seen at this moment.
+	 */
+	Snapshot	snapshot;
+
+	/*
+	 * LSN of the last location we are sure a snapshot has been serialized to.
+	 */
+	XLogRecPtr	last_serialized_snapshot;
+
+	/*
+	 * The reorderbuffer we need to update with usable snapshots et al.
+	 */
+	ReorderBuffer *reorder;
+
+	/*
+	 * TransactionId at which the next phase of initial snapshot building will
+	 * happen. InvalidTransactionId if not known (i.e. SNAPBUILD_START), or
+	 * when no next phase necessary (SNAPBUILD_CONSISTENT).
+	 */
+	TransactionId next_phase_at;
+
+	/*
+	 * Array of transactions which could have catalog changes that committed
+	 * between xmin and xmax.
+	 */
+	struct
+	{
+		/* number of committed transactions */
+		size_t		xcnt;
+
+		/* available space for committed transactions */
+		size_t		xcnt_space;
+
+		/*
+		 * Until we reach a CONSISTENT state, we record commits of all
+		 * transactions, not just the catalog changing ones. Record when that
+		 * changes so we know we cannot export a snapshot safely anymore.
+		 */
+		bool		includes_all_transactions;
+
+		/*
+		 * Array of committed transactions that have modified the catalog.
+		 *
+		 * As this array is frequently modified we do *not* keep it in
+		 * xidComparator order. Instead we sort the array when building &
+		 * distributing a snapshot.
+		 *
+		 * TODO: It's unclear whether that reasoning has much merit. Every
+		 * time we add something here after becoming consistent will also
+		 * require distributing a snapshot. Storing them sorted would
+		 * potentially also make it easier to purge (but more complicated wrt
+		 * wraparound?). Should be improved if sorting while building the
+		 * snapshot shows up in profiles.
+		 */
+		TransactionId *xip;
+	}			committed;
+
+	/*
+	 * Array of transactions and subtransactions that had modified catalogs
+	 * and were running when the snapshot was serialized.
+	 *
+	 * We normally rely on some WAL record types such as HEAP2_NEW_CID to know
+	 * if the transaction has changed the catalog. But it could happen that
+	 * the logical decoding decodes only the commit record of the transaction
+	 * after restoring the previously serialized snapshot in which case we
+	 * will miss adding the xid to the snapshot and end up looking at the
+	 * catalogs with the wrong snapshot.
+	 *
+	 * Now to avoid the above problem, we serialize the transactions that had
+	 * modified the catalogs and are still running at the time of snapshot
+	 * serialization. We fill this array while restoring the snapshot and then
+	 * refer it while decoding commit to ensure if the xact has modified the
+	 * catalog. We discard this array when all the xids in the list become old
+	 * enough to matter. See SnapBuildPurgeOlderTxn for details.
+	 */
+	struct
+	{
+		/* number of transactions */
+		size_t		xcnt;
+
+		/* This array must be sorted in xidComparator order */
+		TransactionId *xip;
+	}			catchange;
+} SnapBuild;
+
+/* -----------------------------------
+ * Snapshot serialization support
+ * -----------------------------------
+ */
+
+/*
+ * We store current state of struct SnapBuild on disk in the following manner:
+ *
+ * struct SnapBuildOnDisk;
+ * TransactionId * committed.xcnt; (*not xcnt_space*)
+ * TransactionId * catchange.xcnt;
+ *
+ */
+typedef struct SnapBuildOnDisk
+{
+	/* first part of this struct needs to be version independent */
+
+	/* data not covered by checksum */
+	uint32		magic;
+	pg_crc32c	checksum;
+
+	/* data covered by checksum */
+
+	/* version, in case we want to support pg_upgrade */
+	uint32		version;
+	/* how large is the on disk data, excluding the constant sized part */
+	uint32		length;
+
+	/* version dependent part */
+	SnapBuild	builder;
+
+	/* variable amount of TransactionIds follows */
+} SnapBuildOnDisk;
+
+#define SnapBuildOnDiskConstantSize \
+	offsetof(SnapBuildOnDisk, builder)
+#define SnapBuildOnDiskNotChecksummedSize \
+	offsetof(SnapBuildOnDisk, version)
+
+#define SNAPBUILD_MAGIC 0x51A1E001
+#define SNAPBUILD_VERSION 6
+
+extern void SnapBuildRestoreContents(int fd, char *dest, Size size, const char *path);
+
+#endif							/* SNAPBUILD_INTERNAL_H */
-- 
2.34.1

Reply via email to