On Sun, Apr 5, 2026 at 2:42 PM Thomas Munro <[email protected]> wrote:
> Perhaps that could be fixed if astreamer_file.c provided
> "astreamer_file_reader" with the same semantics, so that it could
> unconditionally call astreamer_pull(privateInfo->archive_streamer),
> instead of doing the read, push-into-stream itself?  Just a thought.

That seems to work.
From c13c056bb33268bffb85922d25e8b01b14083e5c Mon Sep 17 00:00:00 2001
From: Thomas Munro <[email protected]>
Date: Thu, 9 Apr 2026 20:02:02 +1200
Subject: [PATCH v2 1/6] Provide astreamer_plain_reader for reading files.

A new public API astreamer_pull_content() can be used to tell it to read
more data and send it to the next astreamer.  astreamer_plain_reader is
the opposite of astreamer_plain_writer.
---
 src/fe_utils/astreamer_file.c    | 87 ++++++++++++++++++++++++++++++++
 src/include/fe_utils/astreamer.h | 19 ++++++-
 src/tools/pgindent/typedefs.list |  1 +
 3 files changed, 105 insertions(+), 2 deletions(-)

diff --git a/src/fe_utils/astreamer_file.c b/src/fe_utils/astreamer_file.c
index 0fca70a4f86..3fb9ed72eea 100644
--- a/src/fe_utils/astreamer_file.c
+++ b/src/fe_utils/astreamer_file.c
@@ -21,6 +21,18 @@
 #include "common/logging.h"
 #include "fe_utils/astreamer.h"
 
+/* Size of internal buffer used by astreamer_plain_reader. */
+#define ASTREAMER_PLAIN_READER_BUFFER (128 * 1024)
+
+typedef struct astreamer_plain_reader
+{
+	astreamer	base;
+	FILE	   *file;
+	size_t		size;
+	char		pathname[MAXPGPATH];
+	char		data[ASTREAMER_PLAIN_READER_BUFFER];
+} astreamer_plain_reader;
+
 typedef struct astreamer_plain_writer
 {
 	astreamer	base;
@@ -39,6 +51,16 @@ typedef struct astreamer_extractor
 	FILE	   *file;
 } astreamer_extractor;
 
+static bool astreamer_plain_reader_pull_content(astreamer *streamer);
+static void astreamer_plain_reader_finalize(astreamer *streamer);
+static void astreamer_plain_reader_free(astreamer *streamer);
+
+static const astreamer_ops astreamer_plain_reader_ops = {
+	.pull_content = astreamer_plain_reader_pull_content,
+	.finalize = astreamer_plain_reader_finalize,
+	.free = astreamer_plain_reader_free
+};
+
 static void astreamer_plain_writer_content(astreamer *streamer,
 										   astreamer_member *member,
 										   const char *data, int len,
@@ -68,6 +90,71 @@ static const astreamer_ops astreamer_extractor_ops = {
 	.free = astreamer_extractor_free
 };
 
+/*
+ * Create a 'source' astreamer that just reads data from a file.
+ *
+ * It must be first in a chain of astreamers, and it should be asked to read
+ * more of the file by calling astreamer_pull().  Each time you do that, it
+ * pushes some raw bytes with context ASTREAMER_UNKNOWN into the astreamer
+ * provided as 'next'.
+ */
+astreamer *
+astreamer_plain_reader_new(astreamer *next, const char *pathname)
+{
+	astreamer_plain_reader *streamer;
+
+	streamer = palloc_object(astreamer_plain_reader);
+	*((const astreamer_ops **) &streamer->base.bbs_ops) =
+		&astreamer_plain_reader_ops;
+	streamer->base.bbs_next = next;
+	strlcpy(streamer->pathname, pathname, sizeof(streamer->pathname));
+	streamer->file = fopen(pathname, "r");
+	if (streamer->file == NULL)
+		pg_fatal("astreamer_plain_reader: could not open file \"%s\"",
+				 pathname);
+
+	return &streamer->base;
+}
+
+static bool
+astreamer_plain_reader_pull_content(astreamer *streamer)
+{
+	astreamer_plain_reader *mystreamer = (astreamer_plain_reader *) streamer;
+
+	mystreamer->size = fread(mystreamer->data,
+							 1,
+							 sizeof(mystreamer->data),
+							 mystreamer->file);
+
+	if (mystreamer->size == 0)
+	{
+		if (ferror(mystreamer->file))
+			pg_fatal("could not read file \"%s\"", mystreamer->pathname);
+		return false;
+	}
+	astreamer_content(mystreamer->base.bbs_next,
+					  NULL,
+					  mystreamer->data,
+					  mystreamer->size,
+					  ASTREAMER_UNKNOWN);
+	return true;
+}
+
+static void
+astreamer_plain_reader_finalize(astreamer *streamer)
+{
+	astreamer_finalize(streamer->bbs_next);
+}
+
+static void
+astreamer_plain_reader_free(astreamer *streamer)
+{
+	astreamer_plain_reader *mystreamer = (astreamer_plain_reader *) streamer;
+
+	pclose(mystreamer->file);
+	pfree(mystreamer);
+}
+
 /*
  * Create a astreamer that just writes data to a file.
  *
diff --git a/src/include/fe_utils/astreamer.h b/src/include/fe_utils/astreamer.h
index 8329e4efbc5..2509d157bc5 100644
--- a/src/include/fe_utils/astreamer.h
+++ b/src/include/fe_utils/astreamer.h
@@ -114,8 +114,10 @@ struct astreamer
 };
 
 /*
- * There are three callbacks for a astreamer. The 'content' callback is
- * called repeatedly, as described in the astreamer_archive_context comments.
+ * There are four callbacks for an astreamer. The 'content' callback is called
+ * repeatedly, as described in the astreamer_archive_context comments, to push
+ * data through an astreamer chain. The 'pull_content' variant is an
+ * alternative, for certain astreamers that act as a source of data themselves.
  * Then, the 'finalize' callback is called once at the end, to give the
  * astreamer a chance to perform cleanup such as closing files. Finally,
  * because this code is running in a frontend environment where, as of this
@@ -125,6 +127,7 @@ struct astreamer
  */
 struct astreamer_ops
 {
+	bool		(*pull_content) (astreamer *streamer);
 	void		(*content) (astreamer *streamer, astreamer_member *member,
 							const char *data, int len,
 							astreamer_archive_context context);
@@ -132,6 +135,16 @@ struct astreamer_ops
 	void		(*free) (astreamer *streamer);
 };
 
+/*
+ * Tell a 'source' astreamer to consume content from the source it represents,
+ * and report whether there is any more data.
+ */
+static inline bool
+astreamer_pull_content(astreamer *streamer)
+{
+	return streamer->bbs_ops->pull_content(streamer);
+}
+
 /* Send some content to a astreamer. */
 static inline void
 astreamer_content(astreamer *streamer, astreamer_member *member,
@@ -210,6 +223,8 @@ astreamer_buffer_until(astreamer *streamer, const char **data, int *len,
  * Functions for creating astreamer objects of various types. See the header
  * comments for each of these functions for details.
  */
+extern astreamer *astreamer_plain_reader_new(astreamer *next,
+											 const char *pathname);
 extern astreamer *astreamer_plain_writer_new(char *pathname, FILE *file);
 extern astreamer *astreamer_gzip_writer_new(char *pathname, FILE *file,
 											pg_compress_specification *compress);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index ea95e7984bc..a936c2c9c49 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3592,6 +3592,7 @@ astreamer_gzip_writer
 astreamer_lz4_frame
 astreamer_member
 astreamer_ops
+astreamer_plain_reader
 astreamer_plain_writer
 astreamer_recovery_injector
 astreamer_tar_archiver
-- 
2.53.0

From 1a30aa3f3f5cd0ec54082961ba9e52fbc7dfe28c Mon Sep 17 00:00:00 2001
From: Thomas Munro <[email protected]>
Date: Fri, 10 Apr 2026 16:22:30 +1200
Subject: [PATCH v2 2/6] pg_waldump: Read data with astreamer_plain_reader.

Previously, archive_waldump.c would read a file and "push" data into
astreamer_content().  Now, it attaches astreamer_plain_reader to the
source end of the astreamer chain, and tells it to "pull" data in with
astreamer_pull_content().  The end result is the same.
---
 src/bin/pg_waldump/archive_waldump.c | 69 ++++++----------------------
 src/bin/pg_waldump/pg_waldump.h      |  5 +-
 2 files changed, 14 insertions(+), 60 deletions(-)

diff --git a/src/bin/pg_waldump/archive_waldump.c b/src/bin/pg_waldump/archive_waldump.c
index e4a4bf44a7e..b27888a5056 100644
--- a/src/bin/pg_waldump/archive_waldump.c
+++ b/src/bin/pg_waldump/archive_waldump.c
@@ -23,11 +23,6 @@
 #include "fe_utils/simple_list.h"
 #include "pg_waldump.h"
 
-/*
- * How many bytes should we try to read from a file at once?
- */
-#define READ_CHUNK_SIZE				(128 * 1024)
-
 /* Temporary directory for spilled WAL segment files */
 char	   *TmpWalSegDir = NULL;
 
@@ -129,21 +124,18 @@ void
 init_archive_reader(XLogDumpPrivate *privateInfo,
 					pg_compress_algorithm compression)
 {
-	int			fd;
 	astreamer  *streamer;
 	ArchivedWALFile *entry = NULL;
 	XLogLongPageHeader longhdr;
 	ArchivedWAL_iterator iter;
+	char		pathname[MAXPGPATH];
 
-	/* Open tar archive and store its file descriptor */
-	fd = open_file_in_directory(privateInfo->archive_dir,
-								privateInfo->archive_name);
-
-	if (fd < 0)
-		pg_fatal("could not open file \"%s\"", privateInfo->archive_name);
-
-	privateInfo->archive_fd = fd;
-	privateInfo->archive_fd_eof = false;
+	/* Construct tar archive pathname. */
+	snprintf(pathname,
+			 sizeof(pathname),
+			 "%s/%s",
+			 privateInfo->archive_dir,
+			 privateInfo->archive_name);
 
 	streamer = astreamer_waldump_new(privateInfo);
 
@@ -158,14 +150,10 @@ init_archive_reader(XLogDumpPrivate *privateInfo,
 	else if (compression == PG_COMPRESSION_ZSTD)
 		streamer = astreamer_zstd_decompressor_new(streamer);
 
-	privateInfo->archive_streamer = streamer;
+	/* And before that, we have to read the file. */
+	streamer = astreamer_plain_reader_new(streamer, pathname);
 
-	/*
-	 * Allocate a buffer for reading the archive file to begin content
-	 * decoding.
-	 */
-	privateInfo->archive_read_buf = pg_malloc(READ_CHUNK_SIZE);
-	privateInfo->archive_read_buf_size = READ_CHUNK_SIZE;
+	privateInfo->archive_streamer = streamer;
 
 	/*
 	 * Hash table storing WAL entries read from the archive with an arbitrary
@@ -278,18 +266,6 @@ free_archive_reader(XLogDumpPrivate *privateInfo)
 		ArchivedWAL_destroy(privateInfo->archive_wal_htab);
 		privateInfo->archive_wal_htab = NULL;
 	}
-
-	/* Free the reusable read buffer. */
-	if (privateInfo->archive_read_buf != NULL)
-	{
-		pg_free(privateInfo->archive_read_buf);
-		privateInfo->archive_read_buf = NULL;
-	}
-
-	/* Close the file. */
-	if (close(privateInfo->archive_fd) != 0)
-		pg_log_error("could not close file \"%s\": %m",
-					 privateInfo->archive_name);
 }
 
 /*
@@ -537,28 +513,11 @@ get_archive_wal_entry(const char *fname, XLogDumpPrivate *privateInfo)
 static bool
 read_archive_file(XLogDumpPrivate *privateInfo)
 {
-	int			rc;
-
-	/* Fail if we already reached EOF in a prior call. */
-	if (privateInfo->archive_fd_eof)
+	if (privateInfo->archive_streamer_finalized)
 		return false;
 
 	/* Try to read some more data. */
-	rc = read(privateInfo->archive_fd, privateInfo->archive_read_buf,
-			  privateInfo->archive_read_buf_size);
-	if (rc < 0)
-		pg_fatal("could not read file \"%s\": %m",
-				 privateInfo->archive_name);
-
-	/*
-	 * Decompress (if required), and then parse the previously read contents
-	 * of the tar file.
-	 */
-	if (rc > 0)
-		astreamer_content(privateInfo->archive_streamer, NULL,
-						  privateInfo->archive_read_buf, rc,
-						  ASTREAMER_UNKNOWN);
-	else
+	if (!astreamer_pull_content(privateInfo->archive_streamer))
 	{
 		/*
 		 * We reached EOF, but there is probably still data queued in the
@@ -566,10 +525,8 @@ read_archive_file(XLogDumpPrivate *privateInfo)
 		 * process everything.
 		 */
 		astreamer_finalize(privateInfo->archive_streamer);
-		/* Set flag to ensure we don't finalize more than once. */
-		privateInfo->archive_fd_eof = true;
+		privateInfo->archive_streamer_finalized = true;
 	}
-
 	return true;
 }
 
diff --git a/src/bin/pg_waldump/pg_waldump.h b/src/bin/pg_waldump/pg_waldump.h
index bd46d14f3a8..d9f1b5d4726 100644
--- a/src/bin/pg_waldump/pg_waldump.h
+++ b/src/bin/pg_waldump/pg_waldump.h
@@ -34,12 +34,9 @@ typedef struct XLogDumpPrivate
 	/* Fields required to read WAL from archive */
 	char	   *archive_dir;
 	char	   *archive_name;	/* Tar archive filename */
-	int			archive_fd;		/* File descriptor for the open tar file */
-	bool		archive_fd_eof; /* Have we reached EOF on archive_fd? */
 
 	astreamer  *archive_streamer;
-	char	   *archive_read_buf;	/* Reusable read buffer for archive I/O */
-	Size		archive_read_buf_size;
+	bool		archive_streamer_finalized;
 
 	/*
 	 * The buffer for the WAL file the archive streamer is currently reading,
-- 
2.53.0

From 287662de16a85493421ff8396a72a31e2e8e50c2 Mon Sep 17 00:00:00 2001
From: Thomas Munro <[email protected]>
Date: Sun, 5 Apr 2026 02:25:55 +1200
Subject: [PATCH v2 3/6] libarchive: Add configure and meson options.

A follow-up patch will make use of it.

(Proof-of-concept)
---
 configure                  | 140 +++++++++++++++++++++++++++++++++++++
 configure.ac               |  13 ++++
 meson.build                |  16 +++++
 meson_options.txt          |   3 +
 src/Makefile.global.in     |   4 ++
 src/include/pg_config.h.in |   3 +
 6 files changed, 179 insertions(+)

diff --git a/configure b/configure
index f66c1054a7a..28acbeb9592 100755
--- a/configure
+++ b/configure
@@ -718,6 +718,9 @@ LIBCURL_CPPFLAGS
 LIBCURL_LIBS
 LIBCURL_CFLAGS
 with_libcurl
+LIBARCHIVE_LIBS
+LIBARCHIVE_CFLAGS
+with_libarchive
 with_uuid
 LIBURING_LIBS
 LIBURING_CFLAGS
@@ -877,6 +880,7 @@ with_libedit_preferred
 with_liburing
 with_uuid
 with_ossp_uuid
+with_libarchive
 with_libcurl
 with_libnuma
 with_libxml
@@ -911,6 +915,8 @@ ICU_CFLAGS
 ICU_LIBS
 LIBURING_CFLAGS
 LIBURING_LIBS
+LIBARCHIVE_CFLAGS
+LIBARCHIVE_LIBS
 LIBCURL_CFLAGS
 LIBCURL_LIBS
 LIBNUMA_CFLAGS
@@ -1596,6 +1602,7 @@ Optional Packages:
   --with-liburing         build with io_uring support, for asynchronous I/O
   --with-uuid=LIB         build contrib/uuid-ossp using LIB (bsd,e2fs,ossp)
   --with-ossp-uuid        obsolete spelling of --with-uuid=ossp
+  --with-libarchive       build with libarchive support
   --with-libcurl          build with libcurl support
   --with-libnuma          build with libnuma support
   --with-libxml           build with XML support
@@ -1635,6 +1642,10 @@ Some influential environment variables:
               C compiler flags for LIBURING, overriding pkg-config
   LIBURING_LIBS
               linker flags for LIBURING, overriding pkg-config
+  LIBARCHIVE_CFLAGS
+              C compiler flags for LIBARCHIVE, overriding pkg-config
+  LIBARCHIVE_LIBS
+              linker flags for LIBARCHIVE, overriding pkg-config
   LIBCURL_CFLAGS
               C compiler flags for LIBCURL, overriding pkg-config
   LIBCURL_LIBS
@@ -8912,6 +8923,135 @@ fi
 
 
 
+#
+# libarchive
+#
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to build with libarchive support" >&5
+$as_echo_n "checking whether to build with libarchive support... " >&6; }
+
+
+
+# Check whether --with-libarchive was given.
+if test "${with_libarchive+set}" = set; then :
+  withval=$with_libarchive;
+  case $withval in
+    yes)
+
+$as_echo "#define USE_LIBARCHIVE 1" >>confdefs.h
+
+      ;;
+    no)
+      :
+      ;;
+    *)
+      as_fn_error $? "no argument expected for --with-libarchive option" "$LINENO" 5
+      ;;
+  esac
+
+else
+  with_libarchive=no
+
+fi
+
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $with_libarchive" >&5
+$as_echo "$with_libarchive" >&6; }
+
+if test "$with_libarchive" = yes ; then
+
+pkg_failed=no
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for libarchive" >&5
+$as_echo_n "checking for libarchive... " >&6; }
+
+if test -n "$LIBARCHIVE_CFLAGS"; then
+    pkg_cv_LIBARCHIVE_CFLAGS="$LIBARCHIVE_CFLAGS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libarchive\""; } >&5
+  ($PKG_CONFIG --exists --print-errors "libarchive") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  pkg_cv_LIBARCHIVE_CFLAGS=`$PKG_CONFIG --cflags "libarchive" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
+else
+  pkg_failed=yes
+fi
+ else
+    pkg_failed=untried
+fi
+if test -n "$LIBARCHIVE_LIBS"; then
+    pkg_cv_LIBARCHIVE_LIBS="$LIBARCHIVE_LIBS"
+ elif test -n "$PKG_CONFIG"; then
+    if test -n "$PKG_CONFIG" && \
+    { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libarchive\""; } >&5
+  ($PKG_CONFIG --exists --print-errors "libarchive") 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; then
+  pkg_cv_LIBARCHIVE_LIBS=`$PKG_CONFIG --libs "libarchive" 2>/dev/null`
+		      test "x$?" != "x0" && pkg_failed=yes
+else
+  pkg_failed=yes
+fi
+ else
+    pkg_failed=untried
+fi
+
+
+
+if test $pkg_failed = yes; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+
+if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then
+        _pkg_short_errors_supported=yes
+else
+        _pkg_short_errors_supported=no
+fi
+        if test $_pkg_short_errors_supported = yes; then
+	        LIBARCHIVE_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libarchive" 2>&1`
+        else
+	        LIBARCHIVE_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libarchive" 2>&1`
+        fi
+	# Put the nasty error message in config.log where it belongs
+	echo "$LIBARCHIVE_PKG_ERRORS" >&5
+
+	as_fn_error $? "Package requirements (libarchive) were not met:
+
+$LIBARCHIVE_PKG_ERRORS
+
+Consider adjusting the PKG_CONFIG_PATH environment variable if you
+installed software in a non-standard prefix.
+
+Alternatively, you may set the environment variables LIBARCHIVE_CFLAGS
+and LIBARCHIVE_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details." "$LINENO" 5
+elif test $pkg_failed = untried; then
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+	{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5
+$as_echo "$as_me: error: in \`$ac_pwd':" >&2;}
+as_fn_error $? "The pkg-config script could not be found or is too old.  Make sure it
+is in your PATH or set the PKG_CONFIG environment variable to the full
+path to pkg-config.
+
+Alternatively, you may set the environment variables LIBARCHIVE_CFLAGS
+and LIBARCHIVE_LIBS to avoid the need to call pkg-config.
+See the pkg-config man page for more details.
+
+To get pkg-config, see <http://pkg-config.freedesktop.org/>.
+See \`config.log' for more details" "$LINENO" 5; }
+else
+	LIBARCHIVE_CFLAGS=$pkg_cv_LIBARCHIVE_CFLAGS
+	LIBARCHIVE_LIBS=$pkg_cv_LIBARCHIVE_LIBS
+        { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5
+$as_echo "yes" >&6; }
+
+fi
+fi
+
+
 #
 # libcurl
 #
diff --git a/configure.ac b/configure.ac
index 8d176bd3468..668eb0fc019 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1059,6 +1059,19 @@ fi
 AC_SUBST(with_uuid)
 
 
+#
+# libarchive
+#
+AC_MSG_CHECKING([whether to build with libarchive support])
+PGAC_ARG_BOOL(with, libarchive, no, [build with libarchive support],
+              [AC_DEFINE([USE_LIBARCHIVE], 1, [Define to 1 to build with libarchive support. (--with-libarchive)])])
+AC_MSG_RESULT([$with_libarchive])
+AC_SUBST(with_libarchive)
+if test "$with_libarchive" = yes ; then
+  PKG_CHECK_MODULES(LIBARCHIVE, libarchive)
+fi
+
+
 #
 # libcurl
 #
diff --git a/meson.build b/meson.build
index be97e986e5d..d050642d7a5 100644
--- a/meson.build
+++ b/meson.build
@@ -923,6 +923,22 @@ endif
 
 
 ###############################################################
+# Library: libarchive
+###############################################################
+
+libarchive_opt = get_option('libarchive')
+if not libarchive_opt.disabled()
+  libarchive = dependency('libarchive', required: libarchive_opt)
+else
+  libarchive = not_found_dep
+endif
+if libarchive.found()
+  cdata.set('USE_LIBARCHIVE', 1)
+endif
+
+
+
+i###############################################################
 # Library: LLVM
 ###############################################################
 
diff --git a/meson_options.txt b/meson_options.txt
index 6a793f3e479..671ffe127ff 100644
--- a/meson_options.txt
+++ b/meson_options.txt
@@ -100,6 +100,9 @@ option('icu', type: 'feature', value: 'auto',
 option('ldap', type: 'feature', value: 'auto',
   description: 'LDAP support')
 
+option('libarchive', type : 'feature', value: 'auto',
+  description: 'libarchive support')
+
 option('libcurl', type : 'feature', value: 'auto',
   description: 'libcurl support')
 
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index a7699b026bb..060165bd27d 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -195,6 +195,7 @@ with_systemd	= @with_systemd@
 with_gssapi	= @with_gssapi@
 with_krb_srvnam	= @with_krb_srvnam@
 with_ldap	= @with_ldap@
+with_libarchive	= @with_libarchive@
 with_libcurl	= @with_libcurl@
 with_libnuma	= @with_libnuma@
 with_liburing	= @with_liburing@
@@ -224,6 +225,9 @@ krb_srvtab = @krb_srvtab@
 ICU_CFLAGS		= @ICU_CFLAGS@
 ICU_LIBS		= @ICU_LIBS@
 
+LIBARCHIVE_CFLAGS	= @LIBARCHIVE_CFLAGS@
+LIBARCHIVE_LIBS		= @LIBARCHIVE_LIBS@
+
 LIBNUMA_CFLAGS		= @LIBNUMA_CFLAGS@
 LIBNUMA_LIBS		= @LIBNUMA_LIBS@
 
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 4f8113c144b..3974fd82a02 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -701,6 +701,9 @@
 /* Define to 1 to build with LDAP support. (--with-ldap) */
 #undef USE_LDAP
 
+/* Define to 1 to build with libarchive support. (--with-libarchive) */
+#undef USE_LIBARCHIVE
+
 /* Define to 1 to build with libcurl support. (--with-libcurl) */
 #undef USE_LIBCURL
 
-- 
2.53.0

From 798c26be383a07a9ddee1774ee6f90be0f0b6f64 Mon Sep 17 00:00:00 2001
From: Thomas Munro <[email protected]>
Date: Sat, 4 Apr 2026 16:48:14 +1300
Subject: [PATCH v2 4/6] libarchive: Provide astreamer_libarchive_reader.

This allows modern tar files (and potential other unrelated formats) to
be consumed from a file, with support for various compression
algorithms.
---
 src/fe_utils/Makefile               |   4 +
 src/fe_utils/astreamer_libarchive.c | 232 ++++++++++++++++++++++++++++
 src/fe_utils/meson.build            |   4 +
 src/include/fe_utils/astreamer.h    |  12 ++
 src/tools/pgindent/typedefs.list    |   1 +
 5 files changed, 253 insertions(+)
 create mode 100644 src/fe_utils/astreamer_libarchive.c

diff --git a/src/fe_utils/Makefile b/src/fe_utils/Makefile
index cbfbf93ac69..f6c88a73ee7 100644
--- a/src/fe_utils/Makefile
+++ b/src/fe_utils/Makefile
@@ -40,6 +40,10 @@ OBJS = \
 	string_utils.o \
 	version.o
 
+ifeq ($(with_libarchive), yes)
+OBJS += astreamer_libarchive.o
+endif
+
 ifeq ($(PORTNAME), win32)
 override CPPFLAGS += -DFD_SETSIZE=1024
 endif
diff --git a/src/fe_utils/astreamer_libarchive.c b/src/fe_utils/astreamer_libarchive.c
new file mode 100644
index 00000000000..967eca84abe
--- /dev/null
+++ b/src/fe_utils/astreamer_libarchive.c
@@ -0,0 +1,232 @@
+/*-------------------------------------------------------------------------
+ *
+ * astreamer_libarchive.c
+ *
+ * This module reads from archives using https://www.libarchive.org/.
+ *
+ * Portions Copyright (c) 1996-2026, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		  src/fe_utils/astreamer_libarchive.c
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres_fe.h"
+
+#include <archive.h>
+#include <archive_entry.h>
+
+#include "common/logging.h"
+#include "fe_utils/astreamer.h"
+
+/* This is the data size we'll try to stream at once. */
+#define ASTREAMER_LIBARCHIVE_READER_BUFFER_SIZE (128 * 1024)
+
+typedef struct astreamer_libarchive_reader
+{
+	astreamer	base;
+	astreamer_member member;
+	struct archive *archive;
+	bool		end_of_file;
+	bool		end_of_archive;
+	char		data[ASTREAMER_LIBARCHIVE_READER_BUFFER_SIZE];
+} astreamer_libarchive_reader;
+
+static bool astreamer_libarchive_reader_pull_content(astreamer *streamer);
+static void astreamer_libarchive_reader_finalize(astreamer *streamer);
+static void astreamer_libarchive_reader_free(astreamer *streamer);
+
+static const astreamer_ops astreamer_libarchive_reader_ops = {
+	.pull_content = astreamer_libarchive_reader_pull_content,
+	.finalize = astreamer_libarchive_reader_finalize,
+	.free = astreamer_libarchive_reader_free
+};
+
+/*
+ * Create an astreamer that decodes 'pathname' with libarchive and feeds its
+ * contents to 'next'.  This streamer is a source that must be the first in
+ * the chain, and content should be produced by calling
+ * astreamer_pull_content(streamer).
+ */
+astreamer *
+astreamer_libarchive_reader_new(astreamer *next, const char *pathname)
+{
+	astreamer_libarchive_reader *streamer;
+	int			r;
+
+	streamer = palloc0_object(astreamer_libarchive_reader);
+	*((const astreamer_ops **) &streamer->base.bbs_ops) =
+		&astreamer_libarchive_reader_ops;
+	streamer->base.bbs_next = next;
+
+	/* Prepare to read tar archives with any known compression filter. */
+	streamer->archive = archive_read_new();
+	if (streamer->archive == NULL)
+		pg_fatal("out of memory");
+	if (archive_read_support_format_tar(streamer->archive) != ARCHIVE_OK)
+		pg_fatal("libarchive: could not initialize tar format: %s",
+				 archive_error_string(streamer->archive));
+	if (archive_read_support_filter_all(streamer->archive) != ARCHIVE_OK)
+		pg_fatal("libarchive: could not initialize tar filter: %s",
+				 archive_error_string(streamer->archive));
+
+	/* Open file. */
+	r = archive_read_open_filename(streamer->archive,
+								   pathname,
+								   ASTREAMER_LIBARCHIVE_READER_BUFFER_SIZE);
+	if (r != ARCHIVE_OK)
+		pg_fatal("libarchive: could not open \"%s\": %s",
+				 pathname,
+				 archive_error_string(streamer->archive));
+
+	/* Start by wanting a new file. */
+	streamer->end_of_file = true;
+	streamer->end_of_archive = false;
+
+	return &streamer->base;
+}
+
+/* Fill in an astreamer member given a libarchive entry. */
+static void
+astreamer_libarchive_reader_fill_member(astreamer_member *member,
+										struct archive_entry *entry)
+{
+	strlcpy(member->pathname,
+			archive_entry_pathname(entry),
+			sizeof(member->pathname));
+	member->size = archive_entry_size(entry);
+	member->mode = archive_entry_mode(entry);
+	member->uid = archive_entry_uid(entry);
+	member->gid = archive_entry_gid(entry);
+	switch (archive_entry_filetype(entry))
+	{
+		case AE_IFREG:
+			member->is_regular = true;
+			break;
+		case AE_IFDIR:
+			member->is_directory = true;
+			break;
+		case AE_IFLNK:
+			member->is_symlink = true;
+			strlcpy(member->linktarget,
+					archive_entry_symlink(entry),
+					sizeof(member->linktarget));
+			break;
+		default:
+			break;
+	}
+}
+
+static bool
+astreamer_libarchive_reader_pull_content(astreamer *streamer)
+{
+	astreamer_libarchive_reader *mystreamer;
+	ssize_t		size;
+
+	mystreamer = (astreamer_libarchive_reader *) streamer;
+
+	while (!mystreamer->end_of_archive)
+	{
+		/* Do we need a new file? */
+		if (mystreamer->end_of_file)
+		{
+			struct archive_entry *entry;
+
+			/* Start next file, or discover end of archive. */
+			switch (archive_read_next_header(mystreamer->archive, &entry))
+			{
+				case ARCHIVE_RETRY:
+					continue;
+				case ARCHIVE_FATAL:
+					pg_fatal("libarchive: %s",
+							 archive_error_string(mystreamer->archive));
+					break;
+				case ARCHIVE_WARN:
+					pg_log_warning("libarchive: %s",
+								   archive_error_string(mystreamer->archive));
+					pg_fallthrough;
+				case ARCHIVE_OK:
+					/* Send file header, then fall through to send one chunk. */
+					mystreamer->end_of_file = false;
+					astreamer_libarchive_reader_fill_member(&mystreamer->member,
+															entry);
+					astreamer_content(mystreamer->base.bbs_next,
+									  &mystreamer->member,
+									  NULL,
+									  0,
+									  ASTREAMER_MEMBER_HEADER);
+					break;
+				case ARCHIVE_EOF:
+					/* End of archive. */
+					mystreamer->end_of_archive = true;
+					astreamer_content(mystreamer->base.bbs_next,
+									  NULL,
+									  NULL,
+									  0,
+									  ASTREAMER_ARCHIVE_TRAILER);
+					break;
+				default:
+					pg_fatal("unexpected result from archive_read_next_header()");
+					break;
+			}
+		}
+
+		/* Stream a chunk of data, or discover end of file. */
+		Assert(!mystreamer->end_of_file);
+		size = archive_read_data(mystreamer->archive,
+								 mystreamer->data,
+								 sizeof(mystreamer->data));
+		switch (size)
+		{
+			case ARCHIVE_RETRY:
+				continue;
+			case ARCHIVE_FATAL:
+				pg_fatal("libarchive: %s",
+						 archive_error_string(mystreamer->archive));
+				pg_unreachable();
+			case ARCHIVE_WARN:
+				pg_log_warning("libarchive: %s",
+							   archive_error_string(mystreamer->archive));
+				continue;
+			default:
+				break;
+		}
+
+		if (size == 0)
+		{
+			/* Send trailer, and go around to start another file. */
+			mystreamer->end_of_file = true;
+			astreamer_content(mystreamer->base.bbs_next,
+							  &mystreamer->member,
+							  NULL,
+							  0,
+							  ASTREAMER_MEMBER_TRAILER);
+			continue;
+		}
+
+		/* Stream large chunk and return. */
+		astreamer_content(mystreamer->base.bbs_next,
+						  &mystreamer->member,
+						  mystreamer->data,
+						  size,
+						  ASTREAMER_MEMBER_CONTENTS);
+		return true;
+	}
+	return false;
+}
+
+static void
+astreamer_libarchive_reader_finalize(astreamer *streamer)
+{
+	astreamer_finalize(streamer->bbs_next);
+}
+
+static void
+astreamer_libarchive_reader_free(astreamer *streamer)
+{
+	astreamer_libarchive_reader *mystreamer;
+
+	mystreamer = (astreamer_libarchive_reader *) streamer;
+	archive_free(mystreamer->archive);
+	pfree(streamer);
+}
diff --git a/src/fe_utils/meson.build b/src/fe_utils/meson.build
index 86befca192e..6b95c36e9a5 100644
--- a/src/fe_utils/meson.build
+++ b/src/fe_utils/meson.build
@@ -21,6 +21,10 @@ fe_utils_sources = files(
   'version.c',
 )
 
+if libarchive.found()
+  fe_utils_sources += 'astreamer_libarchive.c'
+endif
+
 psqlscan = custom_target('psqlscan',
   input: 'psqlscan.l',
   output: 'psqlscan.c',
diff --git a/src/include/fe_utils/astreamer.h b/src/include/fe_utils/astreamer.h
index 2509d157bc5..95eb146c734 100644
--- a/src/include/fe_utils/astreamer.h
+++ b/src/include/fe_utils/astreamer.h
@@ -155,6 +155,13 @@ astreamer_content(astreamer *streamer, astreamer_member *member,
 	streamer->bbs_ops->content(streamer, member, data, len, context);
 }
 
+/* Variant for astreamers that produce data themselves. */
+static inline void
+astreamer_pull(astreamer *streamer)
+{
+	astreamer_content(streamer, NULL, NULL, 0, ASTREAMER_UNKNOWN);
+}
+
 /* Finalize a astreamer. */
 static inline void
 astreamer_finalize(astreamer *streamer)
@@ -243,4 +250,9 @@ extern astreamer *astreamer_tar_parser_new(astreamer *next);
 extern astreamer *astreamer_tar_terminator_new(astreamer *next);
 extern astreamer *astreamer_tar_archiver_new(astreamer *next);
 
+#ifdef USE_LIBARCHIVE
+extern astreamer *astreamer_libarchive_reader_new(astreamer *next,
+												  const char *pathname);
+#endif
+
 #endif
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index a936c2c9c49..0f5caaff4b4 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -3589,6 +3589,7 @@ astreamer_archive_context
 astreamer_extractor
 astreamer_gzip_decompressor
 astreamer_gzip_writer
+astreamer_libarchive_reader
 astreamer_lz4_frame
 astreamer_member
 astreamer_ops
-- 
2.53.0

From 71d9875fca9d011a899cd1cfb477a979a3cacd44 Mon Sep 17 00:00:00 2001
From: Thomas Munro <[email protected]>
Date: Sun, 5 Apr 2026 03:07:19 +1200
Subject: [PATCH v2 5/6] fixup: Use more efficient zero-copy API?

We can pass a pointer to data in libarchive's internal buffer directly
to the next streamer, avoiding one copy.  To do this we also have to
expand any sparse regions ourselves.

XXX not sure it's worth the complexity for non-performance critical
code?
---
 src/fe_utils/astreamer_libarchive.c | 62 ++++++++++++++++++++++++-----
 1 file changed, 52 insertions(+), 10 deletions(-)

diff --git a/src/fe_utils/astreamer_libarchive.c b/src/fe_utils/astreamer_libarchive.c
index 967eca84abe..2ac0c565cc4 100644
--- a/src/fe_utils/astreamer_libarchive.c
+++ b/src/fe_utils/astreamer_libarchive.c
@@ -29,7 +29,8 @@ typedef struct astreamer_libarchive_reader
 	struct archive *archive;
 	bool		end_of_file;
 	bool		end_of_archive;
-	char		data[ASTREAMER_LIBARCHIVE_READER_BUFFER_SIZE];
+	pgoff_t		offset;
+	char		zeroes[8192];
 } astreamer_libarchive_reader;
 
 static bool astreamer_libarchive_reader_pull_content(astreamer *streamer);
@@ -117,11 +118,33 @@ astreamer_libarchive_reader_fill_member(astreamer_member *member,
 	}
 }
 
+/* Emit zeroes up to offset. */
+static void
+astreamer_libarchive_reader_expand_sparse(astreamer_libarchive_reader *mystreamer,
+										  pgoff_t offset)
+{
+	size_t		size;
+
+	while (mystreamer->offset < offset)
+	{
+		size = offset - mystreamer->offset;
+		size = Min(size, sizeof(mystreamer->zeroes));
+		astreamer_content(mystreamer->base.bbs_next,
+						  &mystreamer->member,
+						  mystreamer->zeroes,
+						  size,
+						  ASTREAMER_MEMBER_CONTENTS);
+		mystreamer->offset += size;
+	}
+}
+
 static bool
 astreamer_libarchive_reader_pull_content(astreamer *streamer)
 {
 	astreamer_libarchive_reader *mystreamer;
-	ssize_t		size;
+	const void *data;
+	size_t		size;
+	pgoff_t		offset;
 
 	mystreamer = (astreamer_libarchive_reader *) streamer;
 
@@ -148,6 +171,7 @@ astreamer_libarchive_reader_pull_content(astreamer *streamer)
 				case ARCHIVE_OK:
 					/* Send file header, then fall through to send one chunk. */
 					mystreamer->end_of_file = false;
+					mystreamer->offset = 0;
 					astreamer_libarchive_reader_fill_member(&mystreamer->member,
 															entry);
 					astreamer_content(mystreamer->base.bbs_next,
@@ -171,12 +195,19 @@ astreamer_libarchive_reader_pull_content(astreamer *streamer)
 			}
 		}
 
-		/* Stream a chunk of data, or discover end of file. */
+		/*
+		 * Stream a chunk of data, or discover end of file.
+		 *
+		 * It would be a bit simpler to use archive_read_data(), but this
+		 * interface removes the need for copying to an output buffer.  In
+		 * exchange for that, we have to deal with expanding (rare) sparse
+		 * file zeroes.
+		 */
 		Assert(!mystreamer->end_of_file);
-		size = archive_read_data(mystreamer->archive,
-								 mystreamer->data,
-								 sizeof(mystreamer->data));
-		switch (size)
+		switch (archive_read_data_block(mystreamer->archive,
+										&data,
+										&size,
+										&offset))
 		{
 			case ARCHIVE_RETRY:
 				continue;
@@ -187,11 +218,20 @@ astreamer_libarchive_reader_pull_content(astreamer *streamer)
 			case ARCHIVE_WARN:
 				pg_log_warning("libarchive: %s",
 							   archive_error_string(mystreamer->archive));
-				continue;
+				break;
+			case ARCHIVE_EOF:
+				size = 0;
+				break;
+			case ARCHIVE_OK:
+				break;
 			default:
+				pg_fatal("unexpected result from archive_read_next_data_block()");
 				break;
 		}
 
+		/* Expand any intervening sparse region. */
+		astreamer_libarchive_reader_expand_sparse(mystreamer, offset);
+
 		if (size == 0)
 		{
 			/* Send trailer, and go around to start another file. */
@@ -204,12 +244,14 @@ astreamer_libarchive_reader_pull_content(astreamer *streamer)
 			continue;
 		}
 
-		/* Stream large chunk and return. */
+		/* Stream large chunk directly from libarchive's buffer and return. */
+		Assert(mystreamer->offset == offset);
 		astreamer_content(mystreamer->base.bbs_next,
 						  &mystreamer->member,
-						  mystreamer->data,
+						  data,
 						  size,
 						  ASTREAMER_MEMBER_CONTENTS);
+		mystreamer->offset += size;
 		return true;
 	}
 	return false;
-- 
2.53.0

From ceea0553c3c84629a79c4626c30df6aba5f1fbb4 Mon Sep 17 00:00:00 2001
From: Thomas Munro <[email protected]>
Date: Sun, 5 Apr 2026 02:40:56 +1200
Subject: [PATCH v2 6/6] pg_waldump: Use astreamer_libarchive_reader.

If this build supports libarchive, use astreamer_libarchive_reader
instead of astreamer_tar_parser to read WAL archives.  It supports
modern tar formats.

Reviewed-by:
Discussion: https://postgr.es/m/CA%2BhUKGJYThZZp0AfvEbzNX_ZQ22pTpcjtgT0J_Pb%2BHAGH%3DQChw%40mail.gmail.com
---
 src/bin/pg_waldump/Makefile          | 5 +++++
 src/bin/pg_waldump/archive_waldump.c | 5 +++++
 src/bin/pg_waldump/meson.build       | 2 +-
 src/bin/pg_waldump/t/001_basic.pl    | 9 ++++++++-
 4 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/bin/pg_waldump/Makefile b/src/bin/pg_waldump/Makefile
index aabb87566a2..09005bf4ba4 100644
--- a/src/bin/pg_waldump/Makefile
+++ b/src/bin/pg_waldump/Makefile
@@ -23,6 +23,11 @@ OBJS = \
 override CPPFLAGS := -DFRONTEND -I$(libpq_srcdir) $(CPPFLAGS)
 LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils
 
+ifeq ($(with_libarchive), yes)
+# XXX figure out where this should go
+LDFLAGS_INTERNAL += $(LIBARCHIVE_LIBS)
+endif
+
 RMGRDESCSOURCES = $(sort $(notdir $(wildcard $(top_srcdir)/src/backend/access/rmgrdesc/*desc*.c)))
 RMGRDESCOBJS = $(patsubst %.c,%.o,$(RMGRDESCSOURCES))
 
diff --git a/src/bin/pg_waldump/archive_waldump.c b/src/bin/pg_waldump/archive_waldump.c
index b27888a5056..07b81ec4871 100644
--- a/src/bin/pg_waldump/archive_waldump.c
+++ b/src/bin/pg_waldump/archive_waldump.c
@@ -139,6 +139,10 @@ init_archive_reader(XLogDumpPrivate *privateInfo,
 
 	streamer = astreamer_waldump_new(privateInfo);
 
+#ifdef USE_LIBARCHIVE
+	/* libarchive will decode the tar archive. */
+	streamer = astreamer_libarchive_reader_new(streamer, pathname);
+#else
 	/* We must first parse the tar archive. */
 	streamer = astreamer_tar_parser_new(streamer);
 
@@ -152,6 +156,7 @@ init_archive_reader(XLogDumpPrivate *privateInfo,
 
 	/* And before that, we have to read the file. */
 	streamer = astreamer_plain_reader_new(streamer, pathname);
+#endif
 
 	privateInfo->archive_streamer = streamer;
 
diff --git a/src/bin/pg_waldump/meson.build b/src/bin/pg_waldump/meson.build
index 5296f21b82c..0b2c4021107 100644
--- a/src/bin/pg_waldump/meson.build
+++ b/src/bin/pg_waldump/meson.build
@@ -19,7 +19,7 @@ endif
 
 pg_waldump = executable('pg_waldump',
   pg_waldump_sources,
-  dependencies: [frontend_code, libpq, lz4, zstd],
+  dependencies: [frontend_code, libarchive, libpq, lz4, zstd],
   c_args: ['-DFRONTEND'], # needed for xlogreader et al
   kwargs: default_bin_args,
 )
diff --git a/src/bin/pg_waldump/t/001_basic.pl b/src/bin/pg_waldump/t/001_basic.pl
index 7dd1c3dd63e..62a15228b38 100644
--- a/src/bin/pg_waldump/t/001_basic.pl
+++ b/src/bin/pg_waldump/t/001_basic.pl
@@ -11,7 +11,13 @@ use Test::More;
 use List::Util qw(shuffle);
 
 my $tar = $ENV{TAR};
-my @tar_p_flags = tar_portability_options($tar);
+my @tar_p_flags;
+
+# If we don't have libarchive, then we tell tar to stick to ustar format that
+# astreamer_tar.c can decode.  Otherwise we should be able to accept anything
+# that any current tar produces.
+@tar_p_flags = tar_portability_options($tar)
+  if !check_pg_config("#define USE_LIBARCHIVE");
 
 program_help_ok('pg_waldump');
 program_version_ok('pg_waldump');
@@ -373,6 +379,7 @@ my @scenarios = (
 		'compression_flags' => '-czf',
 		'is_archive' => 1,
 		'enabled' => check_pg_config("#define HAVE_LIBZ 1")
+		  || check_pg_config("#define USE_LIBARCHIVE")
 	});
 
 for my $scenario (@scenarios)
-- 
2.53.0

Reply via email to