On Wed, Mar 1, 2023 at 2:39 PM Bharath Rupireddy
<bharath.rupireddyforpostg...@gmail.com> wrote:
>
> Please find the attached v5 patch set for further review.

I simplified the code largely by moving the logic of reading the WAL
buffer page from a separate function to the main while loop. This
enabled me to get rid of XLogReadFromBuffersGuts() that v5 and other
previous patches have.

Please find the attached v6 patch set for further review. Meanwhile,
I'll continue to work on the review comment raised upthread -
https://www.postgresql.org/message-id/20230301041523.GA1453450%40nathanxps13.

--
Bharath Rupireddy
PostgreSQL Contributors Team
RDS Open Source Databases
Amazon Web Services: https://aws.amazon.com
From afb57cd61955f7fc0dba315f3f7c81604a0b47c9 Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Thu, 2 Mar 2023 07:54:06 +0000
Subject: [PATCH v6] Improve WALRead() to suck data directly from WAL buffers

---
 src/backend/access/transam/xlog.c       | 144 ++++++++++++++++++++++++
 src/backend/access/transam/xlogreader.c |  45 +++++++-
 src/include/access/xlog.h               |   6 +
 3 files changed, 193 insertions(+), 2 deletions(-)

diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index f9f0f6db8d..b29dc67c38 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -1639,6 +1639,150 @@ GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli)
 	return cachedPos + ptr % XLOG_BLCKSZ;
 }
 
+/*
+ * Read WAL from WAL buffers.
+ *
+ * Read 'count' bytes of WAL from WAL buffers into 'buf', starting at location
+ * 'startptr', on timeline 'tli' and set the read bytes to 'read_bytes'.
+ *
+ * Note that this function reads as much as it can from WAL buffers, meaning,
+ * it may not read all the requested 'count' bytes. The caller must be aware of
+ * this and deal with it.
+ */
+void
+XLogReadFromBuffers(XLogRecPtr startptr,
+					TimeLineID tli,
+					Size count,
+					char *buf,
+					Size *read_bytes)
+{
+	XLogRecPtr	ptr;
+	char    *dst;
+	Size    nbytes;
+
+	Assert(!XLogRecPtrIsInvalid(startptr));
+	Assert(count > 0);
+	Assert(startptr <= GetFlushRecPtr(NULL));
+	Assert(!RecoveryInProgress());
+	Assert(tli == GetWALInsertionTimeLine());
+
+	ptr = startptr;
+	nbytes = count;
+	dst = buf;
+	*read_bytes = 0;
+
+	while (nbytes > 0)
+	{
+		XLogRecPtr origptr;
+		XLogRecPtr	expectedEndPtr;
+		XLogRecPtr	endptr;
+		int 	idx;
+
+		origptr = ptr;
+		idx = XLogRecPtrToBufIdx(ptr);
+		expectedEndPtr = ptr;
+		expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ;
+
+		/*
+		 * Holding WALBufMappingLock ensures inserters don't overwrite this
+		 * value while we are reading it. We try to acquire it in shared mode
+		 * so that the concurrent WAL readers are also allowed. We try to do as
+		 * less work as possible while holding the lock to not impact
+		 * concurrent WAL writers much. We quickly exit to not cause any
+		 * contention, if the lock isn't immediately available.
+		 */
+		if (!LWLockConditionalAcquire(WALBufMappingLock, LW_SHARED))
+			return;
+
+		endptr = XLogCtl->xlblocks[idx];
+
+		if (expectedEndPtr == endptr)
+		{
+			char	*page;
+			char    *data;
+			XLogPageHeader	phdr;
+
+			/*
+			 * We found WAL buffer page containing given XLogRecPtr. Get
+			 * starting address of the page and a pointer to the right location
+			 * of given XLogRecPtr in that page.
+			 */
+			page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ;
+			data = page + ptr % XLOG_BLCKSZ;
+
+			/* Read what is wanted, not the whole page. */
+			if ((data + nbytes) <= (page + XLOG_BLCKSZ))
+			{
+				/* All the bytes are in one page. */
+				memcpy(dst, data, nbytes);
+				*read_bytes += nbytes;
+				nbytes = 0;
+			}
+			else
+			{
+				Size	nread;
+
+				/*
+				 * All the bytes are not in one page. Read available bytes on
+				 * the current page, copy them over to output buffer and
+				 * continue to read remaining bytes.
+				 */
+				nread = XLOG_BLCKSZ - (data - page);
+				Assert(nread > 0 && nread <= nbytes);
+				memcpy(dst, data, nread);
+				ptr += nread;
+				nbytes -= nread;
+				dst += nread;
+				*read_bytes += nread;
+			}
+
+			/*
+			 * Release the lock as early as possible to avoid creating any
+			 * possible contention.
+			 */
+			LWLockRelease(WALBufMappingLock);
+
+			/*
+			 * The fact that we acquire WALBufMappingLock while reading the WAL
+			 * buffer page itself guarantees that no one else initializes it or
+			 * makes it ready for next use in AdvanceXLInsertBuffer().
+			 *
+			 * However, we perform basic page header checks for ensuring that
+			 * we are not reading a page that just got initialized. Callers
+			 * will anyway perform extensive page-level and record-level
+			 * checks.
+			 */
+			phdr = (XLogPageHeader) page;
+
+			if (!(phdr->xlp_magic == XLOG_PAGE_MAGIC &&
+				  phdr->xlp_pageaddr == (origptr - (origptr % XLOG_BLCKSZ)) &&
+				  phdr->xlp_tli == tli))
+			{
+				/*
+				 * WAL buffer page doesn't look valid, so return with what we
+				 * have read so far.
+				 */
+				break;
+			}
+		}
+		else
+		{
+			/*
+			 * Requested WAL isn't available in WAL buffers, so return with
+			 * what we have read so far.
+			 */
+			LWLockRelease(WALBufMappingLock);
+			break;
+		}
+	}
+
+	/* We never read more than what the caller has asked for. */
+	Assert(*read_bytes <= count);
+
+	elog(DEBUG1, "read %zu bytes out of %zu bytes from WAL buffers for given LSN %X/%X, Timeline ID %u",
+		 *read_bytes, count, LSN_FORMAT_ARGS(startptr), tli);
+}
+
 /*
  * Converts a "usable byte position" to XLogRecPtr. A usable byte position
  * is the position starting from the beginning of WAL, excluding all WAL
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index aa6c929477..723379b7d9 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -1485,8 +1485,7 @@ err:
  * Returns true if succeeded, false if an error occurs, in which case
  * 'errinfo' receives error details.
  *
- * XXX probably this should be improved to suck data directly from the
- * WAL buffers when possible.
+ * When possible, this function reads data directly from WAL buffers.
  */
 bool
 WALRead(XLogReaderState *state,
@@ -1497,6 +1496,48 @@ WALRead(XLogReaderState *state,
 	XLogRecPtr	recptr;
 	Size		nbytes;
 
+#ifndef FRONTEND
+	/* Frontend tools have no idea of WAL buffers. */
+	Size        read_bytes;
+
+	/*
+	 * When possible, read WAL from WAL buffers. We skip this step and continue
+	 * the usual way, that is to read from WAL file, either when server is in
+	 * recovery (standby mode, archive or crash recovery), in which case the
+	 * WAL buffers are not used or when the server is inserting in a different
+	 * timeline from that of the timeline that we're trying to read WAL from.
+	 */
+	if (!RecoveryInProgress() &&
+		tli == GetWALInsertionTimeLine())
+	{
+		XLogReadFromBuffers(startptr, tli, count, buf, &read_bytes);
+
+		/*
+		 * Check if we have read fully (hit), partially (partial hit) or
+		 * nothing (miss) from WAL buffers. If we have read either partially or
+		 * nothing, then continue to read the remaining bytes the usual way,
+		 * that is, read from WAL file.
+		 */
+		if (count == read_bytes)
+		{
+			/* Buffer hit, so return. */
+			return true;
+		}
+		else if (read_bytes > 0 && count > read_bytes)
+		{
+			/*
+			 * Buffer partial hit, so reset the state to count the read bytes
+			 * and continue.
+			 */
+			buf += read_bytes;
+			startptr += read_bytes;
+			count -= read_bytes;
+		}
+
+		/* Buffer miss i.e., read_bytes = 0, so continue */
+	}
+#endif	/* FRONTEND */
+
 	p = buf;
 	recptr = startptr;
 	nbytes = count;
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index cfe5409738..c9941aa001 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -247,6 +247,12 @@ extern XLogRecPtr GetLastImportantRecPtr(void);
 
 extern void SetWalWriterSleeping(bool sleeping);
 
+extern void XLogReadFromBuffers(XLogRecPtr startptr,
+								TimeLineID tli,
+								Size count,
+								char *buf,
+								Size *read_bytes);
+
 /*
  * Routines used by xlogrecovery.c to call back into xlog.c during recovery.
  */
-- 
2.34.1

From 934af3ea6475c3293156e20b62901ab8dda5e65a Mon Sep 17 00:00:00 2001
From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com>
Date: Thu, 2 Mar 2023 07:54:36 +0000
Subject: [PATCH v6] Add test module for verifying WAL read from WAL buffers

---
 src/test/modules/Makefile                     |  1 +
 src/test/modules/meson.build                  |  1 +
 .../test_wal_read_from_buffers/.gitignore     |  4 ++
 .../test_wal_read_from_buffers/Makefile       | 23 +++++++++
 .../test_wal_read_from_buffers/meson.build    | 36 +++++++++++++
 .../test_wal_read_from_buffers/t/001_basic.pl | 44 ++++++++++++++++
 .../test_wal_read_from_buffers--1.0.sql       | 16 ++++++
 .../test_wal_read_from_buffers.c              | 51 +++++++++++++++++++
 .../test_wal_read_from_buffers.control        |  4 ++
 9 files changed, 180 insertions(+)
 create mode 100644 src/test/modules/test_wal_read_from_buffers/.gitignore
 create mode 100644 src/test/modules/test_wal_read_from_buffers/Makefile
 create mode 100644 src/test/modules/test_wal_read_from_buffers/meson.build
 create mode 100644 src/test/modules/test_wal_read_from_buffers/t/001_basic.pl
 create mode 100644 src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers--1.0.sql
 create mode 100644 src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.c
 create mode 100644 src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.control

diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile
index c629cbe383..ea33361f69 100644
--- a/src/test/modules/Makefile
+++ b/src/test/modules/Makefile
@@ -33,6 +33,7 @@ SUBDIRS = \
 		  test_rls_hooks \
 		  test_shm_mq \
 		  test_slru \
+		  test_wal_read_from_buffers \
 		  unsafe_tests \
 		  worker_spi
 
diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build
index 1baa6b558d..e3ffd3538d 100644
--- a/src/test/modules/meson.build
+++ b/src/test/modules/meson.build
@@ -29,5 +29,6 @@ subdir('test_regex')
 subdir('test_rls_hooks')
 subdir('test_shm_mq')
 subdir('test_slru')
+subdir('test_wal_read_from_buffers')
 subdir('unsafe_tests')
 subdir('worker_spi')
diff --git a/src/test/modules/test_wal_read_from_buffers/.gitignore b/src/test/modules/test_wal_read_from_buffers/.gitignore
new file mode 100644
index 0000000000..5dcb3ff972
--- /dev/null
+++ b/src/test/modules/test_wal_read_from_buffers/.gitignore
@@ -0,0 +1,4 @@
+# Generated subdirectories
+/log/
+/results/
+/tmp_check/
diff --git a/src/test/modules/test_wal_read_from_buffers/Makefile b/src/test/modules/test_wal_read_from_buffers/Makefile
new file mode 100644
index 0000000000..7a09533ec7
--- /dev/null
+++ b/src/test/modules/test_wal_read_from_buffers/Makefile
@@ -0,0 +1,23 @@
+# src/test/modules/test_wal_read_from_buffers/Makefile
+
+MODULE_big = test_wal_read_from_buffers
+OBJS = \
+	$(WIN32RES) \
+	test_wal_read_from_buffers.o
+PGFILEDESC = "test_wal_read_from_buffers - test module to verify that WAL can be read from WAL buffers"
+
+EXTENSION = test_wal_read_from_buffers
+DATA = test_wal_read_from_buffers--1.0.sql
+
+TAP_TESTS = 1
+
+ifdef USE_PGXS
+PG_CONFIG = pg_config
+PGXS := $(shell $(PG_CONFIG) --pgxs)
+include $(PGXS)
+else
+subdir = src/test/modules/test_wal_read_from_buffers
+top_builddir = ../../../..
+include $(top_builddir)/src/Makefile.global
+include $(top_srcdir)/contrib/contrib-global.mk
+endif
diff --git a/src/test/modules/test_wal_read_from_buffers/meson.build b/src/test/modules/test_wal_read_from_buffers/meson.build
new file mode 100644
index 0000000000..40a36edc07
--- /dev/null
+++ b/src/test/modules/test_wal_read_from_buffers/meson.build
@@ -0,0 +1,36 @@
+# Copyright (c) 2022-2023, PostgreSQL Global Development Group
+
+# FIXME: prevent install during main install, but not during test :/
+
+test_wal_read_from_buffers_sources = files(
+  'test_wal_read_from_buffers.c',
+)
+
+if host_system == 'windows'
+  test_wal_read_from_buffers_sources += rc_lib_gen.process(win32ver_rc, extra_args: [
+    '--NAME', 'test_wal_read_from_buffers',
+    '--FILEDESC', 'test_wal_read_from_buffers - test module to verify that WAL can be read from WAL buffers',])
+endif
+
+test_wal_read_from_buffers = shared_module('test_wal_read_from_buffers',
+  test_wal_read_from_buffers_sources,
+  kwargs: pg_mod_args,
+)
+testprep_targets += test_wal_read_from_buffers
+
+install_data(
+  'test_wal_read_from_buffers.control',
+  'test_wal_read_from_buffers--1.0.sql',
+  kwargs: contrib_data_args,
+)
+
+tests += {
+  'name': 'test_wal_read_from_buffers',
+  'sd': meson.current_source_dir(),
+  'bd': meson.current_build_dir(),
+  'tap': {
+    'tests': [
+      't/001_basic.pl',
+    ],
+  },
+}
diff --git a/src/test/modules/test_wal_read_from_buffers/t/001_basic.pl b/src/test/modules/test_wal_read_from_buffers/t/001_basic.pl
new file mode 100644
index 0000000000..3448e0bed6
--- /dev/null
+++ b/src/test/modules/test_wal_read_from_buffers/t/001_basic.pl
@@ -0,0 +1,44 @@
+# Copyright (c) 2021-2023, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+
+$node->init;
+
+# Ensure nobody interferes with us so that the WAL in WAL buffers don't get
+# overwritten while running tests.
+$node->append_conf(
+	'postgresql.conf', qq(
+autovacuum = off
+checkpoint_timeout = 1h
+wal_writer_delay = 10000ms
+wal_writer_flush_after = 1GB
+));
+$node->start;
+
+# Setup.
+$node->safe_psql('postgres', 'CREATE EXTENSION test_wal_read_from_buffers');
+
+# Get current insert LSN. After this, we generate some WAL which is guranteed
+# to be in WAL buffers as there is no other WAL generating activity is
+# happening on the server. We then verify if we can read the WAL from WAL
+# buffers using this LSN.
+my $lsn =
+  $node->safe_psql('postgres', 'SELECT pg_current_wal_insert_lsn();');
+
+# Generate minimal WAL so that WAL buffers don't get overwritten.
+$node->safe_psql('postgres',
+	"CREATE TABLE t (c int); INSERT INTO t VALUES (1);");
+
+# Check if WAL is successfully read from WAL buffers.
+my $result = $node->safe_psql('postgres',
+	qq{SELECT test_wal_read_from_buffers('$lsn')});
+is($result, 't', "WAL is successfully read from WAL buffers");
+
+done_testing();
diff --git a/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers--1.0.sql b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers--1.0.sql
new file mode 100644
index 0000000000..8e89910133
--- /dev/null
+++ b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers--1.0.sql
@@ -0,0 +1,16 @@
+/* src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers--1.0.sql */
+
+-- complain if script is sourced in psql, rather than via CREATE EXTENSION
+\echo Use "CREATE EXTENSION test_wal_read_from_buffers" to load this file. \quit
+
+--
+-- test_wal_read_from_buffers()
+--
+-- Returns true if WAL data at a given LSN can be read from WAL buffers.
+-- Otherwise returns false.
+--
+CREATE FUNCTION test_wal_read_from_buffers(IN lsn pg_lsn,
+    OUT read_from_buffers bool
+)
+AS 'MODULE_PATHNAME', 'test_wal_read_from_buffers'
+LANGUAGE C STRICT PARALLEL UNSAFE;
diff --git a/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.c b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.c
new file mode 100644
index 0000000000..ca8645101a
--- /dev/null
+++ b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.c
@@ -0,0 +1,51 @@
+/*--------------------------------------------------------------------------
+ *
+ * test_wal_read_from_buffers.c
+ *		Test code for veryfing WAL read from WAL buffers.
+ *
+ * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.c
+ * -------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/xlog.h"
+#include "fmgr.h"
+#include "utils/pg_lsn.h"
+
+PG_MODULE_MAGIC;
+
+/*
+ * SQL function for verifying that WAL data at a given LSN can be read from WAL
+ * buffers. Returns true if read from WAL buffers, otherwise false.
+ */
+PG_FUNCTION_INFO_V1(test_wal_read_from_buffers);
+Datum
+test_wal_read_from_buffers(PG_FUNCTION_ARGS)
+{
+	XLogRecPtr	lsn;
+	Size	read_bytes;
+	TimeLineID	tli;
+	char	data[XLOG_BLCKSZ] = {0};
+
+	lsn = PG_GETARG_LSN(0);
+
+	if (XLogRecPtrIsInvalid(lsn))
+		PG_RETURN_BOOL(false);
+
+	if (RecoveryInProgress())
+		ereport(ERROR,
+				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+				 errmsg("recovery is in progress"),
+				 errhint("WAL control functions cannot be executed during recovery.")));
+
+	tli = GetWALInsertionTimeLine();
+
+	XLogReadFromBuffers(lsn, tli, XLOG_BLCKSZ, data, &read_bytes);
+
+	PG_RETURN_LSN(read_bytes > 0);
+}
diff --git a/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.control b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.control
new file mode 100644
index 0000000000..7852b3e331
--- /dev/null
+++ b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.control
@@ -0,0 +1,4 @@
+comment = 'Test code for veryfing WAL read from WAL buffers'
+default_version = '1.0'
+module_pathname = '$libdir/test_wal_read_from_buffers'
+relocatable = true
-- 
2.34.1

Reply via email to