On Wed, Mar 1, 2023 at 12:06 AM Kuntal Ghosh <kuntalghosh.2...@gmail.com> wrote: > > On Tue, Feb 28, 2023 at 10:38 AM Bharath Rupireddy > <bharath.rupireddyforpostg...@gmail.com> wrote: > > > +/* > + * Guts of XLogReadFromBuffers(). > + * > + * Read 'count' bytes into 'buf', starting at location 'ptr', from WAL > + * fetched WAL buffers on timeline 'tli' and return the read bytes. > + */ > s/fetched WAL buffers/fetched from WAL buffers
Modified that comment a bit and moved it to the XLogReadFromBuffers. > + else if (nread < nbytes) > + { > + /* > + * We read some of the requested bytes. Continue to read remaining > + * bytes. > + */ > + ptr += nread; > + nbytes -= nread; > + dst += nread; > + *read_bytes += nread; > + } > > The 'if' condition should always be true. You can replace the same > with an assertion instead. Yeah, added an assert and changed that else if (nread < nbytes) to else only condition. > s/Continue to read remaining/Continue to read the remaining Done. > The good thing about this patch is that it reduces read IO calls > without impacting the write performance (at least not that > noticeable). It also takes us one step forward towards the > enhancements mentioned in the thread. Right. > If performance is a concern, we > can introduce a GUC to enable/disable this feature. I didn't see any performance issues from my testing so far with 3 different pgbench cases https://www.postgresql.org/message-id/CALj2ACWXHP6Ha1BfDB14txm%3DXP272wCbOV00mcPg9c6EXbnp5A%40mail.gmail.com. While adding a GUC to enable/disable a feature sounds useful, IMHO it isn't good for the user. Because we already have too many GUCs for the user and we may not want all features to be defensive and add their own GUCs. If at all, any bugs arise due to some corner-case we missed to count in, we can surely help fix them. Having said this, I'm open to suggestions here. Please find the attached v5 patch set for further review. -- Bharath Rupireddy PostgreSQL Contributors Team RDS Open Source Databases Amazon Web Services: https://aws.amazon.com
From 121f98633541c340a89dc628cc3f02711abdee02 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Wed, 1 Mar 2023 08:40:30 +0000 Subject: [PATCH v5] Improve WALRead() to suck data directly from WAL buffers --- src/backend/access/transam/xlog.c | 175 ++++++++++++++++++++++++ src/backend/access/transam/xlogreader.c | 45 +++++- src/include/access/xlog.h | 6 + 3 files changed, 224 insertions(+), 2 deletions(-) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index f9f0f6db8d..98bd588ea0 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -689,6 +689,10 @@ static bool ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr); static XLogRecPtr WaitXLogInsertionsToFinish(XLogRecPtr upto); static char *GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli); +static Size XLogReadFromBuffersGuts(XLogRecPtr ptr, + TimeLineID tli, + Size count, + char *page); static XLogRecPtr XLogBytePosToRecPtr(uint64 bytepos); static XLogRecPtr XLogBytePosToEndRecPtr(uint64 bytepos); static uint64 XLogRecPtrToBytePos(XLogRecPtr ptr); @@ -1639,6 +1643,177 @@ GetXLogBuffer(XLogRecPtr ptr, TimeLineID tli) return cachedPos + ptr % XLOG_BLCKSZ; } +/* + * Guts of XLogReadFromBuffers(). + */ +static Size +XLogReadFromBuffersGuts(XLogRecPtr ptr, + TimeLineID tli, + Size count, + char *buf) +{ + XLogRecPtr expectedEndPtr; + XLogRecPtr endptr; + int idx; + Size nread = 0; + + idx = XLogRecPtrToBufIdx(ptr); + expectedEndPtr = ptr; + expectedEndPtr += XLOG_BLCKSZ - ptr % XLOG_BLCKSZ; + + /* + * Holding WALBufMappingLock ensures inserters don't overwrite this value + * while we are reading it. We try to acquire it in shared mode so that the + * concurrent WAL readers are also allowed. We try to do as less work as + * possible while holding the lock to not impact concurrent WAL writers + * much. We quickly exit to not cause any contention, if the lock isn't + * immediately available. + */ + if (!LWLockConditionalAcquire(WALBufMappingLock, LW_SHARED)) + return nread; + + endptr = XLogCtl->xlblocks[idx]; + + if (expectedEndPtr == endptr) + { + char *page; + char *data; + XLogPageHeader phdr; + + /* + * We found WAL buffer page containing given XLogRecPtr. Get starting + * address of the page and a pointer to the right location of given + * XLogRecPtr in that page. + */ + page = XLogCtl->pages + idx * (Size) XLOG_BLCKSZ; + data = page + ptr % XLOG_BLCKSZ; + + /* Read what is wanted, not the whole page. */ + if ((data + count) <= (page + XLOG_BLCKSZ)) + { + /* All the bytes are in one page. */ + memcpy(buf, data, count); + nread = count; + } + else + { + Size nremaining; + + /* + * All the bytes are not in one page. Compute remaining bytes on + * the current page, copy them over to output buffer. + */ + nremaining = XLOG_BLCKSZ - (data - page); + memcpy(buf, data, nremaining); + nread = nremaining; + } + + /* + * Release the lock as early as possible to avoid creating any possible + * contention. + */ + LWLockRelease(WALBufMappingLock); + + /* + * The fact that we acquire WALBufMappingLock while reading the WAL + * buffer page itself guarantees that no one else initializes it or + * makes it ready for next use in AdvanceXLInsertBuffer(). + * + * However, we perform basic page header checks for ensuring that we + * are not reading a page that just got initialized. Callers will + * anyway perform extensive page-level and record-level checks. + */ + phdr = (XLogPageHeader) page; + + if (!(phdr->xlp_magic == XLOG_PAGE_MAGIC && + phdr->xlp_pageaddr == (ptr - (ptr % XLOG_BLCKSZ)) && + phdr->xlp_tli == tli)) + { + /* + * WAL buffer page doesn't look valid, so return as if nothing was + * read. + */ + nread = 0; + } + } + else + { + /* Requested WAL isn't available in WAL buffers. */ + LWLockRelease(WALBufMappingLock); + } + + /* We never read more than what the caller has asked for. */ + Assert(nread <= count); + + return nread; +} + +/* + * Read WAL from WAL buffers. + * + * Read 'count' bytes of WAL from WAL buffers into 'buf', starting at location + * 'startptr', on timeline 'tli' and set the read bytes to 'read_bytes'. + * + * Note that this function reads as much as it can from WAL buffers, meaning, + * it may not read all the requested 'count' bytes. The caller must be aware of + * this and deal with it. + */ +void +XLogReadFromBuffers(XLogRecPtr startptr, + TimeLineID tli, + Size count, + char *buf, + Size *read_bytes) +{ + XLogRecPtr ptr; + char *dst; + Size nbytes; + + Assert(!XLogRecPtrIsInvalid(startptr)); + Assert(count > 0); + Assert(startptr <= GetFlushRecPtr(NULL)); + Assert(!RecoveryInProgress()); + + ptr = startptr; + nbytes = count; + dst = buf; + *read_bytes = 0; + + while (nbytes > 0) + { + Size nread; + + nread = XLogReadFromBuffersGuts(ptr, tli, nbytes, dst); + + if (nread <= 0) + { + /* We read nothing. */ + break; + } + else if (nread == nbytes) + { + /* We read all the requested bytes. */ + *read_bytes += nread; + break; + } + else + { + /* + * We read some of the requested bytes. Continue to read the + * remaining bytes. + */ + Assert(nread < nbytes); + ptr += nread; + nbytes -= nread; + dst += nread; + *read_bytes += nread; + } + } + + elog(DEBUG1, "read %zu bytes out of %zu bytes from WAL buffers for given LSN %X/%X, Timeline ID %u", + *read_bytes, count, LSN_FORMAT_ARGS(startptr), tli); +} + /* * Converts a "usable byte position" to XLogRecPtr. A usable byte position * is the position starting from the beginning of WAL, excluding all WAL diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c index aa6c929477..723379b7d9 100644 --- a/src/backend/access/transam/xlogreader.c +++ b/src/backend/access/transam/xlogreader.c @@ -1485,8 +1485,7 @@ err: * Returns true if succeeded, false if an error occurs, in which case * 'errinfo' receives error details. * - * XXX probably this should be improved to suck data directly from the - * WAL buffers when possible. + * When possible, this function reads data directly from WAL buffers. */ bool WALRead(XLogReaderState *state, @@ -1497,6 +1496,48 @@ WALRead(XLogReaderState *state, XLogRecPtr recptr; Size nbytes; +#ifndef FRONTEND + /* Frontend tools have no idea of WAL buffers. */ + Size read_bytes; + + /* + * When possible, read WAL from WAL buffers. We skip this step and continue + * the usual way, that is to read from WAL file, either when server is in + * recovery (standby mode, archive or crash recovery), in which case the + * WAL buffers are not used or when the server is inserting in a different + * timeline from that of the timeline that we're trying to read WAL from. + */ + if (!RecoveryInProgress() && + tli == GetWALInsertionTimeLine()) + { + XLogReadFromBuffers(startptr, tli, count, buf, &read_bytes); + + /* + * Check if we have read fully (hit), partially (partial hit) or + * nothing (miss) from WAL buffers. If we have read either partially or + * nothing, then continue to read the remaining bytes the usual way, + * that is, read from WAL file. + */ + if (count == read_bytes) + { + /* Buffer hit, so return. */ + return true; + } + else if (read_bytes > 0 && count > read_bytes) + { + /* + * Buffer partial hit, so reset the state to count the read bytes + * and continue. + */ + buf += read_bytes; + startptr += read_bytes; + count -= read_bytes; + } + + /* Buffer miss i.e., read_bytes = 0, so continue */ + } +#endif /* FRONTEND */ + p = buf; recptr = startptr; nbytes = count; diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index cfe5409738..c9941aa001 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -247,6 +247,12 @@ extern XLogRecPtr GetLastImportantRecPtr(void); extern void SetWalWriterSleeping(bool sleeping); +extern void XLogReadFromBuffers(XLogRecPtr startptr, + TimeLineID tli, + Size count, + char *buf, + Size *read_bytes); + /* * Routines used by xlogrecovery.c to call back into xlog.c during recovery. */ -- 2.34.1
From a889a11b814a902c0b4292f00a360acdab2c7c1f Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Wed, 1 Mar 2023 08:40:57 +0000 Subject: [PATCH v5] Add test module for verifying WAL read from WAL buffers --- src/test/modules/Makefile | 1 + src/test/modules/meson.build | 1 + .../test_wal_read_from_buffers/.gitignore | 4 ++ .../test_wal_read_from_buffers/Makefile | 23 +++++++++ .../test_wal_read_from_buffers/meson.build | 36 +++++++++++++ .../test_wal_read_from_buffers/t/001_basic.pl | 44 ++++++++++++++++ .../test_wal_read_from_buffers--1.0.sql | 16 ++++++ .../test_wal_read_from_buffers.c | 51 +++++++++++++++++++ .../test_wal_read_from_buffers.control | 4 ++ 9 files changed, 180 insertions(+) create mode 100644 src/test/modules/test_wal_read_from_buffers/.gitignore create mode 100644 src/test/modules/test_wal_read_from_buffers/Makefile create mode 100644 src/test/modules/test_wal_read_from_buffers/meson.build create mode 100644 src/test/modules/test_wal_read_from_buffers/t/001_basic.pl create mode 100644 src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers--1.0.sql create mode 100644 src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.c create mode 100644 src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.control diff --git a/src/test/modules/Makefile b/src/test/modules/Makefile index c629cbe383..ea33361f69 100644 --- a/src/test/modules/Makefile +++ b/src/test/modules/Makefile @@ -33,6 +33,7 @@ SUBDIRS = \ test_rls_hooks \ test_shm_mq \ test_slru \ + test_wal_read_from_buffers \ unsafe_tests \ worker_spi diff --git a/src/test/modules/meson.build b/src/test/modules/meson.build index 1baa6b558d..e3ffd3538d 100644 --- a/src/test/modules/meson.build +++ b/src/test/modules/meson.build @@ -29,5 +29,6 @@ subdir('test_regex') subdir('test_rls_hooks') subdir('test_shm_mq') subdir('test_slru') +subdir('test_wal_read_from_buffers') subdir('unsafe_tests') subdir('worker_spi') diff --git a/src/test/modules/test_wal_read_from_buffers/.gitignore b/src/test/modules/test_wal_read_from_buffers/.gitignore new file mode 100644 index 0000000000..5dcb3ff972 --- /dev/null +++ b/src/test/modules/test_wal_read_from_buffers/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/src/test/modules/test_wal_read_from_buffers/Makefile b/src/test/modules/test_wal_read_from_buffers/Makefile new file mode 100644 index 0000000000..7a09533ec7 --- /dev/null +++ b/src/test/modules/test_wal_read_from_buffers/Makefile @@ -0,0 +1,23 @@ +# src/test/modules/test_wal_read_from_buffers/Makefile + +MODULE_big = test_wal_read_from_buffers +OBJS = \ + $(WIN32RES) \ + test_wal_read_from_buffers.o +PGFILEDESC = "test_wal_read_from_buffers - test module to verify that WAL can be read from WAL buffers" + +EXTENSION = test_wal_read_from_buffers +DATA = test_wal_read_from_buffers--1.0.sql + +TAP_TESTS = 1 + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = src/test/modules/test_wal_read_from_buffers +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/src/test/modules/test_wal_read_from_buffers/meson.build b/src/test/modules/test_wal_read_from_buffers/meson.build new file mode 100644 index 0000000000..40a36edc07 --- /dev/null +++ b/src/test/modules/test_wal_read_from_buffers/meson.build @@ -0,0 +1,36 @@ +# Copyright (c) 2022-2023, PostgreSQL Global Development Group + +# FIXME: prevent install during main install, but not during test :/ + +test_wal_read_from_buffers_sources = files( + 'test_wal_read_from_buffers.c', +) + +if host_system == 'windows' + test_wal_read_from_buffers_sources += rc_lib_gen.process(win32ver_rc, extra_args: [ + '--NAME', 'test_wal_read_from_buffers', + '--FILEDESC', 'test_wal_read_from_buffers - test module to verify that WAL can be read from WAL buffers',]) +endif + +test_wal_read_from_buffers = shared_module('test_wal_read_from_buffers', + test_wal_read_from_buffers_sources, + kwargs: pg_mod_args, +) +testprep_targets += test_wal_read_from_buffers + +install_data( + 'test_wal_read_from_buffers.control', + 'test_wal_read_from_buffers--1.0.sql', + kwargs: contrib_data_args, +) + +tests += { + 'name': 'test_wal_read_from_buffers', + 'sd': meson.current_source_dir(), + 'bd': meson.current_build_dir(), + 'tap': { + 'tests': [ + 't/001_basic.pl', + ], + }, +} diff --git a/src/test/modules/test_wal_read_from_buffers/t/001_basic.pl b/src/test/modules/test_wal_read_from_buffers/t/001_basic.pl new file mode 100644 index 0000000000..3448e0bed6 --- /dev/null +++ b/src/test/modules/test_wal_read_from_buffers/t/001_basic.pl @@ -0,0 +1,44 @@ +# Copyright (c) 2021-2023, PostgreSQL Global Development Group + +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('main'); + +$node->init; + +# Ensure nobody interferes with us so that the WAL in WAL buffers don't get +# overwritten while running tests. +$node->append_conf( + 'postgresql.conf', qq( +autovacuum = off +checkpoint_timeout = 1h +wal_writer_delay = 10000ms +wal_writer_flush_after = 1GB +)); +$node->start; + +# Setup. +$node->safe_psql('postgres', 'CREATE EXTENSION test_wal_read_from_buffers'); + +# Get current insert LSN. After this, we generate some WAL which is guranteed +# to be in WAL buffers as there is no other WAL generating activity is +# happening on the server. We then verify if we can read the WAL from WAL +# buffers using this LSN. +my $lsn = + $node->safe_psql('postgres', 'SELECT pg_current_wal_insert_lsn();'); + +# Generate minimal WAL so that WAL buffers don't get overwritten. +$node->safe_psql('postgres', + "CREATE TABLE t (c int); INSERT INTO t VALUES (1);"); + +# Check if WAL is successfully read from WAL buffers. +my $result = $node->safe_psql('postgres', + qq{SELECT test_wal_read_from_buffers('$lsn')}); +is($result, 't', "WAL is successfully read from WAL buffers"); + +done_testing(); diff --git a/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers--1.0.sql b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers--1.0.sql new file mode 100644 index 0000000000..8e89910133 --- /dev/null +++ b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers--1.0.sql @@ -0,0 +1,16 @@ +/* src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers--1.0.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION test_wal_read_from_buffers" to load this file. \quit + +-- +-- test_wal_read_from_buffers() +-- +-- Returns true if WAL data at a given LSN can be read from WAL buffers. +-- Otherwise returns false. +-- +CREATE FUNCTION test_wal_read_from_buffers(IN lsn pg_lsn, + OUT read_from_buffers bool +) +AS 'MODULE_PATHNAME', 'test_wal_read_from_buffers' +LANGUAGE C STRICT PARALLEL UNSAFE; diff --git a/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.c b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.c new file mode 100644 index 0000000000..ca8645101a --- /dev/null +++ b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.c @@ -0,0 +1,51 @@ +/*-------------------------------------------------------------------------- + * + * test_wal_read_from_buffers.c + * Test code for veryfing WAL read from WAL buffers. + * + * Portions Copyright (c) 1996-2023, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.c + * ------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/xlog.h" +#include "fmgr.h" +#include "utils/pg_lsn.h" + +PG_MODULE_MAGIC; + +/* + * SQL function for verifying that WAL data at a given LSN can be read from WAL + * buffers. Returns true if read from WAL buffers, otherwise false. + */ +PG_FUNCTION_INFO_V1(test_wal_read_from_buffers); +Datum +test_wal_read_from_buffers(PG_FUNCTION_ARGS) +{ + XLogRecPtr lsn; + Size read_bytes; + TimeLineID tli; + char data[XLOG_BLCKSZ] = {0}; + + lsn = PG_GETARG_LSN(0); + + if (XLogRecPtrIsInvalid(lsn)) + PG_RETURN_BOOL(false); + + if (RecoveryInProgress()) + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("recovery is in progress"), + errhint("WAL control functions cannot be executed during recovery."))); + + tli = GetWALInsertionTimeLine(); + + XLogReadFromBuffers(lsn, tli, XLOG_BLCKSZ, data, &read_bytes); + + PG_RETURN_LSN(read_bytes > 0); +} diff --git a/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.control b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.control new file mode 100644 index 0000000000..7852b3e331 --- /dev/null +++ b/src/test/modules/test_wal_read_from_buffers/test_wal_read_from_buffers.control @@ -0,0 +1,4 @@ +comment = 'Test code for veryfing WAL read from WAL buffers' +default_version = '1.0' +module_pathname = '$libdir/test_wal_read_from_buffers' +relocatable = true -- 2.34.1