On Fri, Apr 5, 2024 at 12:15 AM Thomas Munro <thomas.mu...@gmail.com> wrote:
>
> Yeah, I plead benchmarking myopia, sorry.  The fastpath as committed
> is only reached when distance goes 2->1, as pg_prewarm does.  Oops.
> With the attached minor rearrangement, it works fine.  I also poked
> some more at that memory prefetcher.  Here are the numbers I got on a
> desktop system (Intel i9-9900 @ 3.1GHz, Linux 6.1, turbo disabled,
> cpufreq governor=performance, 2MB huge pages, SB=8GB, consumer NMVe,
> GCC -O3).
>
> create table t (i int, filler text) with (fillfactor=10);
> insert into t
> select g, repeat('x', 900) from generate_series(1, 560000) g;
> vacuum freeze t;
> set max_parallel_workers_per_gather = 0;
>
> select count(*) from t;
>
> cold = must be read from actual disk (Linux drop_caches)
> warm = read from linux page cache
> hot = already in pg cache via pg_prewarm
>
>                                     cold   warm    hot
> master                            2479ms  886ms  200ms
> seqscan                           2498ms  716ms  211ms <-- regression
> seqscan + fastpath                2493ms  711ms  200ms <-- fixed, I think?
> seqscan + memprefetch             2499ms  716ms  182ms
> seqscan + fastpath + memprefetch  2505ms  710ms  170ms <-- \O/
>
> Cold has no difference.  That's just my disk demonstrating Linux RA at
> 128kB (default); random I/O is obviously a more interesting story.
> It's consistently a smidgen faster with Linux RA set to 2MB (as in
> blockdev --setra 4096 /dev/nvmeXXX), and I believe this effect
> probably also increases on fancier faster storage than what I have on
> hand:
>
>                                     cold
> master                            1775ms
> seqscan + fastpath + memprefetch  1700ms
>
> Warm is faster as expected (fewer system calls schlepping data
> kernel->userspace).
>
> The interesting column is hot.  The 200ms->211ms regression is due to
> the extra bookkeeping in the slow path.  The rejiggered fastpath code
> fixes it for me, or maybe sometimes shows an extra 1ms.  Phew.  Can
> you reproduce that?

I am able to reproduce the fast path solving the issue using Heikki's
example here [1] but in shared buffers (hot).

master:                           25 ms
stream read:                   29 ms
stream read + fast path: 25 ms

I haven't looked into or reviewed the memory prefetching part.

While reviewing 0002, I realized that I don't quite see how
read_stream_get_block() will be used in the fastpath -- which it
claims in its comments.
read_stream_next_buffer() is the only caller of
read_stream_look_ahead()->read_stream_get_block(), and if fast_path is
true, read_stream_next_buffer() always returns before calling
read_stream_look_ahead(). Maybe I am missing something. I see
fast_path uses read_stream_fill_blocknums() to invoke the callback.

Oh and why does READ_STREAM_DISABLE_FAST_PATH macro exist?

Otherwise 0002 looks good to me.

I haven't reviewed 0003 or 0004. I attached a new version (v11)
because I noticed an outdated comment in my seq scan streaming read
user patch (0001). The other patches in the set are untouched from
your versions besides adding author/reviewer info in commit message
for 0002.

- Melanie

[1] 
https://www.postgresql.org/message-id/3b0f3701-addd-4629-9257-cf28e1a6e6a1%40iki.fi
From acbd7172f10857d49d4b5d4afc4efab704b33486 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrow...@gmail.com>
Date: Mon, 10 Jul 2023 11:22:34 +0200
Subject: [PATCH v11 3/4] Add pg_prefetch_mem() macro to load cache lines.

Initially mapping to GCC, Clang and MSVC builtins.

Discussion: https://postgr.es/m/CAEepm%3D2y9HM9QP%2BHhRZdQ3pU6FShSMyu%3DV1uHXhQ5gG-dketHg%40mail.gmail.com
---
 config/c-compiler.m4       | 17 ++++++++++++++++
 configure                  | 40 ++++++++++++++++++++++++++++++++++++++
 configure.ac               |  3 +++
 meson.build                |  1 +
 src/include/c.h            |  8 ++++++++
 src/include/pg_config.h.in |  3 +++
 6 files changed, 72 insertions(+)

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 3268a780bb0..4cc02f97601 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -355,6 +355,23 @@ AC_DEFINE_UNQUOTED(AS_TR_CPP([HAVE$1]), 1,
                    [Define to 1 if your compiler understands $1.])
 fi])# PGAC_CHECK_BUILTIN_FUNC
 
+# PGAC_CHECK_BUILTIN_VOID_FUNC
+# -----------------------
+# Variant for void functions.
+AC_DEFUN([PGAC_CHECK_BUILTIN_VOID_FUNC],
+[AC_CACHE_CHECK(for $1, pgac_cv$1,
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([
+void
+call$1($2)
+{
+    $1(x);
+}], [])],
+[pgac_cv$1=yes],
+[pgac_cv$1=no])])
+if test x"${pgac_cv$1}" = xyes ; then
+AC_DEFINE_UNQUOTED(AS_TR_CPP([HAVE$1]), 1,
+                   [Define to 1 if your compiler understands $1.])
+fi])# PGAC_CHECK_BUILTIN_VOID_FUNC
 
 
 # PGAC_CHECK_BUILTIN_FUNC_PTR
diff --git a/configure b/configure
index 36feeafbb23..79b78c33ddc 100755
--- a/configure
+++ b/configure
@@ -15543,6 +15543,46 @@ _ACEOF
 
 fi
 
+# Can we use a built-in to prefetch memory?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_prefetch" >&5
+$as_echo_n "checking for __builtin_prefetch... " >&6; }
+if ${pgac_cv__builtin_prefetch+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+void
+call__builtin_prefetch(void *x)
+{
+    __builtin_prefetch(x);
+}
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv__builtin_prefetch=yes
+else
+  pgac_cv__builtin_prefetch=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_prefetch" >&5
+$as_echo "$pgac_cv__builtin_prefetch" >&6; }
+if test x"${pgac_cv__builtin_prefetch}" = xyes ; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE__BUILTIN_PREFETCH 1
+_ACEOF
+
+fi
+
 # We require 64-bit fseeko() to be available, but run this check anyway
 # in case it finds that _LARGEFILE_SOURCE has to be #define'd for that.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _LARGEFILE_SOURCE value needed for large files" >&5
diff --git a/configure.ac b/configure.ac
index 57f734879e1..6cd5441ed32 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1781,6 +1781,9 @@ PGAC_CHECK_BUILTIN_FUNC([__builtin_popcount], [unsigned int x])
 # so it needs a different test function.
 PGAC_CHECK_BUILTIN_FUNC_PTR([__builtin_frame_address], [0])
 
+# Can we use a built-in to prefetch memory?
+PGAC_CHECK_BUILTIN_VOID_FUNC([__builtin_prefetch], [void *x])
+
 # We require 64-bit fseeko() to be available, but run this check anyway
 # in case it finds that _LARGEFILE_SOURCE has to be #define'd for that.
 AC_FUNC_FSEEKO
diff --git a/meson.build b/meson.build
index 87437960bc3..210fd4c1cd7 100644
--- a/meson.build
+++ b/meson.build
@@ -1707,6 +1707,7 @@ builtins = [
   'constant_p',
   'frame_address',
   'popcount',
+  'prefetch',
   'unreachable',
 ]
 
diff --git a/src/include/c.h b/src/include/c.h
index cf37e02fe1f..0b7aa3e2924 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -412,6 +412,14 @@ typedef void (*pg_funcptr_t) (void);
 #define HAVE_PRAGMA_GCC_SYSTEM_HEADER	1
 #endif
 
+/* Do we have support for prefetching memory? */
+#if defined(HAVE__BUILTIN_PREFETCH)
+#define pg_prefetch_mem(a) __builtin_prefetch(a)
+#elif defined(_MSC_VER)
+#define pg_prefetch_mem(a) _m_prefetch(a)
+#else
+#define pg_prefetch_mem(a)
+#endif
 
 /* ----------------------------------------------------------------
  *				Section 2:	bool, true, false
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 591e1ca3df6..085abf4c84e 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -543,6 +543,9 @@
 /* Define to 1 if your compiler understands __builtin_popcount. */
 #undef HAVE__BUILTIN_POPCOUNT
 
+/* Define to 1 if your compiler understands __builtin_prefetch. */
+#undef HAVE__BUILTIN_PREFETCH
+
 /* Define to 1 if your compiler understands __builtin_types_compatible_p. */
 #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P
 
-- 
2.40.1

From 8c1cd34f7be9651b1fbe69e3d351c79cbf5b202a Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Fri, 5 Apr 2024 12:08:24 +1300
Subject: [PATCH v11 2/4] Improve read_stream.c's fast path.

Unfortunately the "fast path" for cached scans that don't do any I/O was
coded in a way that could be used by pg_prewarm, but not the proposed
sequential scan patch which starts with a low initial distance due to
flags used to initialize the stream.  Refactor to fix that.

Author: Thomas Munro
Reviewed-by: Melanie Plageman
Discussion: https://postgr.es/m/flat/CAAKRu_YtXJiYKQvb5JsA2SkwrsizYLugs4sSOZh3EAjKUg%3DgEQ%40mail.gmail.com
---
 src/backend/storage/aio/read_stream.c | 70 +++++++++++----------------
 1 file changed, 27 insertions(+), 43 deletions(-)

diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c
index 4f21262ff5e..80b8be8cc7e 100644
--- a/src/backend/storage/aio/read_stream.c
+++ b/src/backend/storage/aio/read_stream.c
@@ -578,13 +578,12 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
 	if (likely(stream->fast_path))
 	{
 		BlockNumber next_blocknum;
-		bool		need_wait;
 
 		/* Fast path assumptions. */
 		Assert(stream->ios_in_progress == 0);
 		Assert(stream->pinned_buffers == 1);
 		Assert(stream->distance == 1);
-		Assert(stream->pending_read_nblocks == 1);
+		Assert(stream->pending_read_nblocks == 0);
 		Assert(stream->per_buffer_data_size == 0);
 
 		/* We're going to return the buffer we pinned last time. */
@@ -594,40 +593,29 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
 		buffer = stream->buffers[oldest_buffer_index];
 		Assert(buffer != InvalidBuffer);
 
-		/*
-		 * Pin a buffer for the next call.  Same buffer entry, and arbitrary
-		 * I/O entry (they're all free).
-		 */
-		need_wait = StartReadBuffer(&stream->ios[0].op,
-									&stream->buffers[oldest_buffer_index],
-									stream->pending_read_blocknum,
-									stream->advice_enabled ?
-									READ_BUFFERS_ISSUE_ADVICE : 0);
-
-		/* Choose the block the next call will pin. */
+		/* Choose the next block to pin. */
 		if (unlikely(stream->blocknums_next == stream->blocknums_count))
 			read_stream_fill_blocknums(stream);
 		next_blocknum = stream->blocknums[stream->blocknums_next++];
 
-		/*
-		 * Fast return if the next call doesn't require I/O for the buffer we
-		 * just pinned, and we have a block number to give it as a pending
-		 * read.
-		 */
-		if (likely(!need_wait && next_blocknum != InvalidBlockNumber))
+		if (likely(next_blocknum != InvalidBlockNumber))
 		{
-			stream->pending_read_blocknum = next_blocknum;
-			return buffer;
-		}
-
-		/*
-		 * For anything more complex, set up some more state and take the slow
-		 * path next time.
-		 */
-		stream->fast_path = false;
+			/*
+			 * Pin a buffer for the next call.  Same buffer entry, and
+			 * arbitrary I/O entry (they're all free).  We don't have to
+			 * adjust pinned_buffers because we're transferring one to caller
+			 * but pinning one more.
+			 */
+			if (likely(!StartReadBuffer(&stream->ios[0].op,
+										&stream->buffers[oldest_buffer_index],
+										next_blocknum,
+										stream->advice_enabled ?
+										READ_BUFFERS_ISSUE_ADVICE : 0)))
+			{
+				/* Fast return. */
+				return buffer;
+			}
 
-		if (need_wait)
-		{
 			/* Next call must wait for I/O for the newly pinned buffer. */
 			stream->oldest_io_index = 0;
 			stream->next_io_index = stream->max_ios > 1 ? 1 : 0;
@@ -635,17 +623,15 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
 			stream->ios[0].buffer_index = oldest_buffer_index;
 			stream->seq_blocknum = next_blocknum + 1;
 		}
-		if (next_blocknum == InvalidBlockNumber)
-		{
-			/* Next call hits end of stream and can't pin anything more. */
-			stream->distance = 0;
-			stream->pending_read_nblocks = 0;
-		}
 		else
 		{
-			/* Set up the pending read. */
-			stream->pending_read_blocknum = next_blocknum;
+			/* No more blocks, end of stream. */
+			stream->distance = 0;
+			stream->oldest_buffer_index = stream->next_buffer_index;
+			stream->pinned_buffers = 0;
 		}
+
+		stream->fast_path = false;
 		return buffer;
 	}
 #endif
@@ -762,15 +748,11 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
 	if (stream->ios_in_progress == 0 &&
 		stream->pinned_buffers == 1 &&
 		stream->distance == 1 &&
-		stream->pending_read_nblocks == 1 &&
+		stream->pending_read_nblocks == 0 &&
 		stream->per_buffer_data_size == 0)
 	{
 		stream->fast_path = true;
 	}
-	else
-	{
-		stream->fast_path = false;
-	}
 #endif
 
 	return buffer;
@@ -789,6 +771,7 @@ read_stream_reset(ReadStream *stream)
 
 	/* Stop looking ahead. */
 	stream->distance = 0;
+	stream->fast_path = false;
 
 	/* Unpin anything that wasn't consumed. */
 	while ((buffer = read_stream_next_buffer(stream, NULL)) != InvalidBuffer)
@@ -799,6 +782,7 @@ read_stream_reset(ReadStream *stream)
 
 	/* Start off assuming data is cached. */
 	stream->distance = 1;
+	stream->fast_path = false;
 }
 
 /*
-- 
2.40.1

From 47eb8392b8cf75a4e21dc50414af4857a5eade56 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Fri, 5 Apr 2024 13:32:14 +1300
Subject: [PATCH v11 1/4] Use streaming IO in heapam sequential and TID range
 scans

Instead of calling ReadBuffer() for each block heap sequential scans and
TID range scans now use the streaming read API introduced in b5a9b18cd0.

Author: Melanie Plageman
Reviewed-by: Thomas Munro
Discussion: https://postgr.es/m/flat/CAAKRu_YtXJiYKQvb5JsA2SkwrsizYLugs4sSOZh3EAjKUg%3DgEQ%40mail.gmail.com
---
 src/backend/access/heap/heapam.c | 98 ++++++++++++++++++++++++++------
 src/include/access/heapam.h      | 15 +++++
 2 files changed, 95 insertions(+), 18 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index dada2ecd1e3..bafe023bce8 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -223,6 +223,25 @@ static const int MultiXactStatusLock[MaxMultiXactStatus + 1] =
  * ----------------------------------------------------------------
  */
 
+static BlockNumber
+heap_scan_stream_read_next(ReadStream *pgsr, void *private_data,
+						   void *per_buffer_data)
+{
+	HeapScanDesc scan = (HeapScanDesc) private_data;
+
+	if (unlikely(!scan->rs_inited))
+	{
+		scan->rs_prefetch_block = heapgettup_initial_block(scan, scan->rs_dir);
+		scan->rs_inited = true;
+	}
+	else
+		scan->rs_prefetch_block = heapgettup_advance_block(scan,
+														   scan->rs_prefetch_block,
+														   scan->rs_dir);
+
+	return scan->rs_prefetch_block;
+}
+
 /* ----------------
  *		initscan - scan code common to heap_beginscan and heap_rescan
  * ----------------
@@ -325,6 +344,13 @@ initscan(HeapScanDesc scan, ScanKey key, bool keep_startblock)
 	scan->rs_cbuf = InvalidBuffer;
 	scan->rs_cblock = InvalidBlockNumber;
 
+	/*
+	 * Initialize to ForwardScanDirection because it is most common and heap
+	 * scans usually must go forwards before going backward.
+	 */
+	scan->rs_dir = ForwardScanDirection;
+	scan->rs_prefetch_block = InvalidBlockNumber;
+
 	/* page-at-a-time fields are always invalid when not rs_inited */
 
 	/*
@@ -462,12 +488,14 @@ heap_prepare_pagescan(TableScanDesc sscan)
 /*
  * heap_fetch_next_buffer - read and pin the next block from MAIN_FORKNUM.
  *
- * Read the next block of the scan relation into a buffer and pin that buffer
- * before saving it in the scan descriptor.
+ * Read the next block of the scan relation from the read stream and pin that
+ * buffer before saving it in the scan descriptor.
  */
 static inline void
 heap_fetch_next_buffer(HeapScanDesc scan, ScanDirection dir)
 {
+	Assert(scan->rs_read_stream);
+
 	/* release previous scan buffer, if any */
 	if (BufferIsValid(scan->rs_cbuf))
 	{
@@ -482,25 +510,23 @@ heap_fetch_next_buffer(HeapScanDesc scan, ScanDirection dir)
 	 */
 	CHECK_FOR_INTERRUPTS();
 
-	if (unlikely(!scan->rs_inited))
+	/*
+	 * If the scan direction is changing, reset the prefetch block to the
+	 * current block. Otherwise, we will incorrectly prefetch the blocks
+	 * between the prefetch block and the current block again before
+	 * prefetching blocks in the new, correct scan direction.
+	 */
+	if (unlikely(scan->rs_dir != dir))
 	{
-		scan->rs_cblock = heapgettup_initial_block(scan, dir);
+		scan->rs_prefetch_block = scan->rs_cblock;
+		read_stream_reset(scan->rs_read_stream);
+	}
 
-		/* ensure rs_cbuf is invalid when we get InvalidBlockNumber */
-		Assert(scan->rs_cblock != InvalidBlockNumber ||
-			   !BufferIsValid(scan->rs_cbuf));
+	scan->rs_dir = dir;
 
-		scan->rs_inited = true;
-	}
-	else
-		scan->rs_cblock = heapgettup_advance_block(scan, scan->rs_cblock,
-												   dir);
-
-	/* read block if valid */
-	if (BlockNumberIsValid(scan->rs_cblock))
-		scan->rs_cbuf = ReadBufferExtended(scan->rs_base.rs_rd, MAIN_FORKNUM,
-										   scan->rs_cblock, RBM_NORMAL,
-										   scan->rs_strategy);
+	scan->rs_cbuf = read_stream_next_buffer(scan->rs_read_stream, NULL);
+	if (BufferIsValid(scan->rs_cbuf))
+		scan->rs_cblock = BufferGetBlockNumber(scan->rs_cbuf);
 }
 
 /*
@@ -833,6 +859,7 @@ continue_page:
 
 	scan->rs_cbuf = InvalidBuffer;
 	scan->rs_cblock = InvalidBlockNumber;
+	scan->rs_prefetch_block = InvalidBlockNumber;
 	tuple->t_data = NULL;
 	scan->rs_inited = false;
 }
@@ -928,6 +955,7 @@ continue_page:
 		ReleaseBuffer(scan->rs_cbuf);
 	scan->rs_cbuf = InvalidBuffer;
 	scan->rs_cblock = InvalidBlockNumber;
+	scan->rs_prefetch_block = InvalidBlockNumber;
 	tuple->t_data = NULL;
 	scan->rs_inited = false;
 }
@@ -1021,6 +1049,26 @@ heap_beginscan(Relation relation, Snapshot snapshot,
 
 	initscan(scan, key, false);
 
+	scan->rs_read_stream = NULL;
+
+	/*
+	 * Set up a read stream for sequential scans and TID range scans. This
+	 * should be done after initscan() because initscan() allocates the
+	 * BufferAccessStrategy object passed to the streaming read API.
+	 */
+	if (scan->rs_base.rs_flags & SO_TYPE_SEQSCAN ||
+		scan->rs_base.rs_flags & SO_TYPE_TIDRANGESCAN)
+	{
+		scan->rs_read_stream = read_stream_begin_relation(READ_STREAM_SEQUENTIAL,
+														  scan->rs_strategy,
+														  scan->rs_base.rs_rd,
+														  MAIN_FORKNUM,
+														  heap_scan_stream_read_next,
+														  scan,
+														  0);
+	}
+
+
 	return (TableScanDesc) scan;
 }
 
@@ -1055,6 +1103,14 @@ heap_rescan(TableScanDesc sscan, ScanKey key, bool set_params,
 	if (BufferIsValid(scan->rs_cbuf))
 		ReleaseBuffer(scan->rs_cbuf);
 
+	/*
+	 * The read stream is reset on rescan. This must be done before
+	 * initscan(), as some state referred to by read_stream_reset() is reset
+	 * in initscan().
+	 */
+	if (scan->rs_read_stream)
+		read_stream_reset(scan->rs_read_stream);
+
 	/*
 	 * reinitialize scan descriptor
 	 */
@@ -1074,6 +1130,12 @@ heap_endscan(TableScanDesc sscan)
 	if (BufferIsValid(scan->rs_cbuf))
 		ReleaseBuffer(scan->rs_cbuf);
 
+	/*
+	 * Must free the read stream before freeing the BufferAccessStrategy.
+	 */
+	if (scan->rs_read_stream)
+		read_stream_end(scan->rs_read_stream);
+
 	/*
 	 * decrement relation reference count and free scan descriptor storage
 	 */
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 2765efc4e5e..332a7faa8d1 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -25,6 +25,7 @@
 #include "storage/bufpage.h"
 #include "storage/dsm.h"
 #include "storage/lockdefs.h"
+#include "storage/read_stream.h"
 #include "storage/shm_toc.h"
 #include "utils/relcache.h"
 #include "utils/snapshot.h"
@@ -70,6 +71,20 @@ typedef struct HeapScanDescData
 
 	HeapTupleData rs_ctup;		/* current tuple in scan, if any */
 
+	/* For scans that stream reads */
+	ReadStream *rs_read_stream;
+
+	/*
+	 * For sequential scans and TID range scans to stream reads. The read
+	 * stream is allocated at the beginning of the scan and reset on rescan or
+	 * when the scan direction changes. The scan direction is saved each time
+	 * a new page is requested. If the scan direction changes from one page to
+	 * the next, the read stream releases all previously pinned buffers and
+	 * resets the prefetch block.
+	 */
+	ScanDirection rs_dir;
+	BlockNumber rs_prefetch_block;
+
 	/*
 	 * For parallel scans to store page allocation data.  NULL when not
 	 * performing a parallel scan.
-- 
2.40.1

From 3840f869b7720c1458cadb99cec0d54e72aef9a9 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.mu...@gmail.com>
Date: Fri, 5 Apr 2024 15:06:32 +1300
Subject: [PATCH v11 4/4] Prefetch page header memory when streaming relations.

read_stream.c can always see at least one page ahead of the one the
caller is accessing.  Take the opportunity to prefetch the cache line
that holds the next page's header.  For some scans, that can generate a
decent speedup, though real world results will depend on how much work
the CPU does before it gets around to accessing the next page.
---
 src/backend/storage/aio/read_stream.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/backend/storage/aio/read_stream.c b/src/backend/storage/aio/read_stream.c
index 80b8be8cc7e..35ee1fc1043 100644
--- a/src/backend/storage/aio/read_stream.c
+++ b/src/backend/storage/aio/read_stream.c
@@ -612,7 +612,8 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
 										stream->advice_enabled ?
 										READ_BUFFERS_ISSUE_ADVICE : 0)))
 			{
-				/* Fast return. */
+				/* Predict caller will soon access next page's header. */
+				pg_prefetch_mem(BufferGetPage(stream->buffers[oldest_buffer_index]));
 				return buffer;
 			}
 
@@ -743,6 +744,10 @@ read_stream_next_buffer(ReadStream *stream, void **per_buffer_data)
 	/* Prepare for the next call. */
 	read_stream_look_ahead(stream, false);
 
+	/* Predict caller will soon access next page's header. */
+	if (stream->pinned_buffers > 0)
+		pg_prefetch_mem(BufferGetPage(stream->buffers[stream->oldest_buffer_index]));
+
 #ifndef READ_STREAM_DISABLE_FAST_PATH
 	/* See if we can take the fast path for all-cached scans next time. */
 	if (stream->ios_in_progress == 0 &&
-- 
2.40.1

Reply via email to