Re: Prefetch the next tuple's memory during seqscans

David Rowley Tue, 22 Nov 2022 13:58:35 -0800

On Thu, 3 Nov 2022 at 06:25, Andres Freund <and...@anarazel.de> wrote:
> Attached is an experimental patch/hack for that. It ended up being more
> beneficial to make the access ordering more optimal than prefetching the tuple
> contents, but I'm not at all sure that's the be-all-end-all.


Thanks for writing that patch. I've been experimenting with it.

I tried unrolling the loop (patch 0003) as you mentioned in:

+ * FIXME: Worth unrolling so that we don't fetch the same cacheline
+ * over and over, due to line items being smaller than a cacheline?

but didn't see any gains from doing that.

I also adjusted your patch a little so that instead of doing:

- OffsetNumber rs_vistuples[MaxHeapTuplesPerPage]; /* their offsets */
+ OffsetNumber *rs_vistuples;
+ OffsetNumber rs_vistuples_d[MaxHeapTuplesPerPage]; /* their offsets */

to work around the issue of having to populate rs_vistuples_d in
reverse, I added a new field called rs_startindex to mark where the
first element in the rs_vistuples array is. The way you wrote it seems
to require fewer code changes, but per the FIXME comment you left, I
get the idea you just did it the way you did to make it work enough
for testing.

I'm quite keen to move forward in committing the 0001 patch to add the
pg_prefetch_mem macro. What I'm a little undecided about is what the
best patch is to commit first to make use of the new macro.

I did some tests on the attached set of patches:

alter system set max_parallel_workers_per_gather = 0;
select pg_reload_conf();

create table t as select a from generate_series(1,10000000)a;
alter table t set (autovacuum_enabled=false);

$ cat bench.sql
select * from t where a = 0;

psql -c "select pg_prewarm('t');" postgres

-- Test 1 no frozen tuples in "t"

Master (@9c6ad5eaa):
$ pgbench -n -f bench.sql -M prepared -T 10 postgres | grep -E "^latency"
latency average = 383.332 ms
latency average = 375.747 ms
latency average = 376.090 ms

Master + 0001 + 0002:
$ pgbench -n -f bench.sql -M prepared -T 10 postgres | grep -E "^latency"
latency average = 370.133 ms
latency average = 370.149 ms
latency average = 370.157 ms

Master + 0001 + 0005:
$ pgbench -n -f bench.sql -M prepared -T 10 postgres | grep -E "^latency"
latency average = 372.662 ms
latency average = 371.034 ms
latency average = 372.709 ms

-- Test 2 "select count(*) from t" with all tuples frozen

$ cat bench1.sql
select count(*) from t;

psql -c "vacuum freeze t;" postgres
psql -c "select pg_prewarm('t');" postgres

Master (@9c6ad5eaa):
$ pgbench -n -f bench1.sql -M prepared -T 10 postgres | grep -E "^latency"
latency average = 406.238 ms
latency average = 407.029 ms
latency average = 406.962 ms

Master + 0001 + 0005:
$ pgbench -n -f bench1.sql -M prepared -T 10 postgres | grep -E "^latency"
latency average = 345.470 ms
latency average = 345.775 ms
latency average = 345.354 ms

My current thoughts are that it might be best to go with 0005 to start
with.  I know Melanie is working on making some changes in this area,
so perhaps it's best to leave 0002 until that work is complete.

David

From 491df9d6ab87a54bbc76b876484733d02d6c94ea Mon Sep 17 00:00:00 2001
From: David Rowley <dgrow...@gmail.com>
Date: Wed, 19 Oct 2022 08:54:01 +1300
Subject: [PATCH v2 1/5] Add pg_prefetch_mem() macro to load cache lines.

Initially mapping to GCC, Clang and MSVC builtins.

Discussion: 
https://postgr.es/m/CAEepm%3D2y9HM9QP%2BHhRZdQ3pU6FShSMyu%3DV1uHXhQ5gG-dketHg%40mail.gmail.com
---
 config/c-compiler.m4       | 17 ++++++++++++++++
 configure                  | 40 ++++++++++++++++++++++++++++++++++++++
 configure.ac               |  3 +++
 meson.build                |  3 ++-
 src/include/c.h            |  8 ++++++++
 src/include/pg_config.h.in |  3 +++
 src/tools/msvc/Solution.pm |  1 +
 7 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 000b075312..582a47501c 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -355,6 +355,23 @@ AC_DEFINE_UNQUOTED(AS_TR_CPP([HAVE$1]), 1,
                    [Define to 1 if your compiler understands $1.])
 fi])# PGAC_CHECK_BUILTIN_FUNC
 
+# PGAC_CHECK_BUILTIN_VOID_FUNC
+# -----------------------
+# Variant for void functions.
+AC_DEFUN([PGAC_CHECK_BUILTIN_VOID_FUNC],
+[AC_CACHE_CHECK(for $1, pgac_cv$1,
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([
+void
+call$1($2)
+{
+    $1(x);
+}], [])],
+[pgac_cv$1=yes],
+[pgac_cv$1=no])])
+if test x"${pgac_cv$1}" = xyes ; then
+AC_DEFINE_UNQUOTED(AS_TR_CPP([HAVE$1]), 1,
+                   [Define to 1 if your compiler understands $1.])
+fi])# PGAC_CHECK_BUILTIN_VOID_FUNC
 
 
 # PGAC_CHECK_BUILTIN_FUNC_PTR
diff --git a/configure b/configure
index 3966368b8d..c4685b8a1e 100755
--- a/configure
+++ b/configure
@@ -15988,6 +15988,46 @@ _ACEOF
 
 fi
 
+# Can we use a built-in to prefetch memory?
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_prefetch" >&5
+$as_echo_n "checking for __builtin_prefetch... " >&6; }
+if ${pgac_cv__builtin_prefetch+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+void
+call__builtin_prefetch(void *x)
+{
+    __builtin_prefetch(x);
+}
+int
+main ()
+{
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv__builtin_prefetch=yes
+else
+  pgac_cv__builtin_prefetch=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_prefetch" 
>&5
+$as_echo "$pgac_cv__builtin_prefetch" >&6; }
+if test x"${pgac_cv__builtin_prefetch}" = xyes ; then
+
+cat >>confdefs.h <<_ACEOF
+#define HAVE__BUILTIN_PREFETCH 1
+_ACEOF
+
+fi
+
 # We require 64-bit fseeko() to be available, but run this check anyway
 # in case it finds that _LARGEFILE_SOURCE has to be #define'd for that.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _LARGEFILE_SOURCE value 
needed for large files" >&5
diff --git a/configure.ac b/configure.ac
index f76b7ee31f..2d4938d43d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1802,6 +1802,9 @@ PGAC_CHECK_BUILTIN_FUNC([__builtin_popcount], [unsigned 
int x])
 # so it needs a different test function.
 PGAC_CHECK_BUILTIN_FUNC_PTR([__builtin_frame_address], [0])
 
+# Can we use a built-in to prefetch memory?
+PGAC_CHECK_BUILTIN_VOID_FUNC([__builtin_prefetch], [void *x])
+
 # We require 64-bit fseeko() to be available, but run this check anyway
 # in case it finds that _LARGEFILE_SOURCE has to be #define'd for that.
 AC_FUNC_FSEEKO
diff --git a/meson.build b/meson.build
index 058382046e..096643703c 100644
--- a/meson.build
+++ b/meson.build
@@ -1587,10 +1587,11 @@ builtins = [
   'bswap32',
   'bswap64',
   'clz',
-  'ctz',
   'constant_p',
+  'ctz',
   'frame_address',
   'popcount',
+  'prefetch',
   'unreachable',
 ]
 
diff --git a/src/include/c.h b/src/include/c.h
index 98cdd285dd..a7f7531450 100644
--- a/src/include/c.h
+++ b/src/include/c.h
@@ -361,6 +361,14 @@ typedef void (*pg_funcptr_t) (void);
  */
 #define FLEXIBLE_ARRAY_MEMBER  /* empty */
 
+/* Do we have support for prefetching memory? */
+#if defined(HAVE__BUILTIN_PREFETCH)
+#define pg_prefetch_mem(a) __builtin_prefetch(a)
+#elif defined(_MSC_VER)
+#define pg_prefetch_mem(a) _m_prefetch(a)
+#else
+#define pg_prefetch_mem(a)
+#endif
 
 /* ----------------------------------------------------------------
  *                             Section 2:      bool, true, false
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index c5a80b829e..07a661e288 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -559,6 +559,9 @@
 /* Define to 1 if your compiler understands __builtin_popcount. */
 #undef HAVE__BUILTIN_POPCOUNT
 
+/* Define to 1 if your compiler understands __builtin_prefetch. */
+#undef HAVE__BUILTIN_PREFETCH
+
 /* Define to 1 if your compiler understands __builtin_types_compatible_p. */
 #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P
 
diff --git a/src/tools/msvc/Solution.pm b/src/tools/msvc/Solution.pm
index c2acb58df0..95de91890e 100644
--- a/src/tools/msvc/Solution.pm
+++ b/src/tools/msvc/Solution.pm
@@ -227,6 +227,7 @@ sub GenerateFiles
                HAVE_BACKTRACE_SYMBOLS     => undef,
                HAVE_BIO_GET_DATA          => undef,
                HAVE_BIO_METH_NEW          => undef,
+               HAVE__BUILTIN_PREFETCH     => undef,
                HAVE_COMPUTED_GOTO         => undef,
                HAVE_COPYFILE              => undef,
                HAVE_COPYFILE_H            => undef,
-- 
2.35.1.windows.2

From c5ec896a2df02041f08d1e41a982223781137d5f Mon Sep 17 00:00:00 2001
From: David Rowley <dgrow...@gmail.com>
Date: Thu, 17 Nov 2022 13:04:49 +1300
Subject: [PATCH v2 2/5] Perform memory prefetching in heapgetpage

---
 src/backend/access/heap/heapam.c         | 87 +++++++++++++++++++-----
 src/backend/access/heap/heapam_handler.c |  7 +-
 src/include/access/heapam.h              |  1 +
 3 files changed, 74 insertions(+), 21 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index d18c5ca6f5..81c7f69644 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -451,31 +451,82 @@ heapgetpage(TableScanDesc sscan, BlockNumber block)
         */
        all_visible = PageIsAllVisible(page) && !snapshot->takenDuringRecovery;
 
-       for (lineoff = FirstOffsetNumber, lpp = PageGetItemId(page, lineoff);
-                lineoff <= lines;
-                lineoff++, lpp++)
+       if (all_visible)
        {
-               if (ItemIdIsNormal(lpp))
+               HeapTupleData loctup;
+
+               loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
+
+               for (lineoff = FirstOffsetNumber, lpp = PageGetItemId(page, 
lineoff);
+                        lineoff <= lines;
+                        lineoff++, lpp++)
                {
-                       HeapTupleData loctup;
-                       bool            valid;
+                       if (!ItemIdIsNormal(lpp))
+                               continue;
 
-                       loctup.t_tableOid = 
RelationGetRelid(scan->rs_base.rs_rd);
                        loctup.t_data = (HeapTupleHeader) PageGetItem(page, 
lpp);
                        loctup.t_len = ItemIdGetLength(lpp);
                        ItemPointerSet(&(loctup.t_self), block, lineoff);
 
-                       if (all_visible)
-                               valid = true;
-                       else
-                               valid = HeapTupleSatisfiesVisibility(&loctup, 
snapshot, buffer);
+                       HeapCheckForSerializableConflictOut(true, 
scan->rs_base.rs_rd,
+                                                                               
                &loctup, buffer, snapshot);
+                       scan->rs_vistuples[ntup++] = lineoff;
+               }
+               scan->rs_startindex = 0;
+       }
+       else
+       {
+               HeapTupleData loctup;
+               int                     normcount = 0;
+               int                     startindex = MaxHeapTuplesPerPage;
+               OffsetNumber normoffsets[MaxHeapTuplesPerPage];
+
+               loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
+
+               /*
+                * Iterate forward over line items, they're laid out in 
increasing
+                * order in memory. Doing this separately allows to benefit from
+                * out-of-order capabilities of the CPU and simplifies the next 
loop.
+                */
+               for (lineoff = FirstOffsetNumber, lpp = PageGetItemId(page, 
lineoff);
+                        lineoff <= lines;
+                        lineoff++, lpp++)
+               {
+                       pg_prefetch_mem(PageGetItemId(page, lineoff+5));
+                       if (ItemIdIsNormal(lpp))
+                               normoffsets[normcount++] = lineoff;
+               }
+
+               /*
+                * Process tuples in reverse order. That'll most often lead to 
memory
+                * accesses in increasing order, which typically is more 
efficient for
+                * the CPUs prefetcher. To avoid affecting sort order, we 
populate the
+                * rs_vistuples[] array backwards and use rs_startindex to mark 
the
+                * first used element in the array.
+                */
+               for (int i = normcount - 1; i >= 0; i--)
+               {
+                       bool valid;
+
+                       lineoff = normoffsets[i];
+                       lpp = PageGetItemId(page, lineoff);
+
+                       loctup.t_data = (HeapTupleHeader) PageGetItem(page, 
lpp);
+                       loctup.t_len = ItemIdGetLength(lpp);
+                       ItemPointerSet(&(loctup.t_self), block, lineoff);
+                       valid = HeapTupleSatisfiesVisibility(&loctup, snapshot, 
buffer);
 
                        HeapCheckForSerializableConflictOut(valid, 
scan->rs_base.rs_rd,
                                                                                
                &loctup, buffer, snapshot);
 
                        if (valid)
-                               scan->rs_vistuples[ntup++] = lineoff;
+                       {
+                               scan->rs_vistuples[--startindex] = lineoff;
+                               ntup++;
+                       }
                }
+               /* record the first used element in rs_vistuples[] */
+               scan->rs_startindex = startindex;
        }
 
        LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
@@ -902,7 +953,7 @@ heapgettup_pagemode(HeapScanDesc scan,
                        else
                                block = scan->rs_startblock; /* first page */
                        heapgetpage((TableScanDesc) scan, block);
-                       lineindex = 0;
+                       lineindex = scan->rs_startindex;
                        scan->rs_inited = true;
                }
                else
@@ -917,7 +968,7 @@ heapgettup_pagemode(HeapScanDesc scan,
                lines = scan->rs_ntuples;
                /* block and lineindex now reference the next visible tid */
 
-               linesleft = lines - lineindex;
+               linesleft = lines - lineindex + scan->rs_startindex;
        }
        else if (backward)
        {
@@ -968,7 +1019,7 @@ heapgettup_pagemode(HeapScanDesc scan,
 
                if (!scan->rs_inited)
                {
-                       lineindex = lines - 1;
+                       lineindex = scan->rs_startindex + lines - 1;
                        scan->rs_inited = true;
                }
                else
@@ -977,7 +1028,7 @@ heapgettup_pagemode(HeapScanDesc scan,
                }
                /* block and lineindex now reference the previous visible tid */
 
-               linesleft = lineindex + 1;
+               linesleft = lineindex + 1 - scan->rs_startindex;
        }
        else
        {
@@ -1127,9 +1178,9 @@ heapgettup_pagemode(HeapScanDesc scan,
                lines = scan->rs_ntuples;
                linesleft = lines;
                if (backward)
-                       lineindex = lines - 1;
+                       lineindex = MaxHeapTuplesPerPage - 1;
                else
-                       lineindex = 0;
+                       lineindex = scan->rs_startindex;
        }
 }
 
diff --git a/src/backend/access/heap/heapam_handler.c 
b/src/backend/access/heap/heapam_handler.c
index ab1bcf3522..d39284465b 100644
--- a/src/backend/access/heap/heapam_handler.c
+++ b/src/backend/access/heap/heapam_handler.c
@@ -2490,15 +2490,16 @@ SampleHeapTupleVisible(TableScanDesc scan, Buffer 
buffer,
        {
                /*
                 * In pageatatime mode, heapgetpage() already did visibility 
checks,
-                * so just look at the info it left in rs_vistuples[].
+                * so just look at the info it left in rs_vistuples[] starting 
at
+                * rs_startindex.
                 *
                 * We use a binary search over the known-sorted array.  Note: 
we could
                 * save some effort if we insisted that NextSampleTuple select 
tuples
                 * in increasing order, but it's not clear that there would be 
enough
                 * gain to justify the restriction.
                 */
-               int                     start = 0,
-                                       end = hscan->rs_ntuples - 1;
+               int                     start = hscan->rs_startindex,
+                                       end = hscan->rs_startindex + 
hscan->rs_ntuples - 1;
 
                while (start <= end)
                {
diff --git a/src/include/access/heapam.h b/src/include/access/heapam.h
index 810baaf9d0..aba0795fc6 100644
--- a/src/include/access/heapam.h
+++ b/src/include/access/heapam.h
@@ -74,6 +74,7 @@ typedef struct HeapScanDescData
        /* these fields only used in page-at-a-time mode and for bitmap scans */
        int                     rs_cindex;              /* current tuple's 
index in vistuples */
        int                     rs_ntuples;             /* number of visible 
tuples on page */
+       int                     rs_startindex;  /* first used element in 
rs_vistuples[] */
        OffsetNumber rs_vistuples[MaxHeapTuplesPerPage];        /* their 
offsets */
 }                      HeapScanDescData;
 typedef struct HeapScanDescData *HeapScanDesc;
-- 
2.35.1.windows.2

From f0b8920b23b200132a4e92942c4bb281426e4f9c Mon Sep 17 00:00:00 2001
From: David Rowley <dgrow...@gmail.com>
Date: Mon, 31 Oct 2022 10:05:12 +1300
Subject: [PATCH v2 5/5] Prefetch tuple memory during forward seqscans

---
 src/backend/access/heap/heapam.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 8fe4f4c837..6dc53effb4 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -1115,6 +1115,17 @@ heapgettup_pagemode(HeapScanDesc scan,
                        tuple->t_len = ItemIdGetLength(lpp);
                        ItemPointerSet(&(tuple->t_self), block, lineoff);
 
+                       /*
+                        * Prefetching the memory for the next tuple has shown 
to improve
+                        * performance on certain hardware.
+                        */
+                       if (!backward && linesleft > 1)
+                       {
+                               lineoff = scan->rs_vistuples[lineindex + 1];
+                               lpp = PageGetItemId(page, lineoff);
+                               pg_prefetch_mem(PageGetItem(page, lpp));
+                       }
+
                        /*
                         * if current tuple qualifies, return it.
                         */
-- 
2.35.1.windows.2

From 8f47b1e0163028ec8928aaf02ce49d34bb89c647 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrow...@gmail.com>
Date: Wed, 19 Oct 2022 08:55:45 +1300
Subject: [PATCH v2 4/5] heapam: WIP: cacheline prefetching for hot pruning.

Author: Andres Freund, with contributions from Dmitry Dolgov
---
 src/backend/access/heap/pruneheap.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/src/backend/access/heap/pruneheap.c 
b/src/backend/access/heap/pruneheap.c
index 91c5f5e9ef..a094ac18f5 100644
--- a/src/backend/access/heap/pruneheap.c
+++ b/src/backend/access/heap/pruneheap.c
@@ -302,6 +302,30 @@ heap_page_prune(Relation relation, Buffer buffer,
        maxoff = PageGetMaxOffsetNumber(page);
        tup.t_tableOid = RelationGetRelid(prstate.rel);
 
+#if 1
+       for (char *p = (char *) PageGetItemId(page, FirstOffsetNumber);
+                p < (char *) PageGetItemId(page, maxoff);
+                p += 64)
+       {
+               pg_prefetch_mem((ItemId)p);
+       }
+
+
+       for (offnum = FirstOffsetNumber;
+                offnum <= maxoff;
+                offnum = OffsetNumberNext(offnum))
+       {
+               ItemId          itemid;
+
+               itemid = PageGetItemId(page, offnum);
+               if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid) || 
!ItemIdHasStorage(itemid))
+                       continue;
+
+               pg_prefetch_mem((HeapTupleHeader) PageGetItem(page, itemid));
+               pg_prefetch_mem(PageGetItem(page, itemid) + 
sizeof(HeapTupleHeaderData) - 1);
+       }
+#endif
+
        /*
         * Determine HTSV for all tuples.
         *
-- 
2.35.1.windows.2

From f1b183123befc1a3f096ba36b1c3834b4c67d3ec Mon Sep 17 00:00:00 2001
From: David Rowley <dgrow...@gmail.com>
Date: Thu, 17 Nov 2022 13:24:40 +1300
Subject: [PATCH v2 3/5] Unroll loop in heapgetpage

---
 src/backend/access/heap/heapam.c | 48 ++++++++++++++++++++++++++++----
 1 file changed, 42 insertions(+), 6 deletions(-)

diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c
index 81c7f69644..8fe4f4c837 100644
--- a/src/backend/access/heap/heapam.c
+++ b/src/backend/access/heap/heapam.c
@@ -484,15 +484,51 @@ heapgetpage(TableScanDesc sscan, BlockNumber block)
                loctup.t_tableOid = RelationGetRelid(scan->rs_base.rs_rd);
 
                /*
-                * Iterate forward over line items, they're laid out in 
increasing
-                * order in memory. Doing this separately allows to benefit from
-                * out-of-order capabilities of the CPU and simplifies the next 
loop.
+                * Iterate forward over line items processing 16 at a time (this
+                * assumes there will be 16 ItemIds per CPU cacheline.
                 */
                for (lineoff = FirstOffsetNumber, lpp = PageGetItemId(page, 
lineoff);
-                        lineoff <= lines;
-                        lineoff++, lpp++)
+                        lineoff <= lines - 15;
+                        lineoff += 16, lpp += 16)
+               {
+                       pg_prefetch_mem(PageGetItemId(page, lineoff + 16));
+                       if (ItemIdIsNormal(lpp))
+                               normoffsets[normcount++] = lineoff;
+                       if (ItemIdIsNormal(lpp + 1))
+                               normoffsets[normcount++] = lineoff + 1;
+                       if (ItemIdIsNormal(lpp + 2))
+                               normoffsets[normcount++] = lineoff + 2;
+                       if (ItemIdIsNormal(lpp + 3))
+                               normoffsets[normcount++] = lineoff + 3;
+                       if (ItemIdIsNormal(lpp + 4))
+                               normoffsets[normcount++] = lineoff + 4;
+                       if (ItemIdIsNormal(lpp + 5))
+                               normoffsets[normcount++] = lineoff + 5;
+                       if (ItemIdIsNormal(lpp + 6))
+                               normoffsets[normcount++] = lineoff + 6;
+                       if (ItemIdIsNormal(lpp + 7))
+                               normoffsets[normcount++] = lineoff + 7;
+                       if (ItemIdIsNormal(lpp + 8))
+                               normoffsets[normcount++] = lineoff + 8;
+                       if (ItemIdIsNormal(lpp + 9))
+                               normoffsets[normcount++] = lineoff + 9;
+                       if (ItemIdIsNormal(lpp + 10))
+                               normoffsets[normcount++] = lineoff + 10;
+                       if (ItemIdIsNormal(lpp + 11))
+                               normoffsets[normcount++] = lineoff + 11;
+                       if (ItemIdIsNormal(lpp + 12))
+                               normoffsets[normcount++] = lineoff + 12;
+                       if (ItemIdIsNormal(lpp + 13))
+                               normoffsets[normcount++] = lineoff + 13;
+                       if (ItemIdIsNormal(lpp + 14))
+                               normoffsets[normcount++] = lineoff + 14;
+                       if (ItemIdIsNormal(lpp + 15))
+                               normoffsets[normcount++] = lineoff + 15;
+               }
+
+               /* get remainder */
+               for (; lineoff <= lines; lineoff++, lpp++)
                {
-                       pg_prefetch_mem(PageGetItemId(page, lineoff+5));
                        if (ItemIdIsNormal(lpp))
                                normoffsets[normcount++] = lineoff;
                }
-- 
2.35.1.windows.2

Re: Prefetch the next tuple's memory during seqscans

Reply via email to