From fe7d2e4061042c4fd30c9c7095889fcf777be105 Mon Sep 17 00:00:00 2001
From: Andrew kim <andrew.kim@intel.com>
Date: Sat, 18 Oct 2025 12:13:50 -0700
Subject: [PATCH 1/2] Enable autovectorizing pg_checksum_block with AVX2
 runtime-detection

        1. Compiler flags: Moved CFLAGS_UNROLL_LOOPS and CFLAGS_VECTORIZE to
           proper port module build files

        2. Header organization: Relocated headers from src/include/storage/
           to src/include/port/ for correct module classification

        3. External compatibility: Made checksum_impl.h fully self-contained with
           inline implementations, required constants, and external interface

        4. Simplified AVX2 detection: Replaced complex CPUID logic with
           __builtin_cpu_supports('avx2')

        5. File consolidation: Removed duplicate backend/storage/page/checksum.c,
           moved pg_checksum_page to unified port implementation

        6. Documentation: Restored comprehensive algorithm comments

        7. Comment clarity: Replaced confusing ISA-specific references
---
 config/c-compiler.m4                          |  31 +++++
 configure                                     |  52 ++++++++
 configure.ac                                  |   9 ++
 meson.build                                   |  28 +++++
 src/backend/backup/basebackup.c               |   2 +-
 src/backend/storage/page/Makefile             |   4 -
 src/backend/storage/page/bufpage.c            |   2 +-
 src/backend/storage/page/checksum.c           |  22 ----
 src/backend/storage/page/meson.build          |   9 --
 src/bin/pg_checksums/pg_checksums.c           |   4 +-
 src/bin/pg_upgrade/file.c                     |   4 +-
 src/include/pg_config.h.in                    |   3 +
 src/include/{storage => port}/checksum.h      |   2 +-
 src/include/{storage => port}/checksum_impl.h | 104 ++++++++--------
 src/port/Makefile                             |   6 +
 src/port/checksum.c                           | 116 ++++++++++++++++++
 src/port/meson.build                          |   5 +-
 src/test/modules/test_aio/test_aio.c          |   2 +-
 src/tools/pginclude/headerscheck              |   2 +-
 19 files changed, 310 insertions(+), 97 deletions(-)
 delete mode 100644 src/backend/storage/page/checksum.c
 rename src/include/{storage => port}/checksum.h (94%)
 rename src/include/{storage => port}/checksum_impl.h (77%)
 create mode 100644 src/port/checksum.c

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 236a59e8536..bcc1398d51a 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -711,6 +711,37 @@ fi
 undefine([Ac_cachevar])dnl
 ])# PGAC_XSAVE_INTRINSICS
 
+# PGAC_AVX2_SUPPORT
+# -----------------------------
+# Check if the compiler supports AVX2 in attribute((target))
+# and using AVX2 intrinsics in those functions
+#
+# If the intrinsics are supported, sets pgac_avx2_support.
+AC_DEFUN([PGAC_AVX2_SUPPORT],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx2_support])])dnl
+AC_CACHE_CHECK([for AVX2 support], [Ac_cachevar],
+[AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
+    #include <stdint.h>
+    #if defined(__has_attribute) && __has_attribute (target)
+    __attribute__((target("avx2")))
+    #endif
+    static int avx2_test(void)
+    {
+      const char buf@<:@sizeof(__m256i)@:>@;
+      __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+	  accum = _mm256_add_epi32(accum, accum);
+      int result = _mm256_extract_epi32(accum, 0);
+      return (int) result;
+    }],
+  [return avx2_test();])],
+  [Ac_cachevar=yes],
+  [Ac_cachevar=no])])
+if test x"$Ac_cachevar" = x"yes"; then
+  pgac_avx2_support=yes
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_AVX2_SUPPORT
+
 # PGAC_AVX512_POPCNT_INTRINSICS
 # -----------------------------
 # Check if the compiler supports the AVX-512 popcount instructions using the
diff --git a/configure b/configure
index 22cd866147b..209849c773c 100755
--- a/configure
+++ b/configure
@@ -17562,6 +17562,58 @@ $as_echo "#define HAVE_XSAVE_INTRINSICS 1" >>confdefs.h
 
 fi
 
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for AVX2 support" >&5
+$as_echo_n "checking for AVX2 support... " >&6; }
+if ${pgac_cv_avx2_support+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <immintrin.h>
+    #include <stdint.h>
+    #if defined(__has_attribute) && __has_attribute (target)
+    __attribute__((target("avx2")))
+    #endif
+    static int avx2_test(void)
+    {
+      const char buf[sizeof(__m256i)];
+      __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+	  accum = _mm256_add_epi32(accum, accum);
+      int result = _mm256_extract_epi32(accum, 0);
+      return (int) result;
+    }
+int
+main ()
+{
+return avx2_test();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_avx2_support=yes
+else
+  pgac_cv_avx2_support=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx2_support" >&5
+$as_echo "$pgac_cv_avx2_support" >&6; }
+if test x"$pgac_cv_avx2_support" = x"yes"; then
+  pgac_avx2_support=yes
+fi
+
+  if test x"$pgac_avx2_support" = x"yes"; then
+
+$as_echo "#define USE_AVX2_WITH_RUNTIME_CHECK 1" >>confdefs.h
+
+  fi
+fi
+
 # Check for AVX-512 popcount intrinsics
 #
 if test x"$host_cpu" = x"x86_64"; then
diff --git a/configure.ac b/configure.ac
index e44943aa6fe..c061b1a854c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2091,6 +2091,15 @@ if test x"$pgac_xsave_intrinsics" = x"yes"; then
   AC_DEFINE(HAVE_XSAVE_INTRINSICS, 1, [Define to 1 if you have XSAVE intrinsics.])
 fi
 
+# Check for AVX2 target and intrinsic support
+#
+if test x"$host_cpu" = x"x86_64"; then
+  PGAC_AVX2_SUPPORT()
+  if test x"$pgac_avx2_support" = x"yes"; then
+    AC_DEFINE(USE_AVX2_WITH_RUNTIME_CHECK, 1, [Define to 1 to use AVX2 instructions with a runtime check.])
+  fi
+fi
+
 # Check for AVX-512 popcount intrinsics
 #
 if test x"$host_cpu" = x"x86_64"; then
diff --git a/meson.build b/meson.build
index 395416a6060..a37ef88bf16 100644
--- a/meson.build
+++ b/meson.build
@@ -2292,6 +2292,34 @@ int main(void)
 
 endif
 
+###############################################################
+# Check for the availability of AVX2 support
+###############################################################
+
+if host_cpu == 'x86_64'
+
+  prog = '''
+#include <immintrin.h>
+#include <stdint.h>
+#if defined(__has_attribute) && __has_attribute (target)
+__attribute__((target("avx2")))
+#endif
+int main(void)
+{
+  const char buf[sizeof(__m256i)];
+  __m256i accum = _mm256_loadu_si256((const __m256i *) buf);
+  accum = _mm256_add_epi32(accum, accum);
+  int result = _mm256_extract_epi32(accum, 0);
+  return (int) result;
+}
+'''
+
+  if cc.links(prog, name: 'AVX2 support', args: test_c_args)
+    cdata.set('USE_AVX2_WITH_RUNTIME_CHECK', 1)
+  endif
+
+endif
+
 
 ###############################################################
 # Check for the availability of AVX-512 popcount intrinsics.
diff --git a/src/backend/backup/basebackup.c b/src/backend/backup/basebackup.c
index bb7d90aa5d9..d84ced4b47c 100644
--- a/src/backend/backup/basebackup.c
+++ b/src/backend/backup/basebackup.c
@@ -39,7 +39,7 @@
 #include "replication/walsender.h"
 #include "replication/walsender_private.h"
 #include "storage/bufpage.h"
-#include "storage/checksum.h"
+#include "port/checksum.h"
 #include "storage/dsm_impl.h"
 #include "storage/ipc.h"
 #include "storage/reinit.h"
diff --git a/src/backend/storage/page/Makefile b/src/backend/storage/page/Makefile
index da539b113a6..5d8a3d2f5ac 100644
--- a/src/backend/storage/page/Makefile
+++ b/src/backend/storage/page/Makefile
@@ -14,10 +14,6 @@ include $(top_builddir)/src/Makefile.global
 
 OBJS =  \
 	bufpage.o \
-	checksum.o \
 	itemptr.o
 
 include $(top_srcdir)/src/backend/common.mk
-
-# Provide special optimization flags for checksum.c
-checksum.o: CFLAGS += ${CFLAGS_UNROLL_LOOPS} ${CFLAGS_VECTORIZE}
diff --git a/src/backend/storage/page/bufpage.c b/src/backend/storage/page/bufpage.c
index dbb49ed9197..b8f889efb88 100644
--- a/src/backend/storage/page/bufpage.c
+++ b/src/backend/storage/page/bufpage.c
@@ -18,7 +18,7 @@
 #include "access/itup.h"
 #include "access/xlog.h"
 #include "pgstat.h"
-#include "storage/checksum.h"
+#include "port/checksum.h"
 #include "utils/memdebug.h"
 #include "utils/memutils.h"
 
diff --git a/src/backend/storage/page/checksum.c b/src/backend/storage/page/checksum.c
deleted file mode 100644
index c913459b5a3..00000000000
--- a/src/backend/storage/page/checksum.c
+++ /dev/null
@@ -1,22 +0,0 @@
-/*-------------------------------------------------------------------------
- *
- * checksum.c
- *	  Checksum implementation for data pages.
- *
- * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
- * Portions Copyright (c) 1994, Regents of the University of California
- *
- * IDENTIFICATION
- *	  src/backend/storage/page/checksum.c
- *
- *-------------------------------------------------------------------------
- */
-#include "postgres.h"
-
-#include "storage/checksum.h"
-/*
- * The actual code is in storage/checksum_impl.h.  This is done so that
- * external programs can incorporate the checksum code by #include'ing
- * that file from the exported Postgres headers.  (Compare our CRC code.)
- */
-#include "storage/checksum_impl.h"	/* IWYU pragma: keep */
diff --git a/src/backend/storage/page/meson.build b/src/backend/storage/page/meson.build
index 112f00ff365..cf92a8f55f0 100644
--- a/src/backend/storage/page/meson.build
+++ b/src/backend/storage/page/meson.build
@@ -1,14 +1,5 @@
 # Copyright (c) 2022-2025, PostgreSQL Global Development Group
 
-checksum_backend_lib = static_library('checksum_backend_lib',
-  'checksum.c',
-  dependencies: backend_build_deps,
-  kwargs: internal_lib_args,
-  c_args: vectorize_cflags + unroll_loops_cflags,
-)
-
-backend_link_with += checksum_backend_lib
-
 backend_sources += files(
   'bufpage.c',
   'itemptr.c',
diff --git a/src/bin/pg_checksums/pg_checksums.c b/src/bin/pg_checksums/pg_checksums.c
index f20be82862a..5231eb33207 100644
--- a/src/bin/pg_checksums/pg_checksums.c
+++ b/src/bin/pg_checksums/pg_checksums.c
@@ -28,8 +28,8 @@
 #include "getopt_long.h"
 #include "pg_getopt.h"
 #include "storage/bufpage.h"
-#include "storage/checksum.h"
-#include "storage/checksum_impl.h"
+#include "port/checksum.h"
+#include "port/checksum_impl.h"
 
 
 static int64 files_scanned = 0;
diff --git a/src/bin/pg_upgrade/file.c b/src/bin/pg_upgrade/file.c
index 91ed16acb08..084392ae54d 100644
--- a/src/bin/pg_upgrade/file.c
+++ b/src/bin/pg_upgrade/file.c
@@ -24,8 +24,8 @@
 #include "common/file_perm.h"
 #include "pg_upgrade.h"
 #include "storage/bufpage.h"
-#include "storage/checksum.h"
-#include "storage/checksum_impl.h"
+#include "port/checksum.h"
+#include "port/checksum_impl.h"
 
 
 /*
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index c4dc5d72bdb..987f9b5c77c 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -675,6 +675,9 @@
 /* Define to 1 to use AVX-512 CRC algorithms with a runtime check. */
 #undef USE_AVX512_CRC32C_WITH_RUNTIME_CHECK
 
+/* Define to 1 to use AVX2 instructions with a runtime check. */
+#undef USE_AVX2_WITH_RUNTIME_CHECK
+
 /* Define to 1 to use AVX-512 popcount instructions with a runtime check. */
 #undef USE_AVX512_POPCNT_WITH_RUNTIME_CHECK
 
diff --git a/src/include/storage/checksum.h b/src/include/port/checksum.h
similarity index 94%
rename from src/include/storage/checksum.h
rename to src/include/port/checksum.h
index 25d13a798d1..c2faed83ede 100644
--- a/src/include/storage/checksum.h
+++ b/src/include/port/checksum.h
@@ -6,7 +6,7 @@
  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * src/include/storage/checksum.h
+ * src/include/port/checksum.h
  *
  *-------------------------------------------------------------------------
  */
diff --git a/src/include/storage/checksum_impl.h b/src/include/port/checksum_impl.h
similarity index 77%
rename from src/include/storage/checksum_impl.h
rename to src/include/port/checksum_impl.h
index da87d61ba52..357b2089f01 100644
--- a/src/include/storage/checksum_impl.h
+++ b/src/include/port/checksum_impl.h
@@ -5,13 +5,13 @@
  *
  * This file exists for the benefit of external programs that may wish to
  * check Postgres page checksums.  They can #include this to get the code
- * referenced by storage/checksum.h.  (Note: you may need to redefine
+ * referenced by port/checksum.h.  (Note: you may need to redefine
  * Assert() as empty to compile this successfully externally.)
  *
  * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * src/include/storage/checksum_impl.h
+ * src/include/port/checksum_impl.h
  *
  *-------------------------------------------------------------------------
  */
@@ -101,12 +101,15 @@
  */
 
 #include "storage/bufpage.h"
+#include "pg_config.h"
+#include <string.h> /* for memcpy */
 
 /* number of checksums to calculate in parallel */
 #define N_SUMS 32
 /* prime multiplier of FNV-1a hash */
 #define FNV_PRIME 16777619
 
+
 /* Use a union so that this code is valid under strict aliasing */
 typedef union
 {
@@ -139,77 +142,76 @@ do { \
 } while (0)
 
 /*
- * Block checksum algorithm.  The page must be adequately aligned
- * (at least on 4-byte boundary).
+ * Default checksum implementation (always available)
  */
-static uint32
-pg_checksum_block(const PGChecksummablePage *page)
+static inline uint32
+pg_checksum_block_default_impl(const PGChecksummablePage *page)
 {
-	uint32		sums[N_SUMS];
-	uint32		result = 0;
-	uint32		i,
-				j;
-
-	/* ensure that the size is compatible with the algorithm */
-	Assert(sizeof(PGChecksummablePage) == BLCKSZ);
+	uint32 sums[N_SUMS], result = 0;
+	uint32 i, j;
 
-	/* initialize partial checksums to their corresponding offsets */
+	/* Initialize each parallel checksum with different base offsets */
 	memcpy(sums, checksumBaseOffsets, sizeof(checksumBaseOffsets));
 
-	/* main checksum calculation */
-	for (i = 0; i < (uint32) (BLCKSZ / (sizeof(uint32) * N_SUMS)); i++)
+	/* Main checksum calculation loop - process page data in parallel */
+	for (i = 0; i < (uint32)(BLCKSZ / (sizeof(uint32) * N_SUMS)); i++)
 		for (j = 0; j < N_SUMS; j++)
 			CHECKSUM_COMP(sums[j], page->data[i][j]);
 
-	/* finally add in two rounds of zeroes for additional mixing */
+	/* Two final rounds with zero to mix remaining bits */
 	for (i = 0; i < 2; i++)
 		for (j = 0; j < N_SUMS; j++)
 			CHECKSUM_COMP(sums[j], 0);
 
-	/* xor fold partial checksums together */
+	/* Combine all parallel checksums with XOR to get final result */
 	for (i = 0; i < N_SUMS; i++)
 		result ^= sums[i];
 
 	return result;
 }
 
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
 /*
- * Compute the checksum for a Postgres page.
- *
- * The page must be adequately aligned (at least on a 4-byte boundary).
- * Beware also that the checksum field of the page is transiently zeroed.
- *
- * The checksum includes the block number (to detect the case where a page is
- * somehow moved to a different location), the page header (excluding the
- * checksum itself), and the page data.
+ * AVX2 optimized implementation (may not be available on all systems)
  */
-uint16
-pg_checksum_page(char *page, BlockNumber blkno)
+pg_attribute_target("avx2")
+static inline uint32
+pg_checksum_block_avx2_impl(const PGChecksummablePage *page)
 {
-	PGChecksummablePage *cpage = (PGChecksummablePage *) page;
-	uint16		save_checksum;
-	uint32		checksum;
-
-	/* We only calculate the checksum for properly-initialized pages */
-	Assert(!PageIsNew((Page) page));
-
-	/*
-	 * Save pd_checksum and temporarily set it to zero, so that the checksum
-	 * calculation isn't affected by the old checksum stored on the page.
-	 * Restore it after, because actually updating the checksum is NOT part of
-	 * the API of this function.
+	/* For now, AVX2 implementation is identical to default
+	 * The compiler will auto-vectorize this with proper flags
+	 * Future versions could use explicit AVX2 intrinsics here
 	 */
-	save_checksum = cpage->phdr.pd_checksum;
-	cpage->phdr.pd_checksum = 0;
-	checksum = pg_checksum_block(cpage);
-	cpage->phdr.pd_checksum = save_checksum;
+	return pg_checksum_block_default_impl(page);
+}
+#endif
 
-	/* Mix in the block number to detect transposed pages */
-	checksum ^= blkno;
+/* Function declarations for checksum implementations */
+uint32 pg_checksum_block_default(const PGChecksummablePage *page);
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+uint32 pg_checksum_block_avx2(const PGChecksummablePage *page);
+#endif
 
-	/*
-	 * Reduce to a uint16 (to fit in the pd_checksum field) with an offset of
-	 * one. That avoids checksums of zero, which seems like a good idea.
-	 */
-	return (uint16) ((checksum % 65535) + 1);
+uint32 pg_checksum_block_choose(const PGChecksummablePage *page);
+extern uint32 (*pg_checksum_block)(const PGChecksummablePage *page);
+
+/*
+ * Simple interface for external programs
+ * Define USE_AVX2_WITH_RUNTIME_CHECK before including to enable AVX2 if available
+ */
+#ifndef PG_CHECKSUM_EXTERNAL_INTERFACE
+#define PG_CHECKSUM_EXTERNAL_INTERFACE
+
+static inline uint32
+pg_checksum_block_simple(const PGChecksummablePage *page)
+{
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+	/* External programs can use AVX2 if they define the macro and have CPU support */
+	if (__builtin_cpu_supports("avx2"))
+		return pg_checksum_block_avx2_impl(page);
+	else
+#endif
+		return pg_checksum_block_default_impl(page);
 }
+
+#endif /* PG_CHECKSUM_EXTERNAL_INTERFACE */
diff --git a/src/port/Makefile b/src/port/Makefile
index 4274949dfa4..430b7bbbcb6 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -48,6 +48,7 @@ OBJS = \
 	pg_numa.o \
 	pg_popcount_aarch64.o \
 	pg_popcount_avx512.o \
+	checksum.o \
 	pg_strong_random.o \
 	pgcheckdir.o \
 	pgmkdirp.o \
@@ -90,6 +91,11 @@ pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_CRC)
 pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_CRC)
 pg_crc32c_armv8_srv.o: CFLAGS+=$(CFLAGS_CRC)
 
+# all versions of checksum.o need vectorization and unroll-loops flags
+checksum.o: CFLAGS+=$(CFLAGS_VECTORIZE) $(CFLAGS_UNROLL_LOOPS)
+checksum_shlib.o: CFLAGS+=$(CFLAGS_VECTORIZE) $(CFLAGS_UNROLL_LOOPS)
+checksum_srv.o: CFLAGS+=$(CFLAGS_VECTORIZE) $(CFLAGS_UNROLL_LOOPS)
+
 #
 # Shared library versions of object files
 #
diff --git a/src/port/checksum.c b/src/port/checksum.c
new file mode 100644
index 00000000000..df1a1b4ce83
--- /dev/null
+++ b/src/port/checksum.c
@@ -0,0 +1,116 @@
+/*-------------------------------------------------------------------------
+ *
+ * checksum.c
+ *	  Checksum implementation for data pages with AVX2 optimization.
+ *
+ * This file consolidates all checksum-related functionality including:
+ * - Runtime CPU feature detection
+ * - Default and AVX2-optimized implementations
+ * - Function dispatch logic
+ * - Page checksum calculation
+ *
+ * Portions Copyright (c) 1996-2025, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ * IDENTIFICATION
+ *	  src/port/checksum.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+#include "port/checksum_impl.h"
+
+#ifndef FRONTEND
+#include "postgres.h"
+#include "storage/bufpage.h"
+#endif
+
+
+
+/*
+ * Check for AVX2 support using GCC builtin
+ */
+static inline bool
+avx2_available(void)
+{
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+	return __builtin_cpu_supports("avx2");
+#else
+	return false;
+#endif
+}
+
+/* default checksum implementation */
+uint32
+pg_checksum_block_default(const PGChecksummablePage *page)
+{
+	return pg_checksum_block_default_impl(page);
+}
+
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+pg_attribute_target("avx2")
+uint32
+pg_checksum_block_avx2(const PGChecksummablePage *page)
+{
+	return pg_checksum_block_avx2_impl(page);
+}
+#endif
+
+/* Function pointer - external linkage (declared extern in header) */
+uint32 (*pg_checksum_block)(const PGChecksummablePage *page) = pg_checksum_block_choose;
+
+/* Choose the best available checksum implementation */
+uint32 pg_checksum_block_choose(const PGChecksummablePage *page)
+{
+#ifdef USE_AVX2_WITH_RUNTIME_CHECK
+	if (avx2_available())
+	{
+		pg_checksum_block = pg_checksum_block_avx2;
+		return pg_checksum_block(page);
+	}
+#endif
+	/* fallback */
+	pg_checksum_block = pg_checksum_block_default;
+	return pg_checksum_block(page);
+}
+
+/*
+ * Compute the checksum for a Postgres page.
+ *
+ * The page must be adequately aligned (at least on a 4-byte boundary).
+ * Beware also that the checksum field of the page is transiently zeroed.
+ *
+ * The checksum includes the block number (to detect the case where a page is
+ * somehow moved to a different location), the page header (excluding the
+ * checksum itself), and the page data.
+ */
+uint16
+pg_checksum_page(char *page, BlockNumber blkno)
+{
+	PGChecksummablePage *cpage = (PGChecksummablePage *) page;
+	uint16 save_checksum;
+	uint32 checksum;
+
+	/* We only calculate the checksum for properly-initialized pages */
+	Assert(!PageIsNew((Page) page));
+
+	/*
+	 * Save pd_checksum and temporarily set it to zero, so that the checksum
+	 * calculation isn't affected by the old checksum stored on the page.
+	 * Restore it after, because actually updating the checksum is NOT part of
+	 * the API of this function.
+	 */
+	save_checksum = cpage->phdr.pd_checksum;
+	cpage->phdr.pd_checksum = 0;
+	checksum = pg_checksum_block(cpage);
+	cpage->phdr.pd_checksum = save_checksum;
+
+	/* Mix in the block number to detect transposed pages */
+	checksum ^= blkno;
+
+	/*
+	 * Reduce to a uint16 (to fit in the pd_checksum field) with an offset of
+	 * one. That avoids checksums of zero, which seems like a good idea.
+	 */
+	return (uint16)((checksum % 65535) + 1);
+}
diff --git a/src/port/meson.build b/src/port/meson.build
index fc7b059fee5..2074553f9a5 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -102,10 +102,11 @@ replace_funcs_pos = [
 
   # generic fallback
   ['pg_crc32c_sb8', 'USE_SLICING_BY_8_CRC32C'],
+
 ]
 
-pgport_cflags = {'crc': cflags_crc}
-pgport_sources_cflags = {'crc': []}
+pgport_cflags = {'crc': cflags_crc, 'checksum': unroll_loops_cflags + vectorize_cflags}
+pgport_sources_cflags = {'crc': [], 'checksum': [files('checksum.c')]}
 
 foreach f : replace_funcs_neg
   func = f.get(0)
diff --git a/src/test/modules/test_aio/test_aio.c b/src/test/modules/test_aio/test_aio.c
index c55cf6c0aac..175e491c0bc 100644
--- a/src/test/modules/test_aio/test_aio.c
+++ b/src/test/modules/test_aio/test_aio.c
@@ -24,7 +24,7 @@
 #include "storage/aio_internal.h"
 #include "storage/buf_internals.h"
 #include "storage/bufmgr.h"
-#include "storage/checksum.h"
+#include "port/checksum.h"
 #include "storage/ipc.h"
 #include "storage/lwlock.h"
 #include "utils/builtins.h"
diff --git a/src/tools/pginclude/headerscheck b/src/tools/pginclude/headerscheck
index a52a5580bdc..35ac0caaa43 100755
--- a/src/tools/pginclude/headerscheck
+++ b/src/tools/pginclude/headerscheck
@@ -167,7 +167,7 @@ do
 	test "$f" = src/test/isolation/specparse.h && continue
 
 	# This produces a "no previous prototype" warning.
-	! $cplusplus && test "$f" = src/include/storage/checksum_impl.h && continue
+	! $cplusplus && test "$f" = src/include/port/checksum_impl.h && continue
 
 	# SectionMemoryManager.h is C++
 	test "$f" = src/include/jit/SectionMemoryManager.h && continue
-- 
2.43.0

