diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 5db02b2ab7..63af1331bf 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -694,3 +694,38 @@ if test x"$Ac_cachevar" = x"yes"; then
 fi
 undefine([Ac_cachevar])dnl
 ])# PGAC_LOONGARCH_CRC32C_INTRINSICS
+
+# PGAC_AVX512_POPCNT_INTRINSICS
+# ---------------------------
+# Check if the compiler supports the x86_64 AVX512 POPCNT instructions using
+# intrinsics used in CPUID features AVX512F and AVX512VPOPCNTDQ.
+#
+# Optional compiler flags can be passed as argument (e.g. -mavx512vpopcntdq).
+# If the intrinsics are supported then pgac_avx512_popcnt_intrinsics and
+# CFLAGS_AVX512_POPCNT are set.
+AC_DEFUN([PGAC_AVX512_POPCNT_INTRINSICS],
+[define([Ac_cachevar], [AS_TR_SH([pgac_cv_avx512_popcnt_intrinsics_$1])])dnl
+AC_CACHE_CHECK([for _mm512_popcnt_epi64 with CFLAGS=$1], [Ac_cachevar],
+[pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS $1"
+AC_LINK_IFELSE([AC_LANG_PROGRAM([#include <immintrin.h>
+#include <stdint.h>],
+  [
+    __m512i tmp __attribute__((aligned(64)));
+    __m512i input = _mm512_setzero_si512();
+    __m512i output = _mm512_popcnt_epi64(input);
+    uint64_t cnt = 999;
+    _mm512_store_si512(&tmp, output);
+    cnt = _mm512_reduce_add_epi64(tmp);
+    /* return computed value, to prevent the above being optimized away */
+    return cnt == 0;
+   ])],
+  [Ac_cachevar=yes],
+  [Ac_cachevar=no])
+CFLAGS="$pgac_save_CFLAGS"])
+if test x"$Ac_cachevar" = x"yes"; then
+  CFLAGS_AVX512_POPCNT="$1"
+  pgac_avx512_popcnt_intrinsics=yes
+fi
+undefine([Ac_cachevar])dnl
+])# PGAC_AVX512_POPCNT_INTRINSICS
diff --git a/configure b/configure
index 2a1ee251f2..96c8f39254 100755
--- a/configure
+++ b/configure
@@ -647,6 +647,7 @@ MSGFMT_FLAGS
 MSGFMT
 PG_CRC32C_OBJS
 CFLAGS_CRC
+CFLAGS_AVX512_POPCNT
 LIBOBJS
 OPENSSL
 ZSTD
@@ -15209,7 +15210,7 @@ else
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -15255,7 +15256,7 @@ else
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -15279,7 +15280,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -15324,7 +15325,7 @@ else
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -15348,7 +15349,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
     We can't simply define LARGE_OFF_T to be 9223372036854775807,
     since some C++ compilers masquerading as C compilers
     incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
+#define LARGE_OFF_T ((((off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		       && LARGE_OFF_T % 2147483647 == 1)
 		      ? 1 : -1];
@@ -17736,6 +17737,100 @@ $as_echo "#define HAVE__CPUID 1" >>confdefs.h
 
 fi
 
+# Check for Intel AVX512 intrinsics to do POPCNT calculations.
+#
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64 with CFLAGS=" >&5
+$as_echo_n "checking for _mm512_popcnt_epi64 with CFLAGS=... " >&6; }
+if ${pgac_cv_avx512_popcnt_intrinsics_+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS "
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <immintrin.h>
+#include <stdint.h>
+int
+main ()
+{
+
+	  __m512i tmp __attribute__((aligned(64)));
+    __m512i input = _mm512_setzero_si512();
+    __m512i output = _mm512_popcnt_epi64(input);
+    uint64_t cnt = 999;
+    _mm512_store_si512(&tmp, output);
+    cnt = _mm512_reduce_add_epi64(tmp);
+    /* return computed value, to prevent the above being optimized away */
+    return cnt == 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_avx512_popcnt_intrinsics_=yes
+else
+  pgac_cv_avx512_popcnt_intrinsics_=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+CFLAGS="$pgac_save_CFLAGS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics_" >&5
+$as_echo "$pgac_cv_avx512_popcnt_intrinsics_" >&6; }
+if test x"$pgac_cv_avx512_popcnt_intrinsics_" = x"yes"; then
+  CFLAGS_AVX512_POPCNT=""
+  pgac_avx512_popcnt_intrinsics=yes
+fi
+
+if test x"$pgac_avx512_popcnt_intrinsics" != x"yes"; then
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for _mm512_popcnt_epi64 with CFLAGS=-mavx512vpopcntdq -mavx512f" >&5
+$as_echo_n "checking for _mm512_popcnt_epi64 with CFLAGS=-mavx512vpopcntdq -mavx512f... " >&6; }
+if ${pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512f+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  pgac_save_CFLAGS=$CFLAGS
+CFLAGS="$pgac_save_CFLAGS -mavx512vpopcntdq -mavx512f"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+#include <immintrin.h>
+#include <stdint.h>
+int
+main ()
+{
+
+	  __m512i tmp __attribute__((aligned(64)));
+    __m512i input = _mm512_setzero_si512();
+    __m512i output = _mm512_popcnt_epi64(input);
+    uint64_t cnt = 999;
+    _mm512_store_si512(&tmp, output);
+    cnt = _mm512_reduce_add_epi64(tmp);
+    /* return computed value, to prevent the above being optimized away */
+    return cnt == 0;
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512f=yes
+else
+  pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512f=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+CFLAGS="$pgac_save_CFLAGS"
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512f" >&5
+$as_echo "$pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512f" >&6; }
+if test x"$pgac_cv_avx512_popcnt_intrinsics__mavx512vpopcntdq__mavx512f" = x"yes"; then
+  CFLAGS_AVX512_POPCNT="-mavx512vpopcntdq -mavx512f"
+  pgac_avx512_popcnt_intrinsics=yes
+fi
+
+fi
+
+
 # Check for Intel SSE 4.2 intrinsics to do CRC calculations.
 #
 # First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used
diff --git a/configure.ac b/configure.ac
index 52fd7af446..d5fe701c9c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -2078,6 +2078,14 @@ if test x"$pgac_cv__cpuid" = x"yes"; then
   AC_DEFINE(HAVE__CPUID, 1, [Define to 1 if you have __cpuid.])
 fi
 
+# Check for Intel AVX512 intrinsics to do POPCNT calculations.
+#
+PGAC_AVX512_POPCNT_INTRINSICS([])
+if test x"$pgac_avx512_popcnt_intrinsics" != x"yes"; then
+  PGAC_AVX512_POPCNT_INTRINSICS([-mavx512vpopcntdq -mavx512f])
+fi
+AC_SUBST(CFLAGS_AVX512_POPCNT)
+
 # Check for Intel SSE 4.2 intrinsics to do CRC calculations.
 #
 # First check if the _mm_crc32_u8 and _mm_crc32_u64 intrinsics can be used
diff --git a/src/Makefile.global.in b/src/Makefile.global.in
index 8b3f8c24e0..089f49b7f3 100644
--- a/src/Makefile.global.in
+++ b/src/Makefile.global.in
@@ -263,6 +263,7 @@ CXXFLAGS_SL_MODULE = @CXXFLAGS_SL_MODULE@
 CFLAGS_UNROLL_LOOPS = @CFLAGS_UNROLL_LOOPS@
 CFLAGS_VECTORIZE = @CFLAGS_VECTORIZE@
 CFLAGS_CRC = @CFLAGS_CRC@
+CFLAGS_AVX512_POPCNT = @CFLAGS_AVX512_POPCNT@
 PERMIT_DECLARATION_AFTER_STATEMENT = @PERMIT_DECLARATION_AFTER_STATEMENT@
 CXXFLAGS = @CXXFLAGS@
 
diff --git a/src/port/Makefile b/src/port/Makefile
index dcc8737e68..6a01a7d89a 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -87,6 +87,11 @@ pg_crc32c_sse42.o: CFLAGS+=$(CFLAGS_CRC)
 pg_crc32c_sse42_shlib.o: CFLAGS+=$(CFLAGS_CRC)
 pg_crc32c_sse42_srv.o: CFLAGS+=$(CFLAGS_CRC)
 
+# Newer Intel processors can use AVX-512 POPCNT Capabilities (01/30/2024)
+pg_bitutils.o: CFLAGS+=$(CFLAGS_AVX512_POPCNT)
+pg_bitutils_shlib.o: CFLAGS+=$(CFLAGS_AVX512_POPCNT)
+pg_bitutils_srv.o:CFLAGS+=$(CFLAGS_AVX512_POPCNT)
+
 # all versions of pg_crc32c_armv8.o need CFLAGS_CRC
 pg_crc32c_armv8.o: CFLAGS+=$(CFLAGS_CRC)
 pg_crc32c_armv8_shlib.o: CFLAGS+=$(CFLAGS_CRC)
diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c
index 640a89561a..7db3cd44ce 100644
--- a/src/port/pg_bitutils.c
+++ b/src/port/pg_bitutils.c
@@ -19,6 +19,8 @@
 #include <intrin.h>
 #endif
 
+#include <immintrin.h>
+
 #include "port/pg_bitutils.h"
 
 
@@ -110,12 +112,16 @@ static int	pg_popcount64_slow(uint64 word);
 static bool pg_popcount_available(void);
 static int	pg_popcount32_choose(uint32 word);
 static int	pg_popcount64_choose(uint64 word);
+static int	pg_popcount512_choose(const char* buf, int bytes);
 static int	pg_popcount32_fast(uint32 word);
 static int	pg_popcount64_fast(uint64 word);
+static uint64  pg_popcount512_fast(const char* buf, int bytes);
+static uint64  pg_popcount512_slow(const char* buf, int bytes);
 
 int			(*pg_popcount32) (uint32 word) = pg_popcount32_choose;
 int			(*pg_popcount64) (uint64 word) = pg_popcount64_choose;
-#endif							/* TRY_POPCNT_FAST */
+uint64		(*pg_popcount512) (const char* buf, int bytes) = pg_popcount512_choose;
+#endif /* TRY_POPCNT_FAST */
 
 #ifdef TRY_POPCNT_FAST
 
@@ -138,6 +144,36 @@ pg_popcount_available(void)
 	return (exx[2] & (1 << 23)) != 0;	/* POPCNT */
 }
 
+/*
+ * Return true if CPUID indicates that the AVX512_POPCNT instruction is available. This is
+ * simular to the method above see this URL.
+ * 
+ * https://en.wikipedia.org/wiki/CPUID#EAX=7,_ECX=0:_Extended_Features
+ * 
+ * Finally we make sure the xgetbv result is conistent with the CPUID results.
+ */
+static bool
+pg_popcount512_available(void)
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuidex(exx, 7, 0);
+#else
+#error cpuid instruction not available
+#endif
+	if ((exx[2] & (0x00004000)) != 0 && (exx[1] & (0x00010000)) != 0) // Check for AVX512VPOPCNTDQ and AVX512F
+	{
+		uint64 xcr = 0;
+		uint32 high;
+		uint32 low;
+		__asm__ __volatile__("xgetbv\t\n" : "=a"(low), "=d"(high) : "c"(xcr));
+		return (low & 0xE0) != 0;
+	} /* POPCNT 512 */
+	return false;
+}
+
 /*
  * These functions get called on the first call to pg_popcount32 etc.
  * They detect whether we can use the asm implementations, and replace
@@ -178,6 +214,19 @@ pg_popcount64_choose(uint64 word)
 	return pg_popcount64(word);
 }
 
+static int
+pg_popcount512_choose(const char* buf, int bytes) {
+	if (pg_popcount512_available())
+	{
+		pg_popcount512 = pg_popcount512_fast;
+	}
+	else
+	{
+		pg_popcount512 = pg_popcount512_slow;
+	}
+	return pg_popcount512(buf, bytes);
+}
+
 /*
  * pg_popcount32_fast
  *		Return the number of 1 bits set in word
@@ -212,6 +261,30 @@ __asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
 #endif
 }
 
+static uint64
+pg_popcount512_fast(const char *buf, int bytes)
+{
+	uint64 popcnt = 0;
+	__m512i accumulator = _mm512_setzero_si512();
+	while (bytes >= 64)
+	{
+		const __m512i v = _mm512_loadu_si512((const __m512i *)buf);
+		const __m512i p = _mm512_popcnt_epi64(v);
+		accumulator = _mm512_add_epi64(accumulator, p);
+		bytes -= 64;
+		buf += 64;
+	}
+
+	popcnt = _mm512_reduce_add_epi64(accumulator);
+	bytes = bytes % 64;
+
+	/* Process any remaining bytes */
+	while (bytes--)
+		popcnt += pg_number_of_ones[(unsigned char)*buf++];
+
+	return popcnt;
+}
+
 #endif							/* TRY_POPCNT_FAST */
 
 
@@ -265,6 +338,29 @@ pg_popcount64_slow(uint64 word)
 #endif							/* HAVE__BUILTIN_POPCOUNT */
 }
 
+static uint64
+pg_popcount512_slow(const char* buf, int bytes) {
+	uint64		popcnt = 0;
+	if (buf == (const char *)TYPEALIGN(8, buf))
+	{
+		const uint64 *words = (const uint64 *) buf;
+
+		while (bytes >= 8)
+		{
+			popcnt += pg_popcount64(*words++);
+			bytes -= 8;
+		}
+
+		buf = (const char *) words;
+	}
+
+	/* Process any remaining bytes */
+	while (bytes--)
+		popcnt += pg_number_of_ones[(unsigned char) *buf++];
+
+	return popcnt;
+}
+
 #ifndef TRY_POPCNT_FAST
 
 /*
@@ -286,6 +382,13 @@ pg_popcount64(uint64 word)
 	return pg_popcount64_slow(word);
 }
 
+static uint64 pg_popcount512_slow(const char *buf, int bytes);
+inline uint64
+pg_popcount512(const char *buf, int bytes)
+{
+	return pg_popcount512_slow(buf, bytes);
+}
+
 #endif							/* !TRY_POPCNT_FAST */
 
 /*
@@ -298,22 +401,10 @@ pg_popcount(const char *buf, int bytes)
 	uint64		popcnt = 0;
 
 #if SIZEOF_VOID_P >= 8
-	/* Process in 64-bit chunks if the buffer is aligned. */
-	if (buf == (const char *) TYPEALIGN(8, buf))
-	{
-		const uint64 *words = (const uint64 *) buf;
-
-		while (bytes >= 8)
-		{
-			popcnt += pg_popcount64(*words++);
-			bytes -= 8;
-		}
-
-		buf = (const char *) words;
-	}
+		return pg_popcount512(buf, bytes);
 #else
-	/* Process in 32-bit chunks if the buffer is aligned. */
-	if (buf == (const char *) TYPEALIGN(4, buf))
+		/* Process in 32-bit chunks if the buffer is aligned. */
+		if (buf == (const char *)TYPEALIGN(4, buf))
 	{
 		const uint32 *words = (const uint32 *) buf;
 
