From e5e4cac323b913e9fcdbd17d6b07316a21f7ff5c Mon Sep 17 00:00:00 2001
From: Paul Amonson <paul.d.amonson@intel.com>
Date: Tue, 19 Mar 2024 13:37:31 -0700
Subject: [PATCH 2/3] [Refactor] Seperated slow, fast, and choose functionality
 into files.

Signed-off-by: Paul Amonson <paul.d.amonson@intel.com>
---
 contrib/intarray/_intbig_gist.c       |   2 +-
 contrib/ltree/_ltree_gist.c           |   2 +-
 contrib/pageinspect/heapfuncs.c       |   4 +-
 contrib/pg_trgm/trgm_gist.c           |   2 +-
 contrib/pg_walinspect/pg_walinspect.c |   2 +-
 src/backend/lib/bloomfilter.c         |   2 +-
 src/backend/postmaster/syslogger.c    |   2 +-
 src/backend/utils/adt/tsgistidx.c     |   2 +-
 src/backend/utils/adt/varbit.c        |   2 +-
 src/backend/utils/adt/varlena.c       |   2 +-
 src/port/Makefile                     |   2 +
 src/port/meson.build                  |   2 +
 src/port/pg_bitutils.c                | 171 +-------------------------
 src/port/pg_popcount_x86_64_accel.c   | 101 +++++++++++++++
 src/port/pg_popcount_x86_64_choose.c  |  98 +++++++++++++++
 15 files changed, 215 insertions(+), 181 deletions(-)
 create mode 100644 src/port/pg_popcount_x86_64_accel.c
 create mode 100644 src/port/pg_popcount_x86_64_choose.c

diff --git a/contrib/intarray/_intbig_gist.c b/contrib/intarray/_intbig_gist.c
index 9699fbf3b4..a12ea7ed9b 100644
--- a/contrib/intarray/_intbig_gist.c
+++ b/contrib/intarray/_intbig_gist.c
@@ -210,7 +210,7 @@ g_intbig_compress(PG_FUNCTION_ARGS)
 static int32
 sizebitvec(BITVECP sign, int siglen)
 {
-	return pg_popcount(sign, siglen);
+	return PG_POPCOUNT(sign, siglen);
 }
 
 static int
diff --git a/contrib/ltree/_ltree_gist.c b/contrib/ltree/_ltree_gist.c
index e89a39a5b5..bd66ec2e65 100644
--- a/contrib/ltree/_ltree_gist.c
+++ b/contrib/ltree/_ltree_gist.c
@@ -180,7 +180,7 @@ _ltree_union(PG_FUNCTION_ARGS)
 static int32
 sizebitvec(BITVECP sign, int siglen)
 {
-	return pg_popcount((const char *) sign, siglen);
+	return PG_POPCOUNT((const char *) sign, siglen);
 }
 
 static int
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
index 3faeabc711..089842962f 100644
--- a/contrib/pageinspect/heapfuncs.c
+++ b/contrib/pageinspect/heapfuncs.c
@@ -527,8 +527,8 @@ heap_tuple_infomask_flags(PG_FUNCTION_ARGS)
 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
 		elog(ERROR, "return type must be a row type");
 
-	bitcnt = pg_popcount((const char *) &t_infomask, sizeof(uint16)) +
-		pg_popcount((const char *) &t_infomask2, sizeof(uint16));
+	bitcnt = PG_POPCOUNT((const char *) &t_infomask, sizeof(uint16)) +
+		PG_POPCOUNT((const char *) &t_infomask2, sizeof(uint16));
 
 	/* If no flags, return a set of empty arrays */
 	if (bitcnt <= 0)
diff --git a/contrib/pg_trgm/trgm_gist.c b/contrib/pg_trgm/trgm_gist.c
index 9ef2e38560..850316196f 100644
--- a/contrib/pg_trgm/trgm_gist.c
+++ b/contrib/pg_trgm/trgm_gist.c
@@ -648,7 +648,7 @@ gtrgm_same(PG_FUNCTION_ARGS)
 static int32
 sizebitvec(BITVECP sign, int siglen)
 {
-	return pg_popcount(sign, siglen);
+	return PG_POPCOUNT(sign, siglen);
 }
 
 static int
diff --git a/contrib/pg_walinspect/pg_walinspect.c b/contrib/pg_walinspect/pg_walinspect.c
index ee2918726d..93a7b4842a 100644
--- a/contrib/pg_walinspect/pg_walinspect.c
+++ b/contrib/pg_walinspect/pg_walinspect.c
@@ -303,7 +303,7 @@ GetWALBlockInfo(FunctionCallInfo fcinfo, XLogReaderState *record,
 			block_fpi_len = blk->bimg_len;
 
 			/* Construct and save block_fpi_info */
-			bitcnt = pg_popcount((const char *) &blk->bimg_info,
+			bitcnt = PG_POPCOUNT((const char *) &blk->bimg_info,
 								 sizeof(uint8));
 			flags = (Datum *) palloc0(sizeof(Datum) * bitcnt);
 			if ((blk->bimg_info & BKPIMAGE_HAS_HOLE) != 0)
diff --git a/src/backend/lib/bloomfilter.c b/src/backend/lib/bloomfilter.c
index 360d21ca45..c01b069c01 100644
--- a/src/backend/lib/bloomfilter.c
+++ b/src/backend/lib/bloomfilter.c
@@ -187,7 +187,7 @@ double
 bloom_prop_bits_set(bloom_filter *filter)
 {
 	int			bitset_bytes = filter->m / BITS_PER_BYTE;
-	uint64		bits_set = pg_popcount((char *) filter->bitset, bitset_bytes);
+	uint64		bits_set = PG_POPCOUNT((char *) filter->bitset, bitset_bytes);
 
 	return bits_set / (double) filter->m;
 }
diff --git a/src/backend/postmaster/syslogger.c b/src/backend/postmaster/syslogger.c
index 08efe74cc9..85c57b3154 100644
--- a/src/backend/postmaster/syslogger.c
+++ b/src/backend/postmaster/syslogger.c
@@ -898,7 +898,7 @@ process_pipe_input(char *logbuffer, int *bytes_in_logbuffer)
 		if (p.nuls[0] == '\0' && p.nuls[1] == '\0' &&
 			p.len > 0 && p.len <= PIPE_MAX_PAYLOAD &&
 			p.pid != 0 &&
-			pg_popcount((char *) &dest_flags, 1) == 1)
+			PG_POPCOUNT((char *) &dest_flags, 1) == 1)
 		{
 			List	   *buffer_list;
 			ListCell   *cell;
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 5698ee5502..d7a76faf31 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -489,7 +489,7 @@ gtsvector_same(PG_FUNCTION_ARGS)
 static int32
 sizebitvec(BITVECP sign, int siglen)
 {
-	return pg_popcount(sign, siglen);
+	return PG_POPCOUNT(sign, siglen);
 }
 
 static int
diff --git a/src/backend/utils/adt/varbit.c b/src/backend/utils/adt/varbit.c
index 8fcf3fb731..3f287cd54d 100644
--- a/src/backend/utils/adt/varbit.c
+++ b/src/backend/utils/adt/varbit.c
@@ -1212,7 +1212,7 @@ bit_bit_count(PG_FUNCTION_ARGS)
 {
 	VarBit	   *arg = PG_GETARG_VARBIT_P(0);
 
-	PG_RETURN_INT64(pg_popcount((char *) VARBITS(arg), VARBITBYTES(arg)));
+	PG_RETURN_INT64(PG_POPCOUNT((char *) VARBITS(arg), VARBITBYTES(arg)));
 }
 
 /*
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 8d28dd42ce..809e6a59ab 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -3151,7 +3151,7 @@ bytea_bit_count(PG_FUNCTION_ARGS)
 {
 	bytea	   *t1 = PG_GETARG_BYTEA_PP(0);
 
-	PG_RETURN_INT64(pg_popcount(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
+	PG_RETURN_INT64(PG_POPCOUNT(VARDATA_ANY(t1), VARSIZE_ANY_EXHDR(t1)));
 }
 
 /*
diff --git a/src/port/Makefile b/src/port/Makefile
index dcc8737e68..1499985dfc 100644
--- a/src/port/Makefile
+++ b/src/port/Makefile
@@ -44,6 +44,8 @@ OBJS = \
 	noblock.o \
 	path.o \
 	pg_bitutils.o \
+	pg_popcount_x86_64_choose.o \
+	pg_popcount_x86_64_accel.o \
 	pg_strong_random.o \
 	pgcheckdir.o \
 	pgmkdirp.o \
diff --git a/src/port/meson.build b/src/port/meson.build
index 92b593e6ef..cf6e9fa06c 100644
--- a/src/port/meson.build
+++ b/src/port/meson.build
@@ -7,6 +7,8 @@ pgport_sources = [
   'noblock.c',
   'path.c',
   'pg_bitutils.c',
+  'pg_popcount_x86_64_choose.c',
+  'pg_popcount_x86_64_accel.c',
   'pg_strong_random.c',
   'pgcheckdir.c',
   'pgmkdirp.c',
diff --git a/src/port/pg_bitutils.c b/src/port/pg_bitutils.c
index e629969035..f08820b35b 100644
--- a/src/port/pg_bitutils.c
+++ b/src/port/pg_bitutils.c
@@ -11,14 +11,6 @@
  *-------------------------------------------------------------------------
  */
 #include "c.h"
-
-#ifdef HAVE__GET_CPUID
-#include <cpuid.h>
-#endif
-#ifdef HAVE__CPUID
-#include <intrin.h>
-#endif
-
 #include "port/pg_bitutils.h"
 
 
@@ -103,167 +95,6 @@ const uint8 pg_number_of_ones[256] = {
 	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
 };
 
-// static inline int pg_popcount32_slow(uint32 word);
-// static inline int pg_popcount64_slow(uint64 word);
-// static uint64 pg_popcount_slow(const char *buf, int bytes);
-
-#ifdef TRY_POPCNT_FAST
-static bool pg_popcount_available(void);
-static int	pg_popcount32_choose(uint32 word);
-static int	pg_popcount64_choose(uint64 word);
-static uint64 pg_popcount_choose(const char *buf, int bytes);
-static inline int pg_popcount32_fast(uint32 word);
-static inline int pg_popcount64_fast(uint64 word);
-static uint64 pg_popcount_fast(const char *buf, int bytes);
-
-int			(*pg_popcount32) (uint32 word) = pg_popcount32_choose;
-int			(*pg_popcount64) (uint64 word) = pg_popcount64_choose;
-uint64		(*pg_popcount) (const char *buf, int bytes) = pg_popcount_choose;
-
-/*
- * Return true if CPUID indicates that the POPCNT instruction is available.
- */
-static bool
-pg_popcount_available(void)
-{
-	unsigned int exx[4] = {0, 0, 0, 0};
-
-#if defined(HAVE__GET_CPUID)
-	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
-#elif defined(HAVE__CPUID)
-	__cpuid(exx, 1);
-#else
-#error cpuid instruction not available
-#endif
-
-	return (exx[2] & (1 << 23)) != 0;	/* POPCNT */
-}
-
-/*
- * These functions get called on the first call to pg_popcount32 etc.
- * They detect whether we can use the asm implementations, and replace
- * the function pointers so that subsequent calls are routed directly to
- * the chosen implementation.
- */
-static inline void set_function_pointers()
-{
-	if (pg_popcount_available())
-	{
-		pg_popcount32 = pg_popcount32_fast;
-		pg_popcount64 = pg_popcount64_fast;
-		pg_popcount = pg_popcount_fast;
-	}
-	else
-	{
-		pg_popcount32 = pg_popcount32_slow;
-		pg_popcount64 = pg_popcount64_slow;
-		pg_popcount = pg_popcount_slow;
-	}
-}
-
-static int
-pg_popcount32_choose(uint32 word)
-{
-	set_function_pointers();
-	return pg_popcount32(word);
-}
-
-static int
-pg_popcount64_choose(uint64 word)
-{
-	set_function_pointers();
-	return pg_popcount64(word);
-}
-
-static uint64
-pg_popcount_choose(const char *buf, int bytes)
-{
-	set_function_pointers();
-	return pg_popcount(buf, bytes);
-}
-
-/*
- * pg_popcount32_fast
- *		Return the number of 1 bits set in word
- */
-static inline int
-pg_popcount32_fast(uint32 word)
-{
-#ifdef _MSC_VER
-	return __popcnt(word);
-#else
-	uint32		res;
-
-__asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
-	return (int) res;
-#endif
-}
-
-/*
- * pg_popcount64_fast
- *		Return the number of 1 bits set in word
- */
-static inline int
-pg_popcount64_fast(uint64 word)
-{
-#ifdef _MSC_VER
-	return __popcnt64(word);
-#else
-	uint64		res;
-
-__asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
-	return (int) res;
-#endif
-}
-
-/*
- * pg_popcount_fast
- *		Returns the number of 1-bits in buf
- */
-static inline uint64
-pg_popcount_fast(const char *buf, int bytes)
-{
-	uint64		popcnt = 0;
-
-#if SIZEOF_VOID_P >= 8
-	/* Process in 64-bit chunks if the buffer is aligned. */
-	if (buf == (const char *) TYPEALIGN(8, buf))
-	{
-		const uint64 *words = (const uint64 *) buf;
-
-		while (bytes >= 8)
-		{
-			popcnt += PG_POPCOUNT64(*words++);
-			bytes -= 8;
-		}
-
-		buf = (const char *) words;
-	}
-#else
-	/* Process in 32-bit chunks if the buffer is aligned. */
-	if (buf == (const char *) TYPEALIGN(4, buf))
-	{
-		const uint32 *words = (const uint32 *) buf;
-
-		while (bytes >= 4)
-		{
-			popcnt += PG_POPCOUNT32(*words++);
-			bytes -= 4;
-		}
-
-		buf = (const char *) words;
-	}
-#endif
-
-	/* Process any remaining bytes */
-	while (bytes--)
-		popcnt += pg_number_of_ones[(unsigned char) *buf++];
-
-	return popcnt;
-}
-
-#endif							/* TRY_POPCNT_FAST */
-
 
 /*
  * pg_popcount32_slow
@@ -319,7 +150,7 @@ pg_popcount64_slow(uint64 word)
  * pg_popcount_slow
  *		Returns the number of 1-bits in buf
  */
-uint64
+inline uint64
 pg_popcount_slow(const char *buf, int bytes)
 {
 	uint64		popcnt = 0;
diff --git a/src/port/pg_popcount_x86_64_accel.c b/src/port/pg_popcount_x86_64_accel.c
new file mode 100644
index 0000000000..d63e8aa30f
--- /dev/null
+++ b/src/port/pg_popcount_x86_64_accel.c
@@ -0,0 +1,101 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_popcount_x86_64_accel.c
+ *	  Miscellaneous functions for bit-wise operations.
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/port/pg_popcount_x86_64_accel.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+#include "port/pg_bitutils.h"
+
+#ifdef TRY_POPCNT_FAST
+int pg_popcount32_fast(uint32 word);
+int pg_popcount64_fast(uint64 word);
+uint64 pg_popcount_fast(const char *buf, int bytes);
+
+/*
+ * pg_popcount32_fast
+ *		Return the number of 1 bits set in word
+ */
+int
+pg_popcount32_fast(uint32 word)
+{
+#ifdef _MSC_VER
+	return __popcnt(word);
+#else
+	uint32		res;
+
+__asm__ __volatile__(" popcntl %1,%0\n":"=q"(res):"rm"(word):"cc");
+	return (int) res;
+#endif
+}
+
+/*
+ * pg_popcount64_fast
+ *		Return the number of 1 bits set in word
+ */
+int
+pg_popcount64_fast(uint64 word)
+{
+#ifdef _MSC_VER
+	return __popcnt64(word);
+#else
+	uint64		res;
+
+__asm__ __volatile__(" popcntq %1,%0\n":"=q"(res):"rm"(word):"cc");
+	return (int) res;
+#endif
+}
+
+/*
+ * pg_popcount_fast
+ *		Returns the number of 1-bits in buf
+ */
+uint64
+pg_popcount_fast(const char *buf, int bytes)
+{
+	uint64		popcnt = 0;
+
+#if SIZEOF_VOID_P >= 8
+	/* Process in 64-bit chunks if the buffer is aligned. */
+	if (buf == (const char *) TYPEALIGN(8, buf))
+	{
+		const uint64 *words = (const uint64 *) buf;
+
+		while (bytes >= 8)
+		{
+			popcnt += PG_POPCOUNT64(*words++);
+			bytes -= 8;
+		}
+
+		buf = (const char *) words;
+	}
+#else
+	/* Process in 32-bit chunks if the buffer is aligned. */
+	if (buf == (const char *) TYPEALIGN(4, buf))
+	{
+		const uint32 *words = (const uint32 *) buf;
+
+		while (bytes >= 4)
+		{
+			popcnt += PG_POPCOUNT32(*words++);
+			bytes -= 4;
+		}
+
+		buf = (const char *) words;
+	}
+#endif
+
+	/* Process any remaining bytes */
+	while (bytes--)
+		popcnt += pg_number_of_ones[(unsigned char) *buf++];
+
+	return popcnt;
+}
+
+#endif							/* TRY_POPCNT_FAST */
diff --git a/src/port/pg_popcount_x86_64_choose.c b/src/port/pg_popcount_x86_64_choose.c
new file mode 100644
index 0000000000..1a0022a0b3
--- /dev/null
+++ b/src/port/pg_popcount_x86_64_choose.c
@@ -0,0 +1,98 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_popcount_x86_64_choose.c
+ *	  Miscellaneous functions for bit-wise operations.
+ *
+ * Copyright (c) 2024, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	  src/port/pg_popcount_x86_64_choose.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "c.h"
+
+#ifdef HAVE__GET_CPUID
+#include <cpuid.h>
+#endif
+#ifdef HAVE__CPUID
+#include <intrin.h>
+#endif
+
+#include "port/pg_bitutils.h"
+
+#ifdef TRY_POPCNT_FAST
+int pg_popcount32_fast(uint32 word);
+int pg_popcount64_fast(uint64 word);
+uint64 pg_popcount_fast(const char *buf, int bytes);
+
+static int	pg_popcount32_choose(uint32 word);
+static int	pg_popcount64_choose(uint64 word);
+static uint64 pg_popcount_choose(const char *buf, int bytes);
+
+int			(*pg_popcount32) (uint32 word) = pg_popcount32_choose;
+int			(*pg_popcount64) (uint64 word) = pg_popcount64_choose;
+uint64		(*pg_popcount) (const char *buf, int bytes) = pg_popcount_choose;
+
+/*
+ * Return true if CPUID indicates that the POPCNT instruction is available.
+ */
+static bool
+pg_popcount_available(void)
+{
+	unsigned int exx[4] = {0, 0, 0, 0};
+
+#if defined(HAVE__GET_CPUID)
+	__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
+#elif defined(HAVE__CPUID)
+	__cpuid(exx, 1);
+#else
+#error cpuid instruction not available
+#endif
+
+	return (exx[2] & (1 << 23)) != 0;	/* POPCNT */
+}
+
+/*
+ * These functions get called on the first call to pg_popcount32 etc.
+ * They detect whether we can use the asm implementations, and replace
+ * the function pointers so that subsequent calls are routed directly to
+ * the chosen implementation.
+ */
+static inline void set_function_pointers()
+{
+	if (pg_popcount_available())
+	{
+		pg_popcount32 = pg_popcount32_fast;
+		pg_popcount64 = pg_popcount64_fast;
+		pg_popcount = pg_popcount_fast;
+	}
+	else
+	{
+		pg_popcount32 = pg_popcount32_slow;
+		pg_popcount64 = pg_popcount64_slow;
+		pg_popcount = pg_popcount_slow;
+	}
+}
+
+static int
+pg_popcount32_choose(uint32 word)
+{
+	set_function_pointers();
+	return pg_popcount32(word);
+}
+
+static int
+pg_popcount64_choose(uint64 word)
+{
+	set_function_pointers();
+	return pg_popcount64(word);
+}
+
+static uint64
+pg_popcount_choose(const char *buf, int bytes)
+{
+	set_function_pointers();
+	return pg_popcount(buf, bytes);
+}
+#endif							/* TRY_POPCNT_FAST */
-- 
2.34.1

