From 7fbab106f422a9da7658c9a1580d2b4e7229731a Mon Sep 17 00:00:00 2001
From: John Naylor <john.naylor@postgresql.org>
Date: Mon, 2 Feb 2026 19:51:46 +0700
Subject: [PATCH v7 5/5] Bypass function call on x86

This allows a single bitmap word to be handled inline without a
lookup table.

Also, make 32-bit inline function similar to 64-bit, looks
cleaner this way and is not performance critical.
---
 src/backend/nodes/bitmapset.c  |  4 ++++
 src/include/port/pg_bitutils.h | 36 +++++++++++++---------------------
 2 files changed, 18 insertions(+), 22 deletions(-)

diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 23c91fdb6c9..c274da98798 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -748,6 +748,10 @@ bms_num_members(const Bitmapset *a)
 	if (a == NULL)
 		return 0;
 
+	/* fastpath for the common case */
+	if (a->nwords == 1)
+		return bmw_popcount(a->words[0]);
+
 	return pg_popcount((const char *) a->words,
 					   a->nwords * sizeof(bitmapword));
 }
diff --git a/src/include/port/pg_bitutils.h b/src/include/port/pg_bitutils.h
index a3e0f346ef6..98ede3e2686 100644
--- a/src/include/port/pg_bitutils.h
+++ b/src/include/port/pg_bitutils.h
@@ -297,23 +297,15 @@ extern uint64 pg_popcount_masked_optimized(const char *buf, int bytes, bits8 mas
 /*
  * pg_popcount32
  *		Return the number of 1 bits set in word
+ *
+ * from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
  */
 static inline int
 pg_popcount32(uint32 word)
 {
-#ifdef HAVE__BUILTIN_POPCOUNT
-	return __builtin_popcount(word);
-#else							/* !HAVE__BUILTIN_POPCOUNT */
-	int			result = 0;
-
-	while (word != 0)
-	{
-		result += pg_number_of_ones[word & 255];
-		word >>= 8;
-	}
-
-	return result;
-#endif							/* HAVE__BUILTIN_POPCOUNT */
+	word = word - ((word >> 1) & 0x55555555);
+	word = (word & 0x33333333) + ((word >> 2) & 0x33333333);
+	return ((word + (word >> 4) & 0xF0F0F0F) * 0x1010101) >> 24;
 }
 
 /*
@@ -323,7 +315,11 @@ pg_popcount32(uint32 word)
 static inline int
 pg_popcount64(uint64 word)
 {
-#ifdef HAVE__BUILTIN_POPCOUNT
+	/*
+	 * On x86, gcc generates a function call for this builtin unless told
+	 * otherwise, so we use the equivalent pure C here to allow inlining.
+	 */
+#if defined(HAVE__BUILTIN_POPCOUNT) && (defined(__POPCNT__) || !defined(__x86_64__))
 #if SIZEOF_LONG == 8
 	return __builtin_popcountl(word);
 #elif SIZEOF_LONG_LONG == 8
@@ -332,15 +328,11 @@ pg_popcount64(uint64 word)
 #error "cannot find integer of the same size as uint64_t"
 #endif
 #else							/* !HAVE__BUILTIN_POPCOUNT */
-	int			result = 0;
 
-	while (word != 0)
-	{
-		result += pg_number_of_ones[word & 255];
-		word >>= 8;
-	}
-
-	return result;
+	word = word - ((word >> 1) & 0x5555555555555555);
+	word = (word & 0x3333333333333333) + ((word >> 2) & 0x3333333333333333);
+	word = (word + (word >> 4)) & 0xF0F0F0F0F0F0F0F;
+	return (word * 0x101010101010101) >> 56;
 #endif							/* HAVE__BUILTIN_POPCOUNT */
 }
 
-- 
2.52.0

