From 056c803c20628e6e7bf0000f217bfcaadcbeae22 Mon Sep 17 00:00:00 2001
From: David Rowley <dgrowley@gmail.com>
Date: Fri, 3 Mar 2023 16:40:29 +1300
Subject: [PATCH v1 1/2] Remove trailing zero words from Bitmapsets

---
 src/backend/nodes/bitmapset.c | 207 +++++++++++++++-------------------
 1 file changed, 89 insertions(+), 118 deletions(-)

diff --git a/src/backend/nodes/bitmapset.c b/src/backend/nodes/bitmapset.c
index 7ba3cf635b..14fb100087 100644
--- a/src/backend/nodes/bitmapset.c
+++ b/src/backend/nodes/bitmapset.c
@@ -5,8 +5,10 @@
  *
  * A bitmap set can represent any set of nonnegative integers, although
  * it is mainly intended for sets where the maximum value is not large,
- * say at most a few hundred.  By convention, we always represent the
- * empty set by a NULL pointer.
+ * say at most a few hundred.  By convention, we always represent a set with
+ * the minimum possible number of words, i.e, there are never any trailing
+ * zero words.  Enforcing this requires that an empty set is represented as
+ * NULL.
  *
  *
  * Copyright (c) 2003-2023, PostgreSQL Global Development Group
@@ -85,18 +87,11 @@ bms_copy(const Bitmapset *a)
 }
 
 /*
- * bms_equal - are two bitmapsets equal?
- *
- * This is logical not physical equality; in particular, a NULL pointer will
- * be reported as equal to a palloc'd value containing no members.
+ * bms_equal - are two bitmapsets equal? or both NULL?
  */
 bool
 bms_equal(const Bitmapset *a, const Bitmapset *b)
 {
-	const Bitmapset *shorter;
-	const Bitmapset *longer;
-	int			shortlen;
-	int			longlen;
 	int			i;
 
 	/* Handle cases where either input is NULL */
@@ -108,30 +103,18 @@ bms_equal(const Bitmapset *a, const Bitmapset *b)
 	}
 	else if (b == NULL)
 		return false;
-	/* Identify shorter and longer input */
-	if (a->nwords <= b->nwords)
-	{
-		shorter = a;
-		longer = b;
-	}
-	else
-	{
-		shorter = b;
-		longer = a;
-	}
-	/* And process */
-	shortlen = shorter->nwords;
-	for (i = 0; i < shortlen; i++)
-	{
-		if (shorter->words[i] != longer->words[i])
-			return false;
-	}
-	longlen = longer->nwords;
-	for (; i < longlen; i++)
+
+	/* can't be equal if the word counts don't match */
+	if (a->nwords != b->nwords)
+		return false;
+
+	/* check each word matches */
+	for (i = 0; i < a->nwords; i++)
 	{
-		if (longer->words[i] != 0)
+		if (a->words[i] != b->words[i])
 			return false;
 	}
+
 	return true;
 }
 
@@ -146,29 +129,18 @@ bms_equal(const Bitmapset *a, const Bitmapset *b)
 int
 bms_compare(const Bitmapset *a, const Bitmapset *b)
 {
-	int			shortlen;
 	int			i;
-
 	/* Handle cases where either input is NULL */
 	if (a == NULL)
 		return (b == NULL) ? 0 : -1;
 	else if (b == NULL)
 		return +1;
-	/* Handle cases where one input is longer than the other */
-	shortlen = Min(a->nwords, b->nwords);
-	for (i = shortlen; i < a->nwords; i++)
-	{
-		if (a->words[i] != 0)
-			return +1;
-	}
-	for (i = shortlen; i < b->nwords; i++)
-	{
-		if (b->words[i] != 0)
-			return -1;
-	}
-	/* Process words in common */
-	i = shortlen;
-	while (--i >= 0)
+
+	/* the set with the most words must be greater */
+	if (a->nwords != b->nwords)
+		return (a->nwords > b->nwords) ? +1 : -1;
+
+	for (i = a->nwords - 1; i >= 0; i--)
 	{
 		bitmapword	aw = a->words[i];
 		bitmapword	bw = b->words[i];
@@ -261,6 +233,7 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b)
 {
 	Bitmapset  *result;
 	const Bitmapset *other;
+	int			lastnonzero;
 	int			resultlen;
 	int			i;
 
@@ -280,14 +253,23 @@ bms_intersect(const Bitmapset *a, const Bitmapset *b)
 	}
 	/* And intersect the longer input with the result */
 	resultlen = result->nwords;
+	lastnonzero = -1;
 	for (i = 0; i < resultlen; i++)
+	{
 		result->words[i] &= other->words[i];
+
+		if (result->words[i] != 0)
+			lastnonzero = i;
+	}
 	/* If we computed an empty result, we must return NULL */
-	if (bms_is_empty_internal(result))
+	if (lastnonzero == -1)
 	{
 		pfree(result);
 		return NULL;
 	}
+
+	/* get rid of trailing zero words */
+	result->nwords = lastnonzero + 1;
 	return result;
 }
 
@@ -298,6 +280,7 @@ Bitmapset *
 bms_difference(const Bitmapset *a, const Bitmapset *b)
 {
 	Bitmapset  *result;
+	int			lastnonzero;
 	int			shortlen;
 	int			i;
 
@@ -319,8 +302,19 @@ bms_difference(const Bitmapset *a, const Bitmapset *b)
 	result = bms_copy(a);
 	/* And remove b's bits from result */
 	shortlen = Min(a->nwords, b->nwords);
+	lastnonzero = -1;
 	for (i = 0; i < shortlen; i++)
+	{
 		result->words[i] &= ~b->words[i];
+
+		if (result->words[i] != 0)
+			lastnonzero = i;
+	}
+
+	/* get rid of trailing zero words */
+	result->nwords = lastnonzero + 1;
+	Assert(result->nwords != 0);
+
 	/* Need not check for empty result, since we handled that case above */
 	return result;
 }
@@ -331,32 +325,23 @@ bms_difference(const Bitmapset *a, const Bitmapset *b)
 bool
 bms_is_subset(const Bitmapset *a, const Bitmapset *b)
 {
-	int			shortlen;
-	int			longlen;
 	int			i;
-
 	/* Handle cases where either input is NULL */
 	if (a == NULL)
 		return true;			/* empty set is a subset of anything */
 	if (b == NULL)
 		return false;
-	/* Check common words */
-	shortlen = Min(a->nwords, b->nwords);
-	for (i = 0; i < shortlen; i++)
+
+	/* 'a' can't be a subset of 'b' if it contains more words */
+	if (a->nwords > b->nwords)
+		return false;
+
+	/* Check all 'a' members are set in 'b' */
+	for (i = 0; i < a->nwords; i++)
 	{
 		if ((a->words[i] & ~b->words[i]) != 0)
 			return false;
 	}
-	/* Check extra words */
-	if (a->nwords > b->nwords)
-	{
-		longlen = a->nwords;
-		for (; i < longlen; i++)
-		{
-			if (a->words[i] != 0)
-				return false;
-		}
-	}
 	return true;
 }
 
@@ -370,7 +355,6 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
 {
 	BMS_Comparison result;
 	int			shortlen;
-	int			longlen;
 	int			i;
 
 	/* Handle cases where either input is NULL */
@@ -408,31 +392,17 @@ bms_subset_compare(const Bitmapset *a, const Bitmapset *b)
 	/* Check extra words */
 	if (a->nwords > b->nwords)
 	{
-		longlen = a->nwords;
-		for (; i < longlen; i++)
-		{
-			if (a->words[i] != 0)
-			{
-				/* a is not a subset of b */
-				if (result == BMS_SUBSET1)
-					return BMS_DIFFERENT;
-				result = BMS_SUBSET2;
-			}
-		}
+		/* if a has more words then a is not a subset of b */
+		if (result == BMS_SUBSET1)
+			return BMS_DIFFERENT;
+		return BMS_SUBSET2;
 	}
 	else if (a->nwords < b->nwords)
 	{
-		longlen = b->nwords;
-		for (; i < longlen; i++)
-		{
-			if (b->words[i] != 0)
-			{
-				/* b is not a subset of a */
-				if (result == BMS_SUBSET2)
-					return BMS_DIFFERENT;
-				result = BMS_SUBSET1;
-			}
-		}
+		/* if b has more words then b is not a subset of a */
+		if (result == BMS_SUBSET2)
+			return BMS_DIFFERENT;
+		return BMS_SUBSET1;
 	}
 	return result;
 }
@@ -563,27 +533,21 @@ bms_overlap_list(const Bitmapset *a, const List *b)
 bool
 bms_nonempty_difference(const Bitmapset *a, const Bitmapset *b)
 {
-	int			shortlen;
 	int			i;
-
 	/* Handle cases where either input is NULL */
 	if (a == NULL)
 		return false;
 	if (b == NULL)
 		return true;
-	/* Check words in common */
-	shortlen = Min(a->nwords, b->nwords);
-	for (i = 0; i < shortlen; i++)
+	/* if a has more words then it must contain additional members */
+	if (a->nwords > b->nwords)
+		return true;
+	/* Check all 'a' members are set in 'b' */
+	for (i = 0; i < a->nwords; i++)
 	{
 		if ((a->words[i] & ~b->words[i]) != 0)
 			return true;
 	}
-	/* Check extra words in a */
-	for (; i < a->nwords; i++)
-	{
-		if (a->words[i] != 0)
-			return true;
-	}
 	return false;
 }
 
@@ -927,6 +891,7 @@ bms_add_range(Bitmapset *a, int lower, int upper)
 Bitmapset *
 bms_int_members(Bitmapset *a, const Bitmapset *b)
 {
+	int			lastnonzero;
 	int			shortlen;
 	int			i;
 
@@ -940,16 +905,24 @@ bms_int_members(Bitmapset *a, const Bitmapset *b)
 	}
 	/* Intersect b into a; we need never copy */
 	shortlen = Min(a->nwords, b->nwords);
+	lastnonzero = -1;
 	for (i = 0; i < shortlen; i++)
+	{
 		a->words[i] &= b->words[i];
-	for (; i < a->nwords; i++)
-		a->words[i] = 0;
+
+		if (a->words[i] != 0)
+			lastnonzero = i;
+	}
+
 	/* If we computed an empty result, we must return NULL */
-	if (bms_is_empty_internal(a))
+	if (lastnonzero == -1)
 	{
 		pfree(a);
 		return NULL;
 	}
+
+	/* get rid of trailing zero words */
+	a->nwords = lastnonzero + 1;
 	return a;
 }
 
@@ -959,6 +932,7 @@ bms_int_members(Bitmapset *a, const Bitmapset *b)
 Bitmapset *
 bms_del_members(Bitmapset *a, const Bitmapset *b)
 {
+	int			lastnonzero;
 	int			shortlen;
 	int			i;
 
@@ -969,14 +943,25 @@ bms_del_members(Bitmapset *a, const Bitmapset *b)
 		return a;
 	/* Remove b's bits from a; we need never copy */
 	shortlen = Min(a->nwords, b->nwords);
+	lastnonzero = -1;
 	for (i = 0; i < shortlen; i++)
+	{
 		a->words[i] &= ~b->words[i];
+
+		if (a->words[i] != 0)
+			lastnonzero = i;
+	}
+
 	/* If we computed an empty result, we must return NULL */
-	if (bms_is_empty_internal(a))
+	if (lastnonzero == -1)
 	{
 		pfree(a);
 		return NULL;
 	}
+
+	/* get rid of trailing zero words */
+	a->nwords = lastnonzero + 1;
+
 	return a;
 }
 
@@ -1140,28 +1125,14 @@ bms_prev_member(const Bitmapset *a, int prevbit)
 
 /*
  * bms_hash_value - compute a hash key for a Bitmapset
- *
- * Note: we must ensure that any two bitmapsets that are bms_equal() will
- * hash to the same value; in practice this means that trailing all-zero
- * words must not affect the result.  Hence we strip those before applying
- * hash_any().
  */
 uint32
 bms_hash_value(const Bitmapset *a)
 {
-	int			lastword;
-
 	if (a == NULL)
 		return 0;				/* All empty sets hash to 0 */
-	for (lastword = a->nwords; --lastword >= 0;)
-	{
-		if (a->words[lastword] != 0)
-			break;
-	}
-	if (lastword < 0)
-		return 0;				/* All empty sets hash to 0 */
 	return DatumGetUInt32(hash_any((const unsigned char *) a->words,
-								   (lastword + 1) * sizeof(bitmapword)));
+								   a->nwords * sizeof(bitmapword)));
 }
 
 /*
-- 
2.37.2

