From c5aedd5bcd917a8c4f987b5dbcdd039a4121e5b6 Mon Sep 17 00:00:00 2001
From: Thomas Munro <thomas.munro@gmail.com>
Date: Fri, 30 Aug 2019 13:41:04 +1200
Subject: [PATCH] Consolidate code that makes a sorted array unique.

Introduce qunique() and qunique_arg() which can be used after qsort()
and qsort_arg() respectively to remove duplicate values.

Author: Thomas Munro
Reviewed-by: Tom Lane
Discussion: https://postgr.es/m/CAEepm%3D2vmFTNpAmwbGGD2WaryM6T3hSDVKQPfUwjdD_5XY6vAA%40mail.gmail.com
---
 contrib/hstore/hstore_io.c           |  5 +++
 contrib/intarray/_int_tool.c         | 19 +++-----
 contrib/pg_trgm/trgm_op.c            | 25 ++---------
 src/backend/access/nbtree/nbtutils.c | 19 ++------
 src/backend/executor/nodeTidscan.c   | 13 ++----
 src/backend/utils/adt/acl.c          | 15 ++-----
 src/backend/utils/adt/tsgistidx.c    | 29 ++-----------
 src/backend/utils/adt/tsquery_op.c   | 29 ++-----------
 src/backend/utils/adt/tsvector.c     |  5 ++-
 src/backend/utils/adt/tsvector_op.c  | 59 ++++---------------------
 src/backend/utils/adt/txid.c         | 19 +-------
 src/backend/utils/cache/syscache.c   | 21 +++------
 src/include/lib/qunique.h            | 65 ++++++++++++++++++++++++++++
 13 files changed, 115 insertions(+), 208 deletions(-)
 create mode 100644 src/include/lib/qunique.h

diff --git a/contrib/hstore/hstore_io.c b/contrib/hstore/hstore_io.c
index be3cce4f74..10ec392775 100644
--- a/contrib/hstore/hstore_io.c
+++ b/contrib/hstore/hstore_io.c
@@ -322,6 +322,11 @@ hstoreUniquePairs(Pairs *a, int32 l, int32 *buflen)
 	}
 
 	qsort((void *) a, l, sizeof(Pairs), comparePairs);
+
+	/*
+	 * We can't use qunique here because we have some clean-up code to run on
+	 * removed elements.
+	 */
 	ptr = a + 1;
 	res = a;
 	while (ptr - a < l)
diff --git a/contrib/intarray/_int_tool.c b/contrib/intarray/_int_tool.c
index efff81d77d..e5f4bd4064 100644
--- a/contrib/intarray/_int_tool.c
+++ b/contrib/intarray/_int_tool.c
@@ -7,6 +7,7 @@
 
 #include "_int.h"
 #include "catalog/pg_type.h"
+#include "lib/qunique.h"
 
 /* arguments are assumed sorted & unique-ified */
 bool
@@ -308,23 +309,13 @@ internal_size(int *a, int len)
 ArrayType *
 _int_unique(ArrayType *r)
 {
-	int		   *tmp,
-			   *dr,
-			   *data;
 	int			num = ARRNELEMS(r);
+	bool		duplicates_found;	/* not used */
 
-	if (num < 2)
-		return r;
+	num = qunique_arg(ARRPTR(r), num, sizeof(int), isort_cmp,
+					  &duplicates_found);
 
-	data = tmp = dr = ARRPTR(r);
-	while (tmp - data < num)
-	{
-		if (*tmp != *dr)
-			*(++dr) = *tmp++;
-		else
-			tmp++;
-	}
-	return resize_intArrayType(r, dr + 1 - ARRPTR(r));
+	return resize_intArrayType(r, num);
 }
 
 void
diff --git a/contrib/pg_trgm/trgm_op.c b/contrib/pg_trgm/trgm_op.c
index 07a3180378..c9c8cbc734 100644
--- a/contrib/pg_trgm/trgm_op.c
+++ b/contrib/pg_trgm/trgm_op.c
@@ -6,6 +6,7 @@
 #include <ctype.h>
 
 #include "catalog/pg_type.h"
+#include "lib/qunique.h"
 #include "trgm.h"
 #include "tsearch/ts_locale.h"
 #include "utils/lsyscache.h"
@@ -162,26 +163,6 @@ comp_trgm(const void *a, const void *b)
 	return CMPTRGM(a, b);
 }
 
-static int
-unique_array(trgm *a, int len)
-{
-	trgm	   *curend,
-			   *tmp;
-
-	curend = tmp = a;
-	while (tmp - a < len)
-		if (CMPTRGM(tmp, curend))
-		{
-			curend++;
-			CPTRGM(curend, tmp);
-			tmp++;
-		}
-		else
-			tmp++;
-
-	return curend + 1 - a;
-}
-
 /*
  * Finds first word in string, returns pointer to the word,
  * endword points to the character after word
@@ -394,7 +375,7 @@ generate_trgm(char *str, int slen)
 	if (len > 1)
 	{
 		qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
-		len = unique_array(GETARR(trg), len);
+		len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
 	}
 
 	SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
@@ -942,7 +923,7 @@ generate_wildcard_trgm(const char *str, int slen)
 	if (len > 1)
 	{
 		qsort((void *) GETARR(trg), len, sizeof(trgm), comp_trgm);
-		len = unique_array(GETARR(trg), len);
+		len = qunique(GETARR(trg), len, sizeof(trgm), comp_trgm);
 	}
 
 	SET_VARSIZE(trg, CALCGTSIZE(ARRKEY, len));
diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c
index bc855dd25d..6a3008dd48 100644
--- a/src/backend/access/nbtree/nbtutils.c
+++ b/src/backend/access/nbtree/nbtutils.c
@@ -21,6 +21,7 @@
 #include "access/reloptions.h"
 #include "access/relscan.h"
 #include "commands/progress.h"
+#include "lib/qunique.h"
 #include "miscadmin.h"
 #include "utils/array.h"
 #include "utils/datum.h"
@@ -435,8 +436,6 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
 	Oid			elemtype;
 	RegProcedure cmp_proc;
 	BTSortArrayContext cxt;
-	int			last_non_dup;
-	int			i;
 
 	if (nelems <= 1)
 		return nelems;			/* no work to do */
@@ -475,20 +474,8 @@ _bt_sort_array_elements(IndexScanDesc scan, ScanKey skey,
 			  _bt_compare_array_elements, (void *) &cxt);
 
 	/* Now scan the sorted elements and remove duplicates */
-	last_non_dup = 0;
-	for (i = 1; i < nelems; i++)
-	{
-		int32		compare;
-
-		compare = DatumGetInt32(FunctionCall2Coll(&cxt.flinfo,
-												  cxt.collation,
-												  elems[last_non_dup],
-												  elems[i]));
-		if (compare != 0)
-			elems[++last_non_dup] = elems[i];
-	}
-
-	return last_non_dup + 1;
+	return qunique_arg(elems, nelems, sizeof(Datum),
+					   _bt_compare_array_elements, &cxt);
 }
 
 /*
diff --git a/src/backend/executor/nodeTidscan.c b/src/backend/executor/nodeTidscan.c
index 8cf22d5bf0..a59480f906 100644
--- a/src/backend/executor/nodeTidscan.c
+++ b/src/backend/executor/nodeTidscan.c
@@ -27,6 +27,7 @@
 #include "catalog/pg_type.h"
 #include "executor/execdebug.h"
 #include "executor/nodeTidscan.h"
+#include "lib/qunique.h"
 #include "miscadmin.h"
 #include "nodes/nodeFuncs.h"
 #include "storage/bufmgr.h"
@@ -260,21 +261,13 @@ TidListEval(TidScanState *tidstate)
 	 */
 	if (numTids > 1)
 	{
-		int			lastTid;
-		int			i;
-
 		/* CurrentOfExpr could never appear OR'd with something else */
 		Assert(!tidstate->tss_isCurrentOf);
 
 		qsort((void *) tidList, numTids, sizeof(ItemPointerData),
 			  itemptr_comparator);
-		lastTid = 0;
-		for (i = 1; i < numTids; i++)
-		{
-			if (!ItemPointerEquals(&tidList[lastTid], &tidList[i]))
-				tidList[++lastTid] = tidList[i];
-		}
-		numTids = lastTid + 1;
+		numTids = qunique(tidList, numTids, sizeof(ItemPointerData),
+						  itemptr_comparator);
 	}
 
 	tidstate->tss_TidList = tidList;
diff --git a/src/backend/utils/adt/acl.c b/src/backend/utils/adt/acl.c
index d7e6100ccb..79bc750d77 100644
--- a/src/backend/utils/adt/acl.c
+++ b/src/backend/utils/adt/acl.c
@@ -28,6 +28,7 @@
 #include "commands/tablespace.h"
 #include "foreign/foreign.h"
 #include "funcapi.h"
+#include "lib/qunique.h"
 #include "miscadmin.h"
 #include "utils/acl.h"
 #include "utils/array.h"
@@ -1475,8 +1476,7 @@ aclmembers(const Acl *acl, Oid **roleids)
 	Oid		   *list;
 	const AclItem *acldat;
 	int			i,
-				j,
-				k;
+				j;
 
 	if (acl == NULL || ACL_NUM(acl) == 0)
 	{
@@ -1508,21 +1508,14 @@ aclmembers(const Acl *acl, Oid **roleids)
 	/* Sort the array */
 	qsort(list, j, sizeof(Oid), oid_cmp);
 
-	/* Remove duplicates from the array */
-	k = 0;
-	for (i = 1; i < j; i++)
-	{
-		if (list[k] != list[i])
-			list[++k] = list[i];
-	}
-
 	/*
 	 * We could repalloc the array down to minimum size, but it's hardly worth
 	 * it since it's only transient memory.
 	 */
 	*roleids = list;
 
-	return k + 1;
+	/* Remove duplicates from the array */
+	return qunique(list, j, sizeof(Oid), oid_cmp);
 }
 
 
diff --git a/src/backend/utils/adt/tsgistidx.c b/src/backend/utils/adt/tsgistidx.c
index 6ff71a49b8..af5b6809c5 100644
--- a/src/backend/utils/adt/tsgistidx.c
+++ b/src/backend/utils/adt/tsgistidx.c
@@ -16,6 +16,7 @@
 
 #include "access/gist.h"
 #include "access/heaptoast.h"
+#include "lib/qunique.h"
 #include "port/pg_bitutils.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
@@ -122,31 +123,6 @@ compareint(const void *va, const void *vb)
 	return (a > b) ? 1 : -1;
 }
 
-/*
- * Removes duplicates from an array of int32. 'l' is
- * size of the input array. Returns the new size of the array.
- */
-static int
-uniqueint(int32 *a, int32 l)
-{
-	int32	   *ptr,
-			   *res;
-
-	if (l <= 1)
-		return l;
-
-	ptr = res = a;
-
-	qsort((void *) a, l, sizeof(int32), compareint);
-
-	while (ptr - a < l)
-		if (*ptr != *res)
-			*(++res) = *ptr++;
-		else
-			ptr++;
-	return res + 1 - a;
-}
-
 static void
 makesign(BITVECP sign, SignTSVector *a)
 {
@@ -193,7 +169,8 @@ gtsvector_compress(PG_FUNCTION_ARGS)
 			ptr++;
 		}
 
-		len = uniqueint(GETARR(res), val->size);
+		qsort(GETARR(res), val->size, sizeof(int), compareint);
+		len = qunique(GETARR(res), val->size, sizeof(int), compareint);
 		if (len != val->size)
 		{
 			/*
diff --git a/src/backend/utils/adt/tsquery_op.c b/src/backend/utils/adt/tsquery_op.c
index 1f63d9b6a9..e6816940ad 100644
--- a/src/backend/utils/adt/tsquery_op.c
+++ b/src/backend/utils/adt/tsquery_op.c
@@ -14,6 +14,7 @@
 
 #include "postgres.h"
 
+#include "lib/qunique.h"
 #include "tsearch/ts_utils.h"
 #include "utils/builtins.h"
 
@@ -302,29 +303,6 @@ cmp_string(const void *a, const void *b)
 	return strcmp(sa, sb);
 }
 
-static int
-remove_duplicates(char **strings, int n)
-{
-	if (n <= 1)
-		return n;
-	else
-	{
-		int			i;
-		char	   *prev = strings[0];
-		int			new_n = 1;
-
-		for (i = 1; i < n; i++)
-		{
-			if (strcmp(strings[i], prev) != 0)
-			{
-				strings[new_n++] = strings[i];
-				prev = strings[i];
-			}
-		}
-		return new_n;
-	}
-}
-
 Datum
 tsq_mcontains(PG_FUNCTION_ARGS)
 {
@@ -342,9 +320,10 @@ tsq_mcontains(PG_FUNCTION_ARGS)
 
 	/* Sort and remove duplicates from both arrays */
 	qsort(query_values, query_nvalues, sizeof(char *), cmp_string);
-	query_nvalues = remove_duplicates(query_values, query_nvalues);
+	query_nvalues = qunique(query_values, query_nvalues, sizeof(char *),
+							cmp_string);
 	qsort(ex_values, ex_nvalues, sizeof(char *), cmp_string);
-	ex_nvalues = remove_duplicates(ex_values, ex_nvalues);
+	ex_nvalues = qunique(ex_values, ex_nvalues, sizeof(char *), cmp_string);
 
 	if (ex_nvalues > query_nvalues)
 		result = false;
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index ccfc4147fa..098eaed3e5 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -41,8 +41,9 @@ compareWordEntryPos(const void *a, const void *b)
 }
 
 /*
- * Removes duplicate pos entries. If there's two entries with same pos
- * but different weight, the higher weight is retained.
+ * Removes duplicate pos entries. If there's two entries with same pos but
+ * different weight, the higher weight is retained, so we can't use
+ * qunique here.
  *
  * Returns new length.
  */
diff --git a/src/backend/utils/adt/tsvector_op.c b/src/backend/utils/adt/tsvector_op.c
index 28d042273e..f483c4b228 100644
--- a/src/backend/utils/adt/tsvector_op.c
+++ b/src/backend/utils/adt/tsvector_op.c
@@ -21,6 +21,7 @@
 #include "commands/trigger.h"
 #include "executor/spi.h"
 #include "funcapi.h"
+#include "lib/qunique.h"
 #include "mb/pg_wchar.h"
 #include "miscadmin.h"
 #include "parser/parse_coerce.h"
@@ -475,16 +476,9 @@ tsvector_delete_by_indices(TSVector tsv, int *indices_to_delete,
 	 */
 	if (indices_count > 1)
 	{
-		int			kp;
-
 		qsort(indices_to_delete, indices_count, sizeof(int), compare_int);
-		kp = 0;
-		for (k = 1; k < indices_count; k++)
-		{
-			if (indices_to_delete[k] != indices_to_delete[kp])
-				indices_to_delete[++kp] = indices_to_delete[k];
-		}
-		indices_count = ++kp;
+		indices_count = qunique(indices_to_delete, indices_count, sizeof(int),
+								compare_int);
 	}
 
 	/*
@@ -761,7 +755,6 @@ array_to_tsvector(PG_FUNCTION_ARGS)
 	bool	   *nulls;
 	int			nitems,
 				i,
-				j,
 				tslen,
 				datalen = 0;
 	char	   *cur;
@@ -781,13 +774,8 @@ array_to_tsvector(PG_FUNCTION_ARGS)
 	if (nitems > 1)
 	{
 		qsort(dlexemes, nitems, sizeof(Datum), compare_text_lexemes);
-		j = 0;
-		for (i = 1; i < nitems; i++)
-		{
-			if (compare_text_lexemes(&dlexemes[j], &dlexemes[i]) < 0)
-				dlexemes[++j] = dlexemes[i];
-		}
-		nitems = ++j;
+		nitems = qunique(dlexemes, nitems, sizeof(Datum),
+						 compare_text_lexemes);
 	}
 
 	/* Calculate space needed for surviving lexemes. */
@@ -1270,39 +1258,6 @@ checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
 	return result;
 }
 
-/*
- * Removes duplicate pos entries. We can't use uniquePos() from
- * tsvector.c because array might be longer than MAXENTRYPOS
- *
- * Returns new length.
- */
-static int
-uniqueLongPos(WordEntryPos *pos, int npos)
-{
-	WordEntryPos *pos_iter,
-			   *result;
-
-	if (npos <= 1)
-		return npos;
-
-	qsort((void *) pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
-
-	result = pos;
-	pos_iter = pos + 1;
-	while (pos_iter < pos + npos)
-	{
-		if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
-		{
-			result++;
-			*result = WEP_GETPOS(*pos_iter);
-		}
-
-		pos_iter++;
-	}
-
-	return result + 1 - pos;
-}
-
 /*
  * is there value 'val' in array or not ?
  */
@@ -1397,7 +1352,9 @@ checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
 		{
 			/* Sort and make unique array of found positions */
 			data->pos = allpos;
-			data->npos = uniqueLongPos(allpos, npos);
+			qsort(data->pos, npos, sizeof(WordEntryPos), compareWordEntryPos);
+			data->npos = qunique(data->pos, npos, sizeof(WordEntryPos),
+								 compareWordEntryPos);
 			data->allocated = true;
 		}
 	}
diff --git a/src/backend/utils/adt/txid.c b/src/backend/utils/adt/txid.c
index 90b2c9b694..e220c5f136 100644
--- a/src/backend/utils/adt/txid.c
+++ b/src/backend/utils/adt/txid.c
@@ -27,6 +27,7 @@
 #include "access/xlog.h"
 #include "funcapi.h"
 #include "miscadmin.h"
+#include "lib/qunique.h"
 #include "libpq/pqformat.h"
 #include "postmaster/postmaster.h"
 #include "storage/lwlock.h"
@@ -213,26 +214,10 @@ cmp_txid(const void *aa, const void *bb)
 static void
 sort_snapshot(TxidSnapshot *snap)
 {
-	txid		last = 0;
-	int			nxip,
-				idx1,
-				idx2;
-
 	if (snap->nxip > 1)
 	{
 		qsort(snap->xip, snap->nxip, sizeof(txid), cmp_txid);
-
-		/* remove duplicates */
-		nxip = snap->nxip;
-		idx1 = idx2 = 0;
-		while (idx1 < nxip)
-		{
-			if (snap->xip[idx1] != last)
-				last = snap->xip[idx2++] = snap->xip[idx1];
-			else
-				snap->nxip--;
-			idx1++;
-		}
+		snap->nxip = qunique(snap->xip, snap->nxip, sizeof(txid), cmp_txid);
 	}
 }
 
diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c
index 16297a52a1..36aee74ab0 100644
--- a/src/backend/utils/cache/syscache.c
+++ b/src/backend/utils/cache/syscache.c
@@ -74,6 +74,7 @@
 #include "catalog/pg_ts_template.h"
 #include "catalog/pg_type.h"
 #include "catalog/pg_user_mapping.h"
+#include "lib/qunique.h"
 #include "utils/rel.h"
 #include "utils/catcache.h"
 #include "utils/syscache.h"
@@ -1010,8 +1011,6 @@ void
 InitCatalogCache(void)
 {
 	int			cacheId;
-	int			i,
-				j;
 
 	StaticAssertStmt(SysCacheSize == (int) lengthof(cacheinfo),
 					 "SysCacheSize does not match syscache.c's array");
@@ -1048,21 +1047,15 @@ InitCatalogCache(void)
 	/* Sort and de-dup OID arrays, so we can use binary search. */
 	pg_qsort(SysCacheRelationOid, SysCacheRelationOidSize,
 			 sizeof(Oid), oid_compare);
-	for (i = 1, j = 0; i < SysCacheRelationOidSize; i++)
-	{
-		if (SysCacheRelationOid[i] != SysCacheRelationOid[j])
-			SysCacheRelationOid[++j] = SysCacheRelationOid[i];
-	}
-	SysCacheRelationOidSize = j + 1;
+	SysCacheRelationOidSize =
+		qunique(SysCacheRelationOid, SysCacheRelationOidSize, sizeof(Oid),
+				oid_compare);
 
 	pg_qsort(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
 			 sizeof(Oid), oid_compare);
-	for (i = 1, j = 0; i < SysCacheSupportingRelOidSize; i++)
-	{
-		if (SysCacheSupportingRelOid[i] != SysCacheSupportingRelOid[j])
-			SysCacheSupportingRelOid[++j] = SysCacheSupportingRelOid[i];
-	}
-	SysCacheSupportingRelOidSize = j + 1;
+	SysCacheSupportingRelOidSize =
+		qunique(SysCacheSupportingRelOid, SysCacheSupportingRelOidSize,
+				sizeof(Oid), oid_compare);
 
 	CacheInitialized = true;
 }
diff --git a/src/include/lib/qunique.h b/src/include/lib/qunique.h
new file mode 100644
index 0000000000..4d620f82ee
--- /dev/null
+++ b/src/include/lib/qunique.h
@@ -0,0 +1,65 @@
+/*-------------------------------------------------------------------------
+ *
+ * qunique.h
+ *		inline array unique functions
+ * Portions Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *		src/include/lib/qunique.h
+ *-------------------------------------------------------------------------
+ */
+
+#ifndef QUNIQUE_H
+#define QUNIQUE_H
+
+/*
+ * Remove duplicates from a pre-sorted array, according to a user-supplied
+ * comparator.  Usually the array should have been sorted with qsort() using
+ * the same arguments.  Return the new size.
+ */
+static inline size_t
+qunique(void *array, size_t elements, size_t width,
+		int (*compare) (const void *, const void *))
+{
+	char	   *bytes = (char *) array;
+	size_t		i,
+				j;
+
+	if (elements <= 1)
+		return elements;
+
+	for (i = 1, j = 0; i < elements; ++i)
+	{
+		if (compare(bytes + i * width, bytes + j * width) != 0)
+			memcpy(bytes + ++j * width, bytes + i * width, width);
+	}
+
+	return j + 1;
+}
+
+/*
+ * Like qunique(), but takes a comparator with an extra user data argument
+ * which is passed through, for compatibility with qsort_arg().
+ */
+static inline size_t
+qunique_arg(void *array, size_t elements, size_t width,
+			int (*compare) (const void *, const void *, void *),
+			void *arg)
+{
+	char	   *bytes = (char *) array;
+	size_t		i,
+				j;
+
+	if (elements <= 1)
+		return elements;
+
+	for (i = 1, j = 0; i < elements; ++i)
+	{
+		if (compare(bytes + i * width, bytes + j * width, arg) != 0)
+			memcpy(bytes + ++j * width, bytes + i * width, width);
+	}
+
+	return j + 1;
+}
+
+#endif							/* QUNIQUE_H */
-- 
2.23.0

