From 5d810740b0fbfc601a89d1132f064518f0cffc63 Mon Sep 17 00:00:00 2001
From: Junwang Zhao <zhjwpku@gmail.com>
Date: Fri, 27 Sep 2024 13:05:40 +0000
Subject: [PATCH v14 1/2] general purpose array_sort

Sorts anyarray on its first dimension.
The sort order is determined by the "<" operator.

Author: Junwang Zhao <zhjwpku@gmail.com>
Co-authored-by: Jian He <jian.universality@gmail.com>

Signed-off-by: Junwang Zhao <zhjwpku@gmail.com>
---
 doc/src/sgml/func.sgml                        |  18 +++
 src/backend/utils/adt/array_userfuncs.c       | 138 ++++++++++++++++++
 src/backend/utils/adt/arrayfuncs.c            |   3 +-
 src/include/catalog/pg_proc.dat               |   3 +
 src/include/utils/array.h                     |   1 +
 src/test/regress/expected/arrays.out          |  90 ++++++++++++
 .../regress/expected/collate.icu.utf8.out     |  13 ++
 src/test/regress/sql/arrays.sql               |  25 ++++
 src/test/regress/sql/collate.icu.utf8.sql     |   4 +
 src/tools/pgindent/typedefs.list              |   1 +
 10 files changed, 295 insertions(+), 1 deletion(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 73979f20ff..748611dee5 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -20438,6 +20438,24 @@ SELECT NULLIF(value, '(none)') ...
        </para></entry>
       </row>
 
+      <row>
+       <entry role="func_table_entry"><para role="func_signature">
+        <indexterm>
+         <primary>array_sort</primary>
+        </indexterm>
+        <function>array_sort</function> ( <type>anyarray</type> )
+        <returnvalue>anyarray</returnvalue>
+       </para>
+       <para>
+        Sorts the first dimension of the array. The sort order is determined by the <literal>&lt;</literal> operator of the element type, nulls will appear after non-null values.
+        The collation to use can be forced by adding a <literal>COLLATE</literal> clause to any of the arguments.
+       </para>
+       <para>
+        <literal>array_sort(ARRAY[[2,4],[2,1],[6,5]])</literal>
+        <returnvalue>{{2,1},{2,4},{6,5}}</returnvalue>
+       </para></entry>
+      </row>
+
       <row>
        <entry role="func_table_entry"><para role="func_signature">
         <indexterm id="function-array-to-string">
diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c
index d053808f6e..49b8d5f534 100644
--- a/src/backend/utils/adt/array_userfuncs.c
+++ b/src/backend/utils/adt/array_userfuncs.c
@@ -16,11 +16,13 @@
 #include "common/int.h"
 #include "common/pg_prng.h"
 #include "libpq/pqformat.h"
+#include "miscadmin.h"
 #include "port/pg_bitutils.h"
 #include "utils/array.h"
 #include "utils/builtins.h"
 #include "utils/datum.h"
 #include "utils/lsyscache.h"
+#include "utils/tuplesort.h"
 #include "utils/typcache.h"
 
 /*
@@ -42,6 +44,18 @@ typedef struct DeserialIOData
 	Oid			typioparam;
 } DeserialIOData;
 
+/*
+ * ArraySortCachedInfo
+ *		Used for caching data in array_sort
+ */
+typedef struct ArraySortCachedInfo
+{
+	TypeCacheEntry *typentry;	/* type cache entry for element type */
+	TypeCacheEntry *array_typentry; /* type cache entry for array type */
+	ArrayMetaState array_meta;	/* array metadata for better
+								 * array_create_iterator performance */
+} ArraySortCachedInfo;
+
 static Datum array_position_common(FunctionCallInfo fcinfo);
 
 
@@ -1797,3 +1811,127 @@ array_reverse(PG_FUNCTION_ARGS)
 
 	PG_RETURN_ARRAYTYPE_P(result);
 }
+
+/*
+ * array_sort
+ *
+ * Sorts the first dimension of the array.
+ * The sort order is determined by the "<" operator of the element type.
+ */
+Datum
+array_sort(PG_FUNCTION_ARGS)
+{
+	ArrayType  *array = PG_GETARG_ARRAYTYPE_P(0);
+	Oid			elmtyp;
+	Oid			array_type;
+	Oid			collation = PG_GET_COLLATION();
+	ArraySortCachedInfo *cache_info;
+	TypeCacheEntry *typentry;
+	Tuplesortstate *tuplesortstate;
+	ArrayIterator array_iterator;
+	Datum		value;
+	bool		isnull;
+	ArrayBuildStateAny *astate = NULL;
+	int			ndim,
+			   *dims,
+			   *lbs;
+
+	ndim = ARR_NDIM(array);
+	dims = ARR_DIMS(array);
+	lbs = ARR_LBOUND(array);
+
+	elmtyp = ARR_ELEMTYPE(array);
+	cache_info = (ArraySortCachedInfo *) fcinfo->flinfo->fn_extra;
+	if (cache_info == NULL)
+	{
+		cache_info = (ArraySortCachedInfo *) MemoryContextAlloc(fcinfo->flinfo->fn_mcxt,
+																sizeof(ArraySortCachedInfo));
+		cache_info->typentry = NULL;
+		cache_info->array_typentry = NULL;
+		fcinfo->flinfo->fn_extra = (void *) cache_info;
+	}
+
+	if (ndim == 1)
+	{
+		/* Finds the ordering operator for the type for 1-D arrays */
+		typentry = cache_info->typentry;
+		if (typentry == NULL || typentry->type_id != elmtyp)
+		{
+			typentry = lookup_type_cache(elmtyp, TYPECACHE_LT_OPR);
+			if (!OidIsValid(typentry->lt_opr))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_FUNCTION),
+						 errmsg("could not identify ordering operator for type %s",
+								format_type_be(elmtyp))));
+			cache_info->typentry = typentry;
+			cache_info->array_meta.element_type = elmtyp;
+			cache_info->array_meta.typlen = typentry->typlen;
+			cache_info->array_meta.typbyval = typentry->typbyval;
+			cache_info->array_meta.typalign = typentry->typalign;
+		}
+	}
+	else
+	{
+		/* Finds the ordering operator for the array type for multi-D arrays */
+		typentry = cache_info->array_typentry;
+		if (typentry == NULL || typentry->typelem != elmtyp)
+		{
+			array_type = get_array_type(elmtyp);
+			if (!OidIsValid(array_type))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_OBJECT),
+						 errmsg("could not find array type for data type %s",
+								format_type_be(elmtyp))));
+			typentry = lookup_type_cache(array_type, TYPECACHE_LT_OPR);
+			if (!OidIsValid(typentry->lt_opr))
+				ereport(ERROR,
+						(errcode(ERRCODE_UNDEFINED_FUNCTION),
+						 errmsg("could not identify ordering operator for type %s",
+								format_type_be(array_type))));
+			cache_info->array_typentry = typentry;
+		}
+		cache_info->array_meta.element_type = elmtyp;
+		get_typlenbyvalalign(elmtyp,
+							 &cache_info->array_meta.typlen,
+							 &cache_info->array_meta.typbyval,
+							 &cache_info->array_meta.typalign);
+	}
+
+	if (ndim < 1 || dims[0] < 2)
+		PG_RETURN_ARRAYTYPE_P(array);
+
+	tuplesortstate = tuplesort_begin_datum(typentry->type_id,
+										   typentry->lt_opr,
+										   collation,
+										   false, work_mem, NULL, false);
+
+	array_iterator = array_create_iterator(array, ndim - 1, &cache_info->array_meta);
+	while (array_iterate(array_iterator, &value, &isnull))
+	{
+		tuplesort_putdatum(tuplesortstate, value, isnull);
+	}
+	array_free_iterator(array_iterator);
+
+	/*
+	 * Do the sort.
+	 */
+	tuplesort_performsort(tuplesortstate);
+
+	while (tuplesort_getdatum(tuplesortstate, true, false, &value, &isnull, NULL))
+	{
+		astate = accumArrayResultAny(astate, value, isnull,
+									 typentry->type_id, CurrentMemoryContext);
+	}
+
+	tuplesort_end(tuplesortstate);
+
+	/* bounds preservation */
+	if (ndim == 1)
+		astate->scalarstate->lb = lbs[0];
+	else
+		astate->arraystate->lbs[0] = lbs[0];
+
+	/* Avoid leaking memory when handed toasted input */
+	PG_FREE_IF_COPY(array, 0);
+	PG_RETURN_DATUM(makeArrayResultAny(astate, CurrentMemoryContext, true));
+}
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index a715e7e0b8..5d5d9b5bbf 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -5330,6 +5330,7 @@ initArrayResultWithSize(Oid element_type, MemoryContext rcontext,
 		MemoryContextAlloc(arr_context, astate->alen * sizeof(bool));
 	astate->nelems = 0;
 	astate->element_type = element_type;
+	astate->lb = 1;				/* default lower bound */
 	get_typlenbyvalalign(element_type,
 						 &astate->typlen,
 						 &astate->typbyval,
@@ -5867,7 +5868,7 @@ makeArrayResultAny(ArrayBuildStateAny *astate,
 		/* If no elements were presented, we want to create an empty array */
 		ndims = (astate->scalarstate->nelems > 0) ? 1 : 0;
 		dims[0] = astate->scalarstate->nelems;
-		lbs[0] = 1;
+		lbs[0] = astate->scalarstate->lb;
 
 		result = makeMdArrayResult(astate->scalarstate, ndims, dims, lbs,
 								   rcontext, release);
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index f23321a41f..54ebbbd135 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -1744,6 +1744,9 @@
 { oid => '8686', descr => 'reverse array',
   proname => 'array_reverse', prorettype => 'anyarray',
   proargtypes => 'anyarray', prosrc => 'array_reverse' },
+{ oid => '8810', descr => 'sort array',
+  proname => 'array_sort', prorettype => 'anyarray',
+  proargtypes => 'anyarray', prosrc => 'array_sort'},
 { oid => '3816', descr => 'array typanalyze',
   proname => 'array_typanalyze', provolatile => 's', prorettype => 'bool',
   proargtypes => 'internal', prosrc => 'array_typanalyze' },
diff --git a/src/include/utils/array.h b/src/include/utils/array.h
index 157cc0e4c6..f04cbe4eeb 100644
--- a/src/include/utils/array.h
+++ b/src/include/utils/array.h
@@ -192,6 +192,7 @@ typedef struct ArrayBuildState
 	int			alen;			/* allocated length of above arrays */
 	int			nelems;			/* number of valid entries in above arrays */
 	Oid			element_type;	/* data type of the Datums */
+	int			lb;				/* lower bound for one dimension array */
 	int16		typlen;			/* needed info about datatype */
 	bool		typbyval;
 	char		typalign;
diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out
index 0b61fb5bb7..9844d5b81f 100644
--- a/src/test/regress/expected/arrays.out
+++ b/src/test/regress/expected/arrays.out
@@ -2734,3 +2734,93 @@ SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
  {{7,8},{5,6},{3,4},{1,2}}
 (1 row)
 
+-- array_sort
+SELECT array_sort('{}'::int[]);
+ array_sort 
+------------
+ {}
+(1 row)
+
+SELECT array_sort('{1}'::int[]);
+ array_sort 
+------------
+ {1}
+(1 row)
+
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+  array_sort   
+---------------
+ {1,2,3,4,5,6}
+(1 row)
+
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+        array_sort         
+---------------------------
+ {1.1,2.2,3.3,4.4,5.5,6.6}
+(1 row)
+
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+      array_sort       
+-----------------------
+ {Abc,CCC,bar,bbc,foo}
+(1 row)
+
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+         array_sort         
+----------------------------
+ {Abc,CCC,bar,bbc,foo,NULL}
+(1 row)
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+ array_sort 
+------------
+ {{1}}
+(1 row)
+
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+     array_sort      
+---------------------
+ {{2,1},{2,4},{6,5}}
+(1 row)
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- ndim < 1
+ERROR:  could not identify ordering operator for type xid
+SELECT array_sort('{1,2,3}'::xid[]);
+ERROR:  could not identify ordering operator for type xid
+SELECT array_sort('{{1,2,3}}'::xid[]); -- dims[0] < 2
+ERROR:  could not identify ordering operator for type xid[]
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+ERROR:  could not identify ordering operator for type xid[]
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+              array_sort              
+--------------------------------------
+ [10:12][20:21]={{1,2},{3,4},{10,20}}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+  array_sort  
+--------------
+ [-1:0]={1,7}
+(1 row)
+
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+              array_sort              
+--------------------------------------
+ [-2:0][20:21]={{1,-4},{1,2},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+    array_sort    
+------------------
+ {{1,-4},{10,20}}
+(1 row)
+
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+ array_sort 
+------------
+ {{1},{10}}
+(1 row)
+
diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out
index 6fa32ae364..6d3bbcf1fb 100644
--- a/src/test/regress/expected/collate.icu.utf8.out
+++ b/src/test/regress/expected/collate.icu.utf8.out
@@ -1338,6 +1338,19 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
  t        | t
 (1 row)
 
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+ array_sort 
+------------
+ {a,B}
+(1 row)
+
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+ array_sort 
+------------
+ {B,a}
+(1 row)
+
 -- test language tags
 CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
 SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql
index 03cc8cfcd9..5a5549c8e0 100644
--- a/src/test/regress/sql/arrays.sql
+++ b/src/test/regress/sql/arrays.sql
@@ -834,3 +834,28 @@ SELECT array_reverse('{1}'::int[]);
 SELECT array_reverse('{1,2}'::int[]);
 SELECT array_reverse('{1,2,3,NULL,4,5,6}'::int[]);
 SELECT array_reverse('{{1,2},{3,4},{5,6},{7,8}}'::int[]);
+
+-- array_sort
+SELECT array_sort('{}'::int[]);
+SELECT array_sort('{1}'::int[]);
+SELECT array_sort('{1,3,5,2,4,6}'::int[]);
+SELECT array_sort('{1.1,3.3,5.5,2.2,4.4,6.6}'::numeric[]);
+SELECT array_sort('{foo,bar,CCC,Abc,bbc}'::text[] COLLATE "C");
+SELECT array_sort('{foo,bar,null,CCC,Abc,bbc}'::text[] COLLATE "C");
+
+-- multidimensional array tests
+SELECT array_sort('{{1}}'::int[]);
+SELECT array_sort(ARRAY[[2,4],[2,1],[6,5]]);
+
+-- no ordering operator tests
+SELECT array_sort('{1}'::xid[]); -- ndim < 1
+SELECT array_sort('{1,2,3}'::xid[]);
+SELECT array_sort('{{1,2,3}}'::xid[]); -- dims[0] < 2
+SELECT array_sort('{{1,2,3},{2,3,4}}'::xid[]);
+
+-- bounds preservation tests
+SELECT array_sort(a) FROM (VALUES ('[10:12][20:21]={{1,2},{10,20},{3,4}}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-1:0]={7,1}'::int[])) v(a);
+SELECT array_sort(a) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
+SELECT array_sort(a [-1:0][20:20]) FROM (VALUES ('[-2:0][20:21]={{1,2},{10,20},{1,-4}}'::int[])) v(a);
diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql
index 49fa9758b4..c1e0f6ee1d 100644
--- a/src/test/regress/sql/collate.icu.utf8.sql
+++ b/src/test/regress/sql/collate.icu.utf8.sql
@@ -536,6 +536,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
 SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
 SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
 
+-- tests with array_sort
+SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
+SELECT array_sort('{a,B}'::text[] COLLATE "C");
+
 -- test language tags
 CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
 SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 1847bbfa95..e740e449f5 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -150,6 +150,7 @@ ArrayIOData
 ArrayIterator
 ArrayMapState
 ArrayMetaState
+ArraySortCachedInfo
 ArraySubWorkspace
 ArrayToken
 ArrayType
-- 
2.39.5

