Here is a new function which could produce an array of numbers with a
controllable array length and duplicated elements in these arrays. I
used it when working with gin index, and I think it would be helpful for
others as well, so here it is.
select * from normal_rand_array(5, 10, 1.8::numeric, 3.5::numeric);
normal_rand_array
-----------------------------------------------
{3.3,2.3,2.7,3.2,2.0,2.7,3.4,2.7,2.3,2.9}
{3.3,1.8,2.9,3.4,2.0,1.8,2.0,3.5,2.8,2.5}
{2.1,1.9,2.3,1.9,2.5,2.7,2.4,2.9,1.8}
{2.3,2.5,2.4,2.7,2.7,2.3,2.9,3.3,3.3,1.9,3.5}
{2.8,3.4,2.7,1.8,3.3,2.3,2.2,3.5,2.6,2.5}
(5 rows)
select * from normal_rand_array(5, 10, 1.8::int4, 3.5::int4);
normal_rand_array
-------------------------------------
{3,2,2,3,4,2}
{2,4,2,3,3,3,3,2,2,3,3,2,3,2}
{2,4,3}
{4,2,3,4,2,4,2,2,3,4,3,3,2,4,4,2,3}
{4,3,3,4,3,3,4,2,4}
(5 rows)
the 5 means it needs to produce 5 rows in total and the 10 is the
average array length, and 1.8 is the minvalue for the random function
and 3.5 is the maxvalue.
--
Best Regards
Andy Fan
>From 397dcaf67f29057b80aebbb6116b49ac8344547c Mon Sep 17 00:00:00 2001
From: Andy Fan <[email protected]>
Date: Sat, 8 Jun 2024 13:21:08 +0800
Subject: [PATCH v20240608 1/1] Add function normal_rand_array function to
contrib/tablefunc.
It can produce an array of numbers with n controllable array length and
duplicated elements in these arrays.
---
contrib/tablefunc/Makefile | 2 +-
contrib/tablefunc/expected/tablefunc.out | 26 ++++
contrib/tablefunc/sql/tablefunc.sql | 10 ++
contrib/tablefunc/tablefunc--1.0--1.1.sql | 7 ++
contrib/tablefunc/tablefunc.c | 140 ++++++++++++++++++++++
contrib/tablefunc/tablefunc.control | 2 +-
doc/src/sgml/tablefunc.sgml | 10 ++
src/backend/utils/adt/arrayfuncs.c | 7 ++
8 files changed, 202 insertions(+), 2 deletions(-)
create mode 100644 contrib/tablefunc/tablefunc--1.0--1.1.sql
diff --git a/contrib/tablefunc/Makefile b/contrib/tablefunc/Makefile
index 191a3a1d38..f0c67308fd 100644
--- a/contrib/tablefunc/Makefile
+++ b/contrib/tablefunc/Makefile
@@ -3,7 +3,7 @@
MODULES = tablefunc
EXTENSION = tablefunc
-DATA = tablefunc--1.0.sql
+DATA = tablefunc--1.0.sql tablefunc--1.0--1.1.sql
PGFILEDESC = "tablefunc - various functions that return tables"
REGRESS = tablefunc
diff --git a/contrib/tablefunc/expected/tablefunc.out b/contrib/tablefunc/expected/tablefunc.out
index ddece79029..9f0cbbfbbe 100644
--- a/contrib/tablefunc/expected/tablefunc.out
+++ b/contrib/tablefunc/expected/tablefunc.out
@@ -12,6 +12,32 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
ERROR: number of rows cannot be negative
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::numeric, 8::numeric) as i;
+ count | avg
+-------+--------------------
+ 10 | 3.0000000000000000
+(1 row)
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int4, 8::int4) as i;
+ count | avg
+-------+--------------------
+ 10 | 3.0000000000000000
+(1 row)
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int8, 8::int8) as i;
+ count | avg
+-------+--------------------
+ 10 | 3.0000000000000000
+(1 row)
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::float8, 8::float8) as i;
+ count | avg
+-------+--------------------
+ 10 | 3.0000000000000000
+(1 row)
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 'abc'::text, 'def'::text) as i;
+ERROR: unsupported type 25 in normal_rand_array.
--
-- crosstab()
--
diff --git a/contrib/tablefunc/sql/tablefunc.sql b/contrib/tablefunc/sql/tablefunc.sql
index 0fb8e40de2..dec57cfc66 100644
--- a/contrib/tablefunc/sql/tablefunc.sql
+++ b/contrib/tablefunc/sql/tablefunc.sql
@@ -8,6 +8,16 @@ SELECT avg(normal_rand)::int, count(*) FROM normal_rand(100, 250, 0.2);
-- negative number of tuples
SELECT avg(normal_rand)::int, count(*) FROM normal_rand(-1, 250, 0.2);
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::numeric, 8::numeric) as i;
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int4, 8::int4) as i;
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::int8, 8::int8) as i;
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 1.23::float8, 8::float8) as i;
+
+SELECT count(*), avg(COALESCE(array_length(i, 1), 0)) FROM normal_rand_array(10, 3, 'abc'::text, 'def'::text) as i;
+
--
-- crosstab()
--
diff --git a/contrib/tablefunc/tablefunc--1.0--1.1.sql b/contrib/tablefunc/tablefunc--1.0--1.1.sql
new file mode 100644
index 0000000000..9d13e80ff0
--- /dev/null
+++ b/contrib/tablefunc/tablefunc--1.0--1.1.sql
@@ -0,0 +1,7 @@
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION tablefunc UPDATE TO '1.1'" to load this file. \quit
+
+CREATE FUNCTION normal_rand_array(int4, int4, anyelement, anyelement)
+RETURNS setof anyarray
+AS 'MODULE_PATHNAME','normal_rand_array'
+LANGUAGE C VOLATILE STRICT;
diff --git a/contrib/tablefunc/tablefunc.c b/contrib/tablefunc/tablefunc.c
index 7d1b5f5143..6d26aa843b 100644
--- a/contrib/tablefunc/tablefunc.c
+++ b/contrib/tablefunc/tablefunc.c
@@ -42,7 +42,9 @@
#include "lib/stringinfo.h"
#include "miscadmin.h"
#include "tablefunc.h"
+#include "utils/array.h"
#include "utils/builtins.h"
+#include "utils/fmgroids.h"
PG_MODULE_MAGIC;
@@ -91,6 +93,13 @@ typedef struct
bool use_carry; /* use second generated value */
} normal_rand_fctx;
+typedef struct
+{
+ int carry_len;
+ FunctionCallInfo fcinfo;
+ FunctionCallInfo random_len_fcinfo;
+} normal_rand_array_fctx;
+
#define xpfree(var_) \
do { \
if (var_ != NULL) \
@@ -269,6 +278,137 @@ normal_rand(PG_FUNCTION_ARGS)
SRF_RETURN_DONE(funcctx);
}
+/*
+ * normal_rand_array - return requested number of random arrays
+ * with a Gaussian (Normal) distribution.
+ *
+ * inputs are int numvals, int mean_len, anyelement minvalue,
+ * anyelement maxvalue returns setof anyelement[]
+ */
+PG_FUNCTION_INFO_V1(normal_rand_array);
+Datum
+normal_rand_array(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ uint64 call_cntr;
+ uint64 max_calls;
+ normal_rand_array_fctx *fctx;
+ MemoryContext oldcontext;
+ Datum minvalue, maxvalue;
+ int array_mean_len;
+ Oid target_oid, random_fn_oid;
+
+ array_mean_len = PG_GETARG_INT32(1);
+ minvalue = PG_GETARG_DATUM(2);
+ maxvalue = PG_GETARG_DATUM(3);
+
+ target_oid = get_fn_expr_argtype(fcinfo->flinfo, 2);
+
+ if (target_oid == INT4OID)
+ random_fn_oid = F_RANDOM_INT4_INT4;
+ else if (target_oid == INT8OID)
+ random_fn_oid = F_RANDOM_INT8_INT8;
+ else if (target_oid == FLOAT8OID)
+ random_fn_oid = F_RANDOM_;
+ else if (target_oid == NUMERICOID)
+ random_fn_oid = F_RANDOM_NUMERIC_NUMERIC;
+ else
+ elog(ERROR, "unsupported type %d in normal_rand_array.",
+ target_oid);
+
+ /* stuff done only on the first call of the function */
+ if (SRF_IS_FIRSTCALL())
+ {
+ int32 num_tuples;
+ FmgrInfo *random_len_flinfo, *random_val_flinfo;
+ FunctionCallInfo random_len_fcinfo, random_val_fcinfo;
+
+ /* create a function context for cross-call persistence */
+ funcctx = SRF_FIRSTCALL_INIT();
+
+ /*
+ * switch to memory context appropriate for multiple function calls
+ */
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+
+ /* total number of tuples to be returned */
+ num_tuples = PG_GETARG_INT32(0);
+ if (num_tuples < 0)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("number of rows cannot be negative")));
+ funcctx->max_calls = num_tuples;
+
+ /* allocate memory for user context */
+ fctx = (normal_rand_array_fctx *) palloc(sizeof(normal_rand_array_fctx));
+
+ random_len_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_len_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(F_RANDOM_INT4_INT4, random_len_flinfo);
+ InitFunctionCallInfoData(*random_len_fcinfo, random_len_flinfo, 2, InvalidOid, NULL, NULL);
+
+ random_len_fcinfo->args[0].isnull = false;
+ random_len_fcinfo->args[1].isnull = false;
+ random_len_fcinfo->args[0].value = 0;
+ random_len_fcinfo->args[1].value = array_mean_len;
+
+ random_val_fcinfo = (FunctionCallInfo) palloc0(SizeForFunctionCallInfo(2));
+ random_val_flinfo = (FmgrInfo *) palloc0(sizeof(FmgrInfo));
+ fmgr_info(random_fn_oid, random_val_flinfo);
+ InitFunctionCallInfoData(*random_val_fcinfo, random_val_flinfo, 2, InvalidOid, NULL, NULL);
+
+ random_val_fcinfo->args[0].isnull = false;
+ random_val_fcinfo->args[1].isnull = false;
+ random_val_fcinfo->args[0].value = minvalue;
+ random_val_fcinfo->args[1].value = maxvalue;
+
+ fctx->carry_len = -1;
+ fctx->fcinfo = random_val_fcinfo;
+ fctx->random_len_fcinfo = random_len_fcinfo;
+
+ funcctx->user_fctx = fctx;
+
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ /* stuff done on every call of the function */
+ funcctx = SRF_PERCALL_SETUP();
+
+ call_cntr = funcctx->call_cntr;
+ max_calls = funcctx->max_calls;
+ fctx = funcctx->user_fctx;
+
+ if (call_cntr < max_calls) /* do when there is more left to send */
+ {
+ int array_len;
+ int i;
+ Datum *results;
+
+ if (fctx->carry_len != -1)
+ {
+ array_len = fctx->carry_len;
+ fctx->carry_len = -1;
+ }
+ else
+ {
+ array_len = Int32GetDatum(FunctionCallInvoke(fctx->random_len_fcinfo));
+ fctx->carry_len = 2 * array_mean_len - array_len;
+ }
+
+ results = palloc(array_len * sizeof(Datum));
+
+ for(i = 0; i < array_len; i++)
+ results[i] = FunctionCallInvoke(fctx->fcinfo);
+
+
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(
+ construct_array_builtin(results, array_len, target_oid)));
+ }
+ else
+ /* do when there is no more left */
+ SRF_RETURN_DONE(funcctx);
+}
+
/*
* get_normal_pair()
* Assigns normally distributed (Gaussian) values to a pair of provided
diff --git a/contrib/tablefunc/tablefunc.control b/contrib/tablefunc/tablefunc.control
index 7b25d16170..9cc6222a4f 100644
--- a/contrib/tablefunc/tablefunc.control
+++ b/contrib/tablefunc/tablefunc.control
@@ -1,6 +1,6 @@
# tablefunc extension
comment = 'functions that manipulate whole tables, including crosstab'
-default_version = '1.0'
+default_version = '1.1'
module_pathname = '$libdir/tablefunc'
relocatable = true
trusted = true
diff --git a/doc/src/sgml/tablefunc.sgml b/doc/src/sgml/tablefunc.sgml
index e10fe7009d..014c36b81c 100644
--- a/doc/src/sgml/tablefunc.sgml
+++ b/doc/src/sgml/tablefunc.sgml
@@ -53,6 +53,16 @@
</para></entry>
</row>
+ <row>
+ <entry role="func_table_entry"><para role="func_signature">
+ <function>normal_rand_array</function> ( <parameter>numvals</parameter> <type>integer</type>, <parameter>meanarraylen</parameter> <type>int4</type>, <parameter>minval</parameter> <type>anyelement</type>, <parameter>maxval</parameter> <type>anyelement</type> )
+ <returnvalue>setof anyarray</returnvalue>
+ </para>
+ <para>
+ Produces a set of normally distributed random array of numbers.
+ </para></entry>
+ </row>
+
<row>
<entry role="func_table_entry"><para role="func_signature">
<function>crosstab</function> ( <parameter>sql</parameter> <type>text</type> )
diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c
index d6641b570d..7c95cc05bc 100644
--- a/src/backend/utils/adt/arrayfuncs.c
+++ b/src/backend/utils/adt/arrayfuncs.c
@@ -3397,6 +3397,12 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
elmalign = TYPALIGN_INT;
break;
+ case FLOAT8OID:
+ elmlen = sizeof(float8);
+ elmbyval = FLOAT8PASSBYVAL;
+ elmalign = TYPALIGN_DOUBLE;
+ break;
+
case INT2OID:
elmlen = sizeof(int16);
elmbyval = true;
@@ -3429,6 +3435,7 @@ construct_array_builtin(Datum *elems, int nelems, Oid elmtype)
break;
case TEXTOID:
+ case NUMERICOID:
elmlen = -1;
elmbyval = false;
elmalign = TYPALIGN_INT;
--
2.45.2