My main question regarding this patch is whether the behavior with MD arrays is useful at all. Suppose I give it this:
alvherre=# select array_offset('{{{1,2},{3,4},{5,6}},{{2,3},{4,5},{6,7}}}', 3); array_offset -------------- 3 (1 fila) What can I do with the "3" value it returned? Certainly not use it as an offset to get a slice of the original array. The only thing that seems sensible to me here is to reject the whole thing with an error, that is, only accept 1-D arrays here. We can later extend the function by allowing higher dimensionality as long as the second argument is an array one dimension less than the first argument. But if we allow the case on its appearance, it's going to be difficult to change the behavior later. Has a case been made for the current behavior? -- Álvaro Herrera http://www.2ndQuadrant.com/ PostgreSQL Development, 24x7 Support, Remote DBA, Training & Services
diff --git a/doc/src/sgml/array.sgml b/doc/src/sgml/array.sgml index 9ea1068..092013b 100644 --- a/doc/src/sgml/array.sgml +++ b/doc/src/sgml/array.sgml @@ -600,6 +600,25 @@ SELECT * FROM sal_emp WHERE pay_by_quarter && ARRAY[10000]; index, as described in <xref linkend="indexes-types">. </para> + <para> + You can also search for specific values in an array using the <function>array_offset</> + and <function>array_offsets</> functions. The former returns the position of + the first occurrence of a value in an array; the latter returns an array with the + positions of all occurrences of the value in the array. For example: + +<programlisting> +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'mon'); + array_offset +-------------- + 2 + +SELECT array_offsets(ARRAY[1, 4, 3, 1, 3, 4, 2, 1], 1); + array_offsets +--------------- + {1,4,8} +</programlisting> + </para> + <tip> <para> Arrays are not sets; searching for specific array elements diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index c198bea..d8c542e 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -11480,6 +11480,12 @@ SELECT NULLIF(value, '(none)') ... <primary>array_lower</primary> </indexterm> <indexterm> + <primary>array_offset</primary> + </indexterm> + <indexterm> + <primary>array_offsets</primary> + </indexterm> + <indexterm> <primary>array_prepend</primary> </indexterm> <indexterm> @@ -11599,6 +11605,39 @@ SELECT NULLIF(value, '(none)') ... <row> <entry> <literal> + <function>array_offset</function>(<type>anyarray</type>, <type>anyelement</type> <optional>, <type>int</type></optional>) + </literal> + </entry> + <entry><type>int</type></entry> + <entry>returns the offset of the first occurrence of the second + argument in the array. Each array element is compared using the + <literal>IS NOT DISTINCT FROM</> operator. The optional third + argument specifies an initial offset to begin the search at. + Returns <literal>NULL</> if the value is not found. + Multi-dimensional arrays are squashed to one dimension before + searching.</entry> + <entry><literal>array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'mon')</literal></entry> + <entry><literal>2</literal></entry> + </row> + <row> + <entry> + <literal> + <function>array_offsets</function>(<type>anyarray</type>, <type>anyelement</type>) + </literal> + </entry> + <entry><type>int[]</type></entry> + <entry>returns an array of offsets of all occurrences of the second + argument in the array given as first argument. Each array element is + compared using the the <literal>IS NOT DISTINCT FROM</> operator. + Returns an empty array when there are no occurences of the value in + the array. Multi-dimensional arrays are squashed to one dimension before + searching.</entry> + <entry><literal>array_offsets(ARRAY['A','A','B','A'], 'A')</literal></entry> + <entry><literal>{1,2,4}</literal></entry> + </row> + <row> + <entry> + <literal> <function>array_prepend</function>(<type>anyelement</type>, <type>anyarray</type>) </literal> </entry> diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c index 6679333..dcc7c62 100644 --- a/src/backend/utils/adt/array_userfuncs.c +++ b/src/backend/utils/adt/array_userfuncs.c @@ -12,9 +12,14 @@ */ #include "postgres.h" +#include "catalog/pg_type.h" #include "utils/array.h" #include "utils/builtins.h" #include "utils/lsyscache.h" +#include "utils/typcache.h" + + +static Datum array_offset_common(FunctionCallInfo fcinfo); /* @@ -652,3 +657,276 @@ array_agg_array_finalfn(PG_FUNCTION_ARGS) PG_RETURN_DATUM(result); } + +/*----------------------------------------------------------------------------- + * array_offset, array_offset_start : + * return the offset of a value in an array. + * + * IS NOT DISTINCT FROM semantics are used for comparisons. Return NULL when + * the value is not found. + *----------------------------------------------------------------------------- + */ +Datum +array_offset(PG_FUNCTION_ARGS) +{ + return array_offset_common(fcinfo); +} + +Datum +array_offset_start(PG_FUNCTION_ARGS) +{ + return array_offset_common(fcinfo); +} + +/* + * array_offset_common + * Common code for array_offset and array_offset_start + * + * These are separate wrappers for the sake of opr_sanity regression test. + * They are not strict so we have to test for null inputs explicitly. + */ +static Datum +array_offset_common(FunctionCallInfo fcinfo) +{ + ArrayType *array; + Oid collation = PG_GET_COLLATION(); + Oid element_type; + Datum searched_element, + value; + bool isnull; + int offset = 0, + offset_min; + bool found = false; + TypeCacheEntry *typentry; + ArrayMetaState *my_extra; + bool null_search; + ArrayIterator array_iterator; + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + array = PG_GETARG_ARRAYTYPE_P(0); + element_type = ARR_ELEMTYPE(array); + + if (PG_ARGISNULL(1)) + { + /* fast return when the array doesn't have have nulls */ + if (!array_contains_nulls(array)) + PG_RETURN_NULL(); + searched_element = (Datum) 0; + null_search = true; + } + else + { + searched_element = PG_GETARG_DATUM(1); + null_search = false; + } + + /* figure out where to start */ + if (PG_NARGS() == 3) + { + if (PG_ARGISNULL(2)) + ereport(ERROR, + (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED), + errmsg("initial offset should not be NULL"))); + + offset_min = PG_GETARG_INT32(2); + } + else + offset_min = 1; + + /* + * We arrange to look up type info for array_create_iterator only once per + * series of calls, assuming the element type doesn't change underneath us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = ~element_type; + } + + if (my_extra->element_type != element_type) + { + get_typlenbyvalalign(element_type, + &my_extra->typlen, + &my_extra->typbyval, + &my_extra->typalign); + + typentry = lookup_type_cache(element_type, TYPECACHE_EQ_OPR_FINFO); + + if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(element_type)))); + + my_extra->element_type = element_type; + fmgr_info(typentry->eq_opr_finfo.fn_oid, &my_extra->proc); + } + + /* Examine each array element until we find a match. */ + array_iterator = array_create_iterator(array, 0, my_extra); + while (array_iterate(array_iterator, &value, &isnull)) + { + offset += 1; + + /* skip initial elements if caller requested so */ + if (offset < offset_min) + continue; + + /* + * Can't look at the array element's value if it's null; but if we + * search for null, we have a hit and are done. + */ + if (isnull || null_search) + { + if (isnull && null_search) + { + found = true; + break; + } + else + continue; + } + + /* not nulls, so run the operator */ + if (DatumGetBool(FunctionCall2Coll(&my_extra->proc, collation, + searched_element, value))) + { + found = true; + break; + } + } + + array_free_iterator(array_iterator); + + /* Avoid leaking memory when handed toasted input */ + PG_FREE_IF_COPY(array, 0); + + if (!found) + PG_RETURN_NULL(); + + PG_RETURN_INT32(offset); +} + +/*----------------------------------------------------------------------------- + * array_offsets : + * return an array of offsets of a value in an array. + * + * IS NOT DISTINCT FROM semantics are used for comparisons. Returns NULL when + * the input array is NULL. When the value is not found in the array, returns + * an empty array. + * + * This is not strict so we have to test for null inputs explicitly. + *----------------------------------------------------------------------------- + */ +Datum +array_offsets(PG_FUNCTION_ARGS) +{ + ArrayType *array; + Oid collation = PG_GET_COLLATION(); + Oid element_type; + Datum searched_element, + value; + bool isnull; + int offset = 0; + TypeCacheEntry *typentry; + ArrayMetaState *my_extra; + bool null_search; + ArrayIterator array_iterator; + ArrayBuildState *astate = NULL; + + if (PG_ARGISNULL(0)) + PG_RETURN_NULL(); + + array = PG_GETARG_ARRAYTYPE_P(0); + element_type = ARR_ELEMTYPE(array); + + astate = initArrayResult(INT4OID, CurrentMemoryContext, false); + + if (PG_ARGISNULL(1)) + { + /* fast return when the array doesn't have have nulls */ + if (!array_contains_nulls(array)) + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); + searched_element = (Datum) 0; + null_search = true; + } + else + { + searched_element = PG_GETARG_DATUM(1); + null_search = false; + } + + /* + * We arrange to look up type info for array_create_iterator only once per + * series of calls, assuming the element type doesn't change underneath us. + */ + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + if (my_extra == NULL) + { + fcinfo->flinfo->fn_extra = MemoryContextAlloc(fcinfo->flinfo->fn_mcxt, + sizeof(ArrayMetaState)); + my_extra = (ArrayMetaState *) fcinfo->flinfo->fn_extra; + my_extra->element_type = ~element_type; + } + + if (my_extra->element_type != element_type) + { + get_typlenbyvalalign(element_type, + &my_extra->typlen, + &my_extra->typbyval, + &my_extra->typalign); + + typentry = lookup_type_cache(element_type, TYPECACHE_EQ_OPR_FINFO); + + if (!OidIsValid(typentry->eq_opr_finfo.fn_oid)) + ereport(ERROR, + (errcode(ERRCODE_UNDEFINED_FUNCTION), + errmsg("could not identify an equality operator for type %s", + format_type_be(element_type)))); + + my_extra->element_type = element_type; + fmgr_info(typentry->eq_opr_finfo.fn_oid, &my_extra->proc); + } + + /* + * Accumulate each array offset iff the element matches the given element. + */ + array_iterator = array_create_iterator(array, 0, my_extra); + while (array_iterate(array_iterator, &value, &isnull)) + { + offset += 1; + + /* + * Can't look at the array element's value if it's null; but if we + * search for null, we have a hit. + */ + if (isnull || null_search) + { + if (isnull && null_search) + astate = + accumArrayResult(astate, Int32GetDatum(offset), false, + INT4OID, CurrentMemoryContext); + + continue; + } + + /* not nulls, so run the operator */ + if (DatumGetBool(FunctionCall2Coll(&my_extra->proc, collation, + searched_element, value))) + astate = + accumArrayResult(astate, Int32GetDatum(offset), false, + INT4OID, CurrentMemoryContext); + } + + array_free_iterator(array_iterator); + + /* Avoid leaking memory when handed toasted input */ + PG_FREE_IF_COPY(array, 0); + + PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext)); +} diff --git a/src/backend/utils/adt/arrayfuncs.c b/src/backend/utils/adt/arrayfuncs.c index 54979fa..9117a55 100644 --- a/src/backend/utils/adt/arrayfuncs.c +++ b/src/backend/utils/adt/arrayfuncs.c @@ -3989,7 +3989,7 @@ arraycontained(PG_FUNCTION_ARGS) * The passed-in array must remain valid for the lifetime of the iterator. */ ArrayIterator -array_create_iterator(ArrayType *arr, int slice_ndim) +array_create_iterator(ArrayType *arr, int slice_ndim, ArrayMetaState *mstate) { ArrayIterator iterator = palloc0(sizeof(ArrayIteratorData)); @@ -4006,10 +4006,20 @@ array_create_iterator(ArrayType *arr, int slice_ndim) iterator->arr = arr; iterator->nullbitmap = ARR_NULLBITMAP(arr); iterator->nitems = ArrayGetNItems(ARR_NDIM(arr), ARR_DIMS(arr)); - get_typlenbyvalalign(ARR_ELEMTYPE(arr), - &iterator->typlen, - &iterator->typbyval, - &iterator->typalign); + + if (mstate != NULL) + { + Assert(mstate->element_type == ARR_ELEMTYPE(arr)); + + iterator->typlen = mstate->typlen; + iterator->typbyval = mstate->typbyval; + iterator->typalign = mstate->typalign; + } + else + get_typlenbyvalalign(ARR_ELEMTYPE(arr), + &iterator->typlen, + &iterator->typbyval, + &iterator->typalign); /* * Remember the slicing parameters. diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index b8a3660..6a757f3 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -895,6 +895,12 @@ DATA(insert OID = 515 ( array_larger PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 DESCR("larger of two"); DATA(insert OID = 516 ( array_smaller PGNSP PGUID 12 1 0 0 0 f f f f t f i 2 0 2277 "2277 2277" _null_ _null_ _null_ _null_ array_smaller _null_ _null_ _null_ )); DESCR("smaller of two"); +DATA(insert OID = 3277 ( array_offset PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 23 "2277 2283" _null_ _null_ _null_ _null_ array_offset _null_ _null_ _null_ )); +DESCR("returns a offset of value in array"); +DATA(insert OID = 3278 ( array_offset PGNSP PGUID 12 1 0 0 0 f f f f f f i 3 0 23 "2277 2283 23" _null_ _null_ _null_ _null_ array_offset_start _null_ _null_ _null_ )); +DESCR("returns a offset of value in array with start index"); +DATA(insert OID = 3279 ( array_offsets PGNSP PGUID 12 1 0 0 0 f f f f f f i 2 0 1007 "2277 2283" _null_ _null_ _null_ _null_ array_offsets _null_ _null_ _null_ )); +DESCR("returns a array of offsets of some value in array"); DATA(insert OID = 1191 ( generate_subscripts PGNSP PGUID 12 1 1000 0 0 f f f f t t i 3 0 23 "2277 23 16" _null_ _null_ _null_ _null_ generate_subscripts _null_ _null_ _null_ )); DESCR("array subscripts generator"); DATA(insert OID = 1192 ( generate_subscripts PGNSP PGUID 12 1 1000 0 0 f f f f t t i 2 0 23 "2277 23" _null_ _null_ _null_ _null_ generate_subscripts_nodir _null_ _null_ _null_ )); diff --git a/src/include/utils/array.h b/src/include/utils/array.h index 649688c..b78b42a 100644 --- a/src/include/utils/array.h +++ b/src/include/utils/array.h @@ -323,7 +323,7 @@ extern ArrayBuildStateAny *accumArrayResultAny(ArrayBuildStateAny *astate, extern Datum makeArrayResultAny(ArrayBuildStateAny *astate, MemoryContext rcontext, bool release); -extern ArrayIterator array_create_iterator(ArrayType *arr, int slice_ndim); +extern ArrayIterator array_create_iterator(ArrayType *arr, int slice_ndim, ArrayMetaState *mstate); extern bool array_iterate(ArrayIterator iterator, Datum *value, bool *isnull); extern void array_free_iterator(ArrayIterator iterator); @@ -358,6 +358,10 @@ extern Datum array_agg_finalfn(PG_FUNCTION_ARGS); extern Datum array_agg_array_transfn(PG_FUNCTION_ARGS); extern Datum array_agg_array_finalfn(PG_FUNCTION_ARGS); +extern Datum array_offset(PG_FUNCTION_ARGS); +extern Datum array_offset_start(PG_FUNCTION_ARGS); +extern Datum array_offsets(PG_FUNCTION_ARGS); + /* * prototypes for functions defined in array_typanalyze.c */ diff --git a/src/pl/plpgsql/src/pl_exec.c b/src/pl/plpgsql/src/pl_exec.c index e332fa0..6a93540 100644 --- a/src/pl/plpgsql/src/pl_exec.c +++ b/src/pl/plpgsql/src/pl_exec.c @@ -2315,7 +2315,7 @@ exec_stmt_foreach_a(PLpgSQL_execstate *estate, PLpgSQL_stmt_foreach_a *stmt) errmsg("FOREACH loop variable must not be of an array type"))); /* Create an iterator to step through the array */ - array_iterator = array_create_iterator(arr, stmt->slice); + array_iterator = array_create_iterator(arr, stmt->slice, NULL); /* Identify iterator result type */ if (stmt->slice > 0) diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index d33c9b9..2c13802 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -366,6 +366,113 @@ SELECT array_cat(ARRAY[[3,4],[5,6]], ARRAY[1,2]) AS "{{3,4},{5,6},{1,2}}"; {{3,4},{5,6},{1,2}} (1 row) +SELECT array_offset(ARRAY[1,2,3,4,5], 4); + array_offset +-------------- + 4 +(1 row) + +SELECT array_offset(ARRAY[5,3,4,2,1], 4); + array_offset +-------------- + 3 +(1 row) + +SELECT array_offset(ARRAY[[1,2],[3,4]], 3); + array_offset +-------------- + 3 +(1 row) + +SELECT array_offset(ARRAY[[1,2],[3,4]], 4); + array_offset +-------------- + 4 +(1 row) + +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'mon'); + array_offset +-------------- + 2 +(1 row) + +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'sat'); + array_offset +-------------- + 7 +(1 row) + +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], NULL); + array_offset +-------------- + +(1 row) + +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu',NULL,'fri','sat'], NULL); + array_offset +-------------- + 6 +(1 row) + +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu',NULL,'fri','sat'], 'sat'); + array_offset +-------------- + 8 +(1 row) + +SELECT array_offsets(NULL, 10); + array_offsets +--------------- + +(1 row) + +SELECT array_offsets(NULL, NULL::int); + array_offsets +--------------- + +(1 row) + +SELECT array_offsets(ARRAY[1,2,3,4,5,6,1,2,3,4,5,6], 4); + array_offsets +--------------- + {4,10} +(1 row) + +SELECT array_offsets(ARRAY[1,2,3,4,5,6,1,2,3,4,5,6], NULL); + array_offsets +--------------- + {} +(1 row) + +SELECT array_offsets(ARRAY[1,2,3,NULL,5,6,1,2,3,NULL,5,6], NULL); + array_offsets +--------------- + {4,10} +(1 row) + +SELECT array_length(array_offsets(ARRAY(SELECT 'AAAAAAAAAAAAAAAAAAAAAAAAA'::text || i % 10 FROM generate_series(1,100) g(i)), + 'AAAAAAAAAAAAAAAAAAAAAAAAA5'),1); + array_length +-------------- + 10 +(1 row) + +do $$ +declare + o int; + a int[] := ARRAY[1,2,3,2,3,1,2]; +begin + o := array_offset(a, 2); + while o is not null + loop + raise notice '%', o; + o := array_offset(a, 2, o + 1); + end loop; +end +$$ language plpgsql; +NOTICE: 2 +NOTICE: 4 +NOTICE: 7 -- operators SELECT a FROM arrtest WHERE b = ARRAY[[[113,142],[1,147]]]; a diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index 733c19b..c0773a0 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -185,6 +185,38 @@ SELECT array_cat(ARRAY[1,2], ARRAY[3,4]) AS "{1,2,3,4}"; SELECT array_cat(ARRAY[1,2], ARRAY[[3,4],[5,6]]) AS "{{1,2},{3,4},{5,6}}"; SELECT array_cat(ARRAY[[3,4],[5,6]], ARRAY[1,2]) AS "{{3,4},{5,6},{1,2}}"; +SELECT array_offset(ARRAY[1,2,3,4,5], 4); +SELECT array_offset(ARRAY[5,3,4,2,1], 4); +SELECT array_offset(ARRAY[[1,2],[3,4]], 3); +SELECT array_offset(ARRAY[[1,2],[3,4]], 4); +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'mon'); +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], 'sat'); +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu','fri','sat'], NULL); +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu',NULL,'fri','sat'], NULL); +SELECT array_offset(ARRAY['sun','mon','tue','wed','thu',NULL,'fri','sat'], 'sat'); + +SELECT array_offsets(NULL, 10); +SELECT array_offsets(NULL, NULL::int); +SELECT array_offsets(ARRAY[1,2,3,4,5,6,1,2,3,4,5,6], 4); +SELECT array_offsets(ARRAY[1,2,3,4,5,6,1,2,3,4,5,6], NULL); +SELECT array_offsets(ARRAY[1,2,3,NULL,5,6,1,2,3,NULL,5,6], NULL); +SELECT array_length(array_offsets(ARRAY(SELECT 'AAAAAAAAAAAAAAAAAAAAAAAAA'::text || i % 10 FROM generate_series(1,100) g(i)), + 'AAAAAAAAAAAAAAAAAAAAAAAAA5'),1); + +do $$ +declare + o int; + a int[] := ARRAY[1,2,3,2,3,1,2]; +begin + o := array_offset(a, 2); + while o is not null + loop + raise notice '%', o; + o := array_offset(a, 2, o + 1); + end loop; +end +$$ language plpgsql; + -- operators SELECT a FROM arrtest WHERE b = ARRAY[[[113,142],[1,147]]]; SELECT NOT ARRAY[1.1,1.2,1.3] = ARRAY[1.1,1.2,1.3] AS "FALSE";
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers