On 6 May 2010 04:42, Pavel Stehule <pavel.steh...@gmail.com> wrote: > attached patch contains to_string and to_array functions. These > functions are equivalent of array_to_string and string_to_array > function with maybe more correct NULL handling.
Hi Pavel, I am reviewing your patch for the commitfest. Overall the patch looks good, although there were some bogus whitespace changes in the patch and some messy punctuation/grammar in some of the code comments. I also thought it was worth mentioning in the docs the default value for null_string is ''. I made an attempt to clean those items up and have attached a v2 of the patch. Regarding the behaviour of the third argument (null_string), I was a little surprised by the results when I passed in a NULL. postgres=# select to_string(array['a', 'b', 'c', 'd'], '/', NULL); to_string ----------- Now, if the array had some NULL elements in it, I could understand why the resulting string would be NULL (because str || NULL is NULL), but in this case there are no NULLs. Why is the result NULL? Surely it should be 'a/b/c/d' regardless of how the third parameter is set? In the reverse case: postgres=# select to_array('a/b/c/d', '/', NULL); to_array ---------- (1 row) Again I find this a bit weird. I have left the null_string NULL, which means it is unknown. It can't possibly match any value in the string, so effectively passing in a NULL null_string should mean that the user doesn't want any string items whatsoever to translate into NULLs in the resulting array. I would expect this call to return {a,b,c,d}. Cheers, BJ
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 70dab53..2256d9c 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -9544,6 +9544,12 @@ SELECT NULLIF(value, '(none)') ... <primary>string_to_array</primary> </indexterm> <indexterm> + <primary>to_array</primary> + </indexterm> + <indexterm> + <primary>to_string</primary> + </indexterm> + <indexterm> <primary>unnest</primary> </indexterm> @@ -9686,6 +9692,28 @@ SELECT NULLIF(value, '(none)') ... <row> <entry> <literal> + <function>to_array</function>(<type>text</type>, <type>text</type> <optional>, <type>text</type></optional>) + </literal> + </entry> + <entry><type>text[]</type></entry> + <entry>splits string into array elements using supplied delimiter and null string (defaults to the empty string)</entry> + <entry><literal>to_array('1,2,3,,5', ',')</literal></entry> + <entry><literal>{1,2,3,4,NULL,5}</literal></entry> + </row> + <row> + <entry> + <literal> + <function>to_string</function>(<type>anyarray</type>, <type>text</type> <optional>, <type>text</type></optional>) + </literal> + </entry> + <entry><type>text</type></entry> + <entry>concatenates array elements using supplied delimiter and null string (defaults to the empty string)</entry> + <entry><literal>to_string(ARRAY[1, 2, 3, NULL, 5], ',', '*')</literal></entry> + <entry><literal>1,2,3,*,5</literal></entry> + </row> + <row> + <entry> + <literal> <function>unnest</function>(<type>anyarray</type>) </literal> </entry> diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql index 8852326..24a79e8 100644 --- a/src/backend/catalog/system_views.sql +++ b/src/backend/catalog/system_views.sql @@ -487,3 +487,11 @@ COMMENT ON FUNCTION ts_debug(text) IS CREATE OR REPLACE FUNCTION pg_start_backup(label text, fast boolean DEFAULT false) RETURNS text STRICT VOLATILE LANGUAGE internal AS 'pg_start_backup'; + +CREATE OR REPLACE FUNCTION + to_string(v anyarray, fldsep text, null_string text DEFAULT '') + RETURNS text STRICT IMMUTABLE LANGUAGE internal AS 'to_string'; + +CREATE OR REPLACE FUNCTION + to_array(inputstr text, fldsep text, null_string text DEFAULT '') + RETURNS text[] STRICT IMMUTABLE LANGUAGE internal AS 'to_array'; diff --git a/src/backend/utils/adt/array_userfuncs.c b/src/backend/utils/adt/array_userfuncs.c index 00e9c54..7f88b82 100644 --- a/src/backend/utils/adt/array_userfuncs.c +++ b/src/backend/utils/adt/array_userfuncs.c @@ -407,9 +407,11 @@ ArrayType * create_singleton_array(FunctionCallInfo fcinfo, Oid element_type, Datum element, + bool isNull, int ndims) { Datum dvalues[1]; + bool nulls[1]; int16 typlen; bool typbyval; char typalign; @@ -429,6 +431,7 @@ create_singleton_array(FunctionCallInfo fcinfo, ndims, MAXDIM))); dvalues[0] = element; + nulls[0] = isNull; for (i = 0; i < ndims; i++) { @@ -462,7 +465,7 @@ create_singleton_array(FunctionCallInfo fcinfo, typbyval = my_extra->typbyval; typalign = my_extra->typalign; - return construct_md_array(dvalues, NULL, ndims, dims, lbs, element_type, + return construct_md_array(dvalues, nulls, ndims, dims, lbs, element_type, typlen, typbyval, typalign); } diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index af28c15..676fdc1 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -2965,16 +2965,22 @@ split_text(PG_FUNCTION_ARGS) } /* - * text_to_array - * parse input string - * return text array of elements - * based on provided field separator + * Return true when two text params are equal. */ -Datum -text_to_array(PG_FUNCTION_ARGS) +static +bool text_isequal(text *txt1, text *txt2) +{ + return DatumGetBool(DirectFunctionCall2(texteq, + PointerGetDatum(txt1), + PointerGetDatum(txt2))); +} + +/* + * Common code for text_to_array and to_array functions. + */ +static Datum +_text_to_array(FunctionCallInfo fcinfo, text *inputstring, text *fldsep, text *null_string, bool *isNull) { - text *inputstring = PG_GETARG_TEXT_PP(0); - text *fldsep = PG_GETARG_TEXT_PP(1); int inputstring_len; int fldsep_len; TextPositionState state; @@ -2985,9 +2991,10 @@ text_to_array(PG_FUNCTION_ARGS) char *start_ptr; text *result_text; ArrayBuildState *astate = NULL; + bool is_null_string; text_position_setup(inputstring, fldsep, &state); - + /* * Note: we check the converted string length, not the original, because * they could be different if the input contained invalid encoding. @@ -2999,7 +3006,8 @@ text_to_array(PG_FUNCTION_ARGS) if (inputstring_len < 1) { text_position_cleanup(&state); - PG_RETURN_NULL(); + *isNull = true; + return (Datum) 0; } /* @@ -3009,8 +3017,10 @@ text_to_array(PG_FUNCTION_ARGS) if (fldsep_len < 1) { text_position_cleanup(&state); - PG_RETURN_ARRAYTYPE_P(create_singleton_array(fcinfo, TEXTOID, - PointerGetDatum(inputstring), 1)); + is_null_string = null_string != NULL ? text_isequal(null_string, inputstring) : false; + *isNull = false; + return (Datum) create_singleton_array(fcinfo, TEXTOID, + PointerGetDatum(inputstring), is_null_string, 1); } start_posn = 1; @@ -3036,11 +3046,12 @@ text_to_array(PG_FUNCTION_ARGS) /* must build a temp text datum to pass to accumArrayResult */ result_text = cstring_to_text_with_len(start_ptr, chunk_len); - + is_null_string = null_string != NULL ? text_isequal(null_string, result_text) : false; + /* stash away this field */ astate = accumArrayResult(astate, PointerGetDatum(result_text), - false, + is_null_string, TEXTOID, CurrentMemoryContext); @@ -3057,20 +3068,65 @@ text_to_array(PG_FUNCTION_ARGS) text_position_cleanup(&state); - PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, - CurrentMemoryContext)); + *isNull = false; + return makeArrayResult(astate, CurrentMemoryContext); } /* - * array_to_text - * concatenate Cstring representation of input array elements - * using provided field separator + * text_to_array + * parse input string + * return text array of elements + * based on provided field separator */ Datum -array_to_text(PG_FUNCTION_ARGS) +text_to_array(PG_FUNCTION_ARGS) +{ + text *inputstring = PG_GETARG_TEXT_PP(0); + text *fldsep = PG_GETARG_TEXT_PP(1); + bool isNull; + Datum result; + + result = _text_to_array(fcinfo, inputstring, fldsep, NULL, &isNull); + + if (isNull) + PG_RETURN_NULL(); + else + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * to_array + * + * Parse a string delimited by fldsep, and return an array. Any instances of + * null_string in the inputstring are replaced by NULL elements in the result + * array. + * + * The default value for null_string is the empty string. + */ +Datum +to_array(PG_FUNCTION_ARGS) +{ + text *inputstring = PG_GETARG_TEXT_PP(0); + text *fldsep = PG_GETARG_TEXT_PP(1); + text *null_string = PG_GETARG_TEXT_PP(2); + bool isNull; + Datum result; + + result = _text_to_array(fcinfo, inputstring, fldsep, null_string, &isNull); + + if (isNull) + PG_RETURN_NULL(); + else + PG_RETURN_ARRAYTYPE_P(result); +} + +/* + * Common code for array_to_text and to_string functions. null_string can be + * NULL (only for array_to_text function). + */ +static text * +_array_to_text(FunctionCallInfo fcinfo, ArrayType *v, char *fldsep, char *null_string) { - ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); - char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1)); int nitems, *dims, ndims; @@ -3092,7 +3148,7 @@ array_to_text(PG_FUNCTION_ARGS) /* if there are no elements, return an empty string */ if (nitems == 0) - PG_RETURN_TEXT_P(cstring_to_text("")); + return cstring_to_text(""); element_type = ARR_ELEMTYPE(v); initStringInfo(&buf); @@ -3140,7 +3196,15 @@ array_to_text(PG_FUNCTION_ARGS) /* Get source element, checking for NULL */ if (bitmap && (*bitmap & bitmask) == 0) { - /* we ignore nulls */ + /* we ignore nulls, when null_string isn't defined (is NULL) */ + if (null_string != NULL) + { + if (printed) + appendStringInfo(&buf, "%s%s", fldsep, null_string); + else + appendStringInfoString(&buf, null_string); + printed = true; + } } else { @@ -3170,9 +3234,41 @@ array_to_text(PG_FUNCTION_ARGS) } } - PG_RETURN_TEXT_P(cstring_to_text_with_len(buf.data, buf.len)); + return cstring_to_text_with_len(buf.data, buf.len); } +/* + * array_to_text + * concatenate Cstring representation of input array elements + * using provided field separator + */ +Datum +array_to_text(PG_FUNCTION_ARGS) +{ + ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1)); + + PG_RETURN_TEXT_P(_array_to_text(fcinfo, v, fldsep, NULL)); +} + +/* + * to_string + * + * Concatenate cstring representation of input array elements using the + * provided field separator. NULL elements in the input array are rendered + * using the provided null_string, which defaults to the empty string. + */ +Datum +to_string(PG_FUNCTION_ARGS) +{ + ArrayType *v = PG_GETARG_ARRAYTYPE_P(0); + char *fldsep = text_to_cstring(PG_GETARG_TEXT_PP(1)); + char *null_string = text_to_cstring(PG_GETARG_TEXT_PP(2)); + + PG_RETURN_TEXT_P(_array_to_text(fcinfo, v, fldsep, null_string)); +} + + #define HEXBASE 16 /* * Convert a int32 to a string containing a base 16 (hex) representation of diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h index 6036493..999b331 100644 --- a/src/include/catalog/pg_proc.h +++ b/src/include/catalog/pg_proc.h @@ -1022,6 +1022,10 @@ DATA(insert OID = 394 ( string_to_array PGNSP PGUID 12 1 0 0 f f f t f i 2 0 DESCR("split delimited text into text[]"); DATA(insert OID = 395 ( array_to_string PGNSP PGUID 12 1 0 0 f f f t f i 2 0 25 "2277 25" _null_ _null_ _null_ _null_ array_to_text _null_ _null_ _null_ )); DESCR("concatenate array elements, using delimiter, into text"); +DATA(insert OID = 950 ( to_array PGNSP PGUID 12 1 0 0 f f f t f i 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ to_array _null_ _null_ _null_ )); +DESCR("split delimited text into text[], possible to set null string"); +DATA(insert OID = 951 ( to_string PGNSP PGUID 12 1 0 0 f f f t f i 3 0 25 "2277 25 25" _null_ _null_ _null_ _null_ to_string _null_ _null_ _null_ )); +DESCR("concatenate array elements, using delimiter, into text, possible to set null string"); DATA(insert OID = 515 ( array_larger PGNSP PGUID 12 1 0 0 f f f t f i 2 0 2277 "2277 2277" _null_ _null_ _null_ _null_ array_larger _null_ _null_ _null_ )); DESCR("larger of two"); DATA(insert OID = 516 ( array_smaller PGNSP PGUID 12 1 0 0 f f f t f i 2 0 2277 "2277 2277" _null_ _null_ _null_ _null_ array_smaller _null_ _null_ _null_ )); diff --git a/src/include/utils/array.h b/src/include/utils/array.h index 1376f8c..db9a615 100644 --- a/src/include/utils/array.h +++ b/src/include/utils/array.h @@ -274,6 +274,7 @@ extern Datum array_cat(PG_FUNCTION_ARGS); extern ArrayType *create_singleton_array(FunctionCallInfo fcinfo, Oid element_type, Datum element, + bool isNull, int ndims); extern Datum array_agg_transfn(PG_FUNCTION_ARGS); diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h index 45123fd..6c539e1 100644 --- a/src/include/utils/builtins.h +++ b/src/include/utils/builtins.h @@ -715,6 +715,8 @@ extern text *replace_text_regexp(text *src_text, void *regexp, extern Datum split_text(PG_FUNCTION_ARGS); extern Datum text_to_array(PG_FUNCTION_ARGS); extern Datum array_to_text(PG_FUNCTION_ARGS); +extern Datum to_array(PG_FUNCTION_ARGS); +extern Datum to_string(PG_FUNCTION_ARGS); extern Datum to_hex32(PG_FUNCTION_ARGS); extern Datum to_hex64(PG_FUNCTION_ARGS); extern Datum md5_text(PG_FUNCTION_ARGS); diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index 3ab18be..df9bec6 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -1208,3 +1208,58 @@ select * from t1; [5:5]={"(42,43)"} (1 row) +-- check to_string and to_array functions +select to_array('abc',''); + to_array +---------- + {abc} +(1 row) + +select to_array('abc','','abc'); + to_array +---------- + {NULL} +(1 row) + +select to_array('abc',','); + to_array +---------- + {abc} +(1 row) + +select to_array('abc',',','abc'); + to_array +---------- + {NULL} +(1 row) + +select to_array('1,2,3,4,,6',','); + to_array +------------------ + {1,2,3,4,NULL,6} +(1 row) + +select to_array('1,2,3,4,,6',',',''); + to_array +------------------ + {1,2,3,4,NULL,6} +(1 row) + +select to_array('1,2,3,4,*,6',',','*'); + to_array +------------------ + {1,2,3,4,NULL,6} +(1 row) + +select to_string(array[1,2,3,4,NULL,6],','); + to_string +------------ + 1,2,3,4,,6 +(1 row) + +select to_string(array[1,2,3,4,NULL,6],',','*'); + to_string +------------- + 1,2,3,4,*,6 +(1 row) + diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index f65bc45..dd149bb 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -412,3 +412,15 @@ insert into t1 (f1[5].q1) values(42); select * from t1; update t1 set f1[5].q2 = 43; select * from t1; + +-- check to_string and to_array functions + +select to_array('abc',''); +select to_array('abc','','abc'); +select to_array('abc',','); +select to_array('abc',',','abc'); +select to_array('1,2,3,4,,6',','); +select to_array('1,2,3,4,,6',',',''); +select to_array('1,2,3,4,*,6',',','*'); +select to_string(array[1,2,3,4,NULL,6],','); +select to_string(array[1,2,3,4,NULL,6],',','*');
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers