pá 17. 4. 2020 v 23:29 odesílatel Justin Pryzby <pry...@telsasoft.com> napsal:
> On Fri, Apr 17, 2020 at 07:47:15PM +0200, Pavel Stehule wrote: > > I propose new function string_to_table. This function is significantly > > +1 > > > +/* > > + * Add text to result set (table or array). Build a table when set is a > expected or build > > + * a array > > as expected (??) > *an* array > I tried to fix this comment > > > +select string_to_table('abc', '', 'abc'); > > + string_to_table > > +----------------- > > + > > +(1 row) > > Maybe you should \pset null '(null)' for this > changing NULL output can break lot of existing tests, but I add second column with info about null +select string_to_table('1,2,3,4,*,6', ',', '*'), string_to_table('1,2,3,4,*,6', ',', '*') IS NULL; + string_to_table | ?column? +-----------------+---------- + 1 | f + 2 | f + 3 | f + 4 | f + | t + 6 | f +(6 rows) Regards Pavel > -- > Justin >
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 92c1835ae3..ba87e472e1 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -14004,6 +14004,9 @@ SELECT NULLIF(value, '(none)') ... <indexterm> <primary>string_to_array</primary> </indexterm> + <indexterm> + <primary>string_to_table</primary> + </indexterm> <indexterm> <primary>unnest</primary> </indexterm> @@ -14206,6 +14209,22 @@ SELECT NULLIF(value, '(none)') ... <entry><literal>string_to_array('xx~^~yy~^~zz', '~^~', 'yy')</literal></entry> <entry><literal>{xx,NULL,zz}</literal></entry> </row> + + + <row> + <entry> + <literal> + <function>string_to_table</function>(<type>text</type>, <type>text</type> <optional>, <type>text</type></optional>) + </literal> + </entry> + <entry><type>setof text</type></entry> + <entry>splits string into table using supplied delimiter and + optional null string</entry> + <entry><literal>string_to_table('xx~^~yy~^~zz', '~^~', 'yy')</literal></entry> + <entry><literallayout class="monospaced">xx +yy +zz</literallayout>(3 rows)</entry> + </row> <row> <entry> <literal> diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c index 2eaabd6231..650813d8b8 100644 --- a/src/backend/utils/adt/varlena.c +++ b/src/backend/utils/adt/varlena.c @@ -26,6 +26,7 @@ #include "lib/hyperloglog.h" #include "libpq/pqformat.h" #include "miscadmin.h" +#include "nodes/execnodes.h" #include "parser/scansup.h" #include "port/pg_bswap.h" #include "regex/regex.h" @@ -35,6 +36,7 @@ #include "utils/memutils.h" #include "utils/pg_locale.h" #include "utils/sortsupport.h" +#include "utils/tuplestore.h" #include "utils/varlena.h" @@ -92,6 +94,16 @@ typedef struct pg_locale_t locale; } VarStringSortSupport; +/* + * Holds target metadata used for split string to array or to table. + */ +typedef struct +{ + ArrayBuildState *astate; + Tuplestorestate *tupstore; + TupleDesc tupdesc; +} SplitStringTargetData; + /* * This should be large enough that most strings will fit, but small enough * that we feel comfortable putting it on the stack @@ -139,7 +151,7 @@ static bytea *bytea_substring(Datum str, bool length_not_specified); static bytea *bytea_overlay(bytea *t1, bytea *t2, int sp, int sl); static void appendStringInfoText(StringInfo str, const text *t); -static Datum text_to_array_internal(PG_FUNCTION_ARGS); +static bool text_to_array_internal(FunctionCallInfo fcinfo, SplitStringTargetData *tstate); static text *array_to_text_internal(FunctionCallInfo fcinfo, ArrayType *v, const char *fldsep, const char *null_string); static StringInfo makeStringAggState(FunctionCallInfo fcinfo); @@ -4679,7 +4691,19 @@ text_isequal(text *txt1, text *txt2, Oid collid) Datum text_to_array(PG_FUNCTION_ARGS) { - return text_to_array_internal(fcinfo); + SplitStringTargetData tstate; + + /* reset tstate */ + memset(&tstate, 0, sizeof(tstate)); + + if (!text_to_array_internal(fcinfo, &tstate)) + PG_RETURN_NULL(); + + if (!tstate.astate) + PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID)); + + PG_RETURN_ARRAYTYPE_P(makeArrayResult(tstate.astate, + CurrentMemoryContext)); } /* @@ -4693,16 +4717,98 @@ text_to_array(PG_FUNCTION_ARGS) Datum text_to_array_null(PG_FUNCTION_ARGS) { - return text_to_array_internal(fcinfo); + return text_to_array(fcinfo); +} + +/* + * text_to_table + * Parse input string and returns substrings as a table. + */ +Datum +text_to_table(PG_FUNCTION_ARGS) +{ + ReturnSetInfo *rsi = (ReturnSetInfo *) fcinfo->resultinfo; + SplitStringTargetData tstate; + MemoryContext old_cxt; + + /* check to see if caller supports us returning a tuplestore */ + if (rsi == NULL || !IsA(rsi, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + + if (!(rsi->allowedModes & SFRM_Materialize)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not " + "allowed in this context"))); + + old_cxt = MemoryContextSwitchTo(rsi->econtext->ecxt_per_query_memory); + + tstate.astate = NULL; + tstate.tupdesc = CreateTupleDescCopy(rsi->expectedDesc); + tstate.tupstore = tuplestore_begin_heap(rsi->allowedModes & SFRM_Materialize_Random, + false, work_mem); + + MemoryContextSwitchTo(old_cxt); + + (void) text_to_array_internal(fcinfo, &tstate); + + tuplestore_donestoring(tstate.tupstore); + + rsi->returnMode = SFRM_Materialize; + rsi->setResult = tstate.tupstore; + rsi->setDesc = tstate.tupdesc; + + return (Datum) 0; +} + +Datum +text_to_table_null(PG_FUNCTION_ARGS) +{ + return text_to_table(fcinfo); +} + +/* + * Add text to result set (table or array). When a result set is expected, + * then we fill a tuplestore, else we prepare an array. + */ +static void +accum_result(SplitStringTargetData *tstate, + text *result_text, + bool is_null) +{ + if (tstate->tupdesc) + { + HeapTuple tuple; + Datum values[1]; + bool nulls[1]; + + values[0] = PointerGetDatum(result_text); + nulls[0] = is_null; + + tuple = heap_form_tuple(tstate->tupdesc, values, nulls); + tuplestore_puttuple(tstate->tupstore, tuple); + } + else + { + tstate->astate = accumArrayResult(tstate->astate, + PointerGetDatum(result_text), + is_null, + TEXTOID, + CurrentMemoryContext); + } } /* * common code for text_to_array and text_to_array_null functions * * These are not strict so we have to test for null inputs explicitly. + * Returns false, when result is null, else returns true. + * */ -static Datum -text_to_array_internal(PG_FUNCTION_ARGS) +static bool +text_to_array_internal(FunctionCallInfo fcinfo, SplitStringTargetData *tstate) { text *inputstring; text *fldsep; @@ -4712,11 +4818,10 @@ text_to_array_internal(PG_FUNCTION_ARGS) char *start_ptr; text *result_text; bool is_null; - ArrayBuildState *astate = NULL; /* when input string is NULL, then result is NULL too */ if (PG_ARGISNULL(0)) - PG_RETURN_NULL(); + return false; inputstring = PG_GETARG_TEXT_PP(0); @@ -4745,7 +4850,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) /* return empty array for empty input string */ if (inputstring_len < 1) - PG_RETURN_ARRAYTYPE_P(construct_empty_array(TEXTOID)); + return true; /* * empty field separator: return the input string as a one-element @@ -4753,22 +4858,11 @@ text_to_array_internal(PG_FUNCTION_ARGS) */ if (fldsep_len < 1) { - Datum elems[1]; - bool nulls[1]; - int dims[1]; - int lbs[1]; - /* single element can be a NULL too */ is_null = null_string ? text_isequal(inputstring, null_string, PG_GET_COLLATION()) : false; - elems[0] = PointerGetDatum(inputstring); - nulls[0] = is_null; - dims[0] = 1; - lbs[0] = 1; - /* XXX: this hardcodes assumptions about the text type */ - PG_RETURN_ARRAYTYPE_P(construct_md_array(elems, nulls, - 1, dims, lbs, - TEXTOID, -1, false, TYPALIGN_INT)); + accum_result(tstate, inputstring, is_null); + return true; } text_position_setup(inputstring, fldsep, PG_GET_COLLATION(), &state); @@ -4802,12 +4896,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false; /* stash away this field */ - astate = accumArrayResult(astate, - PointerGetDatum(result_text), - is_null, - TEXTOID, - CurrentMemoryContext); - + accum_result(tstate, result_text, is_null); pfree(result_text); if (!found) @@ -4844,12 +4933,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) is_null = null_string ? text_isequal(result_text, null_string, PG_GET_COLLATION()) : false; /* stash away this field */ - astate = accumArrayResult(astate, - PointerGetDatum(result_text), - is_null, - TEXTOID, - CurrentMemoryContext); - + accum_result(tstate, result_text, is_null); pfree(result_text); start_ptr += chunk_len; @@ -4857,8 +4941,7 @@ text_to_array_internal(PG_FUNCTION_ARGS) } } - PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, - CurrentMemoryContext)); + return true; } /* diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat index 4bce3ad8de..2fc97f830f 100644 --- a/src/include/catalog/pg_proc.dat +++ b/src/include/catalog/pg_proc.dat @@ -3561,6 +3561,14 @@ { oid => '2768', descr => 'split string by pattern', proname => 'regexp_split_to_array', prorettype => '_text', proargtypes => 'text text text', prosrc => 'regexp_split_to_array' }, +{ oid => '2228', descr => 'split delimited text', + proname => 'string_to_table', prorows => '1000', proretset => 't', + prorettype => 'text', proargtypes => 'text text', + prosrc => 'text_to_table' }, +{ oid => '2282', descr => 'split delimited text with null string', + proname => 'string_to_table', prorows => '1000', proretset => 't', + prorettype => 'text', proargtypes => 'text text text', + prosrc => 'text_to_table_null' }, { oid => '2089', descr => 'convert int4 number to hex', proname => 'to_hex', prorettype => 'text', proargtypes => 'int4', prosrc => 'to_hex32' }, diff --git a/src/test/regress/expected/arrays.out b/src/test/regress/expected/arrays.out index c730563f03..7689f4698f 100644 --- a/src/test/regress/expected/arrays.out +++ b/src/test/regress/expected/arrays.out @@ -1755,6 +1755,109 @@ select string_to_array('1,2,3,4,*,6', ',', '*'); {1,2,3,4,NULL,6} (1 row) +select string_to_table('1|2|3', '|'); + string_to_table +----------------- + 1 + 2 + 3 +(3 rows) + +select string_to_table('1|2|3|', '|'); + string_to_table +----------------- + 1 + 2 + 3 + +(4 rows) + +select string_to_table('1||2|3||', '||'); + string_to_table +----------------- + 1 + 2|3 + +(3 rows) + +select string_to_table('1|2|3', ''); + string_to_table +----------------- + 1|2|3 +(1 row) + +select string_to_table('', '|'); + string_to_table +----------------- +(0 rows) + +select string_to_table('1|2|3', NULL); + string_to_table +----------------- +(0 rows) + +select string_to_table(NULL, '|') IS NULL; + ?column? +---------- +(0 rows) + +select string_to_table('abc', ''); + string_to_table +----------------- + abc +(1 row) + +select string_to_table('abc', '', 'abc'); + string_to_table +----------------- + +(1 row) + +select string_to_table('abc', ','); + string_to_table +----------------- + abc +(1 row) + +select string_to_table('abc', ',', 'abc'); + string_to_table +----------------- + +(1 row) + +select string_to_table('1,2,3,4,,6', ','); + string_to_table +----------------- + 1 + 2 + 3 + 4 + + 6 +(6 rows) + +select string_to_table('1,2,3,4,,6', ',', ''), string_to_table('1,2,3,4,,6', ',', '') IS NULL; + string_to_table | ?column? +-----------------+---------- + 1 | f + 2 | f + 3 | f + 4 | f + | t + 6 | f +(6 rows) + +select string_to_table('1,2,3,4,*,6', ',', '*'), string_to_table('1,2,3,4,*,6', ',', '*') IS NULL; + string_to_table | ?column? +-----------------+---------- + 1 | f + 2 | f + 3 | f + 4 | f + | t + 6 | f +(6 rows) + select array_to_string(NULL::int4[], ',') IS NULL; ?column? ---------- diff --git a/src/test/regress/sql/arrays.sql b/src/test/regress/sql/arrays.sql index 25dd4e2c6d..5c10156a71 100644 --- a/src/test/regress/sql/arrays.sql +++ b/src/test/regress/sql/arrays.sql @@ -544,6 +544,21 @@ select string_to_array('1,2,3,4,,6', ','); select string_to_array('1,2,3,4,,6', ',', ''); select string_to_array('1,2,3,4,*,6', ',', '*'); +select string_to_table('1|2|3', '|'); +select string_to_table('1|2|3|', '|'); +select string_to_table('1||2|3||', '||'); +select string_to_table('1|2|3', ''); +select string_to_table('', '|'); +select string_to_table('1|2|3', NULL); +select string_to_table(NULL, '|') IS NULL; +select string_to_table('abc', ''); +select string_to_table('abc', '', 'abc'); +select string_to_table('abc', ','); +select string_to_table('abc', ',', 'abc'); +select string_to_table('1,2,3,4,,6', ','); +select string_to_table('1,2,3,4,,6', ',', ''), string_to_table('1,2,3,4,,6', ',', '') IS NULL; +select string_to_table('1,2,3,4,*,6', ',', '*'), string_to_table('1,2,3,4,*,6', ',', '*') IS NULL; + select array_to_string(NULL::int4[], ',') IS NULL; select array_to_string('{}'::int4[], ','); select array_to_string(array[1,2,3,4,NULL,6], ',');