This is an automated email from the ASF dual-hosted git repository.
jgemignani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-age.git
The following commit(s) were added to refs/heads/master by this push:
new 0333e5d Add openCypher split() string command.
0333e5d is described below
commit 0333e5d3568f3c2e3225ce3228f1c8470b609616
Author: John Gemignani <[email protected]>
AuthorDate: Thu Aug 13 15:49:42 2020 -0700
Add openCypher split() string command.
Added the openCypher split() string command.
Added regression tests.
---
age--0.2.0.sql | 7 ++
regress/expected/cypher_create.out | 16 ++--
regress/expected/expr.out | 126 +++++++++++++++++++++++++++++++
regress/sql/expr.sql | 52 +++++++++++++
src/backend/parser/cypher_expr.c | 3 +-
src/backend/utils/adt/agtype.c | 147 +++++++++++++++++++++++++++++++++++++
6 files changed, 342 insertions(+), 9 deletions(-)
diff --git a/age--0.2.0.sql b/age--0.2.0.sql
index 14b079d..e4ffe8b 100644
--- a/age--0.2.0.sql
+++ b/age--0.2.0.sql
@@ -979,6 +979,13 @@ STABLE
PARALLEL SAFE
AS 'MODULE_PATHNAME';
+CREATE FUNCTION split(variadic "any")
+RETURNS agtype
+LANGUAGE c
+STABLE
+PARALLEL SAFE
+AS 'MODULE_PATHNAME';
+
--
-- function for typecasting an agtype value to another agtype value
--
diff --git a/regress/expected/cypher_create.out
b/regress/expected/cypher_create.out
index f23e80d..fd0ad5c 100644
--- a/regress/expected/cypher_create.out
+++ b/regress/expected/cypher_create.out
@@ -375,14 +375,14 @@ SELECT * FROM cypher_create.e_var;
SELECT * FROM ag_label;
name | graph | id | kind | relation
------------------+-------+----+------+--------------------------------
- _ag_label_vertex | 17025 | 1 | v | cypher_create._ag_label_vertex
- _ag_label_edge | 17025 | 2 | e | cypher_create._ag_label_edge
- v | 17025 | 3 | v | cypher_create.v
- e | 17025 | 4 | e | cypher_create.e
- n_var | 17025 | 5 | v | cypher_create.n_var
- e_var | 17025 | 6 | e | cypher_create.e_var
- n_other_node | 17025 | 7 | v | cypher_create.n_other_node
- b_var | 17025 | 8 | e | cypher_create.b_var
+ _ag_label_vertex | 17027 | 1 | v | cypher_create._ag_label_vertex
+ _ag_label_edge | 17027 | 2 | e | cypher_create._ag_label_edge
+ v | 17027 | 3 | v | cypher_create.v
+ e | 17027 | 4 | e | cypher_create.e
+ n_var | 17027 | 5 | v | cypher_create.n_var
+ e_var | 17027 | 6 | e | cypher_create.e_var
+ n_other_node | 17027 | 7 | v | cypher_create.n_other_node
+ b_var | 17027 | 8 | e | cypher_create.b_var
(8 rows)
--Validate every vertex has the correct label
diff --git a/regress/expected/expr.out b/regress/expected/expr.out
index 8626e47..af0279d 100644
--- a/regress/expected/expr.out
+++ b/regress/expected/expr.out
@@ -2663,6 +2663,132 @@ LINE 1: SELECT * FROM b_substr();
^
HINT: No function matches the given name and argument types. You might need
to add explicit type casts.
--
+-- split()
+--
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", ",")
+$$) AS (results agtype);
+ results
+--------------------------------
+ ["a", "b", "c", "d", "e", "f"]
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", "")
+$$) AS (results agtype);
+ results
+---------------------------------------------------------
+ ["a", ",", "b", ",", "c", ",", "d", ",", "e", ",", "f"]
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", " ")
+$$) AS (results agtype);
+ results
+-----------------
+ ["a,b,c,d,e,f"]
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,cd e,f", " ")
+$$) AS (results agtype);
+ results
+-----------------------
+ ["a,b,cd", "", "e,f"]
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,cd e,f", " ")
+$$) AS (results agtype);
+ results
+-------------------
+ ["a,b,cd", "e,f"]
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", "c,")
+$$) AS (results agtype);
+ results
+-------------------
+ ["a,b,", "d,e,f"]
+(1 row)
+
+-- should return null
+SELECT * FROM cypher('expr', $$
+ RETURN split(null, null)
+$$) AS (results agtype);
+ results
+---------
+
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", null)
+$$) AS (results agtype);
+ results
+---------
+
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+ RETURN split(null, ",")
+$$) AS (results agtype);
+ results
+---------
+
+(1 row)
+
+SELECT * FROM split(null, null);
+ split
+-------
+
+(1 row)
+
+SELECT * FROM split('a,b,c,d,e,f', null);
+ split
+-------
+
+(1 row)
+
+SELECT * FROM split(null, ',');
+ split
+-------
+
+(1 row)
+
+-- should fail
+SELECT * FROM cypher('expr', $$
+ RETURN split(123456789, ",")
+$$) AS (results agtype);
+ERROR: split() unsuppoted argument agtype 3
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", -1)
+$$) AS (results agtype);
+ERROR: split() unsuppoted argument agtype 3
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f")
+$$) AS (results agtype);
+ERROR: invalid number of input parameters for split()
+LINE 1: SELECT * FROM cypher('expr', $$
+ ^
+SELECT * FROM cypher('expr', $$
+ RETURN split()
+$$) AS (results agtype);
+ERROR: unrecognized or unsupported function
+LINE 1: SELECT * FROM cypher('expr', $$
+ ^
+SELECT * FROM split(123456789, ',');
+ERROR: split() unsuppoted argument type 23
+SELECT * FROM split('a,b,c,d,e,f', -1);
+ERROR: split() unsuppoted argument type 23
+SELECT * FROM split('a,b,c,d,e,f');
+ERROR: split() invalid number of arguments
+SELECT * FROM split();
+ERROR: function split() does not exist
+LINE 1: SELECT * FROM split();
+ ^
+HINT: No function matches the given name and argument types. You might need
to add explicit type casts.
+--
-- Cleanup
--
SELECT * FROM drop_graph('expr', true);
diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql
index 6d3f38c..e4a00f5 100644
--- a/regress/sql/expr.sql
+++ b/regress/sql/expr.sql
@@ -1153,6 +1153,58 @@ SELECT * FROM b_substr('123456789', -1);
SELECT * FROM b_substr();
--
+-- split()
+--
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", ",")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", "")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", " ")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,cd e,f", " ")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,cd e,f", " ")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", "c,")
+$$) AS (results agtype);
+-- should return null
+SELECT * FROM cypher('expr', $$
+ RETURN split(null, null)
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", null)
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+ RETURN split(null, ",")
+$$) AS (results agtype);
+SELECT * FROM split(null, null);
+SELECT * FROM split('a,b,c,d,e,f', null);
+SELECT * FROM split(null, ',');
+-- should fail
+SELECT * FROM cypher('expr', $$
+ RETURN split(123456789, ",")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f", -1)
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+ RETURN split("a,b,c,d,e,f")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+ RETURN split()
+$$) AS (results agtype);
+SELECT * FROM split(123456789, ',');
+SELECT * FROM split('a,b,c,d,e,f', -1);
+SELECT * FROM split('a,b,c,d,e,f');
+SELECT * FROM split();
+
+--
-- Cleanup
--
SELECT * FROM drop_graph('expr', true);
diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c
index 348f775..a48f03d 100644
--- a/src/backend/parser/cypher_expr.c
+++ b/src/backend/parser/cypher_expr.c
@@ -66,6 +66,7 @@
#define FUNC_RSUBSTR {"right", "r_substr", ANYOID, ANYOID, 0,
AGTYPEOID, 2, 1, false}
#define FUNC_LSUBSTR {"left", "l_substr", ANYOID, ANYOID, 0,
AGTYPEOID, 2, 1, false}
#define FUNC_BSUBSTR {"substring", "b_substr", ANYOID, ANYOID,
ANYOID, AGTYPEOID, -1, 1, false}
+#define FUNC_SPLIT {"split", "split", ANYOID, ANYOID, 0,
AGTYPEOID, 2, 1, false}
/* supported functions */
#define SUPPORTED_FUNCTIONS {FUNC_TYPE, FUNC_ENDNODE, FUNC_HEAD, FUNC_ID, \
@@ -75,7 +76,7 @@
FUNC_EXISTS, FUNC_TOSTRING, FUNC_REVERSE, \
FUNC_TOUPPER, FUNC_TOLOWER, FUNC_LTRIM, \
FUNC_RTRIM, FUNC_BTRIM, FUNC_RSUBSTR, \
- FUNC_LSUBSTR, FUNC_BSUBSTR}
+ FUNC_LSUBSTR, FUNC_BSUBSTR, FUNC_SPLIT}
/* structure for supported function signatures */
typedef struct function_signature
diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c
index 1a5fcd3..e2a5291 100644
--- a/src/backend/utils/adt/agtype.c
+++ b/src/backend/utils/adt/agtype.c
@@ -27,6 +27,7 @@
#include "access/htup_details.h"
#include "catalog/namespace.h"
#include "catalog/pg_type.h"
+#include "catalog/pg_collation_d.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "parser/parse_coerce.h"
@@ -5353,3 +5354,149 @@ Datum b_substr(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(agtype_value_to_agtype(&agtv_result));
}
+
+PG_FUNCTION_INFO_V1(split);
+
+Datum split(PG_FUNCTION_ARGS)
+{
+ int nargs;
+ Datum *args;
+ Datum arg;
+ bool *nulls;
+ Oid *types;
+ agtype_value *agtv_result;
+ text *param = NULL;
+ text *text_string = NULL;
+ text *text_delimiter = NULL;
+ Datum text_array;
+ Oid type;
+ int i;
+
+ /* extract argument values */
+ nargs = extract_variadic_args(fcinfo, 0, true, &args, &types, &nulls);
+
+ /* check number of args */
+ if (nargs != 2)
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("split() invalid number of arguments")));
+
+ /* check for a null string and delimiter */
+ if (nargs < 0 || nulls[0] || nulls[1])
+ PG_RETURN_NULL();
+
+ /*
+ * split() supports text, cstring, or the agtype string input for the
+ * string and delimiter values
+ */
+
+ for (i = 0; i < 2; i++)
+ {
+ arg = args[i];
+ type = types[i];
+
+ if (type != AGTYPEOID)
+ {
+ if (type == CSTRINGOID)
+ param = cstring_to_text(DatumGetCString(arg));
+ else if (type == TEXTOID)
+ param = DatumGetTextPP(arg);
+ else
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("split() unsuppoted argument type %d",
+ type)));
+ }
+ else
+ {
+ agtype *agt_arg;
+ agtype_value *agtv_value;
+
+ /* get the agtype argument */
+ agt_arg = DATUM_GET_AGTYPE_P(arg);
+
+ if (!AGT_ROOT_IS_SCALAR(agt_arg))
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("split() only supports scalar
arguments")));
+
+ agtv_value = get_ith_agtype_value_from_container(&agt_arg->root,
0);
+
+ /* check for agtype null */
+ if (agtv_value->type == AGTV_NULL)
+ PG_RETURN_NULL();
+ if (agtv_value->type == AGTV_STRING)
+ param = cstring_to_text_with_len(agtv_value->val.string.val,
+ agtv_value->val.string.len);
+ else
+ ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("split() unsuppoted argument agtype %d",
+ agtv_value->type)));
+ }
+ if (i == 0)
+ text_string = param;
+ if (i == 1)
+ text_delimiter = param;
+ }
+
+ /*
+ * We need the strings as a text strings so that we can let PG deal with
+ * multibyte characters in the string. The result is an ArrayType
+ */
+ text_array = DirectFunctionCall2Coll(regexp_split_to_array,
+ DEFAULT_COLLATION_OID,
+ PointerGetDatum(text_string),
+ PointerGetDatum(text_delimiter));
+
+ /* now build an agtype array of strings */
+ if (PointerIsValid(DatumGetPointer(text_array)))
+ {
+ ArrayType *array = DatumGetArrayTypeP(text_array);
+ agtype_in_state result;
+ Datum *elements;
+ int nelements;
+
+ /* zero the state and deconstruct the ArrayType to TEXTOID */
+ memset(&result, 0, sizeof(agtype_in_state));
+ deconstruct_array(array, TEXTOID, -1, false, 'i', &elements, NULL,
+ &nelements);
+
+ /* open the agtype array */
+ result.res = push_agtype_value(&result.parse_state, WAGT_BEGIN_ARRAY,
+ NULL);
+ /* add the values */
+ for (i = 0; i < nelements; i++)
+ {
+ char *string;
+ int string_len;
+ char *string_copy;
+ agtype_value agtv_string;
+ Datum d;
+
+ /* get the string element from the array */
+ string = VARDATA(elements[i]);
+ string_len = VARSIZE(elements[i]) - VARHDRSZ;
+
+ /* make a copy */
+ string_copy = palloc(string_len);
+ memcpy(string_copy, string, string_len);
+
+ /* build the agtype string */
+ agtv_string.type = AGTV_STRING;
+ agtv_string.val.string.val = string_copy;
+ agtv_string.val.string.len = string_len;
+
+ /* get the datum */
+ d = PointerGetDatum(agtype_value_to_agtype(&agtv_string));
+
+ /* add the value */
+ add_agtype(d, false, &result, AGTYPEOID, false);
+ }
+
+ /* close the array */
+ result.res = push_agtype_value(&result.parse_state, WAGT_END_ARRAY,
NULL);
+
+ agtv_result = result.res;
+ }
+ else
+ elog(ERROR, "split() unexpected error");
+
+ PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result));
+}