This is an automated email from the ASF dual-hosted git repository.

jgemignani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-age.git


The following commit(s) were added to refs/heads/master by this push:
     new 0333e5d  Add openCypher split() string command.
0333e5d is described below

commit 0333e5d3568f3c2e3225ce3228f1c8470b609616
Author: John Gemignani <[email protected]>
AuthorDate: Thu Aug 13 15:49:42 2020 -0700

    Add openCypher split() string command.
    
    Added the openCypher split() string command.
    
    Added regression tests.
---
 age--0.2.0.sql                     |   7 ++
 regress/expected/cypher_create.out |  16 ++--
 regress/expected/expr.out          | 126 +++++++++++++++++++++++++++++++
 regress/sql/expr.sql               |  52 +++++++++++++
 src/backend/parser/cypher_expr.c   |   3 +-
 src/backend/utils/adt/agtype.c     | 147 +++++++++++++++++++++++++++++++++++++
 6 files changed, 342 insertions(+), 9 deletions(-)

diff --git a/age--0.2.0.sql b/age--0.2.0.sql
index 14b079d..e4ffe8b 100644
--- a/age--0.2.0.sql
+++ b/age--0.2.0.sql
@@ -979,6 +979,13 @@ STABLE
 PARALLEL SAFE
 AS 'MODULE_PATHNAME';
 
+CREATE FUNCTION split(variadic "any")
+RETURNS agtype
+LANGUAGE c
+STABLE
+PARALLEL SAFE
+AS 'MODULE_PATHNAME';
+
 --
 -- function for typecasting an agtype value to another agtype value
 --
diff --git a/regress/expected/cypher_create.out 
b/regress/expected/cypher_create.out
index f23e80d..fd0ad5c 100644
--- a/regress/expected/cypher_create.out
+++ b/regress/expected/cypher_create.out
@@ -375,14 +375,14 @@ SELECT * FROM cypher_create.e_var;
 SELECT * FROM ag_label;
        name       | graph | id | kind |            relation            
 ------------------+-------+----+------+--------------------------------
- _ag_label_vertex | 17025 |  1 | v    | cypher_create._ag_label_vertex
- _ag_label_edge   | 17025 |  2 | e    | cypher_create._ag_label_edge
- v                | 17025 |  3 | v    | cypher_create.v
- e                | 17025 |  4 | e    | cypher_create.e
- n_var            | 17025 |  5 | v    | cypher_create.n_var
- e_var            | 17025 |  6 | e    | cypher_create.e_var
- n_other_node     | 17025 |  7 | v    | cypher_create.n_other_node
- b_var            | 17025 |  8 | e    | cypher_create.b_var
+ _ag_label_vertex | 17027 |  1 | v    | cypher_create._ag_label_vertex
+ _ag_label_edge   | 17027 |  2 | e    | cypher_create._ag_label_edge
+ v                | 17027 |  3 | v    | cypher_create.v
+ e                | 17027 |  4 | e    | cypher_create.e
+ n_var            | 17027 |  5 | v    | cypher_create.n_var
+ e_var            | 17027 |  6 | e    | cypher_create.e_var
+ n_other_node     | 17027 |  7 | v    | cypher_create.n_other_node
+ b_var            | 17027 |  8 | e    | cypher_create.b_var
 (8 rows)
 
 --Validate every vertex has the correct label
diff --git a/regress/expected/expr.out b/regress/expected/expr.out
index 8626e47..af0279d 100644
--- a/regress/expected/expr.out
+++ b/regress/expected/expr.out
@@ -2663,6 +2663,132 @@ LINE 1: SELECT * FROM b_substr();
                       ^
 HINT:  No function matches the given name and argument types. You might need 
to add explicit type casts.
 --
+-- split()
+--
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", ",")
+$$) AS (results agtype);
+            results             
+--------------------------------
+ ["a", "b", "c", "d", "e", "f"]
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", "")
+$$) AS (results agtype);
+                         results                         
+---------------------------------------------------------
+ ["a", ",", "b", ",", "c", ",", "d", ",", "e", ",", "f"]
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", " ")
+$$) AS (results agtype);
+     results     
+-----------------
+ ["a,b,c,d,e,f"]
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,cd  e,f", " ")
+$$) AS (results agtype);
+        results        
+-----------------------
+ ["a,b,cd", "", "e,f"]
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,cd  e,f", "  ")
+$$) AS (results agtype);
+      results      
+-------------------
+ ["a,b,cd", "e,f"]
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", "c,")
+$$) AS (results agtype);
+      results      
+-------------------
+ ["a,b,", "d,e,f"]
+(1 row)
+
+-- should return null
+SELECT * FROM cypher('expr', $$
+    RETURN split(null, null)
+$$) AS (results agtype);
+ results 
+---------
+ 
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", null)
+$$) AS (results agtype);
+ results 
+---------
+ 
+(1 row)
+
+SELECT * FROM cypher('expr', $$
+    RETURN split(null, ",")
+$$) AS (results agtype);
+ results 
+---------
+ 
+(1 row)
+
+SELECT * FROM split(null, null);
+ split 
+-------
+ 
+(1 row)
+
+SELECT * FROM split('a,b,c,d,e,f', null);
+ split 
+-------
+ 
+(1 row)
+
+SELECT * FROM split(null, ',');
+ split 
+-------
+ 
+(1 row)
+
+-- should fail
+SELECT * FROM cypher('expr', $$
+    RETURN split(123456789, ",")
+$$) AS (results agtype);
+ERROR:  split() unsuppoted argument agtype 3
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", -1)
+$$) AS (results agtype);
+ERROR:  split() unsuppoted argument agtype 3
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f")
+$$) AS (results agtype);
+ERROR:  invalid number of input parameters for split()
+LINE 1: SELECT * FROM cypher('expr', $$
+                                      ^
+SELECT * FROM cypher('expr', $$
+    RETURN split()
+$$) AS (results agtype);
+ERROR:  unrecognized or unsupported function
+LINE 1: SELECT * FROM cypher('expr', $$
+                                      ^
+SELECT * FROM split(123456789, ',');
+ERROR:  split() unsuppoted argument type 23
+SELECT * FROM split('a,b,c,d,e,f', -1);
+ERROR:  split() unsuppoted argument type 23
+SELECT * FROM split('a,b,c,d,e,f');
+ERROR:  split() invalid number of arguments
+SELECT * FROM split();
+ERROR:  function split() does not exist
+LINE 1: SELECT * FROM split();
+                      ^
+HINT:  No function matches the given name and argument types. You might need 
to add explicit type casts.
+--
 -- Cleanup
 --
 SELECT * FROM drop_graph('expr', true);
diff --git a/regress/sql/expr.sql b/regress/sql/expr.sql
index 6d3f38c..e4a00f5 100644
--- a/regress/sql/expr.sql
+++ b/regress/sql/expr.sql
@@ -1153,6 +1153,58 @@ SELECT * FROM b_substr('123456789', -1);
 SELECT * FROM b_substr();
 
 --
+-- split()
+--
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", ",")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", "")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", " ")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,cd  e,f", " ")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,cd  e,f", "  ")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", "c,")
+$$) AS (results agtype);
+-- should return null
+SELECT * FROM cypher('expr', $$
+    RETURN split(null, null)
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", null)
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+    RETURN split(null, ",")
+$$) AS (results agtype);
+SELECT * FROM split(null, null);
+SELECT * FROM split('a,b,c,d,e,f', null);
+SELECT * FROM split(null, ',');
+-- should fail
+SELECT * FROM cypher('expr', $$
+    RETURN split(123456789, ",")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f", -1)
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+    RETURN split("a,b,c,d,e,f")
+$$) AS (results agtype);
+SELECT * FROM cypher('expr', $$
+    RETURN split()
+$$) AS (results agtype);
+SELECT * FROM split(123456789, ',');
+SELECT * FROM split('a,b,c,d,e,f', -1);
+SELECT * FROM split('a,b,c,d,e,f');
+SELECT * FROM split();
+
+--
 -- Cleanup
 --
 SELECT * FROM drop_graph('expr', true);
diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c
index 348f775..a48f03d 100644
--- a/src/backend/parser/cypher_expr.c
+++ b/src/backend/parser/cypher_expr.c
@@ -66,6 +66,7 @@
 #define FUNC_RSUBSTR    {"right",      "r_substr",   ANYOID,    ANYOID, 0, 
AGTYPEOID, 2, 1, false}
 #define FUNC_LSUBSTR    {"left",       "l_substr",   ANYOID,    ANYOID, 0, 
AGTYPEOID, 2, 1, false}
 #define FUNC_BSUBSTR    {"substring",  "b_substr",   ANYOID,    ANYOID, 
ANYOID, AGTYPEOID, -1, 1, false}
+#define FUNC_SPLIT      {"split",      "split",      ANYOID,    ANYOID, 0, 
AGTYPEOID, 2, 1, false}
 
 /* supported functions */
 #define SUPPORTED_FUNCTIONS {FUNC_TYPE, FUNC_ENDNODE, FUNC_HEAD, FUNC_ID, \
@@ -75,7 +76,7 @@
                              FUNC_EXISTS, FUNC_TOSTRING, FUNC_REVERSE, \
                              FUNC_TOUPPER, FUNC_TOLOWER, FUNC_LTRIM, \
                              FUNC_RTRIM, FUNC_BTRIM, FUNC_RSUBSTR, \
-                             FUNC_LSUBSTR, FUNC_BSUBSTR}
+                             FUNC_LSUBSTR, FUNC_BSUBSTR, FUNC_SPLIT}
 
 /* structure for supported function signatures */
 typedef struct function_signature
diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c
index 1a5fcd3..e2a5291 100644
--- a/src/backend/utils/adt/agtype.c
+++ b/src/backend/utils/adt/agtype.c
@@ -27,6 +27,7 @@
 #include "access/htup_details.h"
 #include "catalog/namespace.h"
 #include "catalog/pg_type.h"
+#include "catalog/pg_collation_d.h"
 #include "funcapi.h"
 #include "miscadmin.h"
 #include "parser/parse_coerce.h"
@@ -5353,3 +5354,149 @@ Datum b_substr(PG_FUNCTION_ARGS)
 
     PG_RETURN_POINTER(agtype_value_to_agtype(&agtv_result));
 }
+
+PG_FUNCTION_INFO_V1(split);
+
+Datum split(PG_FUNCTION_ARGS)
+{
+    int nargs;
+    Datum *args;
+    Datum arg;
+    bool *nulls;
+    Oid *types;
+    agtype_value *agtv_result;
+    text *param = NULL;
+    text *text_string = NULL;
+    text *text_delimiter = NULL;
+    Datum text_array;
+    Oid type;
+    int i;
+
+    /* extract argument values */
+    nargs = extract_variadic_args(fcinfo, 0, true, &args, &types, &nulls);
+
+    /* check number of args */
+    if (nargs != 2)
+        ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                        errmsg("split() invalid number of arguments")));
+
+    /* check for a null string and delimiter */
+    if (nargs < 0 || nulls[0] || nulls[1])
+        PG_RETURN_NULL();
+
+    /*
+     * split() supports text, cstring, or the agtype string input for the
+     * string and delimiter values
+     */
+
+    for (i = 0; i < 2; i++)
+    {
+        arg = args[i];
+        type = types[i];
+
+        if (type != AGTYPEOID)
+        {
+            if (type == CSTRINGOID)
+                param = cstring_to_text(DatumGetCString(arg));
+            else if (type == TEXTOID)
+                param = DatumGetTextPP(arg);
+            else
+                ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("split() unsuppoted argument type %d",
+                                       type)));
+        }
+        else
+        {
+            agtype *agt_arg;
+            agtype_value *agtv_value;
+
+            /* get the agtype argument */
+            agt_arg = DATUM_GET_AGTYPE_P(arg);
+
+            if (!AGT_ROOT_IS_SCALAR(agt_arg))
+                ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("split() only supports scalar 
arguments")));
+
+            agtv_value = get_ith_agtype_value_from_container(&agt_arg->root, 
0);
+
+            /* check for agtype null */
+            if (agtv_value->type == AGTV_NULL)
+                PG_RETURN_NULL();
+            if (agtv_value->type == AGTV_STRING)
+                param = cstring_to_text_with_len(agtv_value->val.string.val,
+                                                 agtv_value->val.string.len);
+            else
+                ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+                                errmsg("split() unsuppoted argument agtype %d",
+                                       agtv_value->type)));
+        }
+        if (i == 0)
+            text_string = param;
+        if (i == 1)
+            text_delimiter = param;
+    }
+
+    /*
+     * We need the strings as a text strings so that we can let PG deal with
+     * multibyte characters in the string. The result is an ArrayType
+     */
+    text_array = DirectFunctionCall2Coll(regexp_split_to_array,
+                                         DEFAULT_COLLATION_OID,
+                                         PointerGetDatum(text_string),
+                                         PointerGetDatum(text_delimiter));
+
+    /* now build an agtype array of strings */
+    if (PointerIsValid(DatumGetPointer(text_array)))
+    {
+        ArrayType *array = DatumGetArrayTypeP(text_array);
+        agtype_in_state result;
+        Datum *elements;
+        int nelements;
+
+        /* zero the state and deconstruct the ArrayType to TEXTOID */
+        memset(&result, 0, sizeof(agtype_in_state));
+        deconstruct_array(array, TEXTOID, -1, false, 'i', &elements, NULL,
+                          &nelements);
+
+        /* open the agtype array */
+        result.res = push_agtype_value(&result.parse_state, WAGT_BEGIN_ARRAY,
+                                       NULL);
+        /* add the values */
+        for (i = 0; i < nelements; i++)
+        {
+            char *string;
+            int string_len;
+            char *string_copy;
+            agtype_value agtv_string;
+            Datum d;
+
+            /* get the string element from the array */
+            string = VARDATA(elements[i]);
+            string_len = VARSIZE(elements[i]) - VARHDRSZ;
+
+            /* make a copy */
+            string_copy = palloc(string_len);
+            memcpy(string_copy, string, string_len);
+
+            /* build the agtype string */
+            agtv_string.type = AGTV_STRING;
+            agtv_string.val.string.val = string_copy;
+            agtv_string.val.string.len = string_len;
+
+            /* get the datum */
+            d = PointerGetDatum(agtype_value_to_agtype(&agtv_string));
+
+            /* add the value */
+            add_agtype(d, false, &result, AGTYPEOID, false);
+        }
+
+        /* close the array */
+        result.res = push_agtype_value(&result.parse_state, WAGT_END_ARRAY, 
NULL);
+
+        agtv_result = result.res;
+    }
+    else
+        elog(ERROR, "split() unexpected error");
+
+    PG_RETURN_POINTER(agtype_value_to_agtype(agtv_result));
+}

Reply via email to