This is an automated email from the ASF dual-hosted git repository.

jgemignani pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/age.git


The following commit(s) were added to refs/heads/master by this push:
     new 2191bead Update age_load to load scalar property values with 
appropriate type (#1519)
2191bead is described below

commit 2191beadf61ae00e021f534cf283231689784745
Author: Rafsun Masud <[email protected]>
AuthorDate: Wed Jan 24 13:25:00 2024 -0800

    Update age_load to load scalar property values with appropriate type (#1519)
    
    Previously, property values from csv files were always loaded as strings. 
This
    patch converts a value to an appropriate scalar type (i.e. string, bool,
    numeric, null) while loading. It uses the agtype_value_from_cstring()
    function for conversion.
    
    Additional change(s):
    -------------------
     - Fix: for csv rows in edge files, create_agtype_from_list_i()'s 
start_index
       is corrected to 4
---
 regress/age_load/data/conversion_edges.csv    |  7 +++
 regress/age_load/data/conversion_vertices.csv |  7 +++
 regress/expected/age_load.out                 | 84 +++++++++++++++++++++++++++
 regress/sql/age_load.sql                      | 18 ++++++
 src/backend/utils/adt/agtype.c                |  3 +-
 src/backend/utils/load/ag_load_edges.c        |  2 +-
 src/backend/utils/load/age_load.c             | 51 +++++++++++++++-
 src/include/utils/agtype.h                    |  1 +
 8 files changed, 168 insertions(+), 5 deletions(-)

diff --git a/regress/age_load/data/conversion_edges.csv 
b/regress/age_load/data/conversion_edges.csv
new file mode 100644
index 00000000..f207cdc5
--- /dev/null
+++ b/regress/age_load/data/conversion_edges.csv
@@ -0,0 +1,7 @@
+start_id,   start_vertex_type,  end_id, end_vertex_type,    string,           
bool,        numeric,
+1,          Person1,            1,      Person2,            "John Smith",     
"true",      1
+1,          Person1,            1,      Person2,            "John",           
"false",     "-2"
+1,          Person1,            1,      Person2,            John Smith,       
true,        1.4
+1,          Person1,            1,      Person2,            """John""",       
false,       -1e10
+1,          Person1,            1,      Person2,            null,             
false,       0
+1,          Person1,            1,      Person2,            nUll,             
false,       "3.14"
diff --git a/regress/age_load/data/conversion_vertices.csv 
b/regress/age_load/data/conversion_vertices.csv
new file mode 100644
index 00000000..8e0a9c65
--- /dev/null
+++ b/regress/age_load/data/conversion_vertices.csv
@@ -0,0 +1,7 @@
+id,    string,           bool,        numeric,
+1,     "John Smith",     "true",      1
+2,     "John",           "false",     "-2"
+3,     John Smith,       true,        1.4
+4,     """John""",       false,       -1e10
+5,     null,             false,       0
+6,     nUll,             false,       "3.14"
diff --git a/regress/expected/age_load.out b/regress/expected/age_load.out
index 5e74eaae..b568980c 100644
--- a/regress/expected/age_load.out
+++ b/regress/expected/age_load.out
@@ -233,3 +233,87 @@ NOTICE:  graph "agload_test_graph" has been dropped
  
 (1 row)
 
+--
+-- Test property type conversion
+--
+SELECT create_graph('agload_conversion');
+NOTICE:  graph "agload_conversion" has been created
+ create_graph 
+--------------
+ 
+(1 row)
+
+SELECT create_vlabel('agload_conversion','Person1');
+NOTICE:  VLabel "Person1" has been created
+ create_vlabel 
+---------------
+ 
+(1 row)
+
+SELECT load_labels_from_file('agload_conversion', 'Person1', 
'age_load/conversion_vertices.csv');
+ load_labels_from_file 
+-----------------------
+ 
+(1 row)
+
+SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) 
$$) as (a agtype);
+                                         a                                     
     
+------------------------------------------------------------------------------------
+ {"id": 1, "bool": true, "__id__": 1, "string": "John Smith", "numeric": 1}
+ {"id": 2, "bool": false, "__id__": 2, "string": "John", "numeric": -2}
+ {"id": 3, "bool": true, "__id__": 3, "string": "John Smith", "numeric": 1.4}
+ {"id": 4, "bool": false, "__id__": 4, "string": "John", "numeric": 
-10000000000.0}
+ {"id": 5, "bool": false, "__id__": 5, "string": null, "numeric": 0}
+ {"id": 6, "bool": false, "__id__": 6, "string": "nUll", "numeric": 3.14}
+(6 rows)
+
+SELECT create_vlabel('agload_conversion','Person2');
+NOTICE:  VLabel "Person2" has been created
+ create_vlabel 
+---------------
+ 
+(1 row)
+
+SELECT load_labels_from_file('agload_conversion', 'Person2', 
'age_load/conversion_vertices.csv');
+ load_labels_from_file 
+-----------------------
+ 
+(1 row)
+
+SELECT create_elabel('agload_conversion','Edges');
+NOTICE:  ELabel "Edges" has been created
+ create_elabel 
+---------------
+ 
+(1 row)
+
+SELECT load_edges_from_file('agload_conversion', 'Edges', 
'age_load/conversion_edges.csv');
+ load_edges_from_file 
+----------------------
+ 
+(1 row)
+
+SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN 
properties(e) $$) as (a agtype);
+                              a                               
+--------------------------------------------------------------
+ {"bool": true, "string": "John Smith", "numeric": 1}
+ {"bool": false, "string": "John", "numeric": -2}
+ {"bool": true, "string": "John Smith", "numeric": 1.4}
+ {"bool": false, "string": "John", "numeric": -10000000000.0}
+ {"bool": false, "string": null, "numeric": 0}
+ {"bool": false, "string": "nUll", "numeric": 3.14}
+(6 rows)
+
+SELECT drop_graph('agload_conversion', true);
+NOTICE:  drop cascades to 5 other objects
+DETAIL:  drop cascades to table agload_conversion._ag_label_vertex
+drop cascades to table agload_conversion._ag_label_edge
+drop cascades to table agload_conversion."Person1"
+drop cascades to table agload_conversion."Person2"
+drop cascades to table agload_conversion."Edges"
+NOTICE:  graph "agload_conversion" has been dropped
+ drop_graph 
+------------
+ 
+(1 row)
+
diff --git a/regress/sql/age_load.sql b/regress/sql/age_load.sql
index 105c2fa6..7805d181 100644
--- a/regress/sql/age_load.sql
+++ b/regress/sql/age_load.sql
@@ -79,3 +79,21 @@ SELECT * FROM cypher('agload_test_graph', $$
 $$) AS (result_1 agtype, result_2 agtype);
 
 SELECT drop_graph('agload_test_graph', true);
+
+--
+-- Test property type conversion
+--
+SELECT create_graph('agload_conversion');
+
+SELECT create_vlabel('agload_conversion','Person1');
+SELECT load_labels_from_file('agload_conversion', 'Person1', 
'age_load/conversion_vertices.csv');
+SELECT * FROM cypher('agload_conversion', $$ MATCH (n) RETURN properties(n) 
$$) as (a agtype);
+
+SELECT create_vlabel('agload_conversion','Person2');
+SELECT load_labels_from_file('agload_conversion', 'Person2', 
'age_load/conversion_vertices.csv');
+
+SELECT create_elabel('agload_conversion','Edges');
+SELECT load_edges_from_file('agload_conversion', 'Edges', 
'age_load/conversion_edges.csv');
+SELECT * FROM cypher('agload_conversion', $$ MATCH ()-[e]->() RETURN 
properties(e) $$) as (a agtype);
+
+SELECT drop_graph('agload_conversion', true);
diff --git a/src/backend/utils/adt/agtype.c b/src/backend/utils/adt/agtype.c
index b91e5174..e65cca41 100644
--- a/src/backend/utils/adt/agtype.c
+++ b/src/backend/utils/adt/agtype.c
@@ -89,7 +89,6 @@ typedef enum /* type categories for datum_to_agtype */
 } agt_type_category;
 
 static inline Datum agtype_from_cstring(char *str, int len);
-static inline agtype_value *agtype_value_from_cstring(char *str, int len);
 size_t check_string_length(size_t len);
 static void agtype_in_agtype_annotation(void *pstate, char *annotation);
 static void agtype_in_object_start(void *pstate);
@@ -354,7 +353,7 @@ Datum agtype_out(PG_FUNCTION_ARGS)
  * Uses the agtype parser (with hooks) to construct an agtype.
  */
 
-static inline agtype_value *agtype_value_from_cstring(char *str, int len)
+agtype_value *agtype_value_from_cstring(char *str, int len)
 {
     agtype_lex_context *lex;
     agtype_in_state state;
diff --git a/src/backend/utils/load/ag_load_edges.c 
b/src/backend/utils/load/ag_load_edges.c
index 5a07c403..1a70a894 100644
--- a/src/backend/utils/load/ag_load_edges.c
+++ b/src/backend/utils/load/ag_load_edges.c
@@ -100,7 +100,7 @@ void edge_row_cb(int delim __attribute__((unused)), void 
*data)
         end_vertex_graph_id = make_graphid(end_vertex_type_id, end_id_int);
 
         props = create_agtype_from_list_i(cr->header, cr->fields,
-                                          n_fields, 3);
+                                          n_fields, 4);
 
         insert_edge_simple(cr->graph_oid, cr->object_name,
                            object_graph_id, start_vertex_graph_id,
diff --git a/src/backend/utils/load/age_load.c 
b/src/backend/utils/load/age_load.c
index a1eea48d..00ac80da 100644
--- a/src/backend/utils/load/age_load.c
+++ b/src/backend/utils/load/age_load.c
@@ -18,11 +18,14 @@
  */
 
 #include "postgres.h"
+#include "utils/json.h"
 
 #include "utils/load/ag_load_edges.h"
 #include "utils/load/ag_load_labels.h"
 #include "utils/load/age_load.h"
 
+static agtype_value *csv_value_to_agtype_value(char *csv_val);
+
 agtype *create_empty_agtype(void)
 {
     agtype* out;
@@ -40,6 +43,50 @@ agtype *create_empty_agtype(void)
     return out;
 }
 
+/*
+ * Converts the given csv value to an agtype_value.
+ *
+ * If csv_val is not a valid json, it is wrapped by double-quotes to make it a
+ * string value. Because agtype is jsonb-like, the token should be a valid
+ * json in order to be parsed into an agtype_value of appropriate type.
+ * Finally, agtype_value_from_cstring() is called for parsing.
+ */
+static agtype_value *csv_value_to_agtype_value(char *csv_val)
+{
+    char *new_csv_val;
+    agtype_value *res;
+
+    if (!json_validate(cstring_to_text(csv_val), false, false))
+    {
+        // wrap the string with double-quote
+        int oldlen;
+        int newlen;
+
+        oldlen = strlen(csv_val);
+        newlen = oldlen + 2; // +2 for double-quotes
+        new_csv_val = (char *)palloc(sizeof(char) * (newlen + 1));
+
+        new_csv_val[0] = '"';
+        strncpy(&new_csv_val[1], csv_val, oldlen);
+        new_csv_val[oldlen + 1] = '"';
+        new_csv_val[oldlen + 2] = '\0';
+    }
+    else
+    {
+        new_csv_val = csv_val;
+    }
+
+    res = agtype_value_from_cstring(new_csv_val, strlen(new_csv_val));
+
+    // extract from top-level row scalar array
+    if (res->type == AGTV_ARRAY && res->val.array.raw_scalar)
+    {
+        res = &res->val.array.elems[0];
+    }
+
+    return res;
+}
+
 agtype *create_agtype_from_list(char **header, char **fields, size_t 
fields_len,
                                 int64 vertex_id)
 {
@@ -74,7 +121,7 @@ agtype *create_agtype_from_list(char **header, char 
**fields, size_t fields_len,
                                        WAGT_KEY,
                                        key_agtype);
 
-        value_agtype = string_to_agtype_value(fields[i]);
+        value_agtype = csv_value_to_agtype_value(fields[i]);
         result.res = push_agtype_value(&result.parse_state,
                                        WAGT_VALUE,
                                        value_agtype);
@@ -117,7 +164,7 @@ agtype* create_agtype_from_list_i(char **header, char 
**fields,
         result.res = push_agtype_value(&result.parse_state,
                                        WAGT_KEY,
                                        key_agtype);
-        value_agtype = string_to_agtype_value(fields[i]);
+        value_agtype = csv_value_to_agtype_value(fields[i]);
         result.res = push_agtype_value(&result.parse_state,
                                        WAGT_VALUE,
                                        value_agtype);
diff --git a/src/include/utils/agtype.h b/src/include/utils/agtype.h
index d430ab93..c2b09395 100644
--- a/src/include/utils/agtype.h
+++ b/src/include/utils/agtype.h
@@ -552,6 +552,7 @@ agtype_iterator *get_next_list_element(agtype_iterator *it,
 void pfree_agtype_value(agtype_value* value);
 void pfree_agtype_value_content(agtype_value* value);
 void pfree_agtype_in_state(agtype_in_state* value);
+agtype_value *agtype_value_from_cstring(char *str, int len);
 
 /* Oid accessors for AGTYPE */
 Oid get_AGTYPEOID(void);

Reply via email to