This is an automated email from the ASF dual-hosted git repository.

jgemignani pushed a commit to branch Dev_Multiple_Labels
in repository https://gitbox.apache.org/repos/asf/age.git


The following commit(s) were added to refs/heads/Dev_Multiple_Labels by this 
push:
     new e384a965 Optimize id() and properties() field access (#2284)
e384a965 is described below

commit e384a965d9508349b8b38fe41c145be99cdd0ba1
Author: John Gemignani <[email protected]>
AuthorDate: Mon Dec 22 17:29:08 2025 -0800

    Optimize id() and properties() field access (#2284)
    
    NOTE: This PR was created with AI tools and a human.
    
    Optimized id() and properties() field access on vertices and edges.
    
    When accessing id(v) or properties(v) on a vertex, the system was
    generating inefficient SQL that rebuilt the entire vertex agtype
    before extracting the field:
    
      age_id(_agtype_build_vertex(id, _label_name_from_table_oid(labels),
                                  properties))
    
    This forced full vertex reconstruction for every row, even though
    the data was already available in table columns.
    
    Added optimize_vertex_field_access() in cypher_expr.c to detect these
    patterns and optimize them to direct column access:
    
      - age_id(_agtype_build_vertex(id, ...)) → graphid_to_agtype(id)
      - age_properties(_agtype_build_vertex(..., props)) → props
      - age_id(_agtype_build_edge(id, ...)) → graphid_to_agtype(id)
      - age_start_id(_agtype_build_edge(...)) → graphid_to_agtype(start_id)
      - age_end_id(_agtype_build_edge(...)) → graphid_to_agtype(end_id)
      - age_properties(_agtype_build_edge(...)) → props
    
    Note: age_label() is intentionally not optimized due to complexity
    of cstring-to-agtype string conversion.
    
    Added regression tests in unified_vertex_table.sql to verify the
    optimization works correctly for both vertices and edges.
    
    modified:   regress/expected/unified_vertex_table.out
    modified:   regress/sql/unified_vertex_table.sql
    modified:   src/backend/parser/cypher_expr.c
---
 regress/expected/unified_vertex_table.out |  96 ++++++++++++++-
 regress/sql/unified_vertex_table.sql      |  61 ++++++++++
 src/backend/parser/cypher_expr.c          | 192 ++++++++++++++++++++++++++++++
 3 files changed, 348 insertions(+), 1 deletion(-)

diff --git a/regress/expected/unified_vertex_table.out 
b/regress/expected/unified_vertex_table.out
index 58a3f1e3..118f4a83 100644
--- a/regress/expected/unified_vertex_table.out
+++ b/regress/expected/unified_vertex_table.out
@@ -1219,11 +1219,101 @@ SELECT * FROM cypher('unified_test', $$
     RETURN e
 $$) AS (e agtype);
 ERROR:  SET/REMOVE label can only be used on vertices
+--
+-- Test 28: Verify id() and properties() optimization
+--
+-- The optimization avoids rebuilding the full vertex agtype when accessing
+-- id() or properties() on a vertex. Instead of:
+--   age_id(_agtype_build_vertex(id, _label_name_from_table_oid(labels), 
properties))
+-- It generates:
+--   graphid_to_agtype(id)
+--
+-- And for properties:
+--   age_properties(_agtype_build_vertex(...))
+-- It generates:
+--   properties (direct column access)
+--
+-- Create test data
+SELECT * FROM cypher('unified_test', $$
+    CREATE (:OptimizeTest {val: 1}),
+           (:OptimizeTest {val: 2}),
+           (:OptimizeTest {val: 3})
+$$) AS (v agtype);
+ v 
+---
+(0 rows)
+
+-- Test that id() works correctly with optimization
+SELECT * FROM cypher('unified_test', $$
+    MATCH (n:OptimizeTest)
+    RETURN id(n), n.val
+    ORDER BY n.val
+$$) AS (id agtype, val agtype);
+        id         | val 
+-------------------+-----
+ 10977524091715585 | 1
+ 10977524091715586 | 2
+ 10977524091715587 | 3
+(3 rows)
+
+-- Test that properties() works correctly with optimization
+SELECT * FROM cypher('unified_test', $$
+    MATCH (n:OptimizeTest)
+    RETURN properties(n), n.val
+    ORDER BY n.val
+$$) AS (props agtype, val agtype);
+   props    | val 
+------------+-----
+ {"val": 1} | 1
+ {"val": 2} | 2
+ {"val": 3} | 3
+(3 rows)
+
+-- Test id() in WHERE clause (common optimization target)
+SELECT * FROM cypher('unified_test', $$
+    MATCH (n:OptimizeTest)
+    WHERE id(n) % 10 = 0
+    RETURN n.val
+$$) AS (val agtype);
+ val 
+-----
+(0 rows)
+
+-- Test properties() access in expressions
+SELECT * FROM cypher('unified_test', $$
+    MATCH (n:OptimizeTest)
+    WHERE properties(n).val > 1
+    RETURN n.val
+    ORDER BY n.val
+$$) AS (val agtype);
+ val 
+-----
+ 2
+ 3
+(2 rows)
+
+-- Test edge id/properties optimization
+SELECT * FROM cypher('unified_test', $$
+    CREATE (:OptStart {x: 1})-[:OPT_EDGE {weight: 10}]->(:OptEnd {y: 2})
+$$) AS (v agtype);
+ v 
+---
+(0 rows)
+
+SELECT * FROM cypher('unified_test', $$
+    MATCH (a)-[e:OPT_EDGE]->(b)
+    RETURN id(e), properties(e), start_id(e), end_id(e)
+$$) AS (eid agtype, props agtype, sid agtype, eid2 agtype);
+        eid        |     props      |        sid        |       eid2        
+-------------------+----------------+-------------------+-------------------
+ 11540474045136897 | {"weight": 10} | 11258999068426241 | 11821949021847553
+(1 row)
+
 --
 -- Cleanup
 --
 SELECT drop_graph('unified_test', true);
-NOTICE:  drop cascades to 38 other objects
+NOTICE:  drop cascades to 42 other objects
 DETAIL:  drop cascades to table unified_test._ag_label_vertex
 drop cascades to table unified_test._ag_label_edge
 drop cascades to table unified_test."Person"
@@ -1262,6 +1352,10 @@ drop cascades to table unified_test."SameLabel"
 drop cascades to table unified_test."EdgeTest1"
 drop cascades to table unified_test."CONNECTS"
 drop cascades to table unified_test."EdgeTest2"
+drop cascades to table unified_test."OptimizeTest"
+drop cascades to table unified_test."OptStart"
+drop cascades to table unified_test."OPT_EDGE"
+drop cascades to table unified_test."OptEnd"
 NOTICE:  graph "unified_test" has been dropped
  drop_graph 
 ------------
diff --git a/regress/sql/unified_vertex_table.sql 
b/regress/sql/unified_vertex_table.sql
index 8eebf9c1..aebf6cd5 100644
--- a/regress/sql/unified_vertex_table.sql
+++ b/regress/sql/unified_vertex_table.sql
@@ -748,6 +748,67 @@ SELECT * FROM cypher('unified_test', $$
     RETURN e
 $$) AS (e agtype);
 
+--
+-- Test 28: Verify id() and properties() optimization
+--
+-- The optimization avoids rebuilding the full vertex agtype when accessing
+-- id() or properties() on a vertex. Instead of:
+--   age_id(_agtype_build_vertex(id, _label_name_from_table_oid(labels), 
properties))
+-- It generates:
+--   graphid_to_agtype(id)
+--
+-- And for properties:
+--   age_properties(_agtype_build_vertex(...))
+-- It generates:
+--   properties (direct column access)
+--
+
+-- Create test data
+SELECT * FROM cypher('unified_test', $$
+    CREATE (:OptimizeTest {val: 1}),
+           (:OptimizeTest {val: 2}),
+           (:OptimizeTest {val: 3})
+$$) AS (v agtype);
+
+-- Test that id() works correctly with optimization
+SELECT * FROM cypher('unified_test', $$
+    MATCH (n:OptimizeTest)
+    RETURN id(n), n.val
+    ORDER BY n.val
+$$) AS (id agtype, val agtype);
+
+-- Test that properties() works correctly with optimization
+SELECT * FROM cypher('unified_test', $$
+    MATCH (n:OptimizeTest)
+    RETURN properties(n), n.val
+    ORDER BY n.val
+$$) AS (props agtype, val agtype);
+
+-- Test id() in WHERE clause (common optimization target)
+SELECT * FROM cypher('unified_test', $$
+    MATCH (n:OptimizeTest)
+    WHERE id(n) % 10 = 0
+    RETURN n.val
+$$) AS (val agtype);
+
+-- Test properties() access in expressions
+SELECT * FROM cypher('unified_test', $$
+    MATCH (n:OptimizeTest)
+    WHERE properties(n).val > 1
+    RETURN n.val
+    ORDER BY n.val
+$$) AS (val agtype);
+
+-- Test edge id/properties optimization
+SELECT * FROM cypher('unified_test', $$
+    CREATE (:OptStart {x: 1})-[:OPT_EDGE {weight: 10}]->(:OptEnd {y: 2})
+$$) AS (v agtype);
+
+SELECT * FROM cypher('unified_test', $$
+    MATCH (a)-[e:OPT_EDGE]->(b)
+    RETURN id(e), properties(e), start_id(e), end_id(e)
+$$) AS (eid agtype, props agtype, sid agtype, eid2 agtype);
+
 --
 -- Cleanup
 --
diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c
index 5f4de86b..cc6bf52e 100644
--- a/src/backend/parser/cypher_expr.c
+++ b/src/backend/parser/cypher_expr.c
@@ -116,6 +116,7 @@ static bool function_exists(char *funcname, char 
*extension);
 static Node *coerce_expr_flexible(ParseState *pstate, Node *expr,
                                   Oid source_oid, Oid target_oid,
                                   int32 t_typemod, bool error_out);
+static Node *optimize_vertex_field_access(Node *node);
 
 /* transform a cypher expression */
 Node *transform_cypher_expr(cypher_parsestate *cpstate, Node *expr,
@@ -2082,6 +2083,14 @@ static Node *transform_FuncCall(cypher_parsestate 
*cpstate, FuncCall *fn)
     retval = ParseFuncOrColumn(pstate, fname, targs, last_srf, fn, false,
                                fn->location);
 
+    /*
+     * Optimize vertex field access patterns. This detects cases like:
+     *   age_id(_agtype_build_vertex(id, label, props))
+     * and optimizes them to directly use the underlying column, avoiding
+     * the expensive reconstruction of the vertex agtype just to extract a 
field.
+     */
+    retval = optimize_vertex_field_access(retval);
+
     /* flag that an aggregate was found during a transform */
     if (retval != NULL && retval->type == T_Aggref)
     {
@@ -2407,3 +2416,186 @@ static Node *transform_SubLink(cypher_parsestate 
*cpstate, SubLink *sublink)
 
     return result;
 }
+/*
+ * optimize_vertex_field_access
+ *
+ * This function optimizes patterns where we're extracting fields from
+ * a vertex that was just built from its underlying columns. The most
+ * common case is:
+ *
+ *   age_id(_agtype_build_vertex(id, label_name, properties))
+ *
+ * Which can be optimized to just use 'id' directly (cast to agtype).
+ *
+ * Similar optimizations apply to:
+ *   - age_properties(_agtype_build_vertex(...)) -> properties
+ *   - age_label(_agtype_build_vertex(...)) -> label_name (needs cast)
+ *
+ * The same optimizations apply to edges with _agtype_build_edge:
+ *   - age_id(_agtype_build_edge(id, startid, endid, label, props)) -> id
+ *   - age_start_id(_agtype_build_edge(...)) -> startid
+ *   - age_end_id(_agtype_build_edge(...)) -> endid
+ *   - age_properties(_agtype_build_edge(...)) -> props
+ *   - age_label(_agtype_build_edge(...)) -> label
+ */
+static Node *optimize_vertex_field_access(Node *node)
+{
+    FuncExpr *outer_func;
+    FuncExpr *inner_func;
+    char *outer_func_name;
+    char *inner_func_name;
+    Node *arg;
+    List *inner_args;
+    int arg_index = -1;
+    Oid result_type;
+    bool needs_agtype_cast = false;
+
+    /* Only optimize FuncExpr nodes */
+    if (node == NULL || !IsA(node, FuncExpr))
+    {
+        return node;
+    }
+
+    outer_func = (FuncExpr *)node;
+
+    /* Must have exactly one argument */
+    if (list_length(outer_func->args) != 1)
+    {
+        return node;
+    }
+
+    /* Get the function name */
+    outer_func_name = get_func_name(outer_func->funcid);
+    if (outer_func_name == NULL)
+    {
+        return node;
+    }
+
+    /* Check if this is an accessor function we can optimize */
+    arg = (Node *)linitial(outer_func->args);
+
+    /* The argument must be a FuncExpr (the build function) */
+    if (!IsA(arg, FuncExpr))
+    {
+        return node;
+    }
+
+    inner_func = (FuncExpr *)arg;
+    inner_func_name = get_func_name(inner_func->funcid);
+
+    if (inner_func_name == NULL)
+    {
+        return node;
+    }
+
+    inner_args = inner_func->args;
+
+    /*
+     * Check for _agtype_build_vertex(id, label_name, properties)
+     * Arguments: 0=id (graphid), 1=label_name (cstring), 2=properties (agtype)
+     *
+     * Note: We don't optimize age_label() because the label_name is a cstring
+     * from _label_name_from_table_oid() and converting it properly to agtype
+     * string is non-trivial. The id and properties optimizations are the most
+     * impactful for performance anyway.
+     */
+    if (strcmp(inner_func_name, "_agtype_build_vertex") == 0 &&
+        list_length(inner_args) == 3)
+    {
+        if (strcmp(outer_func_name, "age_id") == 0)
+        {
+            /* Extract id (arg 0), needs cast from graphid to agtype */
+            arg_index = 0;
+            result_type = GRAPHIDOID;
+            needs_agtype_cast = true;
+        }
+        else if (strcmp(outer_func_name, "age_properties") == 0)
+        {
+            /* Extract properties (arg 2), already agtype */
+            arg_index = 2;
+            result_type = AGTYPEOID;
+            needs_agtype_cast = false;
+        }
+        /* age_label() is intentionally not optimized - cstring conversion is 
complex */
+    }
+    /*
+     * Check for _agtype_build_edge(id, startid, endid, label_name, properties)
+     * Arguments: 0=id (graphid), 1=start_id (graphid), 2=end_id (graphid),
+     *            3=label_name (cstring), 4=properties (agtype)
+     *
+     * Note: Same as vertex, age_label() is not optimized for edges.
+     */
+    else if (strcmp(inner_func_name, "_agtype_build_edge") == 0 &&
+             list_length(inner_args) == 5)
+    {
+        if (strcmp(outer_func_name, "age_id") == 0)
+        {
+            /* Extract id (arg 0), needs cast from graphid to agtype */
+            arg_index = 0;
+            result_type = GRAPHIDOID;
+            needs_agtype_cast = true;
+        }
+        else if (strcmp(outer_func_name, "age_start_id") == 0)
+        {
+            /* Extract start_id (arg 1), needs cast from graphid to agtype */
+            arg_index = 1;
+            result_type = GRAPHIDOID;
+            needs_agtype_cast = true;
+        }
+        else if (strcmp(outer_func_name, "age_end_id") == 0)
+        {
+            /* Extract end_id (arg 2), needs cast from graphid to agtype */
+            arg_index = 2;
+            result_type = GRAPHIDOID;
+            needs_agtype_cast = true;
+        }
+        /* age_label() is intentionally not optimized - cstring conversion is 
complex */
+        else if (strcmp(outer_func_name, "age_properties") == 0)
+        {
+            /* Extract properties (arg 4), already agtype */
+            arg_index = 4;
+            result_type = AGTYPEOID;
+            needs_agtype_cast = false;
+        }
+    }
+
+    /* If we found a pattern to optimize */
+    if (arg_index >= 0)
+    {
+        Node *extracted_arg = (Node *)list_nth(inner_args, arg_index);
+
+        if (needs_agtype_cast)
+        {
+            /*
+             * For graphid: use graphid_to_agtype() function
+             * Currently only graphid needs casting - cstring (for labels)
+             * is intentionally not optimized.
+             */
+            if (result_type == GRAPHIDOID)
+            {
+                Oid cast_func_oid;
+                FuncExpr *cast_expr;
+
+                /* Get the graphid_to_agtype function OID */
+                cast_func_oid = get_ag_func_oid("graphid_to_agtype", 1,
+                                                GRAPHIDOID);
+
+                cast_expr = makeFuncExpr(cast_func_oid, AGTYPEOID,
+                                         list_make1(extracted_arg),
+                                         InvalidOid, InvalidOid,
+                                         COERCE_EXPLICIT_CALL);
+                cast_expr->location = outer_func->location;
+
+                return (Node *)cast_expr;
+            }
+        }
+        else
+        {
+            /* For properties, just return the extracted argument directly */
+            return extracted_arg;
+        }
+    }
+
+    /* No optimization possible */
+    return node;
+}
\ No newline at end of file

Reply via email to