This is an automated email from the ASF dual-hosted git repository.
jgemignani pushed a commit to branch Dev_Multiple_Labels
in repository https://gitbox.apache.org/repos/asf/age.git
The following commit(s) were added to refs/heads/Dev_Multiple_Labels by this
push:
new e384a965 Optimize id() and properties() field access (#2284)
e384a965 is described below
commit e384a965d9508349b8b38fe41c145be99cdd0ba1
Author: John Gemignani <[email protected]>
AuthorDate: Mon Dec 22 17:29:08 2025 -0800
Optimize id() and properties() field access (#2284)
NOTE: This PR was created with AI tools and a human.
Optimized id() and properties() field access on vertices and edges.
When accessing id(v) or properties(v) on a vertex, the system was
generating inefficient SQL that rebuilt the entire vertex agtype
before extracting the field:
age_id(_agtype_build_vertex(id, _label_name_from_table_oid(labels),
properties))
This forced full vertex reconstruction for every row, even though
the data was already available in table columns.
Added optimize_vertex_field_access() in cypher_expr.c to detect these
patterns and optimize them to direct column access:
- age_id(_agtype_build_vertex(id, ...)) → graphid_to_agtype(id)
- age_properties(_agtype_build_vertex(..., props)) → props
- age_id(_agtype_build_edge(id, ...)) → graphid_to_agtype(id)
- age_start_id(_agtype_build_edge(...)) → graphid_to_agtype(start_id)
- age_end_id(_agtype_build_edge(...)) → graphid_to_agtype(end_id)
- age_properties(_agtype_build_edge(...)) → props
Note: age_label() is intentionally not optimized due to complexity
of cstring-to-agtype string conversion.
Added regression tests in unified_vertex_table.sql to verify the
optimization works correctly for both vertices and edges.
modified: regress/expected/unified_vertex_table.out
modified: regress/sql/unified_vertex_table.sql
modified: src/backend/parser/cypher_expr.c
---
regress/expected/unified_vertex_table.out | 96 ++++++++++++++-
regress/sql/unified_vertex_table.sql | 61 ++++++++++
src/backend/parser/cypher_expr.c | 192 ++++++++++++++++++++++++++++++
3 files changed, 348 insertions(+), 1 deletion(-)
diff --git a/regress/expected/unified_vertex_table.out
b/regress/expected/unified_vertex_table.out
index 58a3f1e3..118f4a83 100644
--- a/regress/expected/unified_vertex_table.out
+++ b/regress/expected/unified_vertex_table.out
@@ -1219,11 +1219,101 @@ SELECT * FROM cypher('unified_test', $$
RETURN e
$$) AS (e agtype);
ERROR: SET/REMOVE label can only be used on vertices
+--
+-- Test 28: Verify id() and properties() optimization
+--
+-- The optimization avoids rebuilding the full vertex agtype when accessing
+-- id() or properties() on a vertex. Instead of:
+-- age_id(_agtype_build_vertex(id, _label_name_from_table_oid(labels),
properties))
+-- It generates:
+-- graphid_to_agtype(id)
+--
+-- And for properties:
+-- age_properties(_agtype_build_vertex(...))
+-- It generates:
+-- properties (direct column access)
+--
+-- Create test data
+SELECT * FROM cypher('unified_test', $$
+ CREATE (:OptimizeTest {val: 1}),
+ (:OptimizeTest {val: 2}),
+ (:OptimizeTest {val: 3})
+$$) AS (v agtype);
+ v
+---
+(0 rows)
+
+-- Test that id() works correctly with optimization
+SELECT * FROM cypher('unified_test', $$
+ MATCH (n:OptimizeTest)
+ RETURN id(n), n.val
+ ORDER BY n.val
+$$) AS (id agtype, val agtype);
+ id | val
+-------------------+-----
+ 10977524091715585 | 1
+ 10977524091715586 | 2
+ 10977524091715587 | 3
+(3 rows)
+
+-- Test that properties() works correctly with optimization
+SELECT * FROM cypher('unified_test', $$
+ MATCH (n:OptimizeTest)
+ RETURN properties(n), n.val
+ ORDER BY n.val
+$$) AS (props agtype, val agtype);
+ props | val
+------------+-----
+ {"val": 1} | 1
+ {"val": 2} | 2
+ {"val": 3} | 3
+(3 rows)
+
+-- Test id() in WHERE clause (common optimization target)
+SELECT * FROM cypher('unified_test', $$
+ MATCH (n:OptimizeTest)
+ WHERE id(n) % 10 = 0
+ RETURN n.val
+$$) AS (val agtype);
+ val
+-----
+(0 rows)
+
+-- Test properties() access in expressions
+SELECT * FROM cypher('unified_test', $$
+ MATCH (n:OptimizeTest)
+ WHERE properties(n).val > 1
+ RETURN n.val
+ ORDER BY n.val
+$$) AS (val agtype);
+ val
+-----
+ 2
+ 3
+(2 rows)
+
+-- Test edge id/properties optimization
+SELECT * FROM cypher('unified_test', $$
+ CREATE (:OptStart {x: 1})-[:OPT_EDGE {weight: 10}]->(:OptEnd {y: 2})
+$$) AS (v agtype);
+ v
+---
+(0 rows)
+
+SELECT * FROM cypher('unified_test', $$
+ MATCH (a)-[e:OPT_EDGE]->(b)
+ RETURN id(e), properties(e), start_id(e), end_id(e)
+$$) AS (eid agtype, props agtype, sid agtype, eid2 agtype);
+ eid | props | sid | eid2
+-------------------+----------------+-------------------+-------------------
+ 11540474045136897 | {"weight": 10} | 11258999068426241 | 11821949021847553
+(1 row)
+
--
-- Cleanup
--
SELECT drop_graph('unified_test', true);
-NOTICE: drop cascades to 38 other objects
+NOTICE: drop cascades to 42 other objects
DETAIL: drop cascades to table unified_test._ag_label_vertex
drop cascades to table unified_test._ag_label_edge
drop cascades to table unified_test."Person"
@@ -1262,6 +1352,10 @@ drop cascades to table unified_test."SameLabel"
drop cascades to table unified_test."EdgeTest1"
drop cascades to table unified_test."CONNECTS"
drop cascades to table unified_test."EdgeTest2"
+drop cascades to table unified_test."OptimizeTest"
+drop cascades to table unified_test."OptStart"
+drop cascades to table unified_test."OPT_EDGE"
+drop cascades to table unified_test."OptEnd"
NOTICE: graph "unified_test" has been dropped
drop_graph
------------
diff --git a/regress/sql/unified_vertex_table.sql
b/regress/sql/unified_vertex_table.sql
index 8eebf9c1..aebf6cd5 100644
--- a/regress/sql/unified_vertex_table.sql
+++ b/regress/sql/unified_vertex_table.sql
@@ -748,6 +748,67 @@ SELECT * FROM cypher('unified_test', $$
RETURN e
$$) AS (e agtype);
+--
+-- Test 28: Verify id() and properties() optimization
+--
+-- The optimization avoids rebuilding the full vertex agtype when accessing
+-- id() or properties() on a vertex. Instead of:
+-- age_id(_agtype_build_vertex(id, _label_name_from_table_oid(labels),
properties))
+-- It generates:
+-- graphid_to_agtype(id)
+--
+-- And for properties:
+-- age_properties(_agtype_build_vertex(...))
+-- It generates:
+-- properties (direct column access)
+--
+
+-- Create test data
+SELECT * FROM cypher('unified_test', $$
+ CREATE (:OptimizeTest {val: 1}),
+ (:OptimizeTest {val: 2}),
+ (:OptimizeTest {val: 3})
+$$) AS (v agtype);
+
+-- Test that id() works correctly with optimization
+SELECT * FROM cypher('unified_test', $$
+ MATCH (n:OptimizeTest)
+ RETURN id(n), n.val
+ ORDER BY n.val
+$$) AS (id agtype, val agtype);
+
+-- Test that properties() works correctly with optimization
+SELECT * FROM cypher('unified_test', $$
+ MATCH (n:OptimizeTest)
+ RETURN properties(n), n.val
+ ORDER BY n.val
+$$) AS (props agtype, val agtype);
+
+-- Test id() in WHERE clause (common optimization target)
+SELECT * FROM cypher('unified_test', $$
+ MATCH (n:OptimizeTest)
+ WHERE id(n) % 10 = 0
+ RETURN n.val
+$$) AS (val agtype);
+
+-- Test properties() access in expressions
+SELECT * FROM cypher('unified_test', $$
+ MATCH (n:OptimizeTest)
+ WHERE properties(n).val > 1
+ RETURN n.val
+ ORDER BY n.val
+$$) AS (val agtype);
+
+-- Test edge id/properties optimization
+SELECT * FROM cypher('unified_test', $$
+ CREATE (:OptStart {x: 1})-[:OPT_EDGE {weight: 10}]->(:OptEnd {y: 2})
+$$) AS (v agtype);
+
+SELECT * FROM cypher('unified_test', $$
+ MATCH (a)-[e:OPT_EDGE]->(b)
+ RETURN id(e), properties(e), start_id(e), end_id(e)
+$$) AS (eid agtype, props agtype, sid agtype, eid2 agtype);
+
--
-- Cleanup
--
diff --git a/src/backend/parser/cypher_expr.c b/src/backend/parser/cypher_expr.c
index 5f4de86b..cc6bf52e 100644
--- a/src/backend/parser/cypher_expr.c
+++ b/src/backend/parser/cypher_expr.c
@@ -116,6 +116,7 @@ static bool function_exists(char *funcname, char
*extension);
static Node *coerce_expr_flexible(ParseState *pstate, Node *expr,
Oid source_oid, Oid target_oid,
int32 t_typemod, bool error_out);
+static Node *optimize_vertex_field_access(Node *node);
/* transform a cypher expression */
Node *transform_cypher_expr(cypher_parsestate *cpstate, Node *expr,
@@ -2082,6 +2083,14 @@ static Node *transform_FuncCall(cypher_parsestate
*cpstate, FuncCall *fn)
retval = ParseFuncOrColumn(pstate, fname, targs, last_srf, fn, false,
fn->location);
+ /*
+ * Optimize vertex field access patterns. This detects cases like:
+ * age_id(_agtype_build_vertex(id, label, props))
+ * and optimizes them to directly use the underlying column, avoiding
+ * the expensive reconstruction of the vertex agtype just to extract a
field.
+ */
+ retval = optimize_vertex_field_access(retval);
+
/* flag that an aggregate was found during a transform */
if (retval != NULL && retval->type == T_Aggref)
{
@@ -2407,3 +2416,186 @@ static Node *transform_SubLink(cypher_parsestate
*cpstate, SubLink *sublink)
return result;
}
+/*
+ * optimize_vertex_field_access
+ *
+ * This function optimizes patterns where we're extracting fields from
+ * a vertex that was just built from its underlying columns. The most
+ * common case is:
+ *
+ * age_id(_agtype_build_vertex(id, label_name, properties))
+ *
+ * Which can be optimized to just use 'id' directly (cast to agtype).
+ *
+ * Similar optimizations apply to:
+ * - age_properties(_agtype_build_vertex(...)) -> properties
+ * - age_label(_agtype_build_vertex(...)) -> label_name (needs cast)
+ *
+ * The same optimizations apply to edges with _agtype_build_edge:
+ * - age_id(_agtype_build_edge(id, startid, endid, label, props)) -> id
+ * - age_start_id(_agtype_build_edge(...)) -> startid
+ * - age_end_id(_agtype_build_edge(...)) -> endid
+ * - age_properties(_agtype_build_edge(...)) -> props
+ * - age_label(_agtype_build_edge(...)) -> label
+ */
+static Node *optimize_vertex_field_access(Node *node)
+{
+ FuncExpr *outer_func;
+ FuncExpr *inner_func;
+ char *outer_func_name;
+ char *inner_func_name;
+ Node *arg;
+ List *inner_args;
+ int arg_index = -1;
+ Oid result_type;
+ bool needs_agtype_cast = false;
+
+ /* Only optimize FuncExpr nodes */
+ if (node == NULL || !IsA(node, FuncExpr))
+ {
+ return node;
+ }
+
+ outer_func = (FuncExpr *)node;
+
+ /* Must have exactly one argument */
+ if (list_length(outer_func->args) != 1)
+ {
+ return node;
+ }
+
+ /* Get the function name */
+ outer_func_name = get_func_name(outer_func->funcid);
+ if (outer_func_name == NULL)
+ {
+ return node;
+ }
+
+ /* Check if this is an accessor function we can optimize */
+ arg = (Node *)linitial(outer_func->args);
+
+ /* The argument must be a FuncExpr (the build function) */
+ if (!IsA(arg, FuncExpr))
+ {
+ return node;
+ }
+
+ inner_func = (FuncExpr *)arg;
+ inner_func_name = get_func_name(inner_func->funcid);
+
+ if (inner_func_name == NULL)
+ {
+ return node;
+ }
+
+ inner_args = inner_func->args;
+
+ /*
+ * Check for _agtype_build_vertex(id, label_name, properties)
+ * Arguments: 0=id (graphid), 1=label_name (cstring), 2=properties (agtype)
+ *
+ * Note: We don't optimize age_label() because the label_name is a cstring
+ * from _label_name_from_table_oid() and converting it properly to agtype
+ * string is non-trivial. The id and properties optimizations are the most
+ * impactful for performance anyway.
+ */
+ if (strcmp(inner_func_name, "_agtype_build_vertex") == 0 &&
+ list_length(inner_args) == 3)
+ {
+ if (strcmp(outer_func_name, "age_id") == 0)
+ {
+ /* Extract id (arg 0), needs cast from graphid to agtype */
+ arg_index = 0;
+ result_type = GRAPHIDOID;
+ needs_agtype_cast = true;
+ }
+ else if (strcmp(outer_func_name, "age_properties") == 0)
+ {
+ /* Extract properties (arg 2), already agtype */
+ arg_index = 2;
+ result_type = AGTYPEOID;
+ needs_agtype_cast = false;
+ }
+ /* age_label() is intentionally not optimized - cstring conversion is
complex */
+ }
+ /*
+ * Check for _agtype_build_edge(id, startid, endid, label_name, properties)
+ * Arguments: 0=id (graphid), 1=start_id (graphid), 2=end_id (graphid),
+ * 3=label_name (cstring), 4=properties (agtype)
+ *
+ * Note: Same as vertex, age_label() is not optimized for edges.
+ */
+ else if (strcmp(inner_func_name, "_agtype_build_edge") == 0 &&
+ list_length(inner_args) == 5)
+ {
+ if (strcmp(outer_func_name, "age_id") == 0)
+ {
+ /* Extract id (arg 0), needs cast from graphid to agtype */
+ arg_index = 0;
+ result_type = GRAPHIDOID;
+ needs_agtype_cast = true;
+ }
+ else if (strcmp(outer_func_name, "age_start_id") == 0)
+ {
+ /* Extract start_id (arg 1), needs cast from graphid to agtype */
+ arg_index = 1;
+ result_type = GRAPHIDOID;
+ needs_agtype_cast = true;
+ }
+ else if (strcmp(outer_func_name, "age_end_id") == 0)
+ {
+ /* Extract end_id (arg 2), needs cast from graphid to agtype */
+ arg_index = 2;
+ result_type = GRAPHIDOID;
+ needs_agtype_cast = true;
+ }
+ /* age_label() is intentionally not optimized - cstring conversion is
complex */
+ else if (strcmp(outer_func_name, "age_properties") == 0)
+ {
+ /* Extract properties (arg 4), already agtype */
+ arg_index = 4;
+ result_type = AGTYPEOID;
+ needs_agtype_cast = false;
+ }
+ }
+
+ /* If we found a pattern to optimize */
+ if (arg_index >= 0)
+ {
+ Node *extracted_arg = (Node *)list_nth(inner_args, arg_index);
+
+ if (needs_agtype_cast)
+ {
+ /*
+ * For graphid: use graphid_to_agtype() function
+ * Currently only graphid needs casting - cstring (for labels)
+ * is intentionally not optimized.
+ */
+ if (result_type == GRAPHIDOID)
+ {
+ Oid cast_func_oid;
+ FuncExpr *cast_expr;
+
+ /* Get the graphid_to_agtype function OID */
+ cast_func_oid = get_ag_func_oid("graphid_to_agtype", 1,
+ GRAPHIDOID);
+
+ cast_expr = makeFuncExpr(cast_func_oid, AGTYPEOID,
+ list_make1(extracted_arg),
+ InvalidOid, InvalidOid,
+ COERCE_EXPLICIT_CALL);
+ cast_expr->location = outer_func->location;
+
+ return (Node *)cast_expr;
+ }
+ }
+ else
+ {
+ /* For properties, just return the extracted argument directly */
+ return extracted_arg;
+ }
+ }
+
+ /* No optimization possible */
+ return node;
+}
\ No newline at end of file