Copilot commented on code in PR #2295:
URL: https://github.com/apache/age/pull/2295#discussion_r2654108103
##########
src/backend/parser/cypher_clause.c:
##########
@@ -6410,6 +6427,273 @@ transform_cypher_clause_as_subquery(cypher_parsestate
*cpstate,
return pnsi;
}
+/*
+ * Export hidden columns for entities in the current clause.
+ *
+ * For each named vertex/edge entity, we export its id (and for edges:
+ * start_id, end_id) as target entries. These can be referenced by
+ * subsequent clauses without rebuilding the full vertex/edge.
+ *
+ * This function handles two cases:
+ * 1. Entities declared in the current clause: Extract id from _agtype_build_*
args
+ * 2. Entities from previous clauses (with id_var set): Re-export using their
Vars
+ *
+ * The column names use the AGE_VARNAME prefix pattern
(AGE_DEFAULT_VARNAME_PREFIX)
+ * which makes them hidden from SELECT * output. The expand_star() function in
+ * cypher_item.c filters out columns with this prefix.
+ *
+ * Note: We use resjunk=false so these columns are included in the subquery RTE
+ * when PostgreSQL builds the column list via addRangeTableEntryForSubquery().
+ * If resjunk=true, PostgreSQL would skip them entirely.
+ */
+static void export_entity_hidden_columns(cypher_parsestate *cpstate,
+ Query *query)
+{
+ ListCell *lc;
+ int resno;
+ Oid graphid_to_agtype_oid;
+ List *exported_names = NIL; /* Track entity names already exported */
+
+ /* Get the graphid_to_agtype function OID for id conversions */
+ graphid_to_agtype_oid = get_ag_func_oid("graphid_to_agtype", 1,
GRAPHIDOID);
+
+ /* find the next resno for target entries */
+ resno = list_length(query->targetList) + 1;
+
+ foreach (lc, cpstate->entities)
+ {
+ transform_entity *entity = lfirst(lc);
+ char *entity_name;
+ char *col_name;
+ TargetEntry *te;
+ ListCell *lc2;
+ bool already_exported;
+
+ /* skip entities without names - they aren't referenced */
+ entity_name = get_entity_name(entity);
+ if (entity_name == NULL)
+ {
+ continue;
+ }
+
+ /*
+ * Skip if we've already exported columns for an entity with this name.
+ * This can happen when the same variable appears multiple times in a
+ * pattern, e.g., MATCH (a)-[]->(), ()-[]->(a)
+ */
+ already_exported = false;
+ foreach (lc2, exported_names)
+ {
+ if (strcmp((char *)lfirst(lc2), entity_name) == 0)
+ {
+ already_exported = true;
+ break;
+ }
+ }
+ if (already_exported)
+ {
+ continue;
+ }
+ exported_names = lappend(exported_names, entity_name);
+
+ /* skip path entities - they don't have id/start_id/end_id */
+ if (entity->type == ENT_PATH)
+ {
+ continue;
+ }
+
+ /* skip VLE edges for now - they have different structure */
+ if (entity->type == ENT_VLE_EDGE)
+ {
+ continue;
+ }
+
+ /*
+ * Only export hidden columns for entities declared in the current
clause.
+ * Entities from previous clauses already have their
id_var/start_id_var/end_id_var
+ * pointing to the subquery's hidden columns from when they were
exported.
+ */
+ if (!entity->declared_in_current_clause)
+ {
+ continue;
+ }
+
+ /* Extract id from the entity's FuncExpr */
+ {
+ Expr *expr = entity->expr;
+ FuncExpr *func_expr;
+ Node *id_node;
+ Node *start_id_node;
+ Node *end_id_node;
+ FuncExpr *id_agtype_expr;
+ FuncExpr *start_id_agtype_expr;
+ FuncExpr *end_id_agtype_expr;
+
+ if (expr == NULL)
+ {
+ continue;
+ }
+
+ /*
+ * The expr must be a FuncExpr (_agtype_build_vertex or
_agtype_build_edge)
+ * for entities declared in the current clause.
+ */
+ if (!IsA(expr, FuncExpr))
+ {
+ continue;
+ }
+
+ func_expr = (FuncExpr *)expr;
+
+ /*
+ * For vertices: _agtype_build_vertex(id, label_name, properties)
+ * For edges: _agtype_build_edge(id, start_id, end_id, label_name,
properties)
+ */
+ if (entity->type == ENT_VERTEX)
+ {
+ /* vertex: args are (id, label_name, properties) */
+ if (list_length(func_expr->args) < 3)
+ {
+ continue;
+ }
+ id_node = (Node *)linitial(func_expr->args);
+ start_id_node = NULL;
+ end_id_node = NULL;
+ }
+ else if (entity->type == ENT_EDGE)
+ {
+ /* edge: args are (id, start_id, end_id, label_name,
properties) */
+ if (list_length(func_expr->args) < 5)
+ {
+ continue;
+ }
+ id_node = (Node *)linitial(func_expr->args);
+ start_id_node = (Node *)lsecond(func_expr->args);
+ end_id_node = (Node *)lthird(func_expr->args);
+ }
+ else
+ {
+ continue;
+ }
+
+ /*
+ * Create target entries for id (as agtype).
+ * We wrap id in graphid_to_agtype() since age_id() returns agtype.
+ */
+ id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid, AGTYPEOID,
+ list_make1(copyObject(id_node)),
InvalidOid,
+ InvalidOid, COERCE_EXPLICIT_CALL);
+
+ col_name = psprintf("%s%s", AGE_VARNAME_ID_PREFIX, entity_name);
+ te = makeTargetEntry((Expr *)id_agtype_expr, resno++, col_name,
false);
+ query->targetList = lappend(query->targetList, te);
+
+ /* For edges, also export start_id and end_id */
+ if (entity->type == ENT_EDGE)
+ {
+ start_id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid,
AGTYPEOID,
+
list_make1(copyObject(start_id_node)),
+ InvalidOid, InvalidOid,
+ COERCE_EXPLICIT_CALL);
+ col_name = psprintf("%s%s", AGE_VARNAME_START_ID_PREFIX,
entity_name);
+ te = makeTargetEntry((Expr *)start_id_agtype_expr, resno++,
col_name, false);
+ query->targetList = lappend(query->targetList, te);
+
+ end_id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid,
AGTYPEOID,
+
list_make1(copyObject(end_id_node)),
+ InvalidOid, InvalidOid,
+ COERCE_EXPLICIT_CALL);
+ col_name = psprintf("%s%s", AGE_VARNAME_END_ID_PREFIX,
entity_name);
+ te = makeTargetEntry((Expr *)end_id_agtype_expr, resno++,
col_name, false);
+ query->targetList = lappend(query->targetList, te);
Review Comment:
Memory leak: col_name allocated with psprintf is never freed. The col_name
variable is assigned three times (lines 6587, 6598, 6606) without freeing the
previous allocations. Each psprintf call allocates memory that should be freed
with pfree. Add pfree(col_name) after using it in makeTargetEntry for each
assignment.
##########
src/backend/parser/cypher_expr.c:
##########
@@ -1975,6 +1976,343 @@ static bool function_exists(char *funcname, char
*extension)
return found;
}
+/*
+ * Try to optimize id(), start_id(), end_id() function calls.
+ *
+ * When the argument is a variable (vertex or edge) that came from a previous
+ * clause, we may have exposed column Vars (id_var, start_id_var, end_id_var)
+ * that can be used directly instead of calling the function on the full
+ * vertex/edge expression.
+ *
+ * This optimization avoids the expensive reconstruction of the vertex/edge
+ * object just to extract a single field like id.
+ *
+ * Returns NULL if optimization is not possible; otherwise returns the
+ * optimized expression.
+ */
+/*
+ * Helper function to find an entity only in the current cpstate's entities
list,
+ * not walking up to parent parsestates. This is important for the optimization
+ * because the entity's Var fields (id_var, etc.) are only valid in the context
+ * where they were set.
+ */
Review Comment:
Duplicate function documentation block. There are two consecutive comment
blocks (lines 1979-1992 and 1993-1998) before the
find_entity_in_current_cpstate function. The first block appears to be intended
for try_optimize_id_funcs (which is declared on line 96 and implemented on line
2149), but is placed in the wrong location. Move the first comment block to
line 2149 before the try_optimize_id_funcs implementation.
```suggestion
* Helper function used when optimizing id(), start_id(), end_id() function
* calls.
*
* When the argument to those functions is a variable (vertex or edge) that
* came from a previous clause, we may have exposed column Vars (id_var,
* start_id_var, end_id_var) that can be used directly instead of calling the
* function on the full vertex/edge expression. This function helps that
* optimization by finding an entity only in the current cpstate's entities
* list, not walking up to parent parsestates. This is important because the
* entity's Var fields (id_var, etc.) are only valid in the context where
they
* were set.
*/
```
##########
src/backend/parser/cypher_clause.c:
##########
@@ -6410,6 +6427,273 @@ transform_cypher_clause_as_subquery(cypher_parsestate
*cpstate,
return pnsi;
}
+/*
+ * Export hidden columns for entities in the current clause.
+ *
+ * For each named vertex/edge entity, we export its id (and for edges:
+ * start_id, end_id) as target entries. These can be referenced by
+ * subsequent clauses without rebuilding the full vertex/edge.
+ *
+ * This function handles two cases:
+ * 1. Entities declared in the current clause: Extract id from _agtype_build_*
args
+ * 2. Entities from previous clauses (with id_var set): Re-export using their
Vars
Review Comment:
Misleading documentation comment. The comment at lines 6437-6439 mentions
"This function handles two cases" including "2. Entities from previous clauses
(with id_var set): Re-export using their Vars", but the actual code at lines
6516-6519 skips entities from previous clauses entirely with the condition "if
(!entity->declared_in_current_clause) continue;". The function only exports
hidden columns for entities declared in the current clause. Update the comment
to remove case 2 or clarify that case 2 is intentionally skipped.
```suggestion
* For each named vertex/edge entity declared in the current clause, we
export
* its id (and for edges: start_id, end_id) as target entries. These can be
* referenced by subsequent clauses without rebuilding the full vertex/edge.
*
* Only entities marked as declared in the current clause are processed;
entities
* originating from previous clauses (even if they have id_var set) are
* intentionally skipped by this function.
```
##########
src/backend/parser/cypher_clause.c:
##########
@@ -6410,6 +6427,273 @@ transform_cypher_clause_as_subquery(cypher_parsestate
*cpstate,
return pnsi;
}
+/*
+ * Export hidden columns for entities in the current clause.
+ *
+ * For each named vertex/edge entity, we export its id (and for edges:
+ * start_id, end_id) as target entries. These can be referenced by
+ * subsequent clauses without rebuilding the full vertex/edge.
+ *
+ * This function handles two cases:
+ * 1. Entities declared in the current clause: Extract id from _agtype_build_*
args
+ * 2. Entities from previous clauses (with id_var set): Re-export using their
Vars
+ *
+ * The column names use the AGE_VARNAME prefix pattern
(AGE_DEFAULT_VARNAME_PREFIX)
+ * which makes them hidden from SELECT * output. The expand_star() function in
+ * cypher_item.c filters out columns with this prefix.
+ *
+ * Note: We use resjunk=false so these columns are included in the subquery RTE
+ * when PostgreSQL builds the column list via addRangeTableEntryForSubquery().
+ * If resjunk=true, PostgreSQL would skip them entirely.
+ */
+static void export_entity_hidden_columns(cypher_parsestate *cpstate,
+ Query *query)
+{
+ ListCell *lc;
+ int resno;
+ Oid graphid_to_agtype_oid;
+ List *exported_names = NIL; /* Track entity names already exported */
+
+ /* Get the graphid_to_agtype function OID for id conversions */
+ graphid_to_agtype_oid = get_ag_func_oid("graphid_to_agtype", 1,
GRAPHIDOID);
+
+ /* find the next resno for target entries */
+ resno = list_length(query->targetList) + 1;
+
+ foreach (lc, cpstate->entities)
+ {
+ transform_entity *entity = lfirst(lc);
+ char *entity_name;
+ char *col_name;
+ TargetEntry *te;
+ ListCell *lc2;
+ bool already_exported;
+
+ /* skip entities without names - they aren't referenced */
+ entity_name = get_entity_name(entity);
+ if (entity_name == NULL)
+ {
+ continue;
+ }
+
+ /*
+ * Skip if we've already exported columns for an entity with this name.
+ * This can happen when the same variable appears multiple times in a
+ * pattern, e.g., MATCH (a)-[]->(), ()-[]->(a)
+ */
+ already_exported = false;
+ foreach (lc2, exported_names)
+ {
+ if (strcmp((char *)lfirst(lc2), entity_name) == 0)
+ {
+ already_exported = true;
+ break;
+ }
+ }
+ if (already_exported)
+ {
+ continue;
+ }
+ exported_names = lappend(exported_names, entity_name);
Review Comment:
Potential issue with exported_names list. The exported_names list is used to
track entity names that have already been exported, but it appends entity_name
pointers directly without copying the strings. This could be problematic if
entity_name is freed or modified after being added to the list. While
get_entity_name likely returns a stable pointer from the entity structure, it
would be safer to use pstrdup to copy the string or rely on the fact that
entity names are stable for the lifetime of the function execution.
```suggestion
exported_names = lappend(exported_names, pstrdup(entity_name));
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]