Copilot commented on code in PR #2295:
URL: https://github.com/apache/age/pull/2295#discussion_r2654334472
##########
src/backend/parser/cypher_clause.c:
##########
@@ -6410,6 +6427,277 @@ transform_cypher_clause_as_subquery(cypher_parsestate
*cpstate,
return pnsi;
}
+/*
+ * export_entity_hidden_columns
+ *
+ * Add hidden target entries for entity id columns to the query's target list.
+ *
+ * For each named vertex/edge entity declared in the current clause, this
+ * exports its graphid values (id, and for edges: start_id, end_id) as
+ * additional target entries. These hidden columns allow subsequent clauses
+ * to access id values directly via Var references, avoiding the overhead of
+ * reconstructing the full vertex/edge object.
+ *
+ * The column names use the AGE_DEFAULT_VARNAME_PREFIX pattern (e.g.,
+ * "_age_default_varname_id_<name>") which makes them hidden from user-facing
+ * output. The expand_star() function in cypher_item.c filters out columns
+ * with this prefix from SELECT * results.
+ *
+ * Note: resjunk is set to false so PostgreSQL includes these columns when
+ * building the subquery RTE via addRangeTableEntryForSubquery(). Setting
+ * resjunk=true would cause PostgreSQL to omit them entirely.
+ *
+ * After this function runs, update_entity_vars_from_rte() is called to set
+ * each entity's id_var/start_id_var/end_id_var to reference these new columns.
+ */
+static void export_entity_hidden_columns(cypher_parsestate *cpstate,
+ Query *query)
+{
+ ListCell *lc;
+ int resno;
+ Oid graphid_to_agtype_oid;
+ List *exported_names = NIL; /* Track entity names already exported */
+
+ /* Get the graphid_to_agtype function OID for id conversions */
+ graphid_to_agtype_oid = get_ag_func_oid("graphid_to_agtype", 1,
GRAPHIDOID);
+
+ /* find the next resno for target entries */
+ resno = list_length(query->targetList) + 1;
+
+ foreach (lc, cpstate->entities)
+ {
+ transform_entity *entity = lfirst(lc);
+ char *entity_name;
+ char *col_name;
+ TargetEntry *te;
+ ListCell *lc2;
+ bool already_exported;
+
+ /* skip entities without names - they aren't referenced */
+ entity_name = get_entity_name(entity);
+ if (entity_name == NULL)
+ {
+ continue;
+ }
+
+ /*
+ * Skip if we've already exported columns for an entity with this name.
+ * This can happen when the same variable appears multiple times in a
+ * pattern, e.g., MATCH (a)-[]->(), ()-[]->(a)
+ */
+ already_exported = false;
+ foreach (lc2, exported_names)
+ {
+ if (strcmp((char *)lfirst(lc2), entity_name) == 0)
+ {
+ already_exported = true;
+ break;
+ }
+ }
+ if (already_exported)
+ {
+ continue;
+ }
+ exported_names = lappend(exported_names, entity_name);
+
+ /* skip path entities - they don't have id/start_id/end_id */
+ if (entity->type == ENT_PATH)
+ {
+ continue;
+ }
+
+ /* skip VLE edges for now - they have different structure */
+ if (entity->type == ENT_VLE_EDGE)
+ {
+ continue;
+ }
+
+ /*
+ * Only export hidden columns for entities declared in the current
clause.
+ * Entities from previous clauses already have their
id_var/start_id_var/end_id_var
+ * pointing to the subquery's hidden columns from when they were
exported.
+ */
+ if (!entity->declared_in_current_clause)
+ {
+ continue;
+ }
+
+ /* Extract id from the entity's FuncExpr */
+ {
+ Expr *expr = entity->expr;
+ FuncExpr *func_expr;
+ Node *id_node;
+ Node *start_id_node;
+ Node *end_id_node;
+ FuncExpr *id_agtype_expr;
+ FuncExpr *start_id_agtype_expr;
+ FuncExpr *end_id_agtype_expr;
+
+ if (expr == NULL)
+ {
+ continue;
+ }
+
+ /*
+ * The expr must be a FuncExpr (_agtype_build_vertex or
_agtype_build_edge)
+ * for entities declared in the current clause.
+ */
+ if (!IsA(expr, FuncExpr))
+ {
+ continue;
+ }
+
+ func_expr = (FuncExpr *)expr;
+
+ /*
+ * For vertices: _agtype_build_vertex(id, label_name, properties)
+ * For edges: _agtype_build_edge(id, start_id, end_id, label_name,
properties)
+ */
+ if (entity->type == ENT_VERTEX)
+ {
+ /* vertex: args are (id, label_name, properties) */
+ if (list_length(func_expr->args) < 3)
+ {
+ continue;
+ }
+ id_node = (Node *)linitial(func_expr->args);
+ start_id_node = NULL;
+ end_id_node = NULL;
+ }
+ else if (entity->type == ENT_EDGE)
+ {
+ /* edge: args are (id, start_id, end_id, label_name,
properties) */
+ if (list_length(func_expr->args) < 5)
+ {
+ continue;
+ }
+ id_node = (Node *)linitial(func_expr->args);
+ start_id_node = (Node *)lsecond(func_expr->args);
+ end_id_node = (Node *)lthird(func_expr->args);
+ }
+ else
+ {
+ continue;
+ }
+
+ /*
+ * Create target entries for id (as agtype).
+ * We wrap id in graphid_to_agtype() since age_id() returns agtype.
+ */
+ id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid, AGTYPEOID,
+ list_make1(copyObject(id_node)),
InvalidOid,
+ InvalidOid, COERCE_EXPLICIT_CALL);
+
+ col_name = psprintf("%s%s", AGE_VARNAME_ID_PREFIX, entity_name);
+ te = makeTargetEntry((Expr *)id_agtype_expr, resno++, col_name,
false);
+ query->targetList = lappend(query->targetList, te);
+
+ /* For edges, also export start_id and end_id */
+ if (entity->type == ENT_EDGE)
+ {
+ start_id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid,
AGTYPEOID,
+
list_make1(copyObject(start_id_node)),
+ InvalidOid, InvalidOid,
+ COERCE_EXPLICIT_CALL);
+ col_name = psprintf("%s%s", AGE_VARNAME_START_ID_PREFIX,
entity_name);
+ te = makeTargetEntry((Expr *)start_id_agtype_expr, resno++,
col_name, false);
+ query->targetList = lappend(query->targetList, te);
+
+ end_id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid,
AGTYPEOID,
+
list_make1(copyObject(end_id_node)),
+ InvalidOid, InvalidOid,
+ COERCE_EXPLICIT_CALL);
+ col_name = psprintf("%s%s", AGE_VARNAME_END_ID_PREFIX,
entity_name);
+ te = makeTargetEntry((Expr *)end_id_agtype_expr, resno++,
col_name, false);
+ query->targetList = lappend(query->targetList, te);
+ }
+ }
+ }
Review Comment:
The `exported_names` list is allocated but never freed. In PostgreSQL's
memory context system, this is typically handled automatically when the memory
context is reset, but it's better practice to explicitly free the list at the
end of the function to avoid unnecessary memory usage during long-running
queries with many clauses. Consider adding `list_free(exported_names);` at the
end of the function.
```suggestion
}
/* Free the helper list of exported entity names */
list_free(exported_names);
```
##########
src/backend/parser/cypher_clause.c:
##########
@@ -6410,6 +6427,277 @@ transform_cypher_clause_as_subquery(cypher_parsestate
*cpstate,
return pnsi;
}
+/*
+ * export_entity_hidden_columns
+ *
+ * Add hidden target entries for entity id columns to the query's target list.
+ *
+ * For each named vertex/edge entity declared in the current clause, this
+ * exports its graphid values (id, and for edges: start_id, end_id) as
+ * additional target entries. These hidden columns allow subsequent clauses
+ * to access id values directly via Var references, avoiding the overhead of
+ * reconstructing the full vertex/edge object.
+ *
+ * The column names use the AGE_DEFAULT_VARNAME_PREFIX pattern (e.g.,
+ * "_age_default_varname_id_<name>") which makes them hidden from user-facing
+ * output. The expand_star() function in cypher_item.c filters out columns
+ * with this prefix from SELECT * results.
+ *
+ * Note: resjunk is set to false so PostgreSQL includes these columns when
+ * building the subquery RTE via addRangeTableEntryForSubquery(). Setting
+ * resjunk=true would cause PostgreSQL to omit them entirely.
+ *
+ * After this function runs, update_entity_vars_from_rte() is called to set
+ * each entity's id_var/start_id_var/end_id_var to reference these new columns.
+ */
+static void export_entity_hidden_columns(cypher_parsestate *cpstate,
+ Query *query)
+{
+ ListCell *lc;
+ int resno;
+ Oid graphid_to_agtype_oid;
+ List *exported_names = NIL; /* Track entity names already exported */
+
+ /* Get the graphid_to_agtype function OID for id conversions */
+ graphid_to_agtype_oid = get_ag_func_oid("graphid_to_agtype", 1,
GRAPHIDOID);
+
+ /* find the next resno for target entries */
+ resno = list_length(query->targetList) + 1;
+
+ foreach (lc, cpstate->entities)
+ {
+ transform_entity *entity = lfirst(lc);
+ char *entity_name;
+ char *col_name;
+ TargetEntry *te;
+ ListCell *lc2;
+ bool already_exported;
+
+ /* skip entities without names - they aren't referenced */
+ entity_name = get_entity_name(entity);
+ if (entity_name == NULL)
+ {
+ continue;
+ }
+
+ /*
+ * Skip if we've already exported columns for an entity with this name.
+ * This can happen when the same variable appears multiple times in a
+ * pattern, e.g., MATCH (a)-[]->(), ()-[]->(a)
+ */
+ already_exported = false;
+ foreach (lc2, exported_names)
+ {
+ if (strcmp((char *)lfirst(lc2), entity_name) == 0)
+ {
+ already_exported = true;
+ break;
+ }
+ }
+ if (already_exported)
+ {
+ continue;
+ }
+ exported_names = lappend(exported_names, entity_name);
+
+ /* skip path entities - they don't have id/start_id/end_id */
+ if (entity->type == ENT_PATH)
+ {
+ continue;
+ }
+
+ /* skip VLE edges for now - they have different structure */
+ if (entity->type == ENT_VLE_EDGE)
+ {
+ continue;
+ }
+
+ /*
+ * Only export hidden columns for entities declared in the current
clause.
+ * Entities from previous clauses already have their
id_var/start_id_var/end_id_var
+ * pointing to the subquery's hidden columns from when they were
exported.
+ */
+ if (!entity->declared_in_current_clause)
+ {
+ continue;
+ }
+
+ /* Extract id from the entity's FuncExpr */
+ {
+ Expr *expr = entity->expr;
+ FuncExpr *func_expr;
+ Node *id_node;
+ Node *start_id_node;
+ Node *end_id_node;
+ FuncExpr *id_agtype_expr;
+ FuncExpr *start_id_agtype_expr;
+ FuncExpr *end_id_agtype_expr;
+
+ if (expr == NULL)
+ {
+ continue;
+ }
+
+ /*
+ * The expr must be a FuncExpr (_agtype_build_vertex or
_agtype_build_edge)
+ * for entities declared in the current clause.
+ */
+ if (!IsA(expr, FuncExpr))
+ {
+ continue;
+ }
+
+ func_expr = (FuncExpr *)expr;
+
+ /*
+ * For vertices: _agtype_build_vertex(id, label_name, properties)
+ * For edges: _agtype_build_edge(id, start_id, end_id, label_name,
properties)
+ */
+ if (entity->type == ENT_VERTEX)
+ {
+ /* vertex: args are (id, label_name, properties) */
+ if (list_length(func_expr->args) < 3)
+ {
+ continue;
+ }
+ id_node = (Node *)linitial(func_expr->args);
+ start_id_node = NULL;
+ end_id_node = NULL;
+ }
+ else if (entity->type == ENT_EDGE)
+ {
+ /* edge: args are (id, start_id, end_id, label_name,
properties) */
+ if (list_length(func_expr->args) < 5)
+ {
+ continue;
+ }
+ id_node = (Node *)linitial(func_expr->args);
+ start_id_node = (Node *)lsecond(func_expr->args);
+ end_id_node = (Node *)lthird(func_expr->args);
+ }
+ else
+ {
+ continue;
+ }
+
+ /*
+ * Create target entries for id (as agtype).
+ * We wrap id in graphid_to_agtype() since age_id() returns agtype.
+ */
+ id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid, AGTYPEOID,
+ list_make1(copyObject(id_node)),
InvalidOid,
+ InvalidOid, COERCE_EXPLICIT_CALL);
+
+ col_name = psprintf("%s%s", AGE_VARNAME_ID_PREFIX, entity_name);
+ te = makeTargetEntry((Expr *)id_agtype_expr, resno++, col_name,
false);
+ query->targetList = lappend(query->targetList, te);
+
+ /* For edges, also export start_id and end_id */
+ if (entity->type == ENT_EDGE)
+ {
+ start_id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid,
AGTYPEOID,
+
list_make1(copyObject(start_id_node)),
+ InvalidOid, InvalidOid,
+ COERCE_EXPLICIT_CALL);
+ col_name = psprintf("%s%s", AGE_VARNAME_START_ID_PREFIX,
entity_name);
+ te = makeTargetEntry((Expr *)start_id_agtype_expr, resno++,
col_name, false);
+ query->targetList = lappend(query->targetList, te);
+
+ end_id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid,
AGTYPEOID,
+
list_make1(copyObject(end_id_node)),
+ InvalidOid, InvalidOid,
+ COERCE_EXPLICIT_CALL);
+ col_name = psprintf("%s%s", AGE_VARNAME_END_ID_PREFIX,
entity_name);
+ te = makeTargetEntry((Expr *)end_id_agtype_expr, resno++,
col_name, false);
+ query->targetList = lappend(query->targetList, te);
+ }
Review Comment:
Memory leak: `col_name` is allocated with `psprintf` at line 6591 but is not
freed before being reassigned at line 6602. The first allocation will be
leaked. The variable should be freed with `pfree(col_name)` before each
subsequent `psprintf` assignment. The same issue occurs at line 6610 where
`col_name` from line 6602 is not freed before reassignment.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]