Copilot commented on code in PR #2295:
URL: https://github.com/apache/age/pull/2295#discussion_r2654334472


##########
src/backend/parser/cypher_clause.c:
##########
@@ -6410,6 +6427,277 @@ transform_cypher_clause_as_subquery(cypher_parsestate 
*cpstate,
     return pnsi;
 }
 
+/*
+ * export_entity_hidden_columns
+ *
+ * Add hidden target entries for entity id columns to the query's target list.
+ *
+ * For each named vertex/edge entity declared in the current clause, this
+ * exports its graphid values (id, and for edges: start_id, end_id) as
+ * additional target entries. These hidden columns allow subsequent clauses
+ * to access id values directly via Var references, avoiding the overhead of
+ * reconstructing the full vertex/edge object.
+ *
+ * The column names use the AGE_DEFAULT_VARNAME_PREFIX pattern (e.g.,
+ * "_age_default_varname_id_<name>") which makes them hidden from user-facing
+ * output. The expand_star() function in cypher_item.c filters out columns
+ * with this prefix from SELECT * results.
+ *
+ * Note: resjunk is set to false so PostgreSQL includes these columns when
+ * building the subquery RTE via addRangeTableEntryForSubquery(). Setting
+ * resjunk=true would cause PostgreSQL to omit them entirely.
+ *
+ * After this function runs, update_entity_vars_from_rte() is called to set
+ * each entity's id_var/start_id_var/end_id_var to reference these new columns.
+ */
+static void export_entity_hidden_columns(cypher_parsestate *cpstate,
+                                         Query *query)
+{
+    ListCell *lc;
+    int resno;
+    Oid graphid_to_agtype_oid;
+    List *exported_names = NIL;  /* Track entity names already exported */
+
+    /* Get the graphid_to_agtype function OID for id conversions */
+    graphid_to_agtype_oid = get_ag_func_oid("graphid_to_agtype", 1, 
GRAPHIDOID);
+
+    /* find the next resno for target entries */
+    resno = list_length(query->targetList) + 1;
+
+    foreach (lc, cpstate->entities)
+    {
+        transform_entity *entity = lfirst(lc);
+        char *entity_name;
+        char *col_name;
+        TargetEntry *te;
+        ListCell *lc2;
+        bool already_exported;
+
+        /* skip entities without names - they aren't referenced */
+        entity_name = get_entity_name(entity);
+        if (entity_name == NULL)
+        {
+            continue;
+        }
+
+        /*
+         * Skip if we've already exported columns for an entity with this name.
+         * This can happen when the same variable appears multiple times in a
+         * pattern, e.g., MATCH (a)-[]->(), ()-[]->(a)
+         */
+        already_exported = false;
+        foreach (lc2, exported_names)
+        {
+            if (strcmp((char *)lfirst(lc2), entity_name) == 0)
+            {
+                already_exported = true;
+                break;
+            }
+        }
+        if (already_exported)
+        {
+            continue;
+        }
+        exported_names = lappend(exported_names, entity_name);
+
+        /* skip path entities - they don't have id/start_id/end_id */
+        if (entity->type == ENT_PATH)
+        {
+            continue;
+        }
+
+        /* skip VLE edges for now - they have different structure */
+        if (entity->type == ENT_VLE_EDGE)
+        {
+            continue;
+        }
+
+        /*
+         * Only export hidden columns for entities declared in the current 
clause.
+         * Entities from previous clauses already have their 
id_var/start_id_var/end_id_var
+         * pointing to the subquery's hidden columns from when they were 
exported.
+         */
+        if (!entity->declared_in_current_clause)
+        {
+            continue;
+        }
+
+        /* Extract id from the entity's FuncExpr */
+        {
+            Expr *expr = entity->expr;
+            FuncExpr *func_expr;
+            Node *id_node;
+            Node *start_id_node;
+            Node *end_id_node;
+            FuncExpr *id_agtype_expr;
+            FuncExpr *start_id_agtype_expr;
+            FuncExpr *end_id_agtype_expr;
+
+            if (expr == NULL)
+            {
+                continue;
+            }
+
+            /*
+             * The expr must be a FuncExpr (_agtype_build_vertex or 
_agtype_build_edge)
+             * for entities declared in the current clause.
+             */
+            if (!IsA(expr, FuncExpr))
+            {
+                continue;
+            }
+
+            func_expr = (FuncExpr *)expr;
+
+            /*
+             * For vertices: _agtype_build_vertex(id, label_name, properties)
+             * For edges: _agtype_build_edge(id, start_id, end_id, label_name, 
properties)
+             */
+            if (entity->type == ENT_VERTEX)
+            {
+                /* vertex: args are (id, label_name, properties) */
+                if (list_length(func_expr->args) < 3)
+                {
+                    continue;
+                }
+                id_node = (Node *)linitial(func_expr->args);
+                start_id_node = NULL;
+                end_id_node = NULL;
+            }
+            else if (entity->type == ENT_EDGE)
+            {
+                /* edge: args are (id, start_id, end_id, label_name, 
properties) */
+                if (list_length(func_expr->args) < 5)
+                {
+                    continue;
+                }
+                id_node = (Node *)linitial(func_expr->args);
+                start_id_node = (Node *)lsecond(func_expr->args);
+                end_id_node = (Node *)lthird(func_expr->args);
+            }
+            else
+            {
+                continue;
+            }
+
+            /*
+             * Create target entries for id (as agtype).
+             * We wrap id in graphid_to_agtype() since age_id() returns agtype.
+             */
+            id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid, AGTYPEOID,
+                                          list_make1(copyObject(id_node)), 
InvalidOid,
+                                          InvalidOid, COERCE_EXPLICIT_CALL);
+
+            col_name = psprintf("%s%s", AGE_VARNAME_ID_PREFIX, entity_name);
+            te = makeTargetEntry((Expr *)id_agtype_expr, resno++, col_name, 
false);
+            query->targetList = lappend(query->targetList, te);
+
+            /* For edges, also export start_id and end_id */
+            if (entity->type == ENT_EDGE)
+            {
+                start_id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid, 
AGTYPEOID,
+                                                    
list_make1(copyObject(start_id_node)),
+                                                    InvalidOid, InvalidOid,
+                                                    COERCE_EXPLICIT_CALL);
+                col_name = psprintf("%s%s", AGE_VARNAME_START_ID_PREFIX, 
entity_name);
+                te = makeTargetEntry((Expr *)start_id_agtype_expr, resno++, 
col_name, false);
+                query->targetList = lappend(query->targetList, te);
+
+                end_id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid, 
AGTYPEOID,
+                                                  
list_make1(copyObject(end_id_node)),
+                                                  InvalidOid, InvalidOid,
+                                                  COERCE_EXPLICIT_CALL);
+                col_name = psprintf("%s%s", AGE_VARNAME_END_ID_PREFIX, 
entity_name);
+                te = makeTargetEntry((Expr *)end_id_agtype_expr, resno++, 
col_name, false);
+                query->targetList = lappend(query->targetList, te);
+            }
+        }
+    }

Review Comment:
   The `exported_names` list is allocated but never freed. In PostgreSQL's 
memory context system, this is typically handled automatically when the memory 
context is reset, but it's better practice to explicitly free the list at the 
end of the function to avoid unnecessary memory usage during long-running 
queries with many clauses. Consider adding `list_free(exported_names);` at the 
end of the function.
   ```suggestion
       }
   
       /* Free the helper list of exported entity names */
       list_free(exported_names);
   ```



##########
src/backend/parser/cypher_clause.c:
##########
@@ -6410,6 +6427,277 @@ transform_cypher_clause_as_subquery(cypher_parsestate 
*cpstate,
     return pnsi;
 }
 
+/*
+ * export_entity_hidden_columns
+ *
+ * Add hidden target entries for entity id columns to the query's target list.
+ *
+ * For each named vertex/edge entity declared in the current clause, this
+ * exports its graphid values (id, and for edges: start_id, end_id) as
+ * additional target entries. These hidden columns allow subsequent clauses
+ * to access id values directly via Var references, avoiding the overhead of
+ * reconstructing the full vertex/edge object.
+ *
+ * The column names use the AGE_DEFAULT_VARNAME_PREFIX pattern (e.g.,
+ * "_age_default_varname_id_<name>") which makes them hidden from user-facing
+ * output. The expand_star() function in cypher_item.c filters out columns
+ * with this prefix from SELECT * results.
+ *
+ * Note: resjunk is set to false so PostgreSQL includes these columns when
+ * building the subquery RTE via addRangeTableEntryForSubquery(). Setting
+ * resjunk=true would cause PostgreSQL to omit them entirely.
+ *
+ * After this function runs, update_entity_vars_from_rte() is called to set
+ * each entity's id_var/start_id_var/end_id_var to reference these new columns.
+ */
+static void export_entity_hidden_columns(cypher_parsestate *cpstate,
+                                         Query *query)
+{
+    ListCell *lc;
+    int resno;
+    Oid graphid_to_agtype_oid;
+    List *exported_names = NIL;  /* Track entity names already exported */
+
+    /* Get the graphid_to_agtype function OID for id conversions */
+    graphid_to_agtype_oid = get_ag_func_oid("graphid_to_agtype", 1, 
GRAPHIDOID);
+
+    /* find the next resno for target entries */
+    resno = list_length(query->targetList) + 1;
+
+    foreach (lc, cpstate->entities)
+    {
+        transform_entity *entity = lfirst(lc);
+        char *entity_name;
+        char *col_name;
+        TargetEntry *te;
+        ListCell *lc2;
+        bool already_exported;
+
+        /* skip entities without names - they aren't referenced */
+        entity_name = get_entity_name(entity);
+        if (entity_name == NULL)
+        {
+            continue;
+        }
+
+        /*
+         * Skip if we've already exported columns for an entity with this name.
+         * This can happen when the same variable appears multiple times in a
+         * pattern, e.g., MATCH (a)-[]->(), ()-[]->(a)
+         */
+        already_exported = false;
+        foreach (lc2, exported_names)
+        {
+            if (strcmp((char *)lfirst(lc2), entity_name) == 0)
+            {
+                already_exported = true;
+                break;
+            }
+        }
+        if (already_exported)
+        {
+            continue;
+        }
+        exported_names = lappend(exported_names, entity_name);
+
+        /* skip path entities - they don't have id/start_id/end_id */
+        if (entity->type == ENT_PATH)
+        {
+            continue;
+        }
+
+        /* skip VLE edges for now - they have different structure */
+        if (entity->type == ENT_VLE_EDGE)
+        {
+            continue;
+        }
+
+        /*
+         * Only export hidden columns for entities declared in the current 
clause.
+         * Entities from previous clauses already have their 
id_var/start_id_var/end_id_var
+         * pointing to the subquery's hidden columns from when they were 
exported.
+         */
+        if (!entity->declared_in_current_clause)
+        {
+            continue;
+        }
+
+        /* Extract id from the entity's FuncExpr */
+        {
+            Expr *expr = entity->expr;
+            FuncExpr *func_expr;
+            Node *id_node;
+            Node *start_id_node;
+            Node *end_id_node;
+            FuncExpr *id_agtype_expr;
+            FuncExpr *start_id_agtype_expr;
+            FuncExpr *end_id_agtype_expr;
+
+            if (expr == NULL)
+            {
+                continue;
+            }
+
+            /*
+             * The expr must be a FuncExpr (_agtype_build_vertex or 
_agtype_build_edge)
+             * for entities declared in the current clause.
+             */
+            if (!IsA(expr, FuncExpr))
+            {
+                continue;
+            }
+
+            func_expr = (FuncExpr *)expr;
+
+            /*
+             * For vertices: _agtype_build_vertex(id, label_name, properties)
+             * For edges: _agtype_build_edge(id, start_id, end_id, label_name, 
properties)
+             */
+            if (entity->type == ENT_VERTEX)
+            {
+                /* vertex: args are (id, label_name, properties) */
+                if (list_length(func_expr->args) < 3)
+                {
+                    continue;
+                }
+                id_node = (Node *)linitial(func_expr->args);
+                start_id_node = NULL;
+                end_id_node = NULL;
+            }
+            else if (entity->type == ENT_EDGE)
+            {
+                /* edge: args are (id, start_id, end_id, label_name, 
properties) */
+                if (list_length(func_expr->args) < 5)
+                {
+                    continue;
+                }
+                id_node = (Node *)linitial(func_expr->args);
+                start_id_node = (Node *)lsecond(func_expr->args);
+                end_id_node = (Node *)lthird(func_expr->args);
+            }
+            else
+            {
+                continue;
+            }
+
+            /*
+             * Create target entries for id (as agtype).
+             * We wrap id in graphid_to_agtype() since age_id() returns agtype.
+             */
+            id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid, AGTYPEOID,
+                                          list_make1(copyObject(id_node)), 
InvalidOid,
+                                          InvalidOid, COERCE_EXPLICIT_CALL);
+
+            col_name = psprintf("%s%s", AGE_VARNAME_ID_PREFIX, entity_name);
+            te = makeTargetEntry((Expr *)id_agtype_expr, resno++, col_name, 
false);
+            query->targetList = lappend(query->targetList, te);
+
+            /* For edges, also export start_id and end_id */
+            if (entity->type == ENT_EDGE)
+            {
+                start_id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid, 
AGTYPEOID,
+                                                    
list_make1(copyObject(start_id_node)),
+                                                    InvalidOid, InvalidOid,
+                                                    COERCE_EXPLICIT_CALL);
+                col_name = psprintf("%s%s", AGE_VARNAME_START_ID_PREFIX, 
entity_name);
+                te = makeTargetEntry((Expr *)start_id_agtype_expr, resno++, 
col_name, false);
+                query->targetList = lappend(query->targetList, te);
+
+                end_id_agtype_expr = makeFuncExpr(graphid_to_agtype_oid, 
AGTYPEOID,
+                                                  
list_make1(copyObject(end_id_node)),
+                                                  InvalidOid, InvalidOid,
+                                                  COERCE_EXPLICIT_CALL);
+                col_name = psprintf("%s%s", AGE_VARNAME_END_ID_PREFIX, 
entity_name);
+                te = makeTargetEntry((Expr *)end_id_agtype_expr, resno++, 
col_name, false);
+                query->targetList = lappend(query->targetList, te);
+            }

Review Comment:
   Memory leak: `col_name` is allocated with `psprintf` at line 6591 but is not 
freed before being reassigned at line 6602. The first allocation will be 
leaked. The variable should be freed with `pfree(col_name)` before each 
subsequent `psprintf` assignment. The same issue occurs at line 6610 where 
`col_name` from line 6602 is not freed before reassignment.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to