This is an automated email from the ASF dual-hosted git repository.
zanmato pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 56436e8c37 GH-45564: [C++][Acero] Add size validation for names and
expressions vectors in ProjectNode (#45565)
56436e8c37 is described below
commit 56436e8c3733bfc1d556c32086c1397cba5461dd
Author: mroz45 <[email protected]>
AuthorDate: Tue Mar 11 08:00:04 2025 +0100
GH-45564: [C++][Acero] Add size validation for names and expressions
vectors in ProjectNode (#45565)
### What changes are included in this PR?
Added a check to validate that the sizes of the names and expressions
vectors match in the ProjectNode class
### Are these changes tested?
Yes
### Are there any user-facing changes?
No
* GitHub Issue: #45564
Lead-authored-by: kamilt <[email protected]>
Co-authored-by: mroz45 <[email protected]>
Co-authored-by: Rossi Sun <[email protected]>
Signed-off-by: Rossi Sun <[email protected]>
---
cpp/src/arrow/acero/plan_test.cc | 17 +++++++++++++++++
cpp/src/arrow/acero/project_node.cc | 7 ++++++-
2 files changed, 23 insertions(+), 1 deletion(-)
diff --git a/cpp/src/arrow/acero/plan_test.cc b/cpp/src/arrow/acero/plan_test.cc
index 61ab09f667..0759a1ab34 100644
--- a/cpp/src/arrow/acero/plan_test.cc
+++ b/cpp/src/arrow/acero/plan_test.cc
@@ -1266,6 +1266,23 @@ TEST(ExecPlanExecution,
SourceFilterProjectGroupedSumFilter) {
}
}
+TEST(ExecPlanExecution, ProjectNamesSizeMismatch) {
+ auto input = MakeGroupableBatches();
+
+ Declaration plan = Declaration::Sequence(
+ {{"source", SourceNodeOptions{input.schema, input.gen(true,
/*slow=*/false)}},
+ {"project", ProjectNodeOptions{
+ /*expressions=*/{field_ref("str"),
+ call("multiply", {field_ref("i32"),
literal(2)})},
+ /*names=*/{"a"}}}}); // expected 2 names but only 1
provided
+
+ EXPECT_RAISES_WITH_MESSAGE_THAT(
+ Invalid,
+ ::testing::HasSubstr(
+ "Project node's size of names 1 doesn't match size of expressions
2"),
+ DeclarationToTable(std::move(plan)));
+}
+
TEST(ExecPlanExecution, SourceFilterProjectGroupedSumOrderBy) {
for (bool parallel : {false, true}) {
SCOPED_TRACE(parallel ? "parallel/merged" : "serial");
diff --git a/cpp/src/arrow/acero/project_node.cc
b/cpp/src/arrow/acero/project_node.cc
index bcabe585b2..c810fc071f 100644
--- a/cpp/src/arrow/acero/project_node.cc
+++ b/cpp/src/arrow/acero/project_node.cc
@@ -59,8 +59,13 @@ class ProjectNode : public MapNode {
for (size_t i = 0; i < exprs.size(); ++i) {
names[i] = exprs[i].ToString();
}
+ } else {
+ ARROW_RETURN_IF(
+ names.size() != exprs.size(),
+ Status::Invalid("Project node's size of names " +
std::to_string(names.size()) +
+ " doesn't match size of expressions " +
+ std::to_string(exprs.size())));
}
-
FieldVector fields(exprs.size());
int i = 0;
for (auto& expr : exprs) {