This is an automated email from the ASF dual-hosted git repository.
kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new 9cb051b723 GH-39784: [C++][Gandiva] Fix decimal in_expr crash with
cached object code (#49951)
9cb051b723 is described below
commit 9cb051b72326135b5094b82d1771a4331284541d
Author: Aaditya Srinivasan <[email protected]>
AuthorDate: Thu Jun 18 14:03:13 2026 +0530
GH-39784: [C++][Gandiva] Fix decimal in_expr crash with cached object code
(#49951)
### Rationale for this change
The decimal-specific `VisitInExpression` LLVM generation path embedded a
raw holder pointer directly into generated object code. When Gandiva reused
cached object code, the embedded pointer could become stale, leading to crashes
during cached decimal `in_expr` evaluation.
### What changes are included in this PR?
- Updated decimal `in_expr` LLVM generation to load holder pointers
dynamically at runtime using `arg_holder_ptrs_`, matching the existing generic
`InExpression` implementation.
- Added a regression test covering cached decimal `in_expr` execution.
### Are these changes tested?
Yes.
- Reproduced the crash locally before the fix.
- Verified the new regression test passes after the fix.
- Ran:
- `TestIn.TestInDecimalCached`
- `TestIn.*`
- repeated cached execution using `--gtest_repeat=100`
### Are there any user-facing changes?
No.
**This PR contains a "Critical Fix".**
This change fixes a cache-path crash in Gandiva when evaluating decimal
`in_expr` expressions using cached object code.
* GitHub Issue: #39784
Authored-by: Aaditya Srinivasan <[email protected]>
Signed-off-by: Sutou Kouhei <[email protected]>
---
cpp/src/gandiva/llvm_generator.cc | 13 ++++++++++---
cpp/src/gandiva/tests/in_expr_test.cc | 35 +++++++++++++++++++----------------
2 files changed, 29 insertions(+), 19 deletions(-)
diff --git a/cpp/src/gandiva/llvm_generator.cc
b/cpp/src/gandiva/llvm_generator.cc
index 0f0918b3a1..a42f71a1f7 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -1118,9 +1118,16 @@ void
LLVMGenerator::Visitor::VisitInExpression<gandiva::DecimalScalar128>(
const InExprDex<gandiva::DecimalScalar128>& dex_instance =
dynamic_cast<const InExprDex<gandiva::DecimalScalar128>&>(dex);
/* add the holder at the beginning */
- llvm::Constant* ptr_int_cast =
- types->i64_constant((int64_t)(dex_instance.in_holder().get()));
- params.push_back(ptr_int_cast);
+ // Switch to the entry block and load the holder pointer
+ auto builder = ir_builder();
+ llvm::BasicBlock* saved_block = builder->GetInsertBlock();
+ builder->SetInsertPoint(entry_block_);
+
+ llvm::Value* in_holder = generator_->LoadVectorAtIndex(
+ arg_holder_ptrs_, types->i64_type(), dex_instance.get_holder_idx(),
"in_holder");
+
+ builder->SetInsertPoint(saved_block);
+ params.push_back(in_holder);
/* eval expr result */
for (auto& pair : dex.args()) {
diff --git a/cpp/src/gandiva/tests/in_expr_test.cc
b/cpp/src/gandiva/tests/in_expr_test.cc
index 9650203724..bd7f3756b2 100644
--- a/cpp/src/gandiva/tests/in_expr_test.cc
+++ b/cpp/src/gandiva/tests/in_expr_test.cc
@@ -184,41 +184,44 @@ TEST_F(TestIn, TestInDecimal) {
auto field0 = field("f0", arrow::decimal128(precision, scale));
auto schema = arrow::schema({field0});
- // Build In f0 + f1 in (6, 11)
auto node_f0 = TreeExprBuilder::MakeField(field0);
gandiva::DecimalScalar128 d0("6", precision, scale);
gandiva::DecimalScalar128 d1("12", precision, scale);
gandiva::DecimalScalar128 d2("11", precision, scale);
std::unordered_set<gandiva::DecimalScalar128> in_constants({d0, d1, d2});
+
auto in_expr = TreeExprBuilder::MakeInExpressionDecimal(node_f0,
in_constants);
auto condition = TreeExprBuilder::MakeCondition(in_expr);
- std::shared_ptr<Filter> filter;
- auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
- EXPECT_TRUE(status.ok());
-
- // Create a row-batch with some sample data
int num_records = 5;
auto values0 = MakeDecimalVector({"1", "2", "0", "-6", "6"});
auto array0 =
MakeArrowArrayDecimal(decimal_type, values0, {true, true, true, false,
true});
- // expected output (indices for which condition matches)
+
auto exp = MakeArrowArrayUint16({4});
- // prepare input record batch
auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
- std::shared_ptr<SelectionVector> selection_vector;
- status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
- EXPECT_TRUE(status.ok());
+ // GH-39784: Verify decimal in_expr works correctly when the generated
+ // object code is reused from cache.
+ for (int i = 0; i < 2; ++i) {
+ std::shared_ptr<Filter> filter;
+ ASSERT_OK(Filter::Make(schema, condition, TestConfiguration(), &filter));
- // Evaluate expression
- status = filter->Evaluate(*in_batch, selection_vector);
- EXPECT_TRUE(status.ok());
+ if (i == 0) {
+ EXPECT_FALSE(filter->GetBuiltFromCache());
+ } else {
+ EXPECT_TRUE(filter->GetBuiltFromCache());
+ }
- // Validate results
- EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
+ std::shared_ptr<SelectionVector> selection_vector;
+ ASSERT_OK(SelectionVector::MakeInt16(num_records, pool_,
&selection_vector));
+
+ ASSERT_OK(filter->Evaluate(*in_batch, selection_vector));
+
+ EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
+ }
}
TEST_F(TestIn, TestInString) {