This is an automated email from the ASF dual-hosted git repository.

kou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 9cb051b723 GH-39784: [C++][Gandiva] Fix decimal in_expr crash with 
cached object code (#49951)
9cb051b723 is described below

commit 9cb051b72326135b5094b82d1771a4331284541d
Author: Aaditya Srinivasan <[email protected]>
AuthorDate: Thu Jun 18 14:03:13 2026 +0530

    GH-39784: [C++][Gandiva] Fix decimal in_expr crash with cached object code 
(#49951)
    
    ### Rationale for this change
    
    The decimal-specific `VisitInExpression` LLVM generation path embedded a 
raw holder pointer directly into generated object code. When Gandiva reused 
cached object code, the embedded pointer could become stale, leading to crashes 
during cached decimal `in_expr` evaluation.
    
    ### What changes are included in this PR?
    
    - Updated decimal `in_expr` LLVM generation to load holder pointers 
dynamically at runtime using `arg_holder_ptrs_`, matching the existing generic 
`InExpression` implementation.
    - Added a regression test covering cached decimal `in_expr` execution.
    
    ### Are these changes tested?
    
    Yes.
    
    - Reproduced the crash locally before the fix.
    - Verified the new regression test passes after the fix.
    - Ran:
      - `TestIn.TestInDecimalCached`
      - `TestIn.*`
      -  repeated cached execution using `--gtest_repeat=100`
    
    ### Are there any user-facing changes?
    
    No.
    
    **This PR contains a "Critical Fix".**
    
    This change fixes a cache-path crash in Gandiva when evaluating decimal 
`in_expr` expressions using cached object code.
    
    * GitHub Issue: #39784
    
    Authored-by: Aaditya Srinivasan <[email protected]>
    Signed-off-by: Sutou Kouhei <[email protected]>
---
 cpp/src/gandiva/llvm_generator.cc     | 13 ++++++++++---
 cpp/src/gandiva/tests/in_expr_test.cc | 35 +++++++++++++++++++----------------
 2 files changed, 29 insertions(+), 19 deletions(-)

diff --git a/cpp/src/gandiva/llvm_generator.cc 
b/cpp/src/gandiva/llvm_generator.cc
index 0f0918b3a1..a42f71a1f7 100644
--- a/cpp/src/gandiva/llvm_generator.cc
+++ b/cpp/src/gandiva/llvm_generator.cc
@@ -1118,9 +1118,16 @@ void 
LLVMGenerator::Visitor::VisitInExpression<gandiva::DecimalScalar128>(
   const InExprDex<gandiva::DecimalScalar128>& dex_instance =
       dynamic_cast<const InExprDex<gandiva::DecimalScalar128>&>(dex);
   /* add the holder at the beginning */
-  llvm::Constant* ptr_int_cast =
-      types->i64_constant((int64_t)(dex_instance.in_holder().get()));
-  params.push_back(ptr_int_cast);
+  // Switch to the entry block and load the holder pointer
+  auto builder = ir_builder();
+  llvm::BasicBlock* saved_block = builder->GetInsertBlock();
+  builder->SetInsertPoint(entry_block_);
+
+  llvm::Value* in_holder = generator_->LoadVectorAtIndex(
+      arg_holder_ptrs_, types->i64_type(), dex_instance.get_holder_idx(), 
"in_holder");
+
+  builder->SetInsertPoint(saved_block);
+  params.push_back(in_holder);
 
   /* eval expr result */
   for (auto& pair : dex.args()) {
diff --git a/cpp/src/gandiva/tests/in_expr_test.cc 
b/cpp/src/gandiva/tests/in_expr_test.cc
index 9650203724..bd7f3756b2 100644
--- a/cpp/src/gandiva/tests/in_expr_test.cc
+++ b/cpp/src/gandiva/tests/in_expr_test.cc
@@ -184,41 +184,44 @@ TEST_F(TestIn, TestInDecimal) {
   auto field0 = field("f0", arrow::decimal128(precision, scale));
   auto schema = arrow::schema({field0});
 
-  // Build In f0 + f1 in (6, 11)
   auto node_f0 = TreeExprBuilder::MakeField(field0);
 
   gandiva::DecimalScalar128 d0("6", precision, scale);
   gandiva::DecimalScalar128 d1("12", precision, scale);
   gandiva::DecimalScalar128 d2("11", precision, scale);
   std::unordered_set<gandiva::DecimalScalar128> in_constants({d0, d1, d2});
+
   auto in_expr = TreeExprBuilder::MakeInExpressionDecimal(node_f0, 
in_constants);
   auto condition = TreeExprBuilder::MakeCondition(in_expr);
 
-  std::shared_ptr<Filter> filter;
-  auto status = Filter::Make(schema, condition, TestConfiguration(), &filter);
-  EXPECT_TRUE(status.ok());
-
-  // Create a row-batch with some sample data
   int num_records = 5;
   auto values0 = MakeDecimalVector({"1", "2", "0", "-6", "6"});
   auto array0 =
       MakeArrowArrayDecimal(decimal_type, values0, {true, true, true, false, 
true});
-  // expected output (indices for which condition matches)
+
   auto exp = MakeArrowArrayUint16({4});
 
-  // prepare input record batch
   auto in_batch = arrow::RecordBatch::Make(schema, num_records, {array0});
 
-  std::shared_ptr<SelectionVector> selection_vector;
-  status = SelectionVector::MakeInt16(num_records, pool_, &selection_vector);
-  EXPECT_TRUE(status.ok());
+  // GH-39784: Verify decimal in_expr works correctly when the generated
+  // object code is reused from cache.
+  for (int i = 0; i < 2; ++i) {
+    std::shared_ptr<Filter> filter;
+    ASSERT_OK(Filter::Make(schema, condition, TestConfiguration(), &filter));
 
-  // Evaluate expression
-  status = filter->Evaluate(*in_batch, selection_vector);
-  EXPECT_TRUE(status.ok());
+    if (i == 0) {
+      EXPECT_FALSE(filter->GetBuiltFromCache());
+    } else {
+      EXPECT_TRUE(filter->GetBuiltFromCache());
+    }
 
-  // Validate results
-  EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
+    std::shared_ptr<SelectionVector> selection_vector;
+    ASSERT_OK(SelectionVector::MakeInt16(num_records, pool_, 
&selection_vector));
+
+    ASSERT_OK(filter->Evaluate(*in_batch, selection_vector));
+
+    EXPECT_ARROW_ARRAY_EQUALS(exp, selection_vector->ToArray());
+  }
 }
 
 TEST_F(TestIn, TestInString) {

Reply via email to