This is an automated email from the ASF dual-hosted git repository.

pitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 1b3f313009 GH-49888: [C++][Compute] Fix count for run-end encoded 
arrays with nulls (#49908)
1b3f313009 is described below

commit 1b3f31300979cd6976a2125a8dccfa7c049706ca
Author: fenfeng9 <[email protected]>
AuthorDate: Wed May 6 22:24:03 2026 +0800

    GH-49888: [C++][Compute] Fix count for run-end encoded arrays with nulls 
(#49908)
    
    ### Rationale for this change
    
    The `count` kernel used `GetNullCount()`, which reports the physical null 
count. For run-end encoded arrays, this ignored nulls in the encoded values 
child.
    
    ### What changes are included in this PR?
    
    Use `ComputeLogicalNullCount()` in the `count` kernel so run-end encoded 
arrays are counted correctly. Add C++ and Python tests for this case.
    
    ### Are these changes tested?
    
    Yes.
    
    ### Are there any user-facing changes?
    
    No.
    
    * GitHub Issue: #49888
    
    Authored-by: fenfeng9 <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/compute/kernels/aggregate_basic.cc |  2 +-
 cpp/src/arrow/compute/kernels/aggregate_test.cc  | 10 ++++++++++
 python/pyarrow/tests/test_compute.py             | 12 ++++++++++++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/cpp/src/arrow/compute/kernels/aggregate_basic.cc 
b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
index 03fba53ac0..f7ff72bc11 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_basic.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_basic.cc
@@ -103,7 +103,7 @@ struct CountImpl : public ScalarAggregator {
       this->non_nulls += batch.length;
     } else if (batch[0].is_array()) {
       const ArraySpan& input = batch[0].array;
-      const int64_t nulls = input.GetNullCount();
+      const int64_t nulls = input.ComputeLogicalNullCount();
       this->nulls += nulls;
       this->non_nulls += input.length - nulls;
     } else {
diff --git a/cpp/src/arrow/compute/kernels/aggregate_test.cc 
b/cpp/src/arrow/compute/kernels/aggregate_test.cc
index c5ba012d76..6783475db3 100644
--- a/cpp/src/arrow/compute/kernels/aggregate_test.cc
+++ b/cpp/src/arrow/compute/kernels/aggregate_test.cc
@@ -941,6 +941,16 @@ TYPED_TEST(TestCountKernel, SimpleCount) {
   EXPECT_THAT(Count(*MakeScalar(ty, 1), all), ResultWith(Datum(int64_t(1))));
 }
 
+TEST(TestCountKernel, RunEndEncodedNulls) {
+  auto input = ArrayFromJSON(int32(), "[1, 1, null, null, null, 2, 2, 2, null, 
3]");
+  ASSERT_OK_AND_ASSIGN(auto encoded, RunEndEncode(input));
+
+  auto array = encoded.make_array();
+  ValidateCount(*array, {6, 4});
+  // Logical slice: [null, null, 2, 2, 2, null].
+  ValidateCount(*array->Slice(3, 6), {3, 3});
+}
+
 template <typename ArrowType>
 class TestRandomNumericCountKernel : public ::testing::Test {};
 
diff --git a/python/pyarrow/tests/test_compute.py 
b/python/pyarrow/tests/test_compute.py
index 8c3b09f612..4e44a912d9 100644
--- a/python/pyarrow/tests/test_compute.py
+++ b/python/pyarrow/tests/test_compute.py
@@ -2849,6 +2849,18 @@ def test_count():
         pc.count(arr, 'something else')
 
 
+def test_count_run_end_encoded_nulls():
+    arr = pc.run_end_encode(
+        pa.array([1, 1, None, None, None, 2, 2, 2, None, 3]))
+
+    assert pc.count(arr, mode="only_valid").as_py() == 6
+    assert pc.count(arr, mode="only_null").as_py() == 4
+    assert pc.count(arr, mode="all").as_py() == 10
+    # Slice crosses run boundaries: logical [None, None, 2, 2, 2, None].
+    assert pc.count(arr.slice(3, 6), mode="only_valid").as_py() == 3
+    assert pc.count(arr.slice(3, 6), mode="only_null").as_py() == 3
+
+
 def test_index():
     arr = pa.array([0, 1, None, 3, 4], type=pa.int64())
     assert pc.index(arr, pa.scalar(0)).as_py() == 0

Reply via email to