This is an automated email from the ASF dual-hosted git repository.

CurtHagenlocher pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-dotnet.git


The following commit(s) were added to refs/heads/main by this push:
     new 7949fb6  fix: handle file reader cancellation and empty nested concat 
(#327)
7949fb6 is described below

commit 7949fb671ccc0610a4589f58c2c5ff5e801751c0
Author: InCerryGit <[email protected]>
AuthorDate: Sat Apr 25 00:41:53 2026 +0800

    fix: handle file reader cancellation and empty nested concat (#327)
    
    ## Summary
    
    This PR fixes two correctness edge cases found while reviewing the IPC
    reader and array concatenation paths.
    
    ### File reader cancellation
    
    `ArrowFileReaderImplementation.ReadRecordBatchAsync(...)` and
    `ReadNextRecordBatchAsync(...)` accepted a caller cancellation token,
    but the
    schema/footer loading step still called `ReadSchemaAsync()` without
    passing that
    token.
    
    That meant a canceled async file read could still continue through
    schema/footer
    I/O before cancellation was observed later in the record batch path.
    This PR
    passes the caller token into schema loading so cancellation is honored
    before
    dictionary and record batch reads begin.
    
    ### Empty nested array concatenation
    
    `ArrayDataConcatenator` could drop the required child array structure
    when
    concatenating nested list arrays whose parent arrays were all empty.
    Nested Arrow
    arrays still need a valid zero-length child `ArrayData`; otherwise
    downstream
    array construction can fail or produce structurally invalid nested
    arrays.
    
    This PR preserves a zero-length child for all-empty nested/list inputs.
    
    ### Null-only ListView concatenation
    
    For `ListView` and `LargeListView`, null rows can have `size == 0`, and
    their
    offset values should not contribute to the child slice range. The
    previous logic
    used every offset when computing the child bounds, even for zero-size
    entries.
    
    This PR computes list-view child bounds only from entries with `size >
    0`, so
    null-only inputs keep an empty child slice instead of deriving a range
    from null
    rows.
    
    ## Validation
    
    - `dotnet test test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj -c
    Release --filter
    
"FullyQualifiedName~ArrowFileReaderTests.ReadRecordBatchAsync_HonorsPreCanceledTokenDuringSchemaRead|FullyQualifiedName~ArrowFileReaderTests.ReadNextRecordBatchAsync_HonorsPreCanceledTokenDuringSchemaRead|FullyQualifiedName~ArrowArrayConcatenatorTests.TestConcatenateAllEmpty|FullyQualifiedName~ArrowArrayConcatenatorTests.TestConcatenateNullOnly"`
    - `dotnet build Apache.Arrow.sln -c Release`
---
 src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs   |  76 +++++++--
 .../Ipc/ArrowFileReaderImplementation.cs           |   4 +-
 .../ArrowArrayConcatenatorTests.cs                 | 189 +++++++++++++++++++++
 3 files changed, 255 insertions(+), 14 deletions(-)

diff --git a/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs 
b/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
index 764292a..cc9572a 100644
--- a/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
+++ b/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
@@ -133,13 +133,29 @@ namespace Apache.Arrow
                     var child = arrayData.Children[0];
                     ReadOnlySpan<long> offsets = 
arrayData.Buffers[1].Span.CastTo<long>().Slice(arrayData.Offset, 
arrayData.Length);
                     ReadOnlySpan<long> sizes = 
arrayData.Buffers[2].Span.CastTo<long>().Slice(arrayData.Offset, 
arrayData.Length);
-                    var minOffset = offsets[0];
+                    long minOffset = 0;
                     long maxEnd = 0;
+                    bool hasNonEmptyValue = false;
 
                     for (int i = 0; i < arrayData.Length; ++i)
                     {
-                        minOffset = Math.Min(minOffset, offsets[i]);
-                        maxEnd = Math.Max(maxEnd, offsets[i] + sizes[i]);
+                        if (sizes[i] > 0)
+                        {
+                            minOffset = hasNonEmptyValue ? Math.Min(minOffset, 
offsets[i]) : offsets[i];
+                            maxEnd = Math.Max(maxEnd, offsets[i] + sizes[i]);
+                            hasNonEmptyValue = true;
+                        }
+                    }
+
+                    if (!hasNonEmptyValue)
+                    {
+                        for (int i = 0; i < offsets.Length; ++i)
+                        {
+                            offsetsBuilder.Append(baseOffset);
+                        }
+
+                        children.Add(child.Slice(0, 0));
+                        continue;
                     }
 
                     foreach (long offset in offsets)
@@ -158,7 +174,7 @@ namespace Apache.Arrow
                 }
 
                 ArrowBuffer offsetBuffer = offsetsBuilder.Build(_allocator);
-                ArrayData combinedChild = Concatenate(children, _allocator);
+                ArrayData combinedChild = ConcatenateChildrenOrEmpty(children);
 
                 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, 
new ArrowBuffer[] { validityBuffer, offsetBuffer, sizesBuffer }, new[] { 
combinedChild });
             }
@@ -185,13 +201,29 @@ namespace Apache.Arrow
                     var child = arrayData.Children[0];
                     ReadOnlySpan<int> offsets = 
arrayData.Buffers[1].Span.CastTo<int>().Slice(arrayData.Offset, 
arrayData.Length);
                     ReadOnlySpan<int> sizes = 
arrayData.Buffers[2].Span.CastTo<int>().Slice(arrayData.Offset, 
arrayData.Length);
-                    var minOffset = offsets[0];
-                    var maxEnd = 0;
+                    int minOffset = 0;
+                    int maxEnd = 0;
+                    bool hasNonEmptyValue = false;
 
                     for (int i = 0; i < arrayData.Length; ++i)
                     {
-                        minOffset = Math.Min(minOffset, offsets[i]);
-                        maxEnd = Math.Max(maxEnd, offsets[i] + sizes[i]);
+                        if (sizes[i] > 0)
+                        {
+                            minOffset = hasNonEmptyValue ? Math.Min(minOffset, 
offsets[i]) : offsets[i];
+                            maxEnd = Math.Max(maxEnd, offsets[i] + sizes[i]);
+                            hasNonEmptyValue = true;
+                        }
+                    }
+
+                    if (!hasNonEmptyValue)
+                    {
+                        for (int i = 0; i < offsets.Length; ++i)
+                        {
+                            offsetsBuilder.Append(baseOffset);
+                        }
+
+                        children.Add(child.Slice(0, 0));
+                        continue;
                     }
 
                     foreach (int offset in offsets)
@@ -210,7 +242,7 @@ namespace Apache.Arrow
                 }
 
                 ArrowBuffer offsetBuffer = offsetsBuilder.Build(_allocator);
-                ArrayData combinedChild = Concatenate(children, _allocator);
+                ArrayData combinedChild = ConcatenateChildrenOrEmpty(children);
 
                 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, 
new ArrowBuffer[] { validityBuffer, offsetBuffer, sizesBuffer }, new[] { 
combinedChild });
             }
@@ -237,7 +269,7 @@ namespace Apache.Arrow
                     children.Add(child);
                 }
 
-                ArrayData combinedChild = Concatenate(children, _allocator);
+                ArrayData combinedChild = ConcatenateChildrenOrEmpty(children);
 
                 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, 
new ArrowBuffer[] { validityBuffer }, new[] { combinedChild });
             }
@@ -707,7 +739,7 @@ namespace Apache.Arrow
                     children.Add(child);
                 }
 
-                ArrayData combinedChild = Concatenate(children, _allocator);
+                ArrayData combinedChild = ConcatenateChildrenOrEmpty(children);
 
                 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, 
new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { combinedChild });
             }
@@ -748,11 +780,31 @@ namespace Apache.Arrow
                     children.Add(child);
                 }
 
-                ArrayData combinedChild = Concatenate(children, _allocator);
+                ArrayData combinedChild = ConcatenateChildrenOrEmpty(children);
 
                 Result = new ArrayData(type, _totalLength, _totalNullCount, 0, 
new ArrowBuffer[] { validityBuffer, offsetBuffer }, new[] { combinedChild });
             }
 
+            private ArrayData ConcatenateChildrenOrEmpty(List<ArrayData> 
children)
+            {
+                if (children.Count > 0)
+                {
+                    return Concatenate(children, _allocator);
+                }
+
+                foreach (ArrayData arrayData in _arrayDataList)
+                {
+                    if (arrayData.Children?.Length > 0 && 
arrayData.Children[0] != null)
+                    {
+                        // All parent arrays are empty, but the nested array 
still needs a real
+                        // zero-length child to preserve Arrow's structural 
invariant.
+                        return arrayData.Children[0].Slice(0, 0);
+                    }
+                }
+
+                throw new InvalidOperationException("Nested array data must 
contain child data.");
+            }
+
             private ArrowBuffer ConcatenateLargeOffsetBuffer()
             {
                 var builder = new ArrowBuffer.Builder<long>(_totalLength + 1);
diff --git a/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs 
b/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
index 1498c85..7217b08 100644
--- a/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
+++ b/src/Apache.Arrow/Ipc/ArrowFileReaderImplementation.cs
@@ -151,7 +151,7 @@ namespace Apache.Arrow.Ipc
 
         public async ValueTask<RecordBatch> ReadRecordBatchAsync(int index, 
CancellationToken cancellationToken)
         {
-            await ReadSchemaAsync().ConfigureAwait(false);
+            await ReadSchemaAsync(cancellationToken).ConfigureAwait(false);
             await 
ReadDictionariesAsync(cancellationToken).ConfigureAwait(false);
 
             if (index >= _footer.RecordBatchCount)
@@ -185,7 +185,7 @@ namespace Apache.Arrow.Ipc
 
         public override async ValueTask<RecordBatch> 
ReadNextRecordBatchAsync(CancellationToken cancellationToken)
         {
-            await ReadSchemaAsync().ConfigureAwait(false);
+            await ReadSchemaAsync(cancellationToken).ConfigureAwait(false);
             await 
ReadDictionariesAsync(cancellationToken).ConfigureAwait(false);
 
             if (_recordBatchIndex >= _footer.RecordBatchCount)
diff --git a/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs 
b/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
index 14ae6ec..7702fde 100644
--- a/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
+++ b/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
@@ -60,6 +60,195 @@ namespace Apache.Arrow.Tests
             ArrowReaderVerifier.CompareArrays(array, actualArray);
         }
 
+        [Fact]
+        public void TestConcatenateAllEmptyListArraysKeepsEmptyChild()
+        {
+            AssertConcatenateAllEmptyNestedArraysKeepsChild(
+                new ListArray.Builder(Int32Type.Default).Build(),
+                new ListArray.Builder(Int32Type.Default).Build());
+        }
+
+        [Fact]
+        public void TestConcatenateAllEmptyLargeListArraysKeepsEmptyChild()
+        {
+            AssertConcatenateAllEmptyNestedArraysKeepsChild(
+                new LargeListArray.Builder(Int32Type.Default).Build(),
+                new LargeListArray.Builder(Int32Type.Default).Build());
+        }
+
+        [Fact]
+        public void TestConcatenateAllEmptyListViewArraysKeepsEmptyChild()
+        {
+            AssertConcatenateAllEmptyNestedArraysKeepsChild(
+                new ListViewArray.Builder(Int32Type.Default).Build(),
+                new ListViewArray.Builder(Int32Type.Default).Build());
+        }
+
+        [Fact]
+        public void TestConcatenateAllEmptyLargeListViewArraysKeepsEmptyChild()
+        {
+            AssertConcatenateAllEmptyNestedArraysKeepsChild(
+                new LargeListViewArray.Builder(Int32Type.Default).Build(),
+                new LargeListViewArray.Builder(Int32Type.Default).Build());
+        }
+
+        [Fact]
+        public void TestConcatenateNullOnlyListViewArraysKeepsEmptyChild()
+        {
+            var first = new 
ListViewArray.Builder(Int32Type.Default).AppendNull().Build();
+            var second = new 
ListViewArray.Builder(Int32Type.Default).AppendNull().Build();
+
+            AssertConcatenateNullOnlyListViewArraysKeepsChild(first, second);
+        }
+
+        [Fact]
+        public void TestConcatenateNullOnlyLargeListViewArraysKeepsEmptyChild()
+        {
+            var first = new 
LargeListViewArray.Builder(Int32Type.Default).AppendNull().Build();
+            var second = new 
LargeListViewArray.Builder(Int32Type.Default).AppendNull().Build();
+
+            AssertConcatenateNullOnlyListViewArraysKeepsChild(first, second);
+        }
+
+        [Fact]
+        public void 
TestConcatenateSlicedNullOnlyListViewArraysNormalizesOffsets()
+        {
+            var builder = new ListViewArray.Builder(Int32Type.Default);
+            var valueBuilder = (Int32Array.Builder)builder.ValueBuilder;
+            builder.Append();
+            valueBuilder.Append(42);
+            builder.AppendNull();
+            var sliced = ArrowArrayFactory.Slice(builder.Build(), 1, 1);
+
+            var result = 
Assert.IsType<ListViewArray>(ArrowArrayConcatenator.Concatenate(new[] { sliced, 
sliced }));
+
+            Assert.Equal(new[] { 0, 0 }, result.ValueOffsets.ToArray());
+            Assert.Equal(new[] { 0, 0 }, result.Sizes.ToArray());
+            Assert.Equal(0, result.Data.Children[0].Length);
+        }
+
+        [Fact]
+        public void 
TestConcatenateSlicedNullOnlyLargeListViewArraysNormalizesOffsets()
+        {
+            var builder = new LargeListViewArray.Builder(Int32Type.Default);
+            var valueBuilder = (Int32Array.Builder)builder.ValueBuilder;
+            builder.Append();
+            valueBuilder.Append(42);
+            builder.AppendNull();
+            var sliced = ArrowArrayFactory.Slice(builder.Build(), 1, 1);
+
+            var result = 
Assert.IsType<LargeListViewArray>(ArrowArrayConcatenator.Concatenate(new[] { 
sliced, sliced }));
+
+            Assert.Equal(new[] { 0L, 0L }, result.ValueOffsets.ToArray());
+            Assert.Equal(new[] { 0L, 0L }, result.Sizes.ToArray());
+            Assert.Equal(0, result.Data.Children[0].Length);
+        }
+
+        [Fact]
+        public void TestConcatenateSlicedEmptyListViewArraysNormalizesOffsets()
+        {
+            var builder = new ListViewArray.Builder(Int32Type.Default);
+            var valueBuilder = (Int32Array.Builder)builder.ValueBuilder;
+            builder.Append();
+            valueBuilder.Append(42);
+            builder.Append();
+            var sliced = ArrowArrayFactory.Slice(builder.Build(), 1, 1);
+
+            var result = 
Assert.IsType<ListViewArray>(ArrowArrayConcatenator.Concatenate(new[] { sliced, 
sliced }));
+
+            Assert.Equal(new[] { 0, 0 }, result.ValueOffsets.ToArray());
+            Assert.Equal(new[] { 0, 0 }, result.Sizes.ToArray());
+            Assert.Equal(0, result.NullCount);
+            Assert.Equal(0, result.Data.Children[0].Length);
+        }
+
+        [Fact]
+        public void 
TestConcatenateSlicedEmptyLargeListViewArraysNormalizesOffsets()
+        {
+            var builder = new LargeListViewArray.Builder(Int32Type.Default);
+            var valueBuilder = (Int32Array.Builder)builder.ValueBuilder;
+            builder.Append();
+            valueBuilder.Append(42);
+            builder.Append();
+            var sliced = ArrowArrayFactory.Slice(builder.Build(), 1, 1);
+
+            var result = 
Assert.IsType<LargeListViewArray>(ArrowArrayConcatenator.Concatenate(new[] { 
sliced, sliced }));
+
+            Assert.Equal(new[] { 0L, 0L }, result.ValueOffsets.ToArray());
+            Assert.Equal(new[] { 0L, 0L }, result.Sizes.ToArray());
+            Assert.Equal(0, result.NullCount);
+            Assert.Equal(0, result.Data.Children[0].Length);
+        }
+
+        [Fact]
+        public void 
TestConcatenateListViewZeroSizeSliceAfterNonEmptyUsesBaseOffset()
+        {
+            var nonEmptyBuilder = new ListViewArray.Builder(Int32Type.Default);
+            var nonEmptyValueBuilder = 
(Int32Array.Builder)nonEmptyBuilder.ValueBuilder;
+            nonEmptyBuilder.Append();
+            nonEmptyValueBuilder.Append(7);
+            var nonEmpty = nonEmptyBuilder.Build();
+
+            var emptyBuilder = new ListViewArray.Builder(Int32Type.Default);
+            var emptyValueBuilder = 
(Int32Array.Builder)emptyBuilder.ValueBuilder;
+            emptyBuilder.Append();
+            emptyValueBuilder.Append(42);
+            emptyBuilder.Append();
+            var sliced = ArrowArrayFactory.Slice(emptyBuilder.Build(), 1, 1);
+
+            var result = 
Assert.IsType<ListViewArray>(ArrowArrayConcatenator.Concatenate(new[] { 
nonEmpty, sliced, sliced }));
+
+            Assert.Equal(new[] { 0, 1, 1 }, result.ValueOffsets.ToArray());
+            Assert.Equal(new[] { 1, 0, 0 }, result.Sizes.ToArray());
+            Assert.Equal(1, result.Data.Children[0].Length);
+        }
+
+        [Fact]
+        public void 
TestConcatenateLargeListViewZeroSizeSliceAfterNonEmptyUsesBaseOffset()
+        {
+            var nonEmptyBuilder = new 
LargeListViewArray.Builder(Int32Type.Default);
+            var nonEmptyValueBuilder = 
(Int32Array.Builder)nonEmptyBuilder.ValueBuilder;
+            nonEmptyBuilder.Append();
+            nonEmptyValueBuilder.Append(7);
+            var nonEmpty = nonEmptyBuilder.Build();
+
+            var emptyBuilder = new 
LargeListViewArray.Builder(Int32Type.Default);
+            var emptyValueBuilder = 
(Int32Array.Builder)emptyBuilder.ValueBuilder;
+            emptyBuilder.Append();
+            emptyValueBuilder.Append(42);
+            emptyBuilder.Append();
+            var sliced = ArrowArrayFactory.Slice(emptyBuilder.Build(), 1, 1);
+
+            var result = 
Assert.IsType<LargeListViewArray>(ArrowArrayConcatenator.Concatenate(new[] { 
nonEmpty, sliced, sliced }));
+
+            Assert.Equal(new[] { 0L, 1L, 1L }, result.ValueOffsets.ToArray());
+            Assert.Equal(new[] { 1L, 0L, 0L }, result.Sizes.ToArray());
+            Assert.Equal(1, result.Data.Children[0].Length);
+        }
+
+        private static void 
AssertConcatenateAllEmptyNestedArraysKeepsChild(IArrowArray first, IArrowArray 
second)
+        {
+            IArrowArray result = ArrowArrayConcatenator.Concatenate(new[] { 
first, second });
+
+            Assert.Equal(0, result.Length);
+            Assert.NotNull(result.Data.Children);
+            Assert.Single(result.Data.Children);
+            Assert.NotNull(result.Data.Children[0]);
+            Assert.Equal(0, result.Data.Children[0].Length);
+        }
+
+        private static void 
AssertConcatenateNullOnlyListViewArraysKeepsChild(IArrowArray first, 
IArrowArray second)
+        {
+            IArrowArray result = ArrowArrayConcatenator.Concatenate(new[] { 
first, second });
+
+            Assert.Equal(2, result.Length);
+            Assert.Equal(2, result.NullCount);
+            Assert.NotNull(result.Data.Children);
+            Assert.Single(result.Data.Children);
+            Assert.NotNull(result.Data.Children[0]);
+            Assert.Equal(0, result.Data.Children[0].Length);
+        }
+
         [Fact]
         public void TestRunEndEncodedInt32RunEnds()
         {

Reply via email to