This is an automated email from the ASF dual-hosted git repository.

CurtHagenlocher pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-dotnet.git


The following commit(s) were added to refs/heads/main by this push:
     new 67b6db9  perf: reduce ArrowBuffer builder reallocation work (#330)
67b6db9 is described below

commit 67b6db956b9a823824f6d40d55fcc3434c9a66b8
Author: InCerryGit <[email protected]>
AuthorDate: Sun Apr 26 04:16:51 2026 +0800

    perf: reduce ArrowBuffer builder reallocation work (#330)
    
    ## Summary
    
    - Pre-reserve `ArrowBuffer.Builder<T>` capacity when `AppendRange`
    receives a known-count collection.
    - Copy only the populated `Length * sizeof(T)` portion when reallocating
    builder storage.
    - Add focused coverage for enumerable, read-only collection, and
    reallocation preservation paths.
    
    The reallocation behavior preserves appended/current-length contents.
    Capacity beyond `Length` remains undefined and is not copied across
    reallocations.
    
    ## Validation
    
    - `dotnet test test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj -c
    Release --filter
    "FullyQualifiedName~Apache.Arrow.Tests.ArrowBufferBuilderTests"`
    - `rtk dotnet build "Apache.Arrow.sln" -c Release`
    - LSP diagnostics clean on changed files
    - Code review completed before commit; no blockers found
---
 src/Apache.Arrow/ArrowBuffer.Builder.cs            | 11 +++-
 test/Apache.Arrow.Tests/ArrowBufferBuilderTests.cs | 74 ++++++++++++++++++++++
 2 files changed, 84 insertions(+), 1 deletion(-)

diff --git a/src/Apache.Arrow/ArrowBuffer.Builder.cs 
b/src/Apache.Arrow/ArrowBuffer.Builder.cs
index fad6ab7..cf14adf 100644
--- a/src/Apache.Arrow/ArrowBuffer.Builder.cs
+++ b/src/Apache.Arrow/ArrowBuffer.Builder.cs
@@ -132,6 +132,15 @@ namespace Apache.Arrow
             {
                 if (values != null)
                 {
+                    if (values is ICollection<T> collection)
+                    {
+                        EnsureAdditionalCapacity(collection.Count);
+                    }
+                    else if (values is IReadOnlyCollection<T> 
readOnlyCollection)
+                    {
+                        EnsureAdditionalCapacity(readOnlyCollection.Count);
+                    }
+
                     foreach (T v in values)
                     {
                         Append(v);
@@ -243,7 +252,7 @@ namespace Apache.Arrow
                 if (numBytes != 0)
                 {
                     var memory = new Memory<byte>(new byte[numBytes]);
-                    Memory.CopyTo(memory);
+                    Memory.Slice(0, Length * _size).CopyTo(memory);
 
                     Memory = memory;
                 }
diff --git a/test/Apache.Arrow.Tests/ArrowBufferBuilderTests.cs 
b/test/Apache.Arrow.Tests/ArrowBufferBuilderTests.cs
index 495fc2e..f1a072f 100644
--- a/test/Apache.Arrow.Tests/ArrowBufferBuilderTests.cs
+++ b/test/Apache.Arrow.Tests/ArrowBufferBuilderTests.cs
@@ -14,6 +14,8 @@
 // limitations under the License.
 
 using System;
+using System.Collections;
+using System.Collections.Generic;
 using System.Linq;
 using Xunit;
 
@@ -158,6 +160,65 @@ namespace Apache.Arrow.Tests
                     Assert.Equal(i, span[i]);
                 }
             }
+
+            [Fact]
+            public void BufferHasExpectedValuesForRangeEnumerable()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+
+                builder.AppendRange(Enumerable.Range(0, 10));
+
+                var buffer = builder.Build();
+                var span = buffer.Span.CastTo<int>();
+
+                for (var i = 0; i < 10; i++)
+                {
+                    Assert.Equal(i, span[i]);
+                }
+            }
+
+            [Fact]
+            public void BufferHasExpectedValuesForReadOnlyCollection()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+                var data = new CountingReadOnlyCollection(Enumerable.Range(0, 
10).ToArray());
+
+                builder.AppendRange(data);
+
+                var buffer = builder.Build();
+                var span = buffer.Span.CastTo<int>();
+
+                Assert.Equal(1, data.CountAccesses);
+                for (var i = 0; i < 10; i++)
+                {
+                    Assert.Equal(i, span[i]);
+                }
+            }
+
+            private sealed class CountingReadOnlyCollection : 
IReadOnlyCollection<int>
+            {
+                private readonly int[] _values;
+
+                public CountingReadOnlyCollection(int[] values)
+                {
+                    _values = values;
+                }
+
+                public int CountAccesses { get; private set; }
+
+                public int Count
+                {
+                    get
+                    {
+                        CountAccesses++;
+                        return _values.Length;
+                    }
+                }
+
+                public IEnumerator<int> GetEnumerator() => 
((IEnumerable<int>)_values).GetEnumerator();
+
+                IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+            }
         }
 
         public class Clear
@@ -210,6 +271,19 @@ namespace Apache.Arrow.Tests
                 // Act/Assert
                 Assert.Throws<ArgumentOutOfRangeException>(() => 
builder.Resize(-1));
             }
+
+            [Fact]
+            public void ReservePreservesAppendedValuesAfterReallocate()
+            {
+                var builder = new ArrowBuffer.Builder<int>(1);
+                builder.Append(42);
+
+                builder.Reserve(builder.Capacity + 1);
+
+                var buffer = builder.Build();
+                var span = buffer.Span.CastTo<int>();
+                Assert.Equal(42, span[0]);
+            }
         }
 
     }

Reply via email to