This is an automated email from the ASF dual-hosted git repository.

curth pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/main by this push:
     new 9ba789d111 GH-43267: [C#] Correctly import sliced arrays through the C 
Data interface (#44117)
9ba789d111 is described below

commit 9ba789d1115494b00e6772a3170c8ba2f1a9a02c
Author: Curt Hagenlocher <c...@hagenlocher.org>
AuthorDate: Sun Sep 15 19:20:59 2024 -0700

    GH-43267: [C#] Correctly import sliced arrays through the C Data interface 
(#44117)
    
    ### What changes are included in this PR?
    
    Changes to the C Data importer to correctly handle nonzero offsets.
    
    ### Are these changes tested?
    
    Yes
    
    ### Are there any user-facing changes?
    
    No
    
    Closes #43267
    * GitHub Issue: #43267
    
    Authored-by: Curt Hagenlocher <c...@hagenlocher.org>
    Signed-off-by: Curt Hagenlocher <c...@hagenlocher.org>
---
 csharp/src/Apache.Arrow/Apache.Arrow.csproj        |  6 +--
 csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs   | 22 +++++------
 csharp/src/Apache.Arrow/RecordBatch.cs             | 11 ++++++
 csharp/src/Apache.Arrow/Utility.cs                 |  2 -
 .../test/Apache.Arrow.Tests/ArrowReaderVerifier.cs |  4 +-
 .../Apache.Arrow.Tests/CDataInterfaceDataTests.cs  | 18 +++++++++
 .../CDataInterfacePythonTests.cs                   | 43 ++++++++++++++++++++++
 7 files changed, 88 insertions(+), 18 deletions(-)

diff --git a/csharp/src/Apache.Arrow/Apache.Arrow.csproj 
b/csharp/src/Apache.Arrow/Apache.Arrow.csproj
index 034876a114..a845f8e693 100644
--- a/csharp/src/Apache.Arrow/Apache.Arrow.csproj
+++ b/csharp/src/Apache.Arrow/Apache.Arrow.csproj
@@ -7,18 +7,16 @@
     <Description>Apache Arrow is a cross-language development platform for 
in-memory data. It specifies a standardized language-independent columnar 
memory format for flat and hierarchical data, organized for efficient analytic 
operations on modern hardware.</Description>
   </PropertyGroup>
 
-  <PropertyGroup Condition="'$(IsWindows)'=='true'">
+  <PropertyGroup>
     <TargetFrameworks>netstandard2.0;net6.0;net8.0;net462</TargetFrameworks>
   </PropertyGroup>
-  <PropertyGroup Condition="'$(IsWindows)'!='true'">
-    <TargetFrameworks>netstandard2.0;net6.0;net8.0</TargetFrameworks>
-  </PropertyGroup>
 
   <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard' or 
'$(TargetFramework)' == 'net462'">
     <PackageReference Include="System.Buffers" Version="4.5.1" />
     <PackageReference Include="System.Memory" Version="4.5.5" />
     <PackageReference Include="System.Runtime.CompilerServices.Unsafe" 
Version="4.7.1" />
     <PackageReference Include="System.Threading.Tasks.Extensions" 
Version="4.5.4" />
+    <PackageReference Include="System.ValueTuple" Version="4.5.0" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs 
b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
index 68b67f3d7c..c454380e17 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
@@ -260,7 +260,7 @@ namespace Apache.Arrow.C
 
             private ArrowBuffer ImportValidityBuffer(CArrowArray* cArray)
             {
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + 
(int)cArray->length);
                 int validityLength = 
checked((int)BitUtility.RoundUpToMultipleOf8(length) / 8);
                 return (cArray->buffers[0] == null) ? ArrowBuffer.Empty : new 
ArrowBuffer(AddMemory((IntPtr)cArray->buffers[0], 0, validityLength));
             }
@@ -285,7 +285,7 @@ namespace Apache.Arrow.C
                     throw new InvalidOperationException("Byte arrays are 
expected to have exactly three buffers");
                 }
 
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + 
(int)cArray->length);
                 int offsetsLength = (length + 1) * 4;
                 int* offsets = (int*)cArray->buffers[1];
                 Debug.Assert(offsets != null);
@@ -306,7 +306,7 @@ namespace Apache.Arrow.C
                     throw new InvalidOperationException("Byte array views are 
expected to have at least three buffers");
                 }
 
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + 
(int)cArray->length);
                 int viewsLength = length * 16;
 
                 long* bufferLengths = (long*)cArray->buffers[cArray->n_buffers 
- 1];
@@ -336,7 +336,7 @@ namespace Apache.Arrow.C
                         $"is greater than the maximum supported large byte 
array length ({maxLength})");
                 }
 
-                int length = (int)cArray->length;
+                int length = checked((int)cArray->offset + 
(int)cArray->length);
                 int offsetsLength = (length + 1) * 8;
                 long* offsets = (long*)cArray->buffers[1];
                 Debug.Assert(offsets != null);
@@ -364,7 +364,7 @@ namespace Apache.Arrow.C
                     throw new InvalidOperationException("List arrays are 
expected to have exactly two buffers");
                 }
 
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + 
(int)cArray->length);
                 int offsetsLength = (length + 1) * 4;
 
                 ArrowBuffer[] buffers = new ArrowBuffer[2];
@@ -381,7 +381,7 @@ namespace Apache.Arrow.C
                     throw new InvalidOperationException("List view arrays are 
expected to have exactly three buffers");
                 }
 
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + 
(int)cArray->length);
                 int offsetsLength = length * 4;
 
                 ArrowBuffer[] buffers = new ArrowBuffer[3];
@@ -407,7 +407,7 @@ namespace Apache.Arrow.C
                         $"is greater than the maximum supported large list 
array length ({maxLength})");
                 }
 
-                int length = (int)cArray->length;
+                int length = checked((int)cArray->offset + 
(int)cArray->length);
                 int offsetsLength = (length + 1) * 8;
 
                 ArrowBuffer[] buffers = new ArrowBuffer[2];
@@ -436,7 +436,7 @@ namespace Apache.Arrow.C
                 {
                     throw new InvalidOperationException("Dense union arrays 
are expected to have exactly two children");
                 }
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + 
(int)cArray->length);
                 int offsetsLength = length * 4;
 
                 ArrowBuffer[] buffers = new ArrowBuffer[2];
@@ -454,7 +454,7 @@ namespace Apache.Arrow.C
                 }
 
                 ArrowBuffer[] buffers = new ArrowBuffer[1];
-                buffers[0] = ImportCArrayBuffer(cArray, 0, 
checked((int)cArray->length));
+                buffers[0] = ImportCArrayBuffer(cArray, 0, 
checked((int)cArray->offset + (int)cArray->length));
 
                 return buffers;
             }
@@ -467,10 +467,10 @@ namespace Apache.Arrow.C
                 }
 
                 // validity, data
-                int length = checked((int)cArray->length);
+                int length = checked((int)cArray->offset + 
(int)cArray->length);
                 int valuesLength;
                 if (bitWidth >= 8)
-                    valuesLength = checked((int)(cArray->length * bitWidth / 
8));
+                    valuesLength = checked(length * bitWidth / 8);
                 else
                     valuesLength = 
checked((int)BitUtility.RoundUpToMultipleOf8(length) / 8);
 
diff --git a/csharp/src/Apache.Arrow/RecordBatch.cs 
b/csharp/src/Apache.Arrow/RecordBatch.cs
index 9cc81b1648..4067ba9ac6 100644
--- a/csharp/src/Apache.Arrow/RecordBatch.cs
+++ b/csharp/src/Apache.Arrow/RecordBatch.cs
@@ -100,6 +100,17 @@ namespace Apache.Arrow
             return new RecordBatch(Schema, arrays, Length);
         }
 
+        public RecordBatch Slice(int offset, int length)
+        {
+            if (offset > Length)
+            {
+                throw new ArgumentException($"Offset {offset} cannot be 
greater than Length {Length} for RecordBatch.Slice");
+            }
+
+            length = Math.Min(Length - offset, length);
+            return new RecordBatch(Schema, _arrays.Select(a => 
ArrowArrayFactory.Slice(a, offset, length)), length);
+        }
+
         public void Accept(IArrowArrayVisitor visitor)
         {
             switch (visitor)
diff --git a/csharp/src/Apache.Arrow/Utility.cs 
b/csharp/src/Apache.Arrow/Utility.cs
index c4e5732e6e..22b3ff15f1 100644
--- a/csharp/src/Apache.Arrow/Utility.cs
+++ b/csharp/src/Apache.Arrow/Utility.cs
@@ -13,10 +13,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-using Apache.Arrow.Flatbuf;
 using System;
 using System.Collections.Generic;
-using System.Text;
 
 namespace Apache.Arrow
 {
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs 
b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
index 85f7b75f93..35b2c4e7f2 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
@@ -566,7 +566,9 @@ namespace Apache.Arrow.Tests
                 var listSize = 
((FixedSizeListType)expectedArray.Data.DataType).ListSize;
                 var expectedValuesSlice = ArrowArrayFactory.Slice(
                     expectedArray.Values, expectedArray.Offset * listSize, 
expectedArray.Length * listSize);
-                actualArray.Values.Accept(new 
ArrayComparer(expectedValuesSlice, _strictCompare));
+                var actualValuesSlice = ArrowArrayFactory.Slice(
+                    actualArray.Values, actualArray.Offset * listSize, 
actualArray.Length * listSize);
+                actualValuesSlice.Accept(new 
ArrayComparer(expectedValuesSlice, _strictCompare));
             }
 
             private void CompareValidityBuffer(int nullCount, int arrayLength, 
ArrowBuffer expectedValidityBuffer, int expectedBufferOffset, ArrowBuffer 
actualValidityBuffer, int actualBufferOffset)
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs 
b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
index 2bd4d4d661..70ab1fdae2 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfaceDataTests.cs
@@ -92,5 +92,23 @@ namespace Apache.Arrow.Tests
             GC.KeepAlive(releaseCallback);
         }
 #endif
+
+        [Fact]
+        public unsafe void RoundTripInt32ArrayWithOffset()
+        {
+            Int32Array array = new Int32Array.Builder()
+                .AppendRange(new[] { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 })
+                .Build();
+            IArrowArray sliced = array.Slice(2, 6);
+            CArrowArray* cArray = CArrowArray.Create();
+            CArrowArrayExporter.ExportArray(sliced, cArray);
+            using (var importedSlice = 
(Int32Array)CArrowArrayImporter.ImportArray(cArray, array.Data.DataType))
+            {
+                Assert.Equal(6, importedSlice.Length);
+                Assert.Equal(2, importedSlice.Offset);
+                Assert.Equal(2, importedSlice.GetValue(0));
+            }
+            CArrowArray.Free(cArray);
+        }
     }
 }
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs 
b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index fee18d165c..638cbfb272 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -792,6 +792,49 @@ namespace Apache.Arrow.Tests
             CArrowSchema.Free(cImportSchema);
         }
 
+        [SkippableFact]
+        public unsafe void RoundTripTestSlicedBatch()
+        {
+            // TODO: Enable these once this the version of pyarrow referenced 
during testing supports them
+            HashSet<ArrowTypeId> unsupported = new HashSet<ArrowTypeId> { 
ArrowTypeId.ListView, ArrowTypeId.BinaryView, ArrowTypeId.StringView };
+            RecordBatch batch1 = TestData.CreateSampleRecordBatch(4, 
excludedTypes: unsupported);
+            RecordBatch batch1slice = batch1.Slice(1, 2);
+            RecordBatch batch2 = batch1slice.Clone();
+
+            CArrowArray* cExportArray = CArrowArray.Create();
+            CArrowArrayExporter.ExportRecordBatch(batch1slice, cExportArray);
+
+            CArrowSchema* cExportSchema = CArrowSchema.Create();
+            CArrowSchemaExporter.ExportSchema(batch1.Schema, cExportSchema);
+
+            CArrowArray* cImportArray = CArrowArray.Create();
+            CArrowSchema* cImportSchema = CArrowSchema.Create();
+
+            // For Python, we need to provide the pointers
+            long exportArrayPtr = ((IntPtr)cExportArray).ToInt64();
+            long exportSchemaPtr = ((IntPtr)cExportSchema).ToInt64();
+            long importArrayPtr = ((IntPtr)cImportArray).ToInt64();
+            long importSchemaPtr = ((IntPtr)cImportSchema).ToInt64();
+
+            using (Py.GIL())
+            {
+                dynamic pa = Py.Import("pyarrow");
+                dynamic exportedPyArray = 
pa.RecordBatch._import_from_c(exportArrayPtr, exportSchemaPtr);
+                exportedPyArray._export_to_c(importArrayPtr, importSchemaPtr);
+            }
+
+            Schema schema = CArrowSchemaImporter.ImportSchema(cImportSchema);
+            RecordBatch importedBatch = 
CArrowArrayImporter.ImportRecordBatch(cImportArray, schema);
+
+            ArrowReaderVerifier.CompareBatches(batch2, importedBatch, 
strictCompare: false); // Non-strict because span lengths won't match.
+
+            // Since we allocated, we are responsible for freeing the pointer.
+            CArrowArray.Free(cExportArray);
+            CArrowSchema.Free(cExportSchema);
+            CArrowArray.Free(cImportArray);
+            CArrowSchema.Free(cImportSchema);
+        }
+
         [SkippableFact]
         public unsafe void ExportBatchReader()
         {

Reply via email to