This is an automated email from the ASF dual-hosted git repository.
CurtHagenlocher pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-dotnet.git
The following commit(s) were added to refs/heads/main by this push:
new 90aed52 Add VariantArray extension type and introduce
IBinaryArray/IIndexes interfaces (#325)
90aed52 is described below
commit 90aed52cc670c581f934184d1fc0f6f42cd1ba5b
Author: Curt Hagenlocher <[email protected]>
AuthorDate: Thu Apr 23 03:38:16 2026 -0700
Add VariantArray extension type and introduce IBinaryArray/IIndexes
interfaces (#325)
## What's Changed
- Adds a `VariantArray` extension type.
- Adds a `Builder` class for `VariantArray` that encodes `VariantValue`
instances into the variant binary format and constructs the backing
`StructArray`.
- Introduces internal interfaces `IBinaryArray` and `IIndexes` to
decouple `VariantArray` and other uses from concrete array types.
`IBinaryArray` unifies `BinaryArray`, `LargeBinaryArray`, and
`BinaryViewArray` behind a common `GetBytes` API. `IIndexes` abstracts
index resolution for `DictionaryArray` and `RunEndEncodedArray`,
enabling efficient sequential enumeration for REE arrays. Integer array
types implement `IIndexes` with `GetPhysicalIndex` and
`EnumeratePhysicalIndices` methods.
- Removes support for .NET 6.0, which has been deprecated and replaces
it with .NET 8.0 where appropriate.
- Adds support for .NET 4.6.2 to the `Scalars` assembly and tests
---------
Co-authored-by: Copilot <[email protected]>
---
Directory.Packages.props | 7 +-
dev/release/verify_rc.sh | 2 +-
.../Apache.Arrow.Flight.AspNetCore.csproj | 2 +-
.../Apache.Arrow.Operations.csproj | 2 +-
.../Apache.Arrow.Scalars.csproj | 7 +-
src/Apache.Arrow/Apache.Arrow.csproj | 6 +-
src/Apache.Arrow/Arrays/BinaryArray.cs | 2 +-
src/Apache.Arrow/Arrays/BinaryViewArray.cs | 2 +-
src/Apache.Arrow/Arrays/DictionaryArray.cs | 13 +-
src/Apache.Arrow/Arrays/Int16Array.cs | 12 +-
src/Apache.Arrow/Arrays/Int32Array.cs | 12 +-
src/Apache.Arrow/Arrays/Int64Array.cs | 13 +-
src/Apache.Arrow/Arrays/Int8Array.cs | 12 +-
src/Apache.Arrow/Arrays/LargeBinaryArray.cs | 2 +-
src/Apache.Arrow/Arrays/RunEndEncodedArray.cs | 4 +-
src/Apache.Arrow/Arrays/UInt16Array.cs | 15 +-
src/Apache.Arrow/Arrays/UInt32Array.cs | 12 +-
src/Apache.Arrow/Arrays/UInt64Array.cs | 12 +-
src/Apache.Arrow/Arrays/UInt8Array.cs | 13 +-
src/Apache.Arrow/Arrays/VariantArray.cs | 361 +++++++++++++++
.../Extensions/IArrowArrayExtensions.cs | 32 +-
src/Apache.Arrow/Interfaces/IBinaryArray.cs | 37 ++
src/Apache.Arrow/Interfaces/IIndexes.cs | 33 ++
.../Apache.Arrow.Operations.Tests.csproj | 12 +-
.../Apache.Arrow.Scalars.Tests.csproj | 10 +-
test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj | 15 +-
test/Apache.Arrow.Tests/VariantArrayTests.cs | 510 +++++++++++++++++++++
27 files changed, 1083 insertions(+), 77 deletions(-)
diff --git a/Directory.Packages.props b/Directory.Packages.props
index fd09de7..c6c5d4b 100644
--- a/Directory.Packages.props
+++ b/Directory.Packages.props
@@ -45,7 +45,12 @@
<PackageVersion Include="System.Threading.Tasks.Extensions"
Version="4.6.0" />
<PackageVersion Include="System.ValueTuple" Version="4.5.0" />
<PackageVersion Include="xunit" Version="2.9.3" />
- <PackageVersion Include="xunit.runner.visualstudio" Version="3.1.5" />
+ <PackageVersion Include="xunit.runner.visualstudio"
+ Version="3.1.5"
+ Condition="'$(TargetFramework)' != 'net462'" />
+ <PackageVersion Include="xunit.runner.visualstudio"
+ Version="2.8.2"
+ Condition="'$(TargetFramework)' == 'net462'" />
<PackageVersion Include="xunit.skippablefact" Version="1.5.61" />
<PackageVersion Include="ZstdSharp.Port" Version="0.8.5" />
</ItemGroup>
diff --git a/dev/release/verify_rc.sh b/dev/release/verify_rc.sh
index 2c5ad9a..e2c2294 100755
--- a/dev/release/verify_rc.sh
+++ b/dev/release/verify_rc.sh
@@ -185,7 +185,7 @@ test_binary_distribution() {
reference_package "Apache.Arrow.Flight.Sql" "Apache.Arrow.Flight.Sql.Tests"
"Apache.Arrow.Flight.TestWeb"
reference_package "Apache.Arrow.Flight.AspNetCore"
"Apache.Arrow.Flight.TestWeb"
reference_package "Apache.Arrow.Operations" "Apache.Arrow.Operations.Tests"
"Apache.Arrow.Scalars.Tests"
- reference_package "Apache.Arrow.Scalars" "Apache.Arrow.Scalars.Tests"
+ reference_package "Apache.Arrow.Scalars" "Apache.Arrow.Scalars.Tests"
"Apache.Arrow.Tests" "Apache.Arrow.Operations.Tests"
# Move src directory to ensure we are only testing against built packages
mv src src.backup
diff --git
a/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
b/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
index 6ea0e3c..00bb633 100644
--- a/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
+++ b/src/Apache.Arrow.Flight.AspNetCore/Apache.Arrow.Flight.AspNetCore.csproj
@@ -1,7 +1,7 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
- <TargetFramework>net6.0</TargetFramework>
+ <TargetFramework>net8.0</TargetFramework>
</PropertyGroup>
<ItemGroup>
diff --git a/src/Apache.Arrow.Operations/Apache.Arrow.Operations.csproj
b/src/Apache.Arrow.Operations/Apache.Arrow.Operations.csproj
index f715802..bb6cb08 100644
--- a/src/Apache.Arrow.Operations/Apache.Arrow.Operations.csproj
+++ b/src/Apache.Arrow.Operations/Apache.Arrow.Operations.csproj
@@ -2,7 +2,7 @@
<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
- <TargetFrameworks>netstandard2.0;net8.0</TargetFrameworks>
+ <TargetFrameworks>netstandard2.0;net8.0;net462</TargetFrameworks>
<Description>Format-specific codecs for the Apache Arrow Variant binary
format, including JSON reader/writer.</Description>
</PropertyGroup>
diff --git a/src/Apache.Arrow.Scalars/Apache.Arrow.Scalars.csproj
b/src/Apache.Arrow.Scalars/Apache.Arrow.Scalars.csproj
index 17d7938..3c3bd62 100644
--- a/src/Apache.Arrow.Scalars/Apache.Arrow.Scalars.csproj
+++ b/src/Apache.Arrow.Scalars/Apache.Arrow.Scalars.csproj
@@ -2,16 +2,17 @@
<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
- <TargetFrameworks>netstandard2.0;net8.0</TargetFrameworks>
+ <TargetFrameworks>netstandard2.0;net8.0;net462</TargetFrameworks>
- <Description>Apache Arrow Variant encoding support for .NET. Provides
readers, writers, and object model for the Parquet Variant binary format for
semi-structured data.</Description>
+ <Description>Apache Arrow scalar types, including Parquet-compatible
variants. Provides readers, writers, and object model for the Parquet Variant
binary format for semi-structured data.</Description>
</PropertyGroup>
- <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard'">
+ <ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard' or
'$(TargetFramework)' == 'net462'">
<PackageReference Include="System.Buffers" />
<PackageReference Include="System.Data.Common" />
<PackageReference Include="System.Memory" />
<PackageReference Include="System.Runtime.CompilerServices.Unsafe" />
+ <PackageReference Include="System.ValueTuple" />
</ItemGroup>
<ItemGroup>
diff --git a/src/Apache.Arrow/Apache.Arrow.csproj
b/src/Apache.Arrow/Apache.Arrow.csproj
index f681a16..1425987 100644
--- a/src/Apache.Arrow/Apache.Arrow.csproj
+++ b/src/Apache.Arrow/Apache.Arrow.csproj
@@ -8,7 +8,7 @@
</PropertyGroup>
<PropertyGroup>
- <TargetFrameworks>netstandard2.0;net6.0;net8.0;net462</TargetFrameworks>
+ <TargetFrameworks>netstandard2.0;net8.0;net462</TargetFrameworks>
</PropertyGroup>
<ItemGroup Condition="'$(TargetFrameworkIdentifier)' == '.NETStandard' or
'$(TargetFramework)' == 'net462'">
@@ -56,4 +56,8 @@
<ItemGroup
Condition="$([MSBuild]::IsTargetFrameworkCompatible($(TargetFramework),
'net8.0'))">
<Compile Remove="Extensions\ExperimentalAttribute.Polyfill.cs" />
</ItemGroup>
+
+ <ItemGroup>
+ <ProjectReference
Include="..\Apache.Arrow.Scalars\Apache.Arrow.Scalars.csproj" />
+ </ItemGroup>
</Project>
diff --git a/src/Apache.Arrow/Arrays/BinaryArray.cs
b/src/Apache.Arrow/Arrays/BinaryArray.cs
index d096199..2d11207 100644
--- a/src/Apache.Arrow/Arrays/BinaryArray.cs
+++ b/src/Apache.Arrow/Arrays/BinaryArray.cs
@@ -22,7 +22,7 @@ using Apache.Arrow.Types;
namespace Apache.Arrow
{
- public class BinaryArray : Array, IReadOnlyList<byte[]>,
ICollection<byte[]>
+ public class BinaryArray : Array, IBinaryArray, IReadOnlyList<byte[]>,
ICollection<byte[]>
{
public class Builder : BuilderBase<BinaryArray, Builder>
{
diff --git a/src/Apache.Arrow/Arrays/BinaryViewArray.cs
b/src/Apache.Arrow/Arrays/BinaryViewArray.cs
index f9c8c0e..48ab312 100644
--- a/src/Apache.Arrow/Arrays/BinaryViewArray.cs
+++ b/src/Apache.Arrow/Arrays/BinaryViewArray.cs
@@ -23,7 +23,7 @@ using Apache.Arrow.Types;
namespace Apache.Arrow
{
- public class BinaryViewArray : Array, IReadOnlyList<byte[]>
+ public class BinaryViewArray : Array, IBinaryArray, IReadOnlyList<byte[]>
{
public class Builder : BuilderBase<BinaryViewArray, Builder>
{
diff --git a/src/Apache.Arrow/Arrays/DictionaryArray.cs
b/src/Apache.Arrow/Arrays/DictionaryArray.cs
index 29c0f5c..dc1bab3 100644
--- a/src/Apache.Arrow/Arrays/DictionaryArray.cs
+++ b/src/Apache.Arrow/Arrays/DictionaryArray.cs
@@ -14,7 +14,7 @@
// limitations under the License.
using System;
-using System.IO;
+using System.Collections.Generic;
using Apache.Arrow.Types;
namespace Apache.Arrow
@@ -57,5 +57,16 @@ namespace Apache.Arrow
}
public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+
+ public IEnumerable<int> EnumeratePhysicalIndices() =>
GetIndexes().EnumeratePhysicalIndices();
+
+ internal IIndexes GetIndexes()
+ {
+ if (Indices is IIndexes indexes)
+ {
+ return indexes;
+ }
+ throw new NotSupportedException($"Unsupported index array type:
{Indices.Data.DataType}");
+ }
}
}
diff --git a/src/Apache.Arrow/Arrays/Int16Array.cs
b/src/Apache.Arrow/Arrays/Int16Array.cs
index cb33a53..729e049 100644
--- a/src/Apache.Arrow/Arrays/Int16Array.cs
+++ b/src/Apache.Arrow/Arrays/Int16Array.cs
@@ -13,11 +13,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+using System.Collections.Generic;
using Apache.Arrow.Types;
namespace Apache.Arrow
{
- public class Int16Array : PrimitiveArray<short>
+ public class Int16Array : PrimitiveArray<short>, IIndexes
{
public class Builder : PrimitiveArrayBuilder<short, Int16Array,
Builder>
{
@@ -42,5 +43,14 @@ namespace Apache.Arrow
public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+ int IIndexes.GetPhysicalIndex(int index) => this.GetValue(index) ?? -1;
+
+ IEnumerable<int> IIndexes.EnumeratePhysicalIndices()
+ {
+ for (int i = 0; i < Length; i++)
+ {
+ yield return this.GetValue(i) ?? -1;
+ }
+ }
}
}
diff --git a/src/Apache.Arrow/Arrays/Int32Array.cs
b/src/Apache.Arrow/Arrays/Int32Array.cs
index ef356c7..a1bd03c 100644
--- a/src/Apache.Arrow/Arrays/Int32Array.cs
+++ b/src/Apache.Arrow/Arrays/Int32Array.cs
@@ -13,11 +13,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+using System.Collections.Generic;
using Apache.Arrow.Types;
namespace Apache.Arrow
{
- public class Int32Array : PrimitiveArray<int>
+ public class Int32Array : PrimitiveArray<int>, IIndexes
{
public class Builder : PrimitiveArrayBuilder<int, Int32Array, Builder>
{
@@ -42,5 +43,14 @@ namespace Apache.Arrow
public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+ int IIndexes.GetPhysicalIndex(int index) => this.GetValue(index) ?? -1;
+
+ IEnumerable<int> IIndexes.EnumeratePhysicalIndices()
+ {
+ for (int i = 0; i < Length; i++)
+ {
+ yield return this.GetValue(i) ?? -1;
+ }
+ }
}
}
diff --git a/src/Apache.Arrow/Arrays/Int64Array.cs
b/src/Apache.Arrow/Arrays/Int64Array.cs
index fe8fbc6..3c859c9 100644
--- a/src/Apache.Arrow/Arrays/Int64Array.cs
+++ b/src/Apache.Arrow/Arrays/Int64Array.cs
@@ -13,11 +13,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+using System.Collections.Generic;
using Apache.Arrow.Types;
namespace Apache.Arrow
{
- public class Int64Array : PrimitiveArray<long>
+ public class Int64Array : PrimitiveArray<long>, IIndexes
{
public class Builder : PrimitiveArrayBuilder<long, Int64Array, Builder>
{
@@ -42,5 +43,15 @@ namespace Apache.Arrow
public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+
+ int IIndexes.GetPhysicalIndex(int index) =>
checked((int)(this.GetValue(index) ?? -1));
+
+ IEnumerable<int> IIndexes.EnumeratePhysicalIndices()
+ {
+ for (int i = 0; i < Length; i++)
+ {
+ yield return checked((int)(this.GetValue(i) ?? -1));
+ }
+ }
}
}
diff --git a/src/Apache.Arrow/Arrays/Int8Array.cs
b/src/Apache.Arrow/Arrays/Int8Array.cs
index 58d543a..47fe6e4 100644
--- a/src/Apache.Arrow/Arrays/Int8Array.cs
+++ b/src/Apache.Arrow/Arrays/Int8Array.cs
@@ -13,11 +13,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+using System.Collections.Generic;
using Apache.Arrow.Types;
namespace Apache.Arrow
{
- public class Int8Array : PrimitiveArray<sbyte>
+ public class Int8Array : PrimitiveArray<sbyte>, IIndexes
{
public class Builder : PrimitiveArrayBuilder<sbyte, Int8Array, Builder>
{
@@ -42,5 +43,14 @@ namespace Apache.Arrow
public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+ int IIndexes.GetPhysicalIndex(int index) => this.GetValue(index) ?? -1;
+
+ IEnumerable<int> IIndexes.EnumeratePhysicalIndices()
+ {
+ for (int i = 0; i < Length; i++)
+ {
+ yield return this.GetValue(i) ?? -1;
+ }
+ }
}
}
diff --git a/src/Apache.Arrow/Arrays/LargeBinaryArray.cs
b/src/Apache.Arrow/Arrays/LargeBinaryArray.cs
index 09e1ad8..e980165 100644
--- a/src/Apache.Arrow/Arrays/LargeBinaryArray.cs
+++ b/src/Apache.Arrow/Arrays/LargeBinaryArray.cs
@@ -22,7 +22,7 @@ using Apache.Arrow.Types;
namespace Apache.Arrow;
-public class LargeBinaryArray : Array, IReadOnlyList<byte[]>,
ICollection<byte[]>
+public class LargeBinaryArray : Array, IBinaryArray, IReadOnlyList<byte[]>,
ICollection<byte[]>
{
public class Builder : BuilderBase<LargeBinaryArray, Builder>
{
diff --git a/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs
b/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs
index 388302f..c1aff92 100644
--- a/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs
+++ b/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs
@@ -26,7 +26,7 @@ namespace Apache.Arrow;
/// It contains two child arrays: run_ends (Int16/Int32/Int64) and values (any
type).
/// The run_ends array stores the cumulative end positions of each run.
/// </summary>
-public class RunEndEncodedArray : Array
+public class RunEndEncodedArray : Array, IIndexes
{
/// <summary>
/// Gets the run ends array (Int16Array, Int32Array, or Int64Array).
@@ -464,4 +464,6 @@ public class RunEndEncodedArray : Array
}
public override void Accept(IArrowArrayVisitor visitor) => Accept(this,
visitor);
+
+ int IIndexes.GetPhysicalIndex(int index) => FindPhysicalIndex(index);
}
diff --git a/src/Apache.Arrow/Arrays/UInt16Array.cs
b/src/Apache.Arrow/Arrays/UInt16Array.cs
index bba244f..a1cc591 100644
--- a/src/Apache.Arrow/Arrays/UInt16Array.cs
+++ b/src/Apache.Arrow/Arrays/UInt16Array.cs
@@ -13,11 +13,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+using System.Collections.Generic;
using Apache.Arrow.Types;
namespace Apache.Arrow
{
- public class UInt16Array : PrimitiveArray<ushort>
+ public class UInt16Array : PrimitiveArray<ushort>, IIndexes
{
public class Builder : PrimitiveArrayBuilder<ushort, UInt16Array,
Builder>
{
@@ -41,6 +42,16 @@ namespace Apache.Arrow
}
public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
- }
+
+ int IIndexes.GetPhysicalIndex(int index) => this.GetValue(index) ?? -1;
+
+ IEnumerable<int> IIndexes.EnumeratePhysicalIndices()
+ {
+ for (int i = 0; i < Length; i++)
+ {
+ yield return this.GetValue(i) ?? -1;
+ }
+ }
+ }
}
diff --git a/src/Apache.Arrow/Arrays/UInt32Array.cs
b/src/Apache.Arrow/Arrays/UInt32Array.cs
index 65320be..a6eb275 100644
--- a/src/Apache.Arrow/Arrays/UInt32Array.cs
+++ b/src/Apache.Arrow/Arrays/UInt32Array.cs
@@ -13,11 +13,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+using System.Collections.Generic;
using Apache.Arrow.Types;
namespace Apache.Arrow
{
- public class UInt32Array : PrimitiveArray<uint>
+ public class UInt32Array : PrimitiveArray<uint>, IIndexes
{
public class Builder : PrimitiveArrayBuilder<uint, UInt32Array,
Builder>
{
@@ -42,5 +43,14 @@ namespace Apache.Arrow
public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+ int IIndexes.GetPhysicalIndex(int index) =>
checked((int?)this.GetValue(index)) ?? -1;
+
+ IEnumerable<int> IIndexes.EnumeratePhysicalIndices()
+ {
+ for (int i = 0; i < Length; i++)
+ {
+ yield return checked((int?)this.GetValue(i)) ?? -1;
+ }
+ }
}
}
diff --git a/src/Apache.Arrow/Arrays/UInt64Array.cs
b/src/Apache.Arrow/Arrays/UInt64Array.cs
index 617949f..c3d7701 100644
--- a/src/Apache.Arrow/Arrays/UInt64Array.cs
+++ b/src/Apache.Arrow/Arrays/UInt64Array.cs
@@ -13,11 +13,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+using System.Collections.Generic;
using Apache.Arrow.Types;
namespace Apache.Arrow
{
- public class UInt64Array : PrimitiveArray<ulong>
+ public class UInt64Array : PrimitiveArray<ulong>, IIndexes
{
public class Builder : PrimitiveArrayBuilder<ulong, UInt64Array,
Builder>
{
@@ -42,5 +43,14 @@ namespace Apache.Arrow
public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+ int IIndexes.GetPhysicalIndex(int index) =>
checked((int?)this.GetValue(index)) ?? -1;
+
+ IEnumerable<int> IIndexes.EnumeratePhysicalIndices()
+ {
+ for (int i = 0; i < Length; i++)
+ {
+ yield return checked((int?)this.GetValue(i)) ?? -1;
+ }
+ }
}
}
diff --git a/src/Apache.Arrow/Arrays/UInt8Array.cs
b/src/Apache.Arrow/Arrays/UInt8Array.cs
index f68a5fa..8b6be25 100644
--- a/src/Apache.Arrow/Arrays/UInt8Array.cs
+++ b/src/Apache.Arrow/Arrays/UInt8Array.cs
@@ -13,11 +13,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
+using System.Collections.Generic;
using Apache.Arrow.Types;
namespace Apache.Arrow
{
- public class UInt8Array : PrimitiveArray<byte>
+ public class UInt8Array : PrimitiveArray<byte>, IIndexes
{
public class Builder : PrimitiveArrayBuilder<byte, UInt8Array, Builder>
{
@@ -42,5 +43,15 @@ namespace Apache.Arrow
public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+
+ int IIndexes.GetPhysicalIndex(int index) => this.GetValue(index) ?? -1;
+
+ IEnumerable<int> IIndexes.EnumeratePhysicalIndices()
+ {
+ for (int i = 0; i < Length; i++)
+ {
+ yield return this.GetValue(i) ?? -1;
+ }
+ }
}
}
diff --git a/src/Apache.Arrow/Arrays/VariantArray.cs
b/src/Apache.Arrow/Arrays/VariantArray.cs
new file mode 100644
index 0000000..c31a382
--- /dev/null
+++ b/src/Apache.Arrow/Arrays/VariantArray.cs
@@ -0,0 +1,361 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using System.Linq;
+using Apache.Arrow.Memory;
+using Apache.Arrow.Scalars.Variant;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+ /// <summary>
+ /// Extension definition for the "arrow.parquet.variant" extension type,
+ /// backed by a struct with "metadata" and "value" binary fields.
+ /// </summary>
+ public class VariantExtensionDefinition : ExtensionDefinition
+ {
+ public static VariantExtensionDefinition Instance = new
VariantExtensionDefinition();
+
+ public override string ExtensionName => VariantType.ExtensionName;
+
+ private VariantExtensionDefinition() { }
+
+ public override bool TryCreateType(IArrowType storageType, string
metadata, out ExtensionType type)
+ {
+ if (storageType is StructType structType &&
+ FindBinaryFieldIndex(structType, "metadata") >= 0 &&
+ FindBinaryFieldIndex(structType, "value") >= 0)
+ {
+ type = new VariantType(structType);
+ return true;
+ }
+ type = null;
+ return false;
+ }
+
+ internal static int FindBinaryFieldIndex(StructType structType, string
name)
+ {
+ int index = structType.GetFieldIndex(name);
+ if (index < 0)
+ {
+ return -1;
+ }
+
+ var fieldType = structType.Fields[index].DataType;
+ if (fieldType is BinaryType || fieldType is LargeBinaryType ||
fieldType is BinaryViewType)
+ {
+ return index;
+ }
+
+ return -1;
+ }
+ }
+
+ /// <summary>
+ /// Extension type representing Parquet Variant values, stored as
+ /// struct<metadata: binary, value: binary>.
+ /// </summary>
+ public class VariantType : ExtensionType
+ {
+ internal const string ExtensionName = "arrow.parquet.variant";
+
+ public static VariantType Default = new VariantType();
+
+ public override string Name => ExtensionName;
+ public override string ExtensionMetadata => "";
+
+ public VariantType() : base(new StructType(new[]
+ {
+ new Field("metadata", BinaryType.Default, false),
+ new Field("value", BinaryType.Default, false),
+ }))
+ { }
+
+ internal VariantType(StructType storageType) : base(storageType) { }
+
+ public override ExtensionArray CreateArray(IArrowArray storageArray)
+ {
+ return new VariantArray(this, storageArray);
+ }
+ }
+
+ /// <summary>
+ /// Extension array for Parquet Variant values, backed by a StructArray
+ /// containing "metadata" and "value" binary fields.
+ /// </summary>
+ public class VariantArray : ExtensionArray, IReadOnlyList<VariantValue>
+ {
+ private readonly IIndexes _metadataIndexes;
+ private readonly IBinaryArray _metadataArray;
+ private readonly IIndexes _valueIndexes;
+ private readonly IBinaryArray _valueArray;
+
+ public StructArray StorageArray => (StructArray)Storage;
+
+ public VariantArray(VariantType variantType, IArrowArray storage)
+ : base(variantType, storage)
+ {
+ var structType = (StructType)variantType.StorageType;
+ _metadataArray =
DecodeBinaryArray(StorageArray.Fields[structType.GetFieldIndex("metadata")],
out _metadataIndexes);
+ _valueArray =
DecodeBinaryArray(StorageArray.Fields[structType.GetFieldIndex("value")], out
_valueIndexes);
+ }
+
+ public VariantArray(IArrowArray storage) : this(VariantType.Default,
storage) { }
+
+ /// <summary>
+ /// Gets the metadata bytes for the element at the given index.
+ /// </summary>
+ public ReadOnlySpan<byte> GetMetadataBytes(int index)
+ {
+ int physicalIndex = _metadataIndexes.GetPhysicalIndex(index);
+ return _metadataArray.GetBytes(physicalIndex, out bool isNull);
+ }
+
+ /// <summary>
+ /// Gets the value bytes for the element at the given index.
+ /// </summary>
+ public ReadOnlySpan<byte> GetValueBytes(int index)
+ {
+ int physicalIndex = _valueIndexes.GetPhysicalIndex(index);
+ return _valueArray.GetBytes(physicalIndex, out bool isNull);
+ }
+
+ /// <summary>
+ /// Gets a zero-copy <see cref="VariantReader"/> for the element at
the given index.
+ /// The reader is only valid while the underlying array buffers are
alive.
+ /// </summary>
+ /// <exception cref="ArgumentOutOfRangeException">If <paramref
name="index"/> is out of range.</exception>
+ /// <exception cref="InvalidOperationException">If the element at
<paramref name="index"/> is null.</exception>
+ public VariantReader GetVariantReader(int index)
+ {
+ if (index < 0 || index >= Length)
+ throw new ArgumentOutOfRangeException(nameof(index));
+
+ if (IsNull(index))
+ throw new InvalidOperationException("Cannot create a
VariantReader for a null element.");
+
+ return new VariantReader(GetMetadataBytes(index),
GetValueBytes(index));
+ }
+
+ /// <summary>
+ /// Gets a materialized <see cref="VariantValue"/> for the element at
the given index.
+ /// </summary>
+ public VariantValue GetVariantValue(int index)
+ {
+ if (index < 0 || index >= Length)
+ throw new ArgumentOutOfRangeException(nameof(index));
+
+ if (IsNull(index))
+ return VariantValue.Null;
+
+ var metadata = GetMetadataBytes(index);
+ var value = GetValueBytes(index);
+ var reader = new VariantReader(metadata, value);
+ return reader.ToVariantValue();
+ }
+
+ public int Count => Length;
+ public VariantValue this[int index] => GetVariantValue(index);
+
+ public IEnumerator<VariantValue> GetEnumerator()
+ {
+ IEnumerator<int> metadataIdx =
_metadataIndexes.EnumeratePhysicalIndices().GetEnumerator();
+ IEnumerator<int> valueIdx =
_valueIndexes.EnumeratePhysicalIndices().GetEnumerator();
+ for (int i = 0; metadataIdx.MoveNext() && valueIdx.MoveNext(); i++)
+ {
+ if (IsNull(i))
+ {
+ yield return VariantValue.Null;
+ continue;
+ }
+ var metadata = _metadataArray.GetBytes(metadataIdx.Current,
out _);
+ var value = _valueArray.GetBytes(valueIdx.Current, out _);
+ yield return new VariantReader(metadata,
value).ToVariantValue();
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+
+ private static IBinaryArray DecodeBinaryArray(IArrowArray array, out
IIndexes indexes)
+ {
+ if (array == null)
+ throw new ArgumentNullException(nameof(array));
+
+ switch (array)
+ {
+ case IBinaryArray binary:
+ indexes = new SimpleIndexes(binary.Length);
+ return binary;
+
+ case DictionaryArray dict:
+ IBinaryArray values = dict.Dictionary as IBinaryArray;
+ if (values == null)
+ throw new ArgumentException(
+ $"Dictionary value type
{dict.Dictionary.Data.DataType.TypeId} cannot be read as binary.");
+ indexes = dict.GetIndexes();
+ return values;
+
+ case RunEndEncodedArray ree:
+ IBinaryArray reeValues = ree.Values as IBinaryArray;
+ if (reeValues == null)
+ throw new ArgumentException(
+ $"Run-end encoded value type
{ree.Values.Data.DataType.TypeId} cannot be read as binary.");
+ indexes = ree;
+ return reeValues;
+ default:
+ throw new ArgumentException(
+ $"Cannot create binary reader for array of type
{array.Data.DataType.TypeId}.",
+ nameof(array));
+ }
+ }
+
+ sealed class SimpleIndexes : IIndexes
+ {
+ public SimpleIndexes(int length)
+ {
+ Length = length;
+ }
+
+ public int Length { get; }
+ public IEnumerable<int> EnumeratePhysicalIndices() =>
Enumerable.Range(0, Length);
+ public int GetPhysicalIndex(int index) => index;
+ }
+
+ /// <summary>
+ /// Builder for constructing <see cref="VariantArray"/> instances.
+ /// </summary>
+ public class Builder
+ {
+ private readonly BinaryArray.Builder _metadataBuilder = new
BinaryArray.Builder();
+ private readonly BinaryArray.Builder _valueBuilder = new
BinaryArray.Builder();
+ private readonly ArrowBuffer.BitmapBuilder _validityBuilder = new
ArrowBuffer.BitmapBuilder();
+ private readonly VariantBuilder _encoder = new VariantBuilder();
+ private int _length;
+ private int _nullCount;
+
+ // Pre-encoded placeholder for struct-level nulls.
+ // We use encoded VariantValue.Null so child arrays always have
valid binary data.
+ private static readonly Lazy<(byte[] Metadata, byte[] Value)>
NullPlaceholder =
+ new Lazy<(byte[], byte[])>(() => new
VariantBuilder().Encode(VariantValue.Null));
+
+ /// <summary>
+ /// Gets the number of elements appended so far.
+ /// </summary>
+ public int Length => _length;
+
+ /// <summary>
+ /// Appends a <see cref="VariantValue"/> to the array.
+ /// </summary>
+ public Builder Append(VariantValue value)
+ {
+ var (metadata, valueBytes) = _encoder.Encode(value);
+ _metadataBuilder.Append((ReadOnlySpan<byte>)metadata);
+ _valueBuilder.Append((ReadOnlySpan<byte>)valueBytes);
+ _validityBuilder.Append(true);
+ _length++;
+ return this;
+ }
+
+ /// <summary>
+ /// Appends a nullable <see cref="VariantValue"/>. A null value
appends
+ /// a struct-level null (as opposed to a variant-encoded null).
+ /// </summary>
+ public Builder Append(VariantValue? value)
+ {
+ if (value == null)
+ return AppendNull();
+ return Append(value.Value);
+ }
+
+ /// <summary>
+ /// Appends a variant element from pre-encoded metadata and value
bytes.
+ /// The caller is responsible for providing valid variant-encoded
data.
+ /// </summary>
+ public Builder Append(ReadOnlySpan<byte> metadata,
ReadOnlySpan<byte> value)
+ {
+ _metadataBuilder.Append(metadata);
+ _valueBuilder.Append(value);
+ _validityBuilder.Append(true);
+ _length++;
+ return this;
+ }
+
+ /// <summary>
+ /// Appends a struct-level null element. This is distinct from
appending
+ /// <see cref="VariantValue.Null"/>, which represents a valid slot
+ /// containing a variant-encoded null value.
+ /// </summary>
+ public Builder AppendNull()
+ {
+ var placeholder = NullPlaceholder.Value;
+
_metadataBuilder.Append((ReadOnlySpan<byte>)placeholder.Metadata);
+ _valueBuilder.Append((ReadOnlySpan<byte>)placeholder.Value);
+ _validityBuilder.Append(false);
+ _length++;
+ _nullCount++;
+ return this;
+ }
+
+ /// <summary>
+ /// Appends a range of <see cref="VariantValue"/> elements.
+ /// </summary>
+ public Builder AppendRange(IEnumerable<VariantValue> values)
+ {
+ if (values == null)
+ throw new ArgumentNullException(nameof(values));
+
+ foreach (var value in values)
+ {
+ Append(value);
+ }
+ return this;
+ }
+
+ /// <summary>
+ /// Appends a range of nullable <see cref="VariantValue"/>
elements.
+ /// </summary>
+ public Builder AppendRange(IEnumerable<VariantValue?> values)
+ {
+ if (values == null)
+ throw new ArgumentNullException(nameof(values));
+
+ foreach (var value in values)
+ {
+ Append(value);
+ }
+ return this;
+ }
+
+ /// <summary>
+ /// Builds the <see cref="VariantArray"/> from appended values.
+ /// </summary>
+ public VariantArray Build(MemoryAllocator allocator = default)
+ {
+ var metadataArray = _metadataBuilder.Build(allocator);
+ var valueArray = _valueBuilder.Build(allocator);
+ var structType = (StructType)VariantType.Default.StorageType;
+ var nullBitmap = _nullCount > 0 ?
_validityBuilder.Build(allocator) : ArrowBuffer.Empty;
+ var structArray = new StructArray(
+ structType, _length,
+ new IArrowArray[] { metadataArray, valueArray },
+ nullBitmap, _nullCount);
+ return new VariantArray(VariantType.Default, structArray);
+ }
+ }
+ }
+}
diff --git a/src/Apache.Arrow/Extensions/IArrowArrayExtensions.cs
b/src/Apache.Arrow/Extensions/IArrowArrayExtensions.cs
index 98ee0d3..e377c40 100644
--- a/src/Apache.Arrow/Extensions/IArrowArrayExtensions.cs
+++ b/src/Apache.Arrow/Extensions/IArrowArrayExtensions.cs
@@ -16,7 +16,7 @@
using System;
using System.Collections;
using System.Collections.Generic;
-using Apache.Arrow.Types;
+using System.Linq;
namespace Apache.Arrow
{
@@ -68,13 +68,13 @@ namespace Apache.Arrow
{
private readonly IArrowArray _indices;
private readonly IReadOnlyList<T> _values;
- private readonly Func<IArrowArray, int, int> _indexLookup;
+ private readonly IIndexes _indexLookup;
public DictionaryReadOnlyList(DictionaryArray dict,
IReadOnlyList<T> values)
{
_indices = dict.Indices;
_values = values;
- _indexLookup = GetDictionaryIndex(dict.Indices.Data.DataType);
+ _indexLookup = dict.GetIndexes();
}
public int Count => _indices.Length;
@@ -91,15 +91,14 @@ namespace Apache.Arrow
if (_indices.IsNull(index))
return default;
- int dictIndex = _indexLookup(_indices, index);
+ int dictIndex = _indexLookup.GetPhysicalIndex(index);
return _values[dictIndex];
}
}
public IEnumerator<T> GetEnumerator()
{
- for (int i = 0; i < Count; i++)
- yield return this[i];
+ return _indexLookup.EnumeratePhysicalIndices().Select(index =>
index < 0 ? default : _values[index]).GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
@@ -129,29 +128,10 @@ namespace Apache.Arrow
public IEnumerator<T> GetEnumerator()
{
- foreach (int physicalIndex in _ree.EnumeratePhysicalIndices())
- yield return _values[physicalIndex];
+ return _ree.EnumeratePhysicalIndices().Select(index =>
_values[index]).GetEnumerator();
}
IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
}
-
- private static Func<IArrowArray, int, int>
GetDictionaryIndex(IArrowType type)
- {
- switch (type.TypeId)
- {
- case ArrowTypeId.Int8: return (array, logicalIndex) =>
((Int8Array)array).GetValue(logicalIndex) ?? 0;
- case ArrowTypeId.Int16: return (array, logicalIndex) =>
((Int16Array)array).GetValue(logicalIndex) ?? 0;
- case ArrowTypeId.Int32: return (array, logicalIndex) =>
((Int32Array)array).GetValue(logicalIndex) ?? 0;
- case ArrowTypeId.Int64: return (array, logicalIndex) =>
checked((int)(((Int64Array)array).GetValue(logicalIndex) ?? 0));
- case ArrowTypeId.UInt8: return (array, logicalIndex) =>
((UInt8Array)array).GetValue(logicalIndex) ?? 0;
- case ArrowTypeId.UInt16: return (array, logicalIndex) =>
((UInt16Array)array).GetValue(logicalIndex) ?? 0;
- case ArrowTypeId.UInt32: return (array, logicalIndex) =>
checked((int)(((UInt32Array)array).GetValue(logicalIndex) ?? 0));
- case ArrowTypeId.UInt64: return (array, logicalIndex) =>
checked((int)(((UInt64Array)array).GetValue(logicalIndex) ?? 0));
- default:
- throw new InvalidOperationException(
- $"Unsupported dictionary index type: {type.TypeId}");
- }
- }
}
}
diff --git a/src/Apache.Arrow/Interfaces/IBinaryArray.cs
b/src/Apache.Arrow/Interfaces/IBinaryArray.cs
new file mode 100644
index 0000000..8b92ceb
--- /dev/null
+++ b/src/Apache.Arrow/Interfaces/IBinaryArray.cs
@@ -0,0 +1,37 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+
+namespace Apache.Arrow
+{
+ internal interface IBinaryArray
+ {
+ /// <summary>
+ /// The number of values in the array
+ /// </summary>
+ int Length { get; }
+
+ /// <summary>
+ /// Get the collection of bytes, as a read-only span, at a given index
in the array.
+ /// </summary>
+ /// <param name="index">Index at which to get bytes.</param>
+ /// <param name="isNull">Set to <see langword="true"/> if the value at
the given index is null.</param>
+ /// <returns>Returns a <see cref="ReadOnlySpan{Byte}"/>
object.</returns>
+ /// <exception cref="ArgumentOutOfRangeException">If the index is
negative or beyond the length of the array.
+ /// </exception>
+ public ReadOnlySpan<byte> GetBytes(int index, out bool isNull);
+ }
+}
diff --git a/src/Apache.Arrow/Interfaces/IIndexes.cs
b/src/Apache.Arrow/Interfaces/IIndexes.cs
new file mode 100644
index 0000000..f200718
--- /dev/null
+++ b/src/Apache.Arrow/Interfaces/IIndexes.cs
@@ -0,0 +1,33 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System.Collections.Generic;
+
+namespace Apache.Arrow
+{
+ /// <summary>
+ /// For classes that represent a collection of indices into an array. This
is used by
+ /// <see cref="DictionaryArray"/> to represent the indices into the
dictionary array
+ /// and by <see cref="RunEndEncodedArray"/> to represent the indices into
the run-end encoded array.
+ /// </summary>
+ internal interface IIndexes
+ {
+ int Length { get; }
+
+ int GetPhysicalIndex(int index);
+
+ IEnumerable<int> EnumeratePhysicalIndices();
+ }
+}
diff --git
a/test/Apache.Arrow.Operations.Tests/Apache.Arrow.Operations.Tests.csproj
b/test/Apache.Arrow.Operations.Tests/Apache.Arrow.Operations.Tests.csproj
index e23b7c4..2c42293 100644
--- a/test/Apache.Arrow.Operations.Tests/Apache.Arrow.Operations.Tests.csproj
+++ b/test/Apache.Arrow.Operations.Tests/Apache.Arrow.Operations.Tests.csproj
@@ -7,27 +7,23 @@
</PropertyGroup>
<PropertyGroup Condition="'$(IsWindows)'=='true'">
- <TargetFrameworks>net8.0;net472</TargetFrameworks>
+ <TargetFrameworks>net8.0;net472;net462</TargetFrameworks>
</PropertyGroup>
<PropertyGroup Condition="'$(IsWindows)'!='true'">
<TargetFrameworks>net8.0</TargetFrameworks>
</PropertyGroup>
- <ItemGroup>
- <PackageReference Include="xunit.runner.visualstudio">
- <PrivateAssets>all</PrivateAssets>
- <IncludeAssets>runtime; build; native; contentfiles;
analyzers</IncludeAssets>
- </PackageReference>
- </ItemGroup>
-
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" />
<PackageReference Include="xunit" />
+ <PackageReference Include="xunit.runner.visualstudio" />
<PackageReference Include="xunit.skippablefact" />
+ <PackageReference Include="System.Text.Json" />
</ItemGroup>
<ItemGroup>
<ProjectReference
Include="..\..\src\Apache.Arrow.Operations\Apache.Arrow.Operations.csproj" />
+ <ProjectReference
Include="..\..\src\Apache.Arrow.Scalars\Apache.Arrow.Scalars.csproj" />
</ItemGroup>
</Project>
diff --git a/test/Apache.Arrow.Scalars.Tests/Apache.Arrow.Scalars.Tests.csproj
b/test/Apache.Arrow.Scalars.Tests/Apache.Arrow.Scalars.Tests.csproj
index ca1208e..9677fa6 100644
--- a/test/Apache.Arrow.Scalars.Tests/Apache.Arrow.Scalars.Tests.csproj
+++ b/test/Apache.Arrow.Scalars.Tests/Apache.Arrow.Scalars.Tests.csproj
@@ -7,22 +7,16 @@
</PropertyGroup>
<PropertyGroup Condition="'$(IsWindows)'=='true'">
- <TargetFrameworks>net8.0;net472</TargetFrameworks>
+ <TargetFrameworks>net8.0;net472;net462</TargetFrameworks>
</PropertyGroup>
<PropertyGroup Condition="'$(IsWindows)'!='true'">
<TargetFrameworks>net8.0</TargetFrameworks>
</PropertyGroup>
- <ItemGroup>
- <PackageReference Include="xunit.runner.visualstudio">
- <PrivateAssets>all</PrivateAssets>
- <IncludeAssets>runtime; build; native; contentfiles;
analyzers</IncludeAssets>
- </PackageReference>
- </ItemGroup>
-
<ItemGroup>
<PackageReference Include="Microsoft.NET.Test.Sdk" />
<PackageReference Include="xunit" />
+ <PackageReference Include="xunit.runner.visualstudio" />
<PackageReference Include="xunit.skippablefact" />
<PackageReference Include="System.Text.Json" />
</ItemGroup>
diff --git a/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
b/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
index 14ef96d..b45a55c 100644
--- a/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
+++ b/test/Apache.Arrow.Tests/Apache.Arrow.Tests.csproj
@@ -20,23 +20,12 @@
<PackageReference Include="xunit" />
<PackageReference Include="xunit.skippablefact" />
<PackageReference Include="pythonnet" />
- </ItemGroup>
-
- <ItemGroup Condition="'$(TargetFramework)' == 'net462'">
- <PackageReference Include="xunit.runner.visualstudio"
VersionOverride="2.8.2">
- <PrivateAssets>all</PrivateAssets>
- <IncludeAssets>runtime; build; native; contentfiles;
analyzers</IncludeAssets>
- </PackageReference>
- </ItemGroup>
- <ItemGroup Condition="'$(TargetFramework)' != 'net462'">
- <PackageReference Include="xunit.runner.visualstudio">
- <PrivateAssets>all</PrivateAssets>
- <IncludeAssets>runtime; build; native; contentfiles;
analyzers</IncludeAssets>
- </PackageReference>
+ <PackageReference Include="xunit.runner.visualstudio" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\src\Apache.Arrow\Apache.Arrow.csproj" />
+ <ProjectReference
Include="..\..\src\Apache.Arrow.Scalars\Apache.Arrow.Scalars.csproj" />
</ItemGroup>
<ItemGroup
Condition="!$([MSBuild]::IsTargetFrameworkCompatible($(TargetFramework),
'net6.0'))">
diff --git a/test/Apache.Arrow.Tests/VariantArrayTests.cs
b/test/Apache.Arrow.Tests/VariantArrayTests.cs
new file mode 100644
index 0000000..3299807
--- /dev/null
+++ b/test/Apache.Arrow.Tests/VariantArrayTests.cs
@@ -0,0 +1,510 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using Apache.Arrow.Ipc;
+using Apache.Arrow.Scalars.Variant;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+ public class VariantArrayTests
+ {
+ [Fact]
+ public void BuildEmptyArray()
+ {
+ var builder = new VariantArray.Builder();
+ var array = builder.Build();
+
+ Assert.Empty(array);
+ }
+
+ [Fact]
+ public void BuildSinglePrimitiveValue()
+ {
+ var builder = new VariantArray.Builder();
+ builder.Append(VariantValue.FromInt32(42));
+ var array = builder.Build();
+
+ Assert.Equal(1, array.Length);
+ Assert.Equal(0, array.NullCount);
+
+ var value = array.GetVariantValue(0);
+ Assert.Equal(42, value.AsInt32());
+ }
+
+ [Fact]
+ public void BuildMultiplePrimitiveTypes()
+ {
+ var builder = new VariantArray.Builder();
+ builder.Append(VariantValue.FromInt32(1));
+ builder.Append(VariantValue.FromString("hello"));
+ builder.Append(VariantValue.True);
+ builder.Append(VariantValue.FromDouble(3.14));
+ builder.Append(VariantValue.FromInt64(long.MaxValue));
+ var array = builder.Build();
+
+ Assert.Equal(5, array.Length);
+ Assert.Equal(0, array.NullCount);
+
+ Assert.Equal(1, array.GetVariantValue(0).AsInt32());
+ Assert.Equal("hello", array.GetVariantValue(1).AsString());
+ Assert.True(array.GetVariantValue(2).AsBoolean());
+ Assert.Equal(3.14, array.GetVariantValue(3).AsDouble());
+ Assert.Equal(long.MaxValue, array.GetVariantValue(4).AsInt64());
+ }
+
+ [Fact]
+ public void StructNullVsVariantNull()
+ {
+ var builder = new VariantArray.Builder();
+ builder.Append(VariantValue.Null); // variant-encoded null
(valid slot)
+ builder.AppendNull(); // struct-level null
(invalid slot)
+ var array = builder.Build();
+
+ Assert.Equal(2, array.Length);
+ Assert.Equal(1, array.NullCount);
+
+ // Row 0: valid slot containing variant null
+ Assert.False(array.IsNull(0));
+ var v0 = array.GetVariantValue(0);
+ Assert.True(v0.IsNull);
+
+ // Row 1: struct-level null
+ Assert.True(array.IsNull(1));
+ Assert.True(array.GetVariantValue(1).IsNull);
+ }
+
+ [Fact]
+ public void StructNullDoesNotCreateChildNulls()
+ {
+ var builder = new VariantArray.Builder();
+ builder.Append(VariantValue.FromInt32(1));
+ builder.AppendNull();
+ builder.Append(VariantValue.FromString("test"));
+ var array = builder.Build();
+
+ Assert.Equal(3, array.Length);
+ Assert.Equal(1, array.NullCount);
+
+ // Child arrays should have zero nulls (placeholder bytes for
struct null)
+ var structArray = array.StorageArray;
+ Assert.Equal(0, structArray.Fields[0].NullCount);
+ Assert.Equal(0, structArray.Fields[1].NullCount);
+ }
+
+ [Fact]
+ public void NullableAppend()
+ {
+ var builder = new VariantArray.Builder();
+ builder.Append((VariantValue?)VariantValue.FromInt32(1));
+ builder.Append((VariantValue?)null);
+ builder.Append((VariantValue?)VariantValue.FromString("test"));
+ var array = builder.Build();
+
+ Assert.Equal(3, array.Length);
+ Assert.Equal(1, array.NullCount);
+
+ Assert.Equal(1, array.GetVariantValue(0).AsInt32());
+ Assert.True(array.GetVariantValue(1).IsNull);
+ Assert.Equal("test", array.GetVariantValue(2).AsString());
+ }
+
+ [Fact]
+ public void AppendRangeNonNullable()
+ {
+ var values = new[]
+ {
+ VariantValue.FromInt32(10),
+ VariantValue.FromString("abc"),
+ VariantValue.False,
+ };
+
+ var builder = new VariantArray.Builder();
+ builder.AppendRange(values);
+ var array = builder.Build();
+
+ Assert.Equal(3, array.Length);
+ Assert.Equal(10, array.GetVariantValue(0).AsInt32());
+ Assert.Equal("abc", array.GetVariantValue(1).AsString());
+ Assert.False(array.GetVariantValue(2).AsBoolean());
+ }
+
+ [Fact]
+ public void AppendRangeNullable()
+ {
+ var values = new VariantValue?[]
+ {
+ VariantValue.FromInt32(10),
+ null,
+ VariantValue.FromString("abc"),
+ };
+
+ var builder = new VariantArray.Builder();
+ builder.AppendRange(values);
+ var array = builder.Build();
+
+ Assert.Equal(3, array.Length);
+ Assert.Equal(1, array.NullCount);
+ Assert.Equal(10, array.GetVariantValue(0).AsInt32());
+ Assert.True(array.GetVariantValue(1).IsNull);
+ Assert.Equal("abc", array.GetVariantValue(2).AsString());
+ }
+
+ [Fact]
+ public void AppendRangeThrowsOnNull()
+ {
+ var builder = new VariantArray.Builder();
+ Assert.Throws<ArgumentNullException>(() =>
+ builder.AppendRange((IEnumerable<VariantValue>)null));
+ Assert.Throws<ArgumentNullException>(() =>
+ builder.AppendRange((IEnumerable<VariantValue?>)null));
+ }
+
+ [Fact]
+ public void AppendRawBytes()
+ {
+ // Encode a value manually, then append raw bytes
+ var encoder = new VariantBuilder();
+ var (metadata, value) = encoder.Encode(VariantValue.FromInt32(99));
+
+ var builder = new VariantArray.Builder();
+ builder.Append((ReadOnlySpan<byte>)metadata,
(ReadOnlySpan<byte>)value);
+ var array = builder.Build();
+
+ Assert.Equal(1, array.Length);
+ Assert.Equal(99, array.GetVariantValue(0).AsInt32());
+ }
+
+ [Fact]
+ public void ComplexVariantObject()
+ {
+ var obj = VariantValue.FromObject(new Dictionary<string,
VariantValue>
+ {
+ ["name"] = VariantValue.FromString("Alice"),
+ ["age"] = VariantValue.FromInt32(30),
+ ["active"] = VariantValue.True,
+ });
+
+ var builder = new VariantArray.Builder();
+ builder.Append(obj);
+ var array = builder.Build();
+
+ Assert.Equal(1, array.Length);
+ var result = array.GetVariantValue(0);
+ Assert.True(result.IsObject);
+ var fields = result.AsObject();
+ Assert.Equal("Alice", fields["name"].AsString());
+ Assert.Equal(30, fields["age"].AsInt32());
+ Assert.True(fields["active"].AsBoolean());
+ }
+
+ [Fact]
+ public void ComplexVariantArray()
+ {
+ var variantArray = VariantValue.FromArray(
+ VariantValue.FromInt32(1),
+ VariantValue.FromInt32(2),
+ VariantValue.FromInt32(3)
+ );
+
+ var builder = new VariantArray.Builder();
+ builder.Append(variantArray);
+ var array = builder.Build();
+
+ Assert.Equal(1, array.Length);
+ var result = array.GetVariantValue(0);
+ Assert.True(result.IsArray);
+ var elements = result.AsArray();
+ Assert.Equal(3, elements.Count);
+ Assert.Equal(1, elements[0].AsInt32());
+ Assert.Equal(2, elements[1].AsInt32());
+ Assert.Equal(3, elements[2].AsInt32());
+ }
+
+ [Fact]
+ public void NestedObjectWithArray()
+ {
+ var obj = VariantValue.FromObject(new Dictionary<string,
VariantValue>
+ {
+ ["tags"] = VariantValue.FromArray(
+ VariantValue.FromString("a"),
+ VariantValue.FromString("b")
+ ),
+ ["count"] = VariantValue.FromInt32(2),
+ });
+
+ var builder = new VariantArray.Builder();
+ builder.Append(obj);
+ var array = builder.Build();
+
+ var result = array.GetVariantValue(0);
+ Assert.True(result.IsObject);
+ var fields = result.AsObject();
+ Assert.True(fields["tags"].IsArray);
+ Assert.Equal(2, fields["tags"].AsArray().Count);
+ }
+
+ [Fact]
+ public void GetVariantReaderRoundTrip()
+ {
+ var builder = new VariantArray.Builder();
+ builder.Append(VariantValue.FromString("test-reader"));
+ builder.Append(VariantValue.FromInt32(42));
+ var array = builder.Build();
+
+ var reader0 = array.GetVariantReader(0);
+ Assert.Equal("test-reader", reader0.ToVariantValue().AsString());
+
+ var reader1 = array.GetVariantReader(1);
+ Assert.Equal(42, reader1.ToVariantValue().AsInt32());
+ }
+
+ [Fact]
+ public void GetVariantReaderThrowsOnNull()
+ {
+ var builder = new VariantArray.Builder();
+ builder.AppendNull();
+ var array = builder.Build();
+
+ Assert.Throws<InvalidOperationException>(() =>
array.GetVariantReader(0));
+ }
+
+ [Fact]
+ public void GetVariantReaderThrowsOnOutOfRange()
+ {
+ var builder = new VariantArray.Builder();
+ builder.Append(VariantValue.FromInt32(1));
+ var array = builder.Build();
+
+ Assert.Throws<ArgumentOutOfRangeException>(() =>
array.GetVariantReader(-1));
+ Assert.Throws<ArgumentOutOfRangeException>(() =>
array.GetVariantReader(1));
+ }
+
+ [Fact]
+ public void GetVariantValueThrowsOnOutOfRange()
+ {
+ var builder = new VariantArray.Builder();
+ builder.Append(VariantValue.FromInt32(1));
+ var array = builder.Build();
+
+ Assert.Throws<ArgumentOutOfRangeException>(() =>
array.GetVariantValue(-1));
+ Assert.Throws<ArgumentOutOfRangeException>(() =>
array.GetVariantValue(1));
+ }
+
+ [Fact]
+ public void IReadOnlyListIndexer()
+ {
+ var builder = new VariantArray.Builder();
+ builder.Append(VariantValue.FromInt32(10));
+ builder.AppendNull();
+ builder.Append(VariantValue.FromString("hi"));
+ var array = builder.Build();
+
+ Assert.Equal(3, array.Count);
+
+ Assert.Equal(10, array[0].AsInt32());
+ Assert.True(array[1].IsNull);
+ Assert.Equal("hi", array[2].AsString());
+ }
+
+ [Fact]
+ public void IReadOnlyListEnumerator()
+ {
+ var builder = new VariantArray.Builder();
+ builder.Append(VariantValue.FromInt32(1));
+ builder.AppendNull();
+ builder.Append(VariantValue.FromInt32(3));
+ var array = builder.Build();
+
+ var list = array.ToList();
+ Assert.Equal(3, list.Count);
+ Assert.Equal(1, list[0].AsInt32());
+ Assert.True(list[1].IsNull);
+ Assert.Equal(3, list[2].AsInt32());
+ }
+
+ [Fact]
+ public void FluentBuilderApi()
+ {
+ var array = new VariantArray.Builder()
+ .Append(VariantValue.FromInt32(1))
+ .Append(VariantValue.FromString("two"))
+ .AppendNull()
+ .Build();
+
+ Assert.Equal(3, array.Length);
+ Assert.Equal(1, array.NullCount);
+ }
+
+ [Fact]
+ public void BuilderLengthTracksAppends()
+ {
+ var builder = new VariantArray.Builder();
+ Assert.Equal(0, builder.Length);
+
+ builder.Append(VariantValue.FromInt32(1));
+ Assert.Equal(1, builder.Length);
+
+ builder.AppendNull();
+ Assert.Equal(2, builder.Length);
+
+ builder.Append(VariantValue.FromString("x"));
+ Assert.Equal(3, builder.Length);
+ }
+
+ [Fact]
+ public void AllNullArray()
+ {
+ var builder = new VariantArray.Builder();
+ builder.AppendNull();
+ builder.AppendNull();
+ builder.AppendNull();
+ var array = builder.Build();
+
+ Assert.Equal(3, array.Length);
+ Assert.Equal(3, array.NullCount);
+
+ for (int i = 0; i < 3; i++)
+ {
+ Assert.True(array.IsNull(i));
+ Assert.True(array.GetVariantValue(i).IsNull);
+ }
+ }
+
+ [Fact]
+ public void RowsWithDifferentObjectKeys()
+ {
+ var obj1 = VariantValue.FromObject(new Dictionary<string,
VariantValue>
+ {
+ ["x"] = VariantValue.FromInt32(1),
+ });
+ var obj2 = VariantValue.FromObject(new Dictionary<string,
VariantValue>
+ {
+ ["y"] = VariantValue.FromString("two"),
+ ["z"] = VariantValue.True,
+ });
+
+ var builder = new VariantArray.Builder();
+ builder.Append(obj1);
+ builder.Append(obj2);
+ var array = builder.Build();
+
+ Assert.Equal(2, array.Length);
+
+ var r0 = array.GetVariantValue(0);
+ Assert.True(r0.IsObject);
+ Assert.Equal(1, r0.AsObject()["x"].AsInt32());
+
+ var r1 = array.GetVariantValue(1);
+ Assert.True(r1.IsObject);
+ Assert.Equal("two", r1.AsObject()["y"].AsString());
+ Assert.True(r1.AsObject()["z"].AsBoolean());
+ }
+
+ [Fact]
+ public void ExtensionTypeProperties()
+ {
+ var variantType = VariantType.Default;
+ Assert.Equal(ArrowTypeId.Extension, variantType.TypeId);
+ Assert.Equal("arrow.parquet.variant", variantType.Name);
+ Assert.Equal("", variantType.ExtensionMetadata);
+ Assert.IsType<StructType>(variantType.StorageType);
+
+ var structType = (StructType)variantType.StorageType;
+ Assert.Equal(2, structType.Fields.Count);
+ Assert.Equal("metadata", structType.Fields[0].Name);
+ Assert.Equal("value", structType.Fields[1].Name);
+ }
+
+ [Fact]
+ public void ExtensionDefinitionTryCreateType()
+ {
+ var structType = new StructType(new[]
+ {
+ new Field("metadata", BinaryType.Default, false),
+ new Field("value", BinaryType.Default, false),
+ });
+
+ Assert.True(VariantExtensionDefinition.Instance.TryCreateType(
+ structType, "", out var extType));
+ Assert.IsType<VariantType>(extType);
+ }
+
+ [Fact]
+ public void ExtensionDefinitionRejectsInvalidType()
+ {
+ // Wrong field names
+ var structType = new StructType(new[]
+ {
+ new Field("meta", BinaryType.Default, false),
+ new Field("val", BinaryType.Default, false),
+ });
+
+ Assert.False(VariantExtensionDefinition.Instance.TryCreateType(
+ structType, "", out _));
+
+ // Non-struct type
+ Assert.False(VariantExtensionDefinition.Instance.TryCreateType(
+ Int32Type.Default, "", out _));
+ }
+
+ [Fact]
+ public void IpcRoundTrip()
+ {
+ using
(ExtensionTypeRegistry.Default.RegisterTemporary(VariantExtensionDefinition.Instance))
+ {
+ var builder = new VariantArray.Builder();
+ builder.Append(VariantValue.FromInt32(42));
+ builder.Append(VariantValue.FromString("hello"));
+ builder.AppendNull();
+ builder.Append(VariantValue.Null);
+ var array = builder.Build();
+
+ var field = new Field("variants", VariantType.Default, true);
+ var schema = new Schema(new[] { field }, null);
+ var batch = new RecordBatch(schema, new IArrowArray[] { array
}, array.Length);
+
+ // Write to stream
+ using var stream = new MemoryStream();
+ using (var writer = new ArrowStreamWriter(stream, schema,
leaveOpen: true))
+ {
+ writer.WriteRecordBatch(batch);
+ writer.WriteEnd();
+ }
+ stream.Position = 0;
+
+ // Read back
+ using var reader = new ArrowStreamReader(stream);
+ using var readBatch = reader.ReadNextRecordBatch();
+ Assert.NotNull(readBatch);
+
+ var readArray = readBatch.Column(0) as VariantArray;
+ Assert.NotNull(readArray);
+ Assert.Equal(4, readArray.Length);
+ Assert.Equal(1, readArray.NullCount);
+
+ Assert.Equal(42, readArray.GetVariantValue(0).AsInt32());
+ Assert.Equal("hello", readArray.GetVariantValue(1).AsString());
+ Assert.True(readArray.GetVariantValue(2).IsNull);
+ Assert.True(readArray.GetVariantValue(3).IsNull);
+ }
+ }
+ }
+}