This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new e55f912ecb GH-36795: [C#] Implement support for dense and sparse
unions (#36797)
e55f912ecb is described below
commit e55f912ecbce21ad46043bdffe32712272d6268a
Author: Curt Hagenlocher <[email protected]>
AuthorDate: Mon Sep 25 06:04:18 2023 -0700
GH-36795: [C#] Implement support for dense and sparse unions (#36797)
### What changes are included in this PR?
Support dense and sparse unions in the C# implementation.
Adds Archery support for C# unions.
### Are these changes tested?
Yes
### Are there any user-facing changes?
Unions are now supported in the C# implementation.
**This PR includes breaking changes to public APIs.**
The public APIs for the UnionArray and UnionType were changed fairly
substantially. As these were previously not implemented properly, the impact of
the changes ought to be minimal.
The ChunkedArray and Column classes were changed to hold IArrowArrays
instead of Arrays. To accomodate this, a constructor was added which may
introduce ambiguity in calling code. This could be avoided by changing the
overloaded constructor to instead be a factory method. This didn't seem
worthwhile but could be reconsidered.
The metadata version was finally increased to V5.
* Closes: #36795
Authored-by: Curt Hagenlocher <[email protected]>
Signed-off-by: David Li <[email protected]>
---
csharp/src/Apache.Arrow/Arrays/Array.cs | 13 +--
.../Apache.Arrow/Arrays/ArrayDataConcatenator.cs | 62 +++++++++++-
.../Apache.Arrow/Arrays/ArrayDataTypeComparer.cs | 12 ++-
.../src/Apache.Arrow/Arrays/ArrowArrayFactory.cs | 16 +++-
.../Arrays/{UnionArray.cs => DenseUnionArray.cs} | 47 +++++-----
.../Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs | 3 +
.../Arrays/{UnionArray.cs => SparseUnionArray.cs} | 43 ++++-----
csharp/src/Apache.Arrow/Arrays/UnionArray.cs | 77 ++++++++++++---
csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs | 38 ++++++++
csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs | 18 ++++
csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs | 56 ++++++++---
csharp/src/Apache.Arrow/ChunkedArray.cs | 30 ++++--
csharp/src/Apache.Arrow/Column.cs | 24 +++--
.../Apache.Arrow/Extensions/FlatbufExtensions.cs | 10 ++
csharp/src/Apache.Arrow/Interfaces/IArrowArray.cs | 4 -
.../Apache.Arrow/Ipc/ArrowReaderImplementation.cs | 75 +++++++++------
csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs | 19 +++-
.../Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs | 14 ++-
csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs | 4 +
csharp/src/Apache.Arrow/Table.cs | 4 +-
csharp/src/Apache.Arrow/Types/UnionType.cs | 11 ++-
.../IntegrationCommand.cs | 63 ++++++++++++-
.../test/Apache.Arrow.IntegrationTest/JsonFile.cs | 4 +
.../test/Apache.Arrow.Tests/ArrayTypeComparer.cs | 19 +++-
.../ArrowArrayConcatenatorTests.cs | 104 ++++++++++++++++++++-
.../test/Apache.Arrow.Tests/ArrowReaderVerifier.cs | 19 ++++
.../CDataInterfacePythonTests.cs | 36 +++++--
csharp/test/Apache.Arrow.Tests/ColumnTests.cs | 2 +-
csharp/test/Apache.Arrow.Tests/TableTests.cs | 10 +-
csharp/test/Apache.Arrow.Tests/TestData.cs | 64 +++++++++++++
dev/archery/archery/integration/datagen.py | 3 +-
docs/source/status.rst | 4 +-
32 files changed, 742 insertions(+), 166 deletions(-)
diff --git a/csharp/src/Apache.Arrow/Arrays/Array.cs
b/csharp/src/Apache.Arrow/Arrays/Array.cs
index a453b08072..0838134b19 100644
--- a/csharp/src/Apache.Arrow/Arrays/Array.cs
+++ b/csharp/src/Apache.Arrow/Arrays/Array.cs
@@ -62,16 +62,7 @@ namespace Apache.Arrow
public Array Slice(int offset, int length)
{
- if (offset > Length)
- {
- throw new ArgumentException($"Offset {offset} cannot be
greater than Length {Length} for Array.Slice");
- }
-
- length = Math.Min(Data.Length - offset, length);
- offset += Data.Offset;
-
- ArrayData newData = Data.Slice(offset, length);
- return ArrowArrayFactory.BuildArray(newData) as Array;
+ return ArrowArrayFactory.Slice(this, offset, length) as Array;
}
public void Dispose()
@@ -88,4 +79,4 @@ namespace Apache.Arrow
}
}
}
-}
\ No newline at end of file
+}
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
index 8859ecd7f0..806defdc7c 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataConcatenator.cs
@@ -49,7 +49,8 @@ namespace Apache.Arrow
IArrowTypeVisitor<StringType>,
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
- IArrowTypeVisitor<StructType>
+ IArrowTypeVisitor<StructType>,
+ IArrowTypeVisitor<UnionType>
{
public ArrayData Result { get; private set; }
private readonly IReadOnlyList<ArrayData> _arrayDataList;
@@ -123,6 +124,33 @@ namespace Apache.Arrow
Result = new ArrayData(type, _arrayDataList[0].Length,
_arrayDataList[0].NullCount, 0, _arrayDataList[0].Buffers, children);
}
+ public void Visit(UnionType type)
+ {
+ int bufferCount = type.Mode switch
+ {
+ UnionMode.Sparse => 1,
+ UnionMode.Dense => 2,
+ _ => throw new InvalidOperationException("TODO"),
+ };
+
+ CheckData(type, bufferCount);
+ List<ArrayData> children = new
List<ArrayData>(type.Fields.Count);
+
+ for (int i = 0; i < type.Fields.Count; i++)
+ {
+ children.Add(Concatenate(SelectChildren(i), _allocator));
+ }
+
+ ArrowBuffer[] buffers = new ArrowBuffer[bufferCount];
+ buffers[0] = ConcatenateUnionTypeBuffer();
+ if (bufferCount > 1)
+ {
+ buffers[1] = ConcatenateUnionOffsetBuffer();
+ }
+
+ Result = new ArrayData(type, _totalLength, _totalNullCount, 0,
buffers, children);
+ }
+
public void Visit(IArrowType type)
{
throw new NotImplementedException($"Concatenation for
{type.Name} is not supported yet.");
@@ -231,6 +259,38 @@ namespace Apache.Arrow
return builder.Build(_allocator);
}
+ private ArrowBuffer ConcatenateUnionTypeBuffer()
+ {
+ var builder = new ArrowBuffer.Builder<byte>(_totalLength);
+
+ foreach (ArrayData arrayData in _arrayDataList)
+ {
+ builder.Append(arrayData.Buffers[0]);
+ }
+
+ return builder.Build(_allocator);
+ }
+
+ private ArrowBuffer ConcatenateUnionOffsetBuffer()
+ {
+ var builder = new ArrowBuffer.Builder<int>(_totalLength);
+ int baseOffset = 0;
+
+ foreach (ArrayData arrayData in _arrayDataList)
+ {
+ ReadOnlySpan<int> span =
arrayData.Buffers[1].Span.CastTo<int>();
+ foreach (int offset in span)
+ {
+ builder.Append(baseOffset + offset);
+ }
+
+ // The next offset must start from the current last offset.
+ baseOffset += span[arrayData.Length];
+ }
+
+ return builder.Build(_allocator);
+ }
+
private List<ArrayData> SelectChildren(int index)
{
var children = new List<ArrayData>(_arrayDataList.Count);
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
index 8a6bfed29a..6b54ec1edb 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrayDataTypeComparer.cs
@@ -27,7 +27,8 @@ namespace Apache.Arrow
IArrowTypeVisitor<FixedSizeBinaryType>,
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
- IArrowTypeVisitor<StructType>
+ IArrowTypeVisitor<StructType>,
+ IArrowTypeVisitor<UnionType>
{
private readonly IArrowType _expectedType;
private bool _dataTypeMatch;
@@ -122,6 +123,15 @@ namespace Apache.Arrow
}
}
+ public void Visit(UnionType actualType)
+ {
+ if (_expectedType is UnionType expectedType
+ && CompareNested(expectedType, actualType))
+ {
+ _dataTypeMatch = true;
+ }
+ }
+
private static bool CompareNested(NestedType expectedType, NestedType
actualType)
{
if (expectedType.Fields.Count != actualType.Fields.Count)
diff --git a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
index f82037bff4..aa407203d1 100644
--- a/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
+++ b/csharp/src/Apache.Arrow/Arrays/ArrowArrayFactory.cs
@@ -62,7 +62,7 @@ namespace Apache.Arrow
case ArrowTypeId.Struct:
return new StructArray(data);
case ArrowTypeId.Union:
- return new UnionArray(data);
+ return UnionArray.Create(data);
case ArrowTypeId.Date64:
return new Date64Array(data);
case ArrowTypeId.Date32:
@@ -91,5 +91,19 @@ namespace Apache.Arrow
throw new NotSupportedException($"An ArrowArray cannot be
built for type {data.DataType.TypeId}.");
}
}
+
+ public static IArrowArray Slice(IArrowArray array, int offset, int
length)
+ {
+ if (offset > array.Length)
+ {
+ throw new ArgumentException($"Offset {offset} cannot be
greater than Length {array.Length} for Array.Slice");
+ }
+
+ length = Math.Min(array.Data.Length - offset, length);
+ offset += array.Data.Offset;
+
+ ArrayData newData = array.Data.Slice(offset, length);
+ return BuildArray(newData);
+ }
}
}
diff --git a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
similarity index 53%
copy from csharp/src/Apache.Arrow/Arrays/UnionArray.cs
copy to csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
index 8bccea2b59..1aacbe11f0 100644
--- a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/DenseUnionArray.cs
@@ -15,37 +15,38 @@
using Apache.Arrow.Types;
using System;
+using System.Collections.Generic;
+using System.Linq;
namespace Apache.Arrow
{
- public class UnionArray: Array
+ public class DenseUnionArray : UnionArray
{
- public UnionType Type => Data.DataType as UnionType;
-
- public UnionMode Mode => Type.Mode;
-
- public ArrowBuffer TypeBuffer => Data.Buffers[1];
-
- public ArrowBuffer ValueOffsetBuffer => Data.Buffers[2];
-
- public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span;
-
- public ReadOnlySpan<int> ValueOffsets =>
ValueOffsetBuffer.Span.CastTo<int>().Slice(0, Length + 1);
-
- public UnionArray(ArrayData data)
- : base(data)
+ public ArrowBuffer ValueOffsetBuffer => Data.Buffers[1];
+
+ public ReadOnlySpan<int> ValueOffsets =>
ValueOffsetBuffer.Span.CastTo<int>();
+
+ public DenseUnionArray(
+ IArrowType dataType,
+ int length,
+ IEnumerable<IArrowArray> children,
+ ArrowBuffer typeIds,
+ ArrowBuffer valuesOffsetBuffer,
+ int nullCount = 0,
+ int offset = 0)
+ : base(new ArrayData(
+ dataType, length, nullCount, offset, new[] { typeIds,
valuesOffsetBuffer },
+ children.Select(child => child.Data)))
{
- data.EnsureDataType(ArrowTypeId.Union);
- data.EnsureBufferCount(3);
+ _fields = children.ToArray();
+ ValidateMode(UnionMode.Dense, Type.Mode);
}
- public IArrowArray GetChild(int index)
+ public DenseUnionArray(ArrayData data)
+ : base(data)
{
- // TODO: Implement
- throw new NotImplementedException();
+ ValidateMode(UnionMode.Dense, Type.Mode);
+ data.EnsureBufferCount(2);
}
-
- public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
-
}
}
diff --git a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs
b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs
index a50d4b52c3..67fe46633c 100644
--- a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs
+++ b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs
@@ -137,6 +137,9 @@ namespace Apache.Arrow
return Instance;
}
+ public TBuilder Append(T? value) =>
+ (value == null) ? AppendNull() : Append(value.Value);
+
public TBuilder Append(ReadOnlySpan<T> span)
{
int len = ValueBuffer.Length;
diff --git a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
similarity index 53%
copy from csharp/src/Apache.Arrow/Arrays/UnionArray.cs
copy to csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
index 8bccea2b59..b79c44c979 100644
--- a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/SparseUnionArray.cs
@@ -14,38 +14,33 @@
// limitations under the License.
using Apache.Arrow.Types;
-using System;
+using System.Collections.Generic;
+using System.Linq;
namespace Apache.Arrow
{
- public class UnionArray: Array
+ public class SparseUnionArray : UnionArray
{
- public UnionType Type => Data.DataType as UnionType;
-
- public UnionMode Mode => Type.Mode;
-
- public ArrowBuffer TypeBuffer => Data.Buffers[1];
-
- public ArrowBuffer ValueOffsetBuffer => Data.Buffers[2];
-
- public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span;
-
- public ReadOnlySpan<int> ValueOffsets =>
ValueOffsetBuffer.Span.CastTo<int>().Slice(0, Length + 1);
-
- public UnionArray(ArrayData data)
- : base(data)
+ public SparseUnionArray(
+ IArrowType dataType,
+ int length,
+ IEnumerable<IArrowArray> children,
+ ArrowBuffer typeIds,
+ int nullCount = 0,
+ int offset = 0)
+ : base(new ArrayData(
+ dataType, length, nullCount, offset, new[] { typeIds },
+ children.Select(child => child.Data)))
{
- data.EnsureDataType(ArrowTypeId.Union);
- data.EnsureBufferCount(3);
+ _fields = children.ToArray();
+ ValidateMode(UnionMode.Sparse, Type.Mode);
}
- public IArrowArray GetChild(int index)
+ public SparseUnionArray(ArrayData data)
+ : base(data)
{
- // TODO: Implement
- throw new NotImplementedException();
+ ValidateMode(UnionMode.Sparse, Type.Mode);
+ data.EnsureBufferCount(1);
}
-
- public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
-
}
}
diff --git a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
index 8bccea2b59..0a7ae288fd 100644
--- a/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
+++ b/csharp/src/Apache.Arrow/Arrays/UnionArray.cs
@@ -15,37 +15,88 @@
using Apache.Arrow.Types;
using System;
+using System.Collections.Generic;
+using System.Threading;
namespace Apache.Arrow
{
- public class UnionArray: Array
+ public abstract class UnionArray : IArrowArray
{
- public UnionType Type => Data.DataType as UnionType;
+ protected IReadOnlyList<IArrowArray> _fields;
- public UnionMode Mode => Type.Mode;
+ public IReadOnlyList<IArrowArray> Fields =>
+ LazyInitializer.EnsureInitialized(ref _fields, () =>
InitializeFields());
+
+ public ArrayData Data { get; }
- public ArrowBuffer TypeBuffer => Data.Buffers[1];
+ public UnionType Type => (UnionType)Data.DataType;
- public ArrowBuffer ValueOffsetBuffer => Data.Buffers[2];
+ public UnionMode Mode => Type.Mode;
+
+ public ArrowBuffer TypeBuffer => Data.Buffers[0];
public ReadOnlySpan<byte> TypeIds => TypeBuffer.Span;
- public ReadOnlySpan<int> ValueOffsets =>
ValueOffsetBuffer.Span.CastTo<int>().Slice(0, Length + 1);
+ public int Length => Data.Length;
+
+ public int Offset => Data.Offset;
- public UnionArray(ArrayData data)
- : base(data)
+ public int NullCount => Data.NullCount;
+
+ public bool IsValid(int index) => NullCount == 0 ||
Fields[TypeIds[index]].IsValid(index);
+
+ public bool IsNull(int index) => !IsValid(index);
+
+ protected UnionArray(ArrayData data)
{
+ Data = data;
data.EnsureDataType(ArrowTypeId.Union);
- data.EnsureBufferCount(3);
}
- public IArrowArray GetChild(int index)
+ public static UnionArray Create(ArrayData data)
{
- // TODO: Implement
- throw new NotImplementedException();
+ return ((UnionType)data.DataType).Mode switch
+ {
+ UnionMode.Dense => new DenseUnionArray(data),
+ UnionMode.Sparse => new SparseUnionArray(data),
+ _ => throw new InvalidOperationException("unknown union mode
in array creation")
+ };
}
- public override void Accept(IArrowArrayVisitor visitor) =>
Accept(this, visitor);
+ public void Accept(IArrowArrayVisitor visitor) => Array.Accept(this,
visitor);
+ public void Dispose()
+ {
+ Dispose(true);
+ GC.SuppressFinalize(this);
+ }
+
+ protected virtual void Dispose(bool disposing)
+ {
+ if (disposing)
+ {
+ Data.Dispose();
+ }
+ }
+
+ protected static void ValidateMode(UnionMode expected, UnionMode
actual)
+ {
+ if (expected != actual)
+ {
+ throw new ArgumentException(
+ $"Specified union mode <{actual}> does not match expected
mode <{expected}>",
+ "Mode");
+ }
+ }
+
+ private IReadOnlyList<IArrowArray> InitializeFields()
+ {
+ IArrowArray[] result = new IArrowArray[Data.Children.Length];
+ for (int i = 0; i < Data.Children.Length; i++)
+ {
+ result[i] = ArrowArrayFactory.BuildArray(Data.Children[i]);
+ }
+ return result;
+ }
}
}
diff --git a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
index 9b7bcb7abe..da1b0f31b8 100644
--- a/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowArrayImporter.cs
@@ -170,6 +170,15 @@ namespace Apache.Arrow.C
buffers = new ArrowBuffer[] {
ImportValidityBuffer(cArray) };
break;
case ArrowTypeId.Union:
+ UnionType unionType = (UnionType)type;
+ children = ProcessStructChildren(cArray,
unionType.Fields);
+ buffers = unionType.Mode switch
+ {
+ UnionMode.Dense => ImportDenseUnionBuffers(cArray),
+ UnionMode.Sparse =>
ImportSparseUnionBuffers(cArray),
+ _ => throw new InvalidOperationException("unknown
union mode in import")
+ }; ;
+ break;
case ArrowTypeId.Map:
break;
case ArrowTypeId.Null:
@@ -286,6 +295,35 @@ namespace Apache.Arrow.C
return buffers;
}
+ private ArrowBuffer[] ImportDenseUnionBuffers(CArrowArray* cArray)
+ {
+ if (cArray->n_buffers != 2)
+ {
+ throw new InvalidOperationException("Dense union arrays
are expected to have exactly two children");
+ }
+ int length = checked((int)cArray->length);
+ int offsetsLength = length * 4;
+
+ ArrowBuffer[] buffers = new ArrowBuffer[2];
+ buffers[0] = new
ArrowBuffer(AddMemory((IntPtr)cArray->buffers[0], 0, length));
+ buffers[1] = new
ArrowBuffer(AddMemory((IntPtr)cArray->buffers[1], 0, offsetsLength));
+
+ return buffers;
+ }
+
+ private ArrowBuffer[] ImportSparseUnionBuffers(CArrowArray* cArray)
+ {
+ if (cArray->n_buffers != 1)
+ {
+ throw new InvalidOperationException("Sparse union arrays
are expected to have exactly one child");
+ }
+
+ ArrowBuffer[] buffers = new ArrowBuffer[1];
+ buffers[0] = new
ArrowBuffer(AddMemory((IntPtr)cArray->buffers[0], 0,
checked((int)cArray->length)));
+
+ return buffers;
+ }
+
private ArrowBuffer[] ImportFixedWidthBuffers(CArrowArray* cArray,
int bitWidth)
{
if (cArray->n_buffers != 2)
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
index 66142da331..c1a12362a9 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaExporter.cs
@@ -124,6 +124,23 @@ namespace Apache.Arrow.C
_ => throw new InvalidDataException($"Unsupported time unit for
export: {unit}"),
};
+ private static string FormatUnion(UnionType unionType)
+ {
+ StringBuilder builder = new StringBuilder();
+ builder.Append(unionType.Mode switch
+ {
+ UnionMode.Sparse => "+us:",
+ UnionMode.Dense => "+ud:",
+ _ => throw new InvalidDataException($"Unsupported union mode
for export: {unionType.Mode}"),
+ });
+ for (int i = 0; i < unionType.TypeIds.Length; i++)
+ {
+ if (i > 0) { builder.Append(','); }
+ builder.Append(unionType.TypeIds[i]);
+ }
+ return builder.ToString();
+ }
+
private static string GetFormat(IArrowType datatype)
{
switch (datatype)
@@ -170,6 +187,7 @@ namespace Apache.Arrow.C
case FixedSizeListType fixedListType:
return $"+w:{fixedListType.ListSize}";
case StructType _: return "+s";
+ case UnionType u: return FormatUnion(u);
// Dictionary
case DictionaryType dictionaryType:
return GetFormat(dictionaryType.IndexType);
diff --git a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
index 2a750d5e82..42c8cdd5ef 100644
--- a/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
+++ b/csharp/src/Apache.Arrow/C/CArrowSchemaImporter.cs
@@ -184,21 +184,7 @@ namespace Apache.Arrow.C
}
else if (format == "+s")
{
- var child_schemas = new
ImportedArrowSchema[_cSchema->n_children];
-
- for (int i = 0; i < _cSchema->n_children; i++)
- {
- if (_cSchema->GetChild(i) == null)
- {
- throw new InvalidDataException("Expected struct
type child to be non-null.");
- }
- child_schemas[i] = new
ImportedArrowSchema(_cSchema->GetChild(i), isRoot: false);
- }
-
-
- List<Field> childFields = child_schemas.Select(schema =>
schema.GetAsField()).ToList();
-
- return new StructType(childFields);
+ return new StructType(ParseChildren("struct"));
}
else if (format.StartsWith("+w:"))
{
@@ -265,6 +251,30 @@ namespace Apache.Arrow.C
return new FixedSizeBinaryType(width);
}
+ // Unions
+ if (format.StartsWith("+ud:") || format.StartsWith("+us:"))
+ {
+ UnionMode unionMode = format[2] == 'd' ? UnionMode.Dense :
UnionMode.Sparse;
+ List<int> typeIds = new List<int>();
+ int pos = 4;
+ do
+ {
+ int next = format.IndexOf(',', pos);
+ if (next < 0) { next = format.Length; }
+
+ int code;
+ if (!int.TryParse(format.Substring(pos, next - pos),
out code))
+ {
+ throw new InvalidDataException($"Invalid type code
for union import: {format.Substring(pos, next - pos)}");
+ }
+ typeIds.Add(code);
+
+ pos = next + 1;
+ } while (pos < format.Length);
+
+ return new UnionType(ParseChildren("union"), typeIds,
unionMode);
+ }
+
return format switch
{
// Primitives
@@ -324,6 +334,22 @@ namespace Apache.Arrow.C
}
}
+ private List<Field> ParseChildren(string typeName)
+ {
+ var child_schemas = new
ImportedArrowSchema[_cSchema->n_children];
+
+ for (int i = 0; i < _cSchema->n_children; i++)
+ {
+ if (_cSchema->GetChild(i) == null)
+ {
+ throw new InvalidDataException($"Expected {typeName}
type child to be non-null.");
+ }
+ child_schemas[i] = new
ImportedArrowSchema(_cSchema->GetChild(i), isRoot: false);
+ }
+
+ return child_schemas.Select(schema =>
schema.GetAsField()).ToList();
+ }
+
private unsafe static IReadOnlyDictionary<string, string>
GetMetadata(byte* metadata)
{
if (metadata == null)
diff --git a/csharp/src/Apache.Arrow/ChunkedArray.cs
b/csharp/src/Apache.Arrow/ChunkedArray.cs
index 5f25acfe04..f5909f5adf 100644
--- a/csharp/src/Apache.Arrow/ChunkedArray.cs
+++ b/csharp/src/Apache.Arrow/ChunkedArray.cs
@@ -15,7 +15,6 @@
using System;
using System.Collections.Generic;
-using Apache.Arrow;
using Apache.Arrow.Types;
namespace Apache.Arrow
@@ -25,7 +24,7 @@ namespace Apache.Arrow
/// </summary>
public class ChunkedArray
{
- private IList<Array> Arrays { get; }
+ private IList<IArrowArray> Arrays { get; }
public IArrowType DataType { get; }
public long Length { get; }
public long NullCount { get; }
@@ -35,9 +34,16 @@ namespace Apache.Arrow
get => Arrays.Count;
}
- public Array Array(int index) => Arrays[index];
+ public Array Array(int index) => Arrays[index] as Array;
+
+ public IArrowArray ArrowArray(int index) => Arrays[index];
public ChunkedArray(IList<Array> arrays)
+ : this(Cast(arrays))
+ {
+ }
+
+ public ChunkedArray(IList<IArrowArray> arrays)
{
Arrays = arrays ?? throw new ArgumentNullException(nameof(arrays));
if (arrays.Count < 1)
@@ -45,14 +51,14 @@ namespace Apache.Arrow
throw new ArgumentException($"Count must be at least 1. Got
{arrays.Count} instead");
}
DataType = arrays[0].Data.DataType;
- foreach (Array array in arrays)
+ foreach (IArrowArray array in arrays)
{
Length += array.Length;
NullCount += array.NullCount;
}
}
- public ChunkedArray(Array array) : this(new[] { array }) { }
+ public ChunkedArray(Array array) : this(new IArrowArray[] { array }) {
}
public ChunkedArray Slice(long offset, long length)
{
@@ -69,10 +75,10 @@ namespace Apache.Arrow
curArrayIndex++;
}
- IList<Array> newArrays = new List<Array>();
+ IList<IArrowArray> newArrays = new List<IArrowArray>();
while (curArrayIndex < numArrays && length > 0)
{
- newArrays.Add(Arrays[curArrayIndex].Slice((int)offset,
+ newArrays.Add(ArrowArrayFactory.Slice(Arrays[curArrayIndex],
(int)offset,
length > Arrays[curArrayIndex].Length ?
Arrays[curArrayIndex].Length : (int)length));
length -= Arrays[curArrayIndex].Length - offset;
offset = 0;
@@ -86,6 +92,16 @@ namespace Apache.Arrow
return Slice(offset, Length - offset);
}
+ private static IArrowArray[] Cast(IList<Array> arrays)
+ {
+ IArrowArray[] arrowArrays = new IArrowArray[arrays.Count];
+ for (int i = 0; i < arrays.Count; i++)
+ {
+ arrowArrays[i] = arrays[i];
+ }
+ return arrowArrays;
+ }
+
// TODO: Flatten for Structs
}
}
diff --git a/csharp/src/Apache.Arrow/Column.cs
b/csharp/src/Apache.Arrow/Column.cs
index 4eaf9a559e..0709b9142c 100644
--- a/csharp/src/Apache.Arrow/Column.cs
+++ b/csharp/src/Apache.Arrow/Column.cs
@@ -28,19 +28,23 @@ namespace Apache.Arrow
public ChunkedArray Data { get; }
public Column(Field field, IList<Array> arrays)
+ : this(field, new ChunkedArray(arrays), doValidation: true)
+ {
+ }
+
+ public Column(Field field, IList<IArrowArray> arrays)
+ : this(field, new ChunkedArray(arrays), doValidation: true)
{
- Data = new ChunkedArray(arrays);
- Field = field;
- if (!ValidateArrayDataTypes())
- {
- throw new ArgumentException($"{Field.DataType} must match
{Data.DataType}");
- }
}
- private Column(Field field, ChunkedArray arrays)
+ private Column(Field field, ChunkedArray data, bool doValidation =
false)
{
+ Data = data;
Field = field;
- Data = arrays;
+ if (doValidation && !ValidateArrayDataTypes())
+ {
+ throw new ArgumentException($"{Field.DataType} must match
{Data.DataType}");
+ }
}
public long Length => Data.Length;
@@ -64,12 +68,12 @@ namespace Apache.Arrow
for (int i = 0; i < Data.ArrayCount; i++)
{
- if (Data.Array(i).Data.DataType.TypeId !=
Field.DataType.TypeId)
+ if (Data.ArrowArray(i).Data.DataType.TypeId !=
Field.DataType.TypeId)
{
return false;
}
- Data.Array(i).Data.DataType.Accept(dataTypeComparer);
+ Data.ArrowArray(i).Data.DataType.Accept(dataTypeComparer);
if (!dataTypeComparer.DataTypeMatch)
{
diff --git a/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
b/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
index d2a70bca9e..35c5b3e551 100644
--- a/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
+++ b/csharp/src/Apache.Arrow/Extensions/FlatbufExtensions.cs
@@ -80,6 +80,16 @@ namespace Apache.Arrow
throw new ArgumentException($"Unexpected Flatbuf
TimeUnit", nameof(unit));
}
}
+
+ public static Types.UnionMode ToArrow(this Flatbuf.UnionMode mode)
+ {
+ return mode switch
+ {
+ Flatbuf.UnionMode.Dense => Types.UnionMode.Dense,
+ Flatbuf.UnionMode.Sparse => Types.UnionMode.Sparse,
+ _ => throw new ArgumentException($"Unsupported Flatbuf
UnionMode", nameof(mode)),
+ };
+ }
}
}
diff --git a/csharp/src/Apache.Arrow/Interfaces/IArrowArray.cs
b/csharp/src/Apache.Arrow/Interfaces/IArrowArray.cs
index 50fbc3af6d..9bcee36ef4 100644
--- a/csharp/src/Apache.Arrow/Interfaces/IArrowArray.cs
+++ b/csharp/src/Apache.Arrow/Interfaces/IArrowArray.cs
@@ -32,9 +32,5 @@ namespace Apache.Arrow
ArrayData Data { get; }
void Accept(IArrowArrayVisitor visitor);
-
- //IArrowArray Slice(int offset);
-
- //IArrowArray Slice(int offset, int length);
}
}
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
index c9c1b21673..d3115da52c 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowReaderImplementation.cs
@@ -116,11 +116,11 @@ namespace Apache.Arrow.Ipc
break;
case Flatbuf.MessageHeader.DictionaryBatch:
Flatbuf.DictionaryBatch dictionaryBatch =
message.Header<Flatbuf.DictionaryBatch>().Value;
- ReadDictionaryBatch(dictionaryBatch, bodyByteBuffer,
memoryOwner);
+ ReadDictionaryBatch(message.Version, dictionaryBatch,
bodyByteBuffer, memoryOwner);
break;
case Flatbuf.MessageHeader.RecordBatch:
Flatbuf.RecordBatch rb =
message.Header<Flatbuf.RecordBatch>().Value;
- List<IArrowArray> arrays = BuildArrays(Schema,
bodyByteBuffer, rb);
+ List<IArrowArray> arrays = BuildArrays(message.Version,
Schema, bodyByteBuffer, rb);
return new RecordBatch(Schema, memoryOwner, arrays,
(int)rb.Length);
default:
// NOTE: Skip unsupported message type
@@ -136,7 +136,11 @@ namespace Apache.Arrow.Ipc
return new ByteBuffer(new ReadOnlyMemoryBufferAllocator(buffer),
0);
}
- private void ReadDictionaryBatch(Flatbuf.DictionaryBatch
dictionaryBatch, ByteBuffer bodyByteBuffer, IMemoryOwner<byte> memoryOwner)
+ private void ReadDictionaryBatch(
+ MetadataVersion version,
+ Flatbuf.DictionaryBatch dictionaryBatch,
+ ByteBuffer bodyByteBuffer,
+ IMemoryOwner<byte> memoryOwner)
{
long id = dictionaryBatch.Id;
IArrowType valueType = DictionaryMemo.GetDictionaryType(id);
@@ -149,7 +153,7 @@ namespace Apache.Arrow.Ipc
Field valueField = new Field("dummy", valueType, true);
var schema = new Schema(new[] { valueField }, default);
- IList<IArrowArray> arrays = BuildArrays(schema, bodyByteBuffer,
recordBatch.Value);
+ IList<IArrowArray> arrays = BuildArrays(version, schema,
bodyByteBuffer, recordBatch.Value);
if (arrays.Count != 1)
{
@@ -167,6 +171,7 @@ namespace Apache.Arrow.Ipc
}
private List<IArrowArray> BuildArrays(
+ MetadataVersion version,
Schema schema,
ByteBuffer messageBuffer,
Flatbuf.RecordBatch recordBatchMessage)
@@ -187,8 +192,8 @@ namespace Apache.Arrow.Ipc
Flatbuf.FieldNode fieldNode =
recordBatchEnumerator.CurrentNode;
ArrayData arrayData = field.DataType.IsFixedPrimitive()
- ? LoadPrimitiveField(ref recordBatchEnumerator, field, in
fieldNode, messageBuffer, bufferCreator)
- : LoadVariableField(ref recordBatchEnumerator, field, in
fieldNode, messageBuffer, bufferCreator);
+ ? LoadPrimitiveField(version, ref recordBatchEnumerator,
field, in fieldNode, messageBuffer, bufferCreator)
+ : LoadVariableField(version, ref recordBatchEnumerator,
field, in fieldNode, messageBuffer, bufferCreator);
arrays.Add(ArrowArrayFactory.BuildArray(arrayData));
} while (recordBatchEnumerator.MoveNextNode());
@@ -225,6 +230,7 @@ namespace Apache.Arrow.Ipc
}
private ArrayData LoadPrimitiveField(
+ MetadataVersion version,
ref RecordBatchEnumerator recordBatchEnumerator,
Field field,
in Flatbuf.FieldNode fieldNode,
@@ -245,31 +251,44 @@ namespace Apache.Arrow.Ipc
throw new InvalidDataException("Null count length must be >=
0"); // TODO:Localize exception message
}
- if (field.DataType.TypeId == ArrowTypeId.Null)
+ int buffers;
+ switch (field.DataType.TypeId)
{
- return new ArrayData(field.DataType, fieldLength,
fieldNullCount, 0, System.Array.Empty<ArrowBuffer>());
- }
-
- ArrowBuffer nullArrowBuffer = BuildArrowBuffer(bodyData,
recordBatchEnumerator.CurrentBuffer, bufferCreator);
- if (!recordBatchEnumerator.MoveNextBuffer())
- {
- throw new Exception("Unable to move to the next buffer.");
+ case ArrowTypeId.Null:
+ return new ArrayData(field.DataType, fieldLength,
fieldNullCount, 0, System.Array.Empty<ArrowBuffer>());
+ case ArrowTypeId.Union:
+ if (version < MetadataVersion.V5)
+ {
+ if (fieldNullCount > 0)
+ {
+ if (recordBatchEnumerator.CurrentBuffer.Length > 0)
+ {
+ // With older metadata we can get a validity
bitmap. Fixing up union data is hard,
+ // so we will just quit.
+ throw new NotSupportedException("Cannot read
pre-1.0.0 Union array with top-level validity bitmap");
+ }
+ }
+ recordBatchEnumerator.MoveNextBuffer();
+ }
+ buffers = ((UnionType)field.DataType).Mode ==
Types.UnionMode.Dense ? 2 : 1;
+ break;
+ case ArrowTypeId.Struct:
+ case ArrowTypeId.FixedSizeList:
+ buffers = 1;
+ break;
+ default:
+ buffers = 2;
+ break;
}
- ArrowBuffer[] arrowBuff;
- if (field.DataType.TypeId == ArrowTypeId.Struct ||
field.DataType.TypeId == ArrowTypeId.FixedSizeList)
+ ArrowBuffer[] arrowBuff = new ArrowBuffer[buffers];
+ for (int i = 0; i < buffers; i++)
{
- arrowBuff = new[] { nullArrowBuffer };
- }
- else
- {
- ArrowBuffer valueArrowBuffer = BuildArrowBuffer(bodyData,
recordBatchEnumerator.CurrentBuffer, bufferCreator);
+ arrowBuff[i] = BuildArrowBuffer(bodyData,
recordBatchEnumerator.CurrentBuffer, bufferCreator);
recordBatchEnumerator.MoveNextBuffer();
-
- arrowBuff = new[] { nullArrowBuffer, valueArrowBuffer };
}
- ArrayData[] children = GetChildren(ref recordBatchEnumerator,
field, bodyData, bufferCreator);
+ ArrayData[] children = GetChildren(version, ref
recordBatchEnumerator, field, bodyData, bufferCreator);
IArrowArray dictionary = null;
if (field.DataType.TypeId == ArrowTypeId.Dictionary)
@@ -282,6 +301,7 @@ namespace Apache.Arrow.Ipc
}
private ArrayData LoadVariableField(
+ MetadataVersion version,
ref RecordBatchEnumerator recordBatchEnumerator,
Field field,
in Flatbuf.FieldNode fieldNode,
@@ -316,7 +336,7 @@ namespace Apache.Arrow.Ipc
}
ArrowBuffer[] arrowBuff = new[] { nullArrowBuffer,
offsetArrowBuffer, valueArrowBuffer };
- ArrayData[] children = GetChildren(ref recordBatchEnumerator,
field, bodyData, bufferCreator);
+ ArrayData[] children = GetChildren(version, ref
recordBatchEnumerator, field, bodyData, bufferCreator);
IArrowArray dictionary = null;
if (field.DataType.TypeId == ArrowTypeId.Dictionary)
@@ -329,6 +349,7 @@ namespace Apache.Arrow.Ipc
}
private ArrayData[] GetChildren(
+ MetadataVersion version,
ref RecordBatchEnumerator recordBatchEnumerator,
Field field,
ByteBuffer bodyData,
@@ -345,8 +366,8 @@ namespace Apache.Arrow.Ipc
Field childField = type.Fields[index];
ArrayData child = childField.DataType.IsFixedPrimitive()
- ? LoadPrimitiveField(ref recordBatchEnumerator,
childField, in childFieldNode, bodyData, bufferCreator)
- : LoadVariableField(ref recordBatchEnumerator, childField,
in childFieldNode, bodyData, bufferCreator);
+ ? LoadPrimitiveField(version, ref recordBatchEnumerator,
childField, in childFieldNode, bodyData, bufferCreator)
+ : LoadVariableField(version, ref recordBatchEnumerator,
childField, in childFieldNode, bodyData, bufferCreator);
children[index] = child;
}
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
index a5d8db3f50..2b3815af71 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs
@@ -55,6 +55,7 @@ namespace Apache.Arrow.Ipc
IArrowArrayVisitor<BinaryArray>,
IArrowArrayVisitor<FixedSizeBinaryArray>,
IArrowArrayVisitor<StructArray>,
+ IArrowArrayVisitor<UnionArray>,
IArrowArrayVisitor<Decimal128Array>,
IArrowArrayVisitor<Decimal256Array>,
IArrowArrayVisitor<DictionaryArray>,
@@ -156,6 +157,22 @@ namespace Apache.Arrow.Ipc
}
}
+ public void Visit(UnionArray array)
+ {
+ _buffers.Add(CreateBuffer(array.TypeBuffer));
+
+ ArrowBuffer? offsets = (array as
DenseUnionArray)?.ValueOffsetBuffer;
+ if (offsets != null)
+ {
+ _buffers.Add(CreateBuffer(offsets.Value));
+ }
+
+ for (int i = 0; i < array.Fields.Count; i++)
+ {
+ array.Fields[i].Accept(this);
+ }
+ }
+
public void Visit(DictionaryArray array)
{
// Dictionary is serialized separately in Dictionary
serialization.
@@ -218,7 +235,7 @@ namespace Apache.Arrow.Ipc
private readonly bool _leaveOpen;
private readonly IpcOptions _options;
- private protected const Flatbuf.MetadataVersion CurrentMetadataVersion
= Flatbuf.MetadataVersion.V4;
+ private protected const Flatbuf.MetadataVersion CurrentMetadataVersion
= Flatbuf.MetadataVersion.V5;
private static readonly byte[] s_padding = new byte[64];
diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
index 203aa72d93..b11467538d 100644
--- a/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
+++ b/csharp/src/Apache.Arrow/Ipc/ArrowTypeFlatbufferBuilder.cs
@@ -120,7 +120,9 @@ namespace Apache.Arrow.Ipc
public void Visit(UnionType type)
{
- throw new NotImplementedException();
+ Result = FieldType.Build(
+ Flatbuf.Type.Union,
+ Flatbuf.Union.CreateUnion(Builder,
ToFlatBuffer(type.Mode), Flatbuf.Union.CreateTypeIdsVector(Builder,
type.TypeIds)));
}
public void Visit(StringType type)
@@ -279,5 +281,15 @@ namespace Apache.Arrow.Ipc
return result;
}
+
+ private static Flatbuf.UnionMode ToFlatBuffer(Types.UnionMode mode)
+ {
+ return mode switch
+ {
+ Types.UnionMode.Dense => Flatbuf.UnionMode.Dense,
+ Types.UnionMode.Sparse => Flatbuf.UnionMode.Sparse,
+ _ => throw new ArgumentException($"unsupported union mode
<{mode}>", nameof(mode)),
+ };
+ }
}
}
diff --git a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
index 8ca69b6116..6249063ba8 100644
--- a/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
+++ b/csharp/src/Apache.Arrow/Ipc/MessageSerializer.cs
@@ -203,6 +203,10 @@ namespace Apache.Arrow.Ipc
case Flatbuf.Type.Struct_:
Debug.Assert(childFields != null);
return new Types.StructType(childFields);
+ case Flatbuf.Type.Union:
+ Debug.Assert(childFields != null);
+ Flatbuf.Union unionMetadata =
field.Type<Flatbuf.Union>().Value;
+ return new Types.UnionType(childFields,
unionMetadata.GetTypeIdsArray(), unionMetadata.Mode.ToArrow());
default:
throw new InvalidDataException($"Arrow primitive
'{field.TypeType}' is unsupported.");
}
diff --git a/csharp/src/Apache.Arrow/Table.cs b/csharp/src/Apache.Arrow/Table.cs
index 0b9f31557b..939ec23f54 100644
--- a/csharp/src/Apache.Arrow/Table.cs
+++ b/csharp/src/Apache.Arrow/Table.cs
@@ -37,10 +37,10 @@ namespace Apache.Arrow
List<Column> columns = new List<Column>(nColumns);
for (int icol = 0; icol < nColumns; icol++)
{
- List<Array> columnArrays = new List<Array>(nBatches);
+ List<IArrowArray> columnArrays = new
List<IArrowArray>(nBatches);
for (int jj = 0; jj < nBatches; jj++)
{
- columnArrays.Add(recordBatches[jj].Column(icol) as Array);
+ columnArrays.Add(recordBatches[jj].Column(icol));
}
columns.Add(new Column(schema.GetFieldByIndex(icol),
columnArrays));
}
diff --git a/csharp/src/Apache.Arrow/Types/UnionType.cs
b/csharp/src/Apache.Arrow/Types/UnionType.cs
index 293271018a..23fa3b45ab 100644
--- a/csharp/src/Apache.Arrow/Types/UnionType.cs
+++ b/csharp/src/Apache.Arrow/Types/UnionType.cs
@@ -24,20 +24,21 @@ namespace Apache.Arrow.Types
Dense
}
- public sealed class UnionType : ArrowType
+ public sealed class UnionType : NestedType
{
public override ArrowTypeId TypeId => ArrowTypeId.Union;
public override string Name => "union";
public UnionMode Mode { get; }
-
- public IEnumerable<byte> TypeCodes { get; }
+
+ public int[] TypeIds { get; }
public UnionType(
- IEnumerable<Field> fields, IEnumerable<byte> typeCodes,
+ IEnumerable<Field> fields, IEnumerable<int> typeIds,
UnionMode mode = UnionMode.Sparse)
+ : base(fields.ToArray())
{
- TypeCodes = typeCodes.ToList();
+ TypeIds = typeIds.ToArray();
Mode = mode;
}
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
index abf7451e5e..1e76ee505a 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/IntegrationCommand.cs
@@ -128,7 +128,7 @@ namespace Apache.Arrow.IntegrationTest
for (int i = 0; i < jsonRecordBatch.Columns.Count; i++)
{
JsonFieldData data = jsonRecordBatch.Columns[i];
- Field field = schema.GetFieldByName(data.Name);
+ Field field = schema.FieldsList[i];
ArrayCreator creator = new ArrayCreator(data);
field.DataType.Accept(creator);
arrays.Add(creator.Array);
@@ -188,6 +188,7 @@ namespace Apache.Arrow.IntegrationTest
"list" => ToListArrowType(type, children),
"fixedsizelist" => ToFixedSizeListArrowType(type, children),
"struct" => ToStructArrowType(type, children),
+ "union" => ToUnionArrowType(type, children),
"null" => NullType.Default,
_ => throw new NotSupportedException($"JsonArrowType not
supported: {type.Name}")
};
@@ -281,6 +282,17 @@ namespace Apache.Arrow.IntegrationTest
return new StructType(children);
}
+ private static IArrowType ToUnionArrowType(JsonArrowType type, Field[]
children)
+ {
+ UnionMode mode = type.Mode switch
+ {
+ "SPARSE" => UnionMode.Sparse,
+ "DENSE" => UnionMode.Dense,
+ _ => throw new NotSupportedException($"Union mode not
supported: {type.Mode}"),
+ };
+ return new UnionType(children, type.TypeIds, mode);
+ }
+
private class ArrayCreator :
IArrowTypeVisitor<BooleanType>,
IArrowTypeVisitor<Int8Type>,
@@ -306,6 +318,7 @@ namespace Apache.Arrow.IntegrationTest
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>,
+ IArrowTypeVisitor<UnionType>,
IArrowTypeVisitor<NullType>
{
private JsonFieldData JsonFieldData { get; set; }
@@ -556,6 +569,43 @@ namespace Apache.Arrow.IntegrationTest
Array = new StructArray(arrayData);
}
+ public void Visit(UnionType type)
+ {
+ ArrowBuffer[] buffers;
+ if (type.Mode == UnionMode.Dense)
+ {
+ buffers = new ArrowBuffer[2];
+ buffers[1] = GetOffsetBuffer();
+ }
+ else
+ {
+ buffers = new ArrowBuffer[1];
+ }
+ buffers[0] = GetTypeIdBuffer();
+
+ ArrayData[] children = GetChildren(type);
+
+ int nullCount = 0;
+ ArrayData arrayData = new ArrayData(type, JsonFieldData.Count,
nullCount, 0, buffers, children);
+ Array = UnionArray.Create(arrayData);
+ }
+
+ private ArrayData[] GetChildren(NestedType type)
+ {
+ ArrayData[] children = new ArrayData[type.Fields.Count];
+
+ var data = JsonFieldData;
+ for (int i = 0; i < children.Length; i++)
+ {
+ JsonFieldData = data.Children[i];
+ type.Fields[i].DataType.Accept(this);
+ children[i] = Array.Data;
+ }
+ JsonFieldData = data;
+
+ return children;
+ }
+
private static byte[] ConvertHexStringToByteArray(string hexString)
{
byte[] data = new byte[hexString.Length / 2];
@@ -619,11 +669,22 @@ namespace Apache.Arrow.IntegrationTest
private ArrowBuffer GetOffsetBuffer()
{
+ if (JsonFieldData.Count == 0) { return ArrowBuffer.Empty; }
ArrowBuffer.Builder<int> valueOffsets = new
ArrowBuffer.Builder<int>(JsonFieldData.Offset.Length);
valueOffsets.AppendRange(JsonFieldData.Offset);
return valueOffsets.Build(default);
}
+ private ArrowBuffer GetTypeIdBuffer()
+ {
+ ArrowBuffer.Builder<byte> typeIds = new
ArrowBuffer.Builder<byte>(JsonFieldData.TypeId.Length);
+ for (int i = 0; i < JsonFieldData.TypeId.Length; i++)
+ {
+ typeIds.Append(checked((byte)JsonFieldData.TypeId[i]));
+ }
+ return typeIds.Build(default);
+ }
+
private ArrowBuffer GetValidityBuffer(out int nullCount)
{
if (JsonFieldData.Validity == null)
diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
index f0f63d3e19..112eeabcb9 100644
--- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
+++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs
@@ -71,6 +71,10 @@ namespace Apache.Arrow.IntegrationTest
// FixedSizeList fields
public int ListSize { get; set; }
+ // union fields
+ public string Mode { get; set; }
+ public int[] TypeIds { get; set; }
+
[JsonExtensionData]
public Dictionary<string, JsonElement> ExtensionData { get; set; }
}
diff --git a/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs
b/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs
index 77584aefb1..c8bcc3cee0 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrayTypeComparer.cs
@@ -28,7 +28,8 @@ namespace Apache.Arrow.Tests
IArrowTypeVisitor<FixedSizeBinaryType>,
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
- IArrowTypeVisitor<StructType>
+ IArrowTypeVisitor<StructType>,
+ IArrowTypeVisitor<UnionType>
{
private readonly IArrowType _expectedType;
@@ -114,6 +115,22 @@ namespace Apache.Arrow.Tests
CompareNested(expectedType, actualType);
}
+ public void Visit(UnionType actualType)
+ {
+ Assert.IsAssignableFrom<UnionType>(_expectedType);
+ UnionType expectedType = (UnionType)_expectedType;
+
+ Assert.Equal(expectedType.Mode, actualType.Mode);
+
+ Assert.Equal(expectedType.TypeIds.Length,
actualType.TypeIds.Length);
+ for (int i = 0; i < expectedType.TypeIds.Length; i++)
+ {
+ Assert.Equal(expectedType.TypeIds[i], actualType.TypeIds[i]);
+ }
+
+ CompareNested(expectedType, actualType);
+ }
+
private static void CompareNested(NestedType expectedType, NestedType
actualType)
{
Assert.Equal(expectedType.Fields.Count, actualType.Fields.Count);
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
index 36cffe7eb4..f5a2c345e2 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayConcatenatorTests.cs
@@ -77,6 +77,22 @@ namespace Apache.Arrow.Tests
new
Field.Builder().Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()
}),
new FixedSizeListType(Int32Type.Default, 1),
+ new UnionType(
+ new List<Field>{
+ new
Field.Builder().Name("Strings").DataType(StringType.Default).Nullable(true).Build(),
+ new
Field.Builder().Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()
+ },
+ new[] { 0, 1 },
+ UnionMode.Sparse
+ ),
+ new UnionType(
+ new List<Field>{
+ new
Field.Builder().Name("Strings").DataType(StringType.Default).Nullable(true).Build(),
+ new
Field.Builder().Name("Ints").DataType(Int32Type.Default).Nullable(true).Build()
+ },
+ new[] { 0, 1 },
+ UnionMode.Dense
+ ),
};
foreach (IArrowType type in targetTypes)
@@ -119,7 +135,8 @@ namespace Apache.Arrow.Tests
IArrowTypeVisitor<TimestampType>,
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
- IArrowTypeVisitor<StructType>
+ IArrowTypeVisitor<StructType>,
+ IArrowTypeVisitor<UnionType>
{
private List<List<int?>> _baseData;
@@ -392,6 +409,91 @@ namespace Apache.Arrow.Tests
ExpectedArray = new StructArray(type, 3, new List<Array> {
resultStringArray, resultInt32Array }, nullBitmapBuffer, 1);
}
+ public void Visit(UnionType type)
+ {
+ bool isDense = type.Mode == UnionMode.Dense;
+
+ StringArray.Builder stringResultBuilder = new
StringArray.Builder().Reserve(_baseDataTotalElementCount);
+ Int32Array.Builder intResultBuilder = new
Int32Array.Builder().Reserve(_baseDataTotalElementCount);
+ ArrowBuffer.Builder<byte> typeResultBuilder = new
ArrowBuffer.Builder<byte>().Reserve(_baseDataTotalElementCount);
+ ArrowBuffer.Builder<int> offsetResultBuilder = new
ArrowBuffer.Builder<int>().Reserve(_baseDataTotalElementCount);
+ int resultNullCount = 0;
+
+ for (int i = 0; i < _baseDataListCount; i++)
+ {
+ List<int?> dataList = _baseData[i];
+ StringArray.Builder stringBuilder = new
StringArray.Builder().Reserve(dataList.Count);
+ Int32Array.Builder intBuilder = new
Int32Array.Builder().Reserve(dataList.Count);
+ ArrowBuffer.Builder<byte> typeBuilder = new
ArrowBuffer.Builder<byte>().Reserve(dataList.Count);
+ ArrowBuffer.Builder<int> offsetBuilder = new
ArrowBuffer.Builder<int>().Reserve(dataList.Count);
+ int nullCount = 0;
+
+ for (int j = 0; j < dataList.Count; j++)
+ {
+ byte index = (byte)Math.Max(j % 3, 1);
+ int? intValue = (index == 1) ? dataList[j] : null;
+ string stringValue = (index == 1) ? null :
dataList[j]?.ToString();
+ typeBuilder.Append(index);
+
+ if (isDense)
+ {
+ if (index == 0)
+ {
+ offsetBuilder.Append(stringBuilder.Length);
+
offsetResultBuilder.Append(stringResultBuilder.Length);
+ stringBuilder.Append(stringValue);
+ stringResultBuilder.Append(stringValue);
+ }
+ else
+ {
+ offsetBuilder.Append(intBuilder.Length);
+
offsetResultBuilder.Append(intResultBuilder.Length);
+ intBuilder.Append(intValue);
+ intResultBuilder.Append(intValue);
+ }
+ }
+ else
+ {
+ stringBuilder.Append(stringValue);
+ stringResultBuilder.Append(stringValue);
+ intBuilder.Append(intValue);
+ intResultBuilder.Append(intValue);
+ }
+
+ if (dataList[j] == null)
+ {
+ nullCount++;
+ resultNullCount++;
+ }
+ }
+
+ ArrowBuffer[] buffers;
+ if (isDense)
+ {
+ buffers = new[] { typeBuilder.Build(),
offsetBuilder.Build() };
+ }
+ else
+ {
+ buffers = new[] { typeBuilder.Build() };
+ }
+ TestTargetArrayList.Add(UnionArray.Create(new ArrayData(
+ type, dataList.Count, nullCount, 0, buffers,
+ new[] { stringBuilder.Build().Data,
intBuilder.Build().Data })));
+ }
+
+ ArrowBuffer[] resultBuffers;
+ if (isDense)
+ {
+ resultBuffers = new[] { typeResultBuilder.Build(),
offsetResultBuilder.Build() };
+ }
+ else
+ {
+ resultBuffers = new[] { typeResultBuilder.Build() };
+ }
+ ExpectedArray = UnionArray.Create(new ArrayData(
+ type, _baseDataTotalElementCount, resultNullCount, 0,
resultBuffers,
+ new[] { stringResultBuilder.Build().Data,
intResultBuilder.Build().Data }));
+ }
public void Visit(IArrowType type)
{
diff --git a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
index e588eab51e..8b41763a70 100644
--- a/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
+++ b/csharp/test/Apache.Arrow.Tests/ArrowReaderVerifier.cs
@@ -91,6 +91,7 @@ namespace Apache.Arrow.Tests
IArrowArrayVisitor<FixedSizeBinaryArray>,
IArrowArrayVisitor<BinaryArray>,
IArrowArrayVisitor<StructArray>,
+ IArrowArrayVisitor<UnionArray>,
IArrowArrayVisitor<Decimal128Array>,
IArrowArrayVisitor<Decimal256Array>,
IArrowArrayVisitor<DictionaryArray>,
@@ -151,6 +152,24 @@ namespace Apache.Arrow.Tests
}
}
+ public void Visit(UnionArray array)
+ {
+ Assert.IsAssignableFrom<UnionArray>(_expectedArray);
+ UnionArray expectedArray = (UnionArray)_expectedArray;
+
+ Assert.Equal(expectedArray.Mode, array.Mode);
+ Assert.Equal(expectedArray.Length, array.Length);
+ Assert.Equal(expectedArray.NullCount, array.NullCount);
+ Assert.Equal(expectedArray.Offset, array.Offset);
+ Assert.Equal(expectedArray.Data.Children.Length,
array.Data.Children.Length);
+ Assert.Equal(expectedArray.Fields.Count, array.Fields.Count);
+
+ for (int i = 0; i < array.Fields.Count; i++)
+ {
+ array.Fields[i].Accept(new
ArrayComparer(expectedArray.Fields[i], _strictCompare));
+ }
+ }
+
public void Visit(DictionaryArray array)
{
Assert.IsAssignableFrom<DictionaryArray>(_expectedArray);
diff --git a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
index 29b1b9e7db..f28b89a9cd 100644
--- a/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/CDataInterfacePythonTests.cs
@@ -112,6 +112,9 @@ namespace Apache.Arrow.Tests
.Field(f => f.Name("dict_string_ordered").DataType(new
DictionaryType(Int32Type.Default, StringType.Default, true)).Nullable(false))
.Field(f => f.Name("list_dict_string").DataType(new
ListType(new DictionaryType(Int32Type.Default, StringType.Default,
false))).Nullable(false))
+ .Field(f => f.Name("dense_union").DataType(new
UnionType(new[] { new Field("i64", Int64Type.Default, false), new Field("f32",
FloatType.Default, true), }, new[] { 0, 1 }, UnionMode.Dense)))
+ .Field(f => f.Name("sparse_union").DataType(new
UnionType(new[] { new Field("i32", Int32Type.Default, true), new Field("f64",
DoubleType.Default, false), }, new[] { 0, 1 }, UnionMode.Sparse)))
+
// Checking wider characters.
.Field(f => f.Name("hello 你好
😄").DataType(BooleanType.Default).Nullable(true))
@@ -172,6 +175,9 @@ namespace Apache.Arrow.Tests
yield return pa.field("dict_string_ordered",
pa.dictionary(pa.int32(), pa.utf8(), true), false);
yield return pa.field("list_dict_string",
pa.list_(pa.dictionary(pa.int32(), pa.utf8(), false)), false);
+ yield return pa.field("dense_union",
pa.dense_union(List(pa.field("i64", pa.int64(), false), pa.field("f32",
pa.float32(), true))));
+ yield return pa.field("sparse_union",
pa.sparse_union(List(pa.field("i32", pa.int32(), true), pa.field("f64",
pa.float64(), false))));
+
yield return pa.field("hello 你好 😄", pa.bool_(), true);
}
}
@@ -485,22 +491,29 @@ namespace Apache.Arrow.Tests
pa.array(List(0.0, 1.4, 2.5, 3.6, 4.7)),
pa.array(new PyObject[] { List(1, 2), List(3, 4),
PyObject.None, PyObject.None, List(5, 4, 3) }),
pa.StructArray.from_arrays(
- new PyList(new PyObject[]
- {
+ List(
List(10, 9, null, null, null),
List("banana", "apple", "orange", "cherry",
"grape"),
- List(null, 4.3, -9, 123.456, 0),
- }),
+ List(null, 4.3, -9, 123.456, 0)
+ ),
new[] { "fld1", "fld2", "fld3" }),
pa.DictionaryArray.from_arrays(
pa.array(List(1, 0, 1, 1, null)),
- pa.array(List("foo", "bar"))
- ),
+ pa.array(List("foo", "bar"))),
pa.FixedSizeListArray.from_arrays(
pa.array(List(1, 2, 3, 4, null, 6, 7, null, null,
null)),
2),
+ pa.UnionArray.from_dense(
+ pa.array(List(0, 1, 1, 0, 0), type: "int8"),
+ pa.array(List(0, 0, 1, 1, 2), type: "int32"),
+ List(
+ pa.array(List(1, 4, null)),
+ pa.array(List("two", "three"))
+ ),
+ /* field name */ List("i32", "s"),
+ /* type codes */ List(3, 2)),
}),
- new[] { "col1", "col2", "col3", "col4", "col5", "col6",
"col7", "col8" });
+ new[] { "col1", "col2", "col3", "col4", "col5", "col6",
"col7", "col8", "col9" });
dynamic batch = table.to_batches()[0];
@@ -568,6 +581,10 @@ namespace Apache.Arrow.Tests
Assert.Equal(new long[] { 1, 2, 3, 4, 0, 6, 7, 0, 0, 0 },
col8a.Values.ToArray());
Assert.True(col8a.IsValid(3));
Assert.False(col8a.IsValid(9));
+
+ UnionArray col9 = (UnionArray)recordBatch.Column("col9");
+ Assert.Equal(5, col9.Length);
+ Assert.True(col9 is DenseUnionArray);
}
[SkippableFact]
@@ -789,6 +806,11 @@ namespace Apache.Arrow.Tests
return new PyList(values.Select(i => i == null ? PyObject.None :
new PyString(i)).ToArray());
}
+ private static PyObject List(params PyObject[] values)
+ {
+ return new PyList(values);
+ }
+
sealed class TestArrayStream : IArrowArrayStream
{
private readonly RecordBatch[] _batches;
diff --git a/csharp/test/Apache.Arrow.Tests/ColumnTests.cs
b/csharp/test/Apache.Arrow.Tests/ColumnTests.cs
index b90c681622..2d867b7917 100644
--- a/csharp/test/Apache.Arrow.Tests/ColumnTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/ColumnTests.cs
@@ -39,7 +39,7 @@ namespace Apache.Arrow.Tests
Array intArrayCopy = MakeIntArray(10);
Field field = new
Field.Builder().Name("f0").DataType(Int32Type.Default).Build();
- Column column = new Column(field, new[] { intArray, intArrayCopy
});
+ Column column = new Column(field, new IArrowArray[] { intArray,
intArrayCopy });
Assert.True(column.Name == field.Name);
Assert.True(column.Field == field);
diff --git a/csharp/test/Apache.Arrow.Tests/TableTests.cs
b/csharp/test/Apache.Arrow.Tests/TableTests.cs
index b4c4b1faed..8b07a38c1b 100644
--- a/csharp/test/Apache.Arrow.Tests/TableTests.cs
+++ b/csharp/test/Apache.Arrow.Tests/TableTests.cs
@@ -30,7 +30,7 @@ namespace Apache.Arrow.Tests
Field field = new
Field.Builder().Name("f0").DataType(Int32Type.Default).Build();
Schema s0 = new Schema.Builder().Field(field).Build();
- Column column = new Column(field, new List<Array> { intArray,
intArrayCopy });
+ Column column = new Column(field, new List<IArrowArray> {
intArray, intArrayCopy });
Table table = new Table(s0, new List<Column> { column });
return table;
}
@@ -60,7 +60,7 @@ namespace Apache.Arrow.Tests
Table table1 = Table.TableFromRecordBatches(recordBatch1.Schema,
recordBatches);
Assert.Equal(20, table1.RowCount);
- Assert.Equal(24, table1.ColumnCount);
+ Assert.Equal(26, table1.ColumnCount);
FixedSizeBinaryType type = new FixedSizeBinaryType(17);
Field newField1 = new Field(type.Name, type, false);
@@ -86,13 +86,13 @@ namespace Apache.Arrow.Tests
Array nonEqualLengthIntArray = ColumnTests.MakeIntArray(10);
Field field1 = new
Field.Builder().Name("f1").DataType(Int32Type.Default).Build();
- Column nonEqualLengthColumn = new Column(field1, new[] {
nonEqualLengthIntArray});
+ Column nonEqualLengthColumn = new Column(field1, new IArrowArray[]
{ nonEqualLengthIntArray });
Assert.Throws<ArgumentException>(() => table.InsertColumn(-1,
nonEqualLengthColumn));
Assert.Throws<ArgumentException>(() => table.InsertColumn(1,
nonEqualLengthColumn));
Array equalLengthIntArray = ColumnTests.MakeIntArray(20);
Field field2 = new
Field.Builder().Name("f2").DataType(Int32Type.Default).Build();
- Column equalLengthColumn = new Column(field2, new[] {
equalLengthIntArray});
+ Column equalLengthColumn = new Column(field2, new IArrowArray[] {
equalLengthIntArray });
Column existingColumn = table.Column(0);
Table newTable = table.InsertColumn(0, equalLengthColumn);
@@ -118,7 +118,7 @@ namespace Apache.Arrow.Tests
RecordBatch batch = TestData.CreateSampleRecordBatch(schema, 10);
Table table = Table.TableFromRecordBatches(schema, new[] { batch
});
- Assert.NotNull(table.Column(0).Data.Array(0) as Int64Array);
+ Assert.NotNull(table.Column(0).Data.ArrowArray(0) as Int64Array);
}
}
diff --git a/csharp/test/Apache.Arrow.Tests/TestData.cs
b/csharp/test/Apache.Arrow.Tests/TestData.cs
index 41507311f6..9e2061e342 100644
--- a/csharp/test/Apache.Arrow.Tests/TestData.cs
+++ b/csharp/test/Apache.Arrow.Tests/TestData.cs
@@ -60,6 +60,8 @@ namespace Apache.Arrow.Tests
builder.Field(CreateField(new
DictionaryType(Int32Type.Default, StringType.Default, false), i));
builder.Field(CreateField(new FixedSizeBinaryType(16), i));
builder.Field(CreateField(new
FixedSizeListType(Int32Type.Default, 3), i));
+ builder.Field(CreateField(new UnionType(new[] {
CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[]
{ 0, 1 }, UnionMode.Sparse), i));
+ builder.Field(CreateField(new UnionType(new[] {
CreateField(StringType.Default, i), CreateField(Int32Type.Default, i) }, new[]
{ 0, 1 }, UnionMode.Dense), -i));
}
//builder.Field(CreateField(HalfFloatType.Default));
@@ -125,6 +127,7 @@ namespace Apache.Arrow.Tests
IArrowTypeVisitor<ListType>,
IArrowTypeVisitor<FixedSizeListType>,
IArrowTypeVisitor<StructType>,
+ IArrowTypeVisitor<UnionType>,
IArrowTypeVisitor<Decimal128Type>,
IArrowTypeVisitor<Decimal256Type>,
IArrowTypeVisitor<DictionaryType>,
@@ -315,6 +318,67 @@ namespace Apache.Arrow.Tests
Array = new StructArray(type, Length, childArrays,
nullBitmap.Build());
}
+ public void Visit(UnionType type)
+ {
+ int[] lengths = new int[type.Fields.Count];
+ if (type.Mode == UnionMode.Sparse)
+ {
+ for (int i = 0; i < lengths.Length; i++)
+ {
+ lengths[i] = Length;
+ }
+ }
+ else
+ {
+ int totalLength = Length;
+ int oneLength = Length / lengths.Length;
+ for (int i = 1; i < lengths.Length; i++)
+ {
+ lengths[i] = oneLength;
+ totalLength -= oneLength;
+ }
+ lengths[0] = totalLength;
+ }
+
+ ArrayData[] childArrays = new ArrayData[type.Fields.Count];
+ for (int i = 0; i < childArrays.Length; i++)
+ {
+ childArrays[i] = CreateArray(type.Fields[i],
lengths[i]).Data;
+ }
+
+ ArrowBuffer.Builder<byte> typeIdBuilder = new
ArrowBuffer.Builder<byte>(Length);
+ byte index = 0;
+ for (int i = 0; i < Length; i++)
+ {
+ typeIdBuilder.Append(index);
+ index++;
+ if (index == lengths.Length)
+ {
+ index = 0;
+ }
+ }
+
+ ArrowBuffer[] buffers;
+ if (type.Mode == UnionMode.Sparse)
+ {
+ buffers = new ArrowBuffer[1];
+ }
+ else
+ {
+ ArrowBuffer.Builder<int> offsetBuilder = new
ArrowBuffer.Builder<int>(Length);
+ for (int i = 0; i < Length; i++)
+ {
+ offsetBuilder.Append(i / lengths.Length);
+ }
+
+ buffers = new ArrowBuffer[2];
+ buffers[1] = offsetBuilder.Build();
+ }
+ buffers[0] = typeIdBuilder.Build();
+
+ Array = UnionArray.Create(new ArrayData(type, Length, 0, 0,
buffers, childArrays));
+ }
+
public void Visit(DictionaryType type)
{
Int32Array.Builder indicesBuilder = new
Int32Array.Builder().Reserve(Length);
diff --git a/dev/archery/archery/integration/datagen.py
b/dev/archery/archery/integration/datagen.py
index 5ac32da56a..299881c4b6 100644
--- a/dev/archery/archery/integration/datagen.py
+++ b/dev/archery/archery/integration/datagen.py
@@ -1833,8 +1833,7 @@ def get_generated_json_files(tempdir=None):
.skip_tester('C#')
.skip_tester('JS'),
- generate_unions_case()
- .skip_tester('C#'),
+ generate_unions_case(),
generate_custom_metadata_case()
.skip_tester('C#'),
diff --git a/docs/source/status.rst b/docs/source/status.rst
index 36c29fcdc4..6314fd4c8d 100644
--- a/docs/source/status.rst
+++ b/docs/source/status.rst
@@ -83,9 +83,9 @@ Data Types
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Map | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Dense Union | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓
| |
+| Dense Union | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
-| Sparse Union | ✓ | ✓ | ✓ | ✓ | | ✓ | ✓
| |
+| Sparse Union | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓
| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+