CurtHagenlocher commented on code in PR #325: URL: https://github.com/apache/arrow-dotnet/pull/325#discussion_r3125608033
########## src/Apache.Arrow/Arrays/VariantArray.cs: ########## @@ -0,0 +1,362 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +using System; +using System.Collections; +using System.Collections.Generic; +using System.Linq; +using Apache.Arrow.Memory; +using Apache.Arrow.Scalars.Variant; +using Apache.Arrow.Types; + +namespace Apache.Arrow +{ + /// <summary> + /// Extension definition for the "arrow.parquet.variant" extension type, + /// backed by a struct with "metadata" and "value" binary fields. + /// </summary> + public class VariantExtensionDefinition : ExtensionDefinition + { + public static VariantExtensionDefinition Instance = new VariantExtensionDefinition(); + + public override string ExtensionName => VariantType.ExtensionName; + + private VariantExtensionDefinition() { } + + public override bool TryCreateType(IArrowType storageType, string metadata, out ExtensionType type) + { + if (storageType is StructType structType && + FindBinaryFieldIndex(structType, "metadata") >= 0 && + FindBinaryFieldIndex(structType, "value") >= 0) + { + type = new VariantType(structType); + return true; + } + type = null; + return false; + } + + internal static int FindBinaryFieldIndex(StructType structType, string name) + { + int index = structType.GetFieldIndex(name); + if (index < 0) + { + return -1; + } + + var fieldType = structType.Fields[index].DataType; + if (fieldType is BinaryType || fieldType is LargeBinaryType || fieldType is BinaryViewType) + { + return index; + } + + return -1; + } + } + + /// <summary> + /// Extension type representing Parquet Variant values, stored as + /// struct<metadata: binary, value: binary>. + /// </summary> + public class VariantType : ExtensionType + { + internal const string ExtensionName = "arrow.parquet.variant"; + + public static VariantType Default = new VariantType(); + + public override string Name => ExtensionName; + public override string ExtensionMetadata => ""; + + public VariantType() : base(new StructType(new[] + { + new Field("metadata", BinaryType.Default, false), + new Field("value", BinaryType.Default, false), + })) + { } + + internal VariantType(StructType storageType) : base(storageType) { } + + public override ExtensionArray CreateArray(IArrowArray storageArray) + { + return new VariantArray(this, storageArray); + } + } + + /// <summary> + /// Extension array for Parquet Variant values, backed by a StructArray + /// containing "metadata" and "value" binary fields. + /// </summary> + public class VariantArray : ExtensionArray, IReadOnlyList<VariantValue> + { + private readonly IIndexes _metadataIndexes; + private readonly IBinaryArray _metadataArray; + private readonly IIndexes _valueIndexes; + private readonly IBinaryArray _valueArray; + + public StructArray StorageArray => (StructArray)Storage; + + public VariantArray(VariantType variantType, IArrowArray storage) + : base(variantType, storage) + { + var structType = (StructType)variantType.StorageType; + _metadataArray = DecodeBinaryArray(StorageArray.Fields[structType.GetFieldIndex("metadata")], out _metadataIndexes); + _valueArray = DecodeBinaryArray(StorageArray.Fields[structType.GetFieldIndex("value")], out _valueIndexes); + } + + public VariantArray(IArrowArray storage) : this(VariantType.Default, storage) { } + + /// <summary> + /// Gets the metadata bytes for the element at the given index. + /// </summary> + public ReadOnlySpan<byte> GetMetadataBytes(int index) + { + int physicalIndex = _metadataIndexes.GetPhysicalIndex(index); + return _metadataArray.GetBytes(physicalIndex, out bool isNull); + } Review Comment: I'm fine with this consequence for malformed data. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
