This is an automated email from the ASF dual-hosted git repository.
CurtHagenlocher pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-dotnet.git
The following commit(s) were added to refs/heads/main by this push:
new 4f5b95f Add transparent encoding readers and TimestampWithOffset
extension type (#324)
4f5b95f is described below
commit 4f5b95f35243d233b8c7f5ea5ff997891f199158
Author: Curt Hagenlocher <[email protected]>
AuthorDate: Wed Apr 22 04:48:57 2026 -0700
Add transparent encoding readers and TimestampWithOffset extension type
(#324)
## What's Changed
- Adds an extension method `AsDecodedReadOnlyList<T>()` on `IArrowArray`
that returns an `IReadOnlyList<T>` view over plain, dictionary-encoded,
and run-end encoded arrays with zero overhead for plain arrays.
- Adds `EnumeratePhysicalIndices()` to `RunEndEncodedArray` for O(n+m)
sequential enumeration instead of O(n·log m) from repeated binary
searches.
- Implement the `arrow.timestamp_with_offset` canonical extension type
with `TimestampWithOffsetArray`, supporting
`IReadOnlyList<DateTimeOffset?>` and a builder. The offset_minutes field
accepts dictionary and REE encodings via `AsDecodedReadOnlyList`.
---------
Co-authored-by: Copilot <[email protected]>
---
src/Apache.Arrow/Arrays/RunEndEncodedArray.cs | 68 +++
.../Arrays/TimestampWithOffsetArray.cs | 278 +++++++++++
.../Extensions/IArrowArrayExtensions.cs | 157 +++++++
.../Apache.Arrow.Tests/ReadOnlyListAdapterTests.cs | 509 +++++++++++++++++++++
test/Apache.Arrow.Tests/RunEndEncodedArrayTests.cs | 105 +++++
.../TimestampWithOffsetArrayTests.cs | 415 +++++++++++++++++
6 files changed, 1532 insertions(+)
diff --git a/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs
b/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs
index 290c3a0..388302f 100644
--- a/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs
+++ b/src/Apache.Arrow/Arrays/RunEndEncodedArray.cs
@@ -14,6 +14,7 @@
// limitations under the License.
using System;
+using System.Collections.Generic;
using Apache.Arrow.Memory;
using Apache.Arrow.Types;
@@ -395,5 +396,72 @@ public class RunEndEncodedArray : Array
return Math.Min(lo + 1, runEnds.Length);
}
+ /// <summary>
+ /// Enumerates the physical index for every logical position in order.
+ /// Unlike repeated calls to <see cref="FindPhysicalIndex"/>, this walks
the
+ /// run-ends array linearly, yielding O(n + m) total work instead of
O(n·log m).
+ /// </summary>
+ public IEnumerable<int> EnumeratePhysicalIndices()
+ {
+ int length = Length;
+ if (length == 0)
+ yield break;
+
+ int offset = Data.Offset;
+ int physicalIndex = FindPhysicalIndex(0);
+
+ switch (RunEnds)
+ {
+ case Int16Array int16RunEnds:
+ {
+ long currentRunEnd =
int16RunEnds.GetValue(physicalIndex).Value;
+ for (int logical = 0; logical < length; logical++)
+ {
+ // Skip to the next run if we've reached the end of
the current run
+ // Unless the slice offset is greater than zero (or
the indexes are
+ // malformed) we should only execute the loop body
once per run.
+ while (logical + offset >= currentRunEnd)
+ {
+ physicalIndex++;
+ currentRunEnd =
int16RunEnds.GetValue(physicalIndex).Value;
+ }
+ yield return physicalIndex;
+ }
+ break;
+ }
+ case Int32Array int32RunEnds:
+ {
+ long currentRunEnd =
int32RunEnds.GetValue(physicalIndex).Value;
+ for (int logical = 0; logical < length; logical++)
+ {
+ while (logical + offset >= currentRunEnd)
+ {
+ physicalIndex++;
+ currentRunEnd =
int32RunEnds.GetValue(physicalIndex).Value;
+ }
+ yield return physicalIndex;
+ }
+ break;
+ }
+ case Int64Array int64RunEnds:
+ {
+ long currentRunEnd =
int64RunEnds.GetValue(physicalIndex).Value;
+ for (int logical = 0; logical < length; logical++)
+ {
+ while (logical + offset >= currentRunEnd)
+ {
+ physicalIndex++;
+ currentRunEnd =
int64RunEnds.GetValue(physicalIndex).Value;
+ }
+ yield return physicalIndex;
+ }
+ break;
+ }
+ default:
+ throw new InvalidOperationException(
+ $"Unexpected run ends array type:
{RunEnds.Data.DataType.TypeId}");
+ }
+ }
+
public override void Accept(IArrowArrayVisitor visitor) => Accept(this,
visitor);
}
diff --git a/src/Apache.Arrow/Arrays/TimestampWithOffsetArray.cs
b/src/Apache.Arrow/Arrays/TimestampWithOffsetArray.cs
new file mode 100644
index 0000000..49acbae
--- /dev/null
+++ b/src/Apache.Arrow/Arrays/TimestampWithOffsetArray.cs
@@ -0,0 +1,278 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+ /// <summary>
+ /// Extension definition for the "arrow.timestamp_with_offset" canonical
extension type.
+ /// Storage is a struct with fields "timestamp" (Timestamp(unit, "UTC"))
and
+ /// "offset_minutes" (Int16). The offset_minutes field may be
dictionary-encoded
+ /// or run-end encoded.
+ /// </summary>
+ public class TimestampWithOffsetExtensionDefinition : ExtensionDefinition
+ {
+ public static readonly TimestampWithOffsetExtensionDefinition Instance
= new TimestampWithOffsetExtensionDefinition();
+
+ public override string ExtensionName => "arrow.timestamp_with_offset";
+
+ private TimestampWithOffsetExtensionDefinition() { }
+
+ public override bool TryCreateType(IArrowType storageType, string
metadata, out ExtensionType type)
+ {
+ type = null;
+
+ if (!(storageType is StructType structType) ||
structType.Fields.Count != 2)
+ return false;
+
+ // Validate field order and names and nullability per spec
+ Field tsField = structType.Fields[0];
+ Field offsetField = structType.Fields[1];
+
+ if (tsField.Name != "timestamp" || offsetField.Name !=
"offset_minutes")
+ return false;
+
+ if (!(tsField.DataType is TimestampType tsType) || tsType.Timezone
!= "UTC")
+ return false;
+
+ if (tsField.IsNullable || offsetField.IsNullable)
+ return false;
+
+ // offset_minutes must logically be Int16, but may be dict/REE
encoded
+ if (!IsLogicallyInt16(offsetField.DataType))
+ return false;
+
+ type = new TimestampWithOffsetType(tsType.Unit, structType);
+ return true;
+ }
+
+ private static bool IsLogicallyInt16(IArrowType type)
+ {
+ switch (type)
+ {
+ case Int16Type _:
+ return true;
+ case DictionaryType dictType:
+ return dictType.ValueType is Int16Type;
+ case RunEndEncodedType reeType:
+ return reeType.ValuesDataType is Int16Type;
+ default:
+ return false;
+ }
+ }
+ }
+
+ /// <summary>
+ /// Extension type for timestamps with per-value UTC offset, stored as
+ /// Struct(timestamp: Timestamp(unit, "UTC"), offset_minutes: Int16).
+ /// </summary>
+ public class TimestampWithOffsetType : ExtensionType
+ {
+ public static readonly TimestampWithOffsetType Default =
+ new TimestampWithOffsetType(TimeUnit.Microsecond);
+
+ public override string Name => "arrow.timestamp_with_offset";
+ public override string ExtensionMetadata => "";
+
+ public TimeUnit Unit { get; }
+
+ public TimestampWithOffsetType(TimeUnit unit = TimeUnit.Microsecond)
+ : base(CreateDefaultStorageType(unit))
+ {
+ Unit = unit;
+ }
+
+ internal TimestampWithOffsetType(TimeUnit unit, StructType storageType)
+ : base(storageType)
+ {
+ Unit = unit;
+ }
+
+ public override ExtensionArray CreateArray(IArrowArray storageArray)
+ {
+ return new TimestampWithOffsetArray(this, storageArray);
+ }
+
+ private static StructType CreateDefaultStorageType(TimeUnit unit)
+ {
+ return new StructType(new[]
+ {
+ new Field("timestamp", new TimestampType(unit, "UTC"),
nullable: false),
+ new Field("offset_minutes", Int16Type.Default, nullable:
false),
+ });
+ }
+ }
+
+ /// <summary>
+ /// Extension array for the "arrow.timestamp_with_offset" canonical
extension type.
+ /// Implements <see cref="IReadOnlyList{T}"/> of nullable <see
cref="DateTimeOffset"/>.
+ /// </summary>
+ public class TimestampWithOffsetArray : ExtensionArray,
IReadOnlyList<DateTimeOffset?>
+ {
+ private readonly StructArray _struct;
+ private readonly TimestampArray _timestamps;
+ private readonly IReadOnlyList<short?> _offsetMinutes;
+
+ public TimestampWithOffsetArray(TimestampWithOffsetType type,
IArrowArray storage)
+ : base(type, storage)
+ {
+ _struct = (StructArray)storage;
+ var structType = (StructType)storage.Data.DataType;
+
+ int tsIndex = structType.GetFieldIndex("timestamp");
+ int offsetIndex = structType.GetFieldIndex("offset_minutes");
+ if (tsIndex < 0 || offsetIndex < 0)
+ throw new ArgumentException("Storage struct must have
'timestamp' and 'offset_minutes' fields.");
+
+ _timestamps = (TimestampArray)_struct.Fields[tsIndex];
+ _offsetMinutes =
_struct.Fields[offsetIndex].AsDecodedReadOnlyList<short?>();
+ }
+
+ /// <summary>
+ /// Gets the value at the specified index as a <see
cref="DateTimeOffset"/>
+ /// with the original timezone offset preserved.
+ /// </summary>
+ public DateTimeOffset? GetValue(int index)
+ {
+ if (index < 0 || index >= Length)
+ throw new ArgumentOutOfRangeException(nameof(index));
+
+ if (IsNull(index))
+ return null;
+
+ DateTimeOffset? utc = _timestamps.GetTimestamp(index);
+ if (utc == null)
+ return null;
+
+ short offsetMins = _offsetMinutes[index] ?? 0;
+ TimeSpan offset = TimeSpan.FromMinutes(offsetMins);
+ return utc.Value.ToOffset(offset);
+ }
+
+ public int Count => Length;
+ public DateTimeOffset? this[int index] => GetValue(index);
+
+ public IEnumerator<DateTimeOffset?> GetEnumerator()
+ {
+ int index = 0;
+ IReadOnlyList<DateTimeOffset?> timestamps =
_timestamps.AsDecodedReadOnlyList<DateTimeOffset?>();
+ using (IEnumerator<short?> offsets =
_offsetMinutes.GetEnumerator())
+ {
+ while (offsets.MoveNext() && index < Length)
+ {
+ DateTimeOffset? utc = timestamps[index];
+ if (IsNull(index) || utc == null)
+ {
+ yield return null;
+ }
+ else
+ {
+ TimeSpan offset = TimeSpan.FromMinutes(offsets.Current
?? 0);
+ yield return utc.Value.ToOffset(offset);
+ }
+ index++;
+ }
+ }
+ }
+
+ IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+
+ /// <summary>
+ /// Builder for <see cref="TimestampWithOffsetArray"/>.
+ /// </summary>
+ public class Builder
+ {
+ private readonly TimestampArray.Builder _timestampBuilder;
+ private readonly Int16Array.Builder _offsetBuilder;
+ private readonly ArrowBuffer.BitmapBuilder _validityBuilder;
+ private readonly TimestampWithOffsetType _type;
+ private int _length;
+ private int _nullCount;
+
+ public Builder(TimeUnit unit = TimeUnit.Microsecond)
+ {
+ _type = new TimestampWithOffsetType(unit);
+ _timestampBuilder = new TimestampArray.Builder(unit, "UTC");
+ _offsetBuilder = new Int16Array.Builder();
+ _validityBuilder = new ArrowBuffer.BitmapBuilder();
+ }
+
+ public Builder Append(DateTimeOffset value)
+ {
+ _timestampBuilder.Append(value.ToUniversalTime());
+
_offsetBuilder.Append(checked((short)value.Offset.TotalMinutes));
+ _validityBuilder.Append(true);
+ _length++;
+ return this;
+ }
+
+ public Builder AppendNull()
+ {
+ _timestampBuilder.Append(default(DateTimeOffset));
+ _offsetBuilder.Append(0);
+ _validityBuilder.Append(false);
+ _length++;
+ _nullCount++;
+ return this;
+ }
+
+ public Builder AppendRange(IEnumerable<DateTimeOffset> values)
+ {
+ if (values == null)
+ throw new ArgumentNullException(nameof(values));
+
+ foreach (var value in values)
+ Append(value);
+
+ return this;
+ }
+
+ public Builder AppendRange(IEnumerable<DateTimeOffset?> values)
+ {
+ if (values == null)
+ throw new ArgumentNullException(nameof(values));
+
+ foreach (var value in values)
+ {
+ if (value.HasValue)
+ Append(value.Value);
+ else
+ AppendNull();
+ }
+
+ return this;
+ }
+
+ public TimestampWithOffsetArray Build()
+ {
+ TimestampArray timestamps = _timestampBuilder.Build();
+ Int16Array offsets = _offsetBuilder.Build();
+ ArrowBuffer validityBuffer = _nullCount > 0 ?
_validityBuilder.Build() : ArrowBuffer.Empty;
+
+ var structType = (StructType)_type.StorageType;
+ var structArray = new StructArray(
+ structType, _length,
+ new IArrowArray[] { timestamps, offsets },
+ validityBuffer, _nullCount);
+
+ return new TimestampWithOffsetArray(_type, structArray);
+ }
+ }
+ }
+}
diff --git a/src/Apache.Arrow/Extensions/IArrowArrayExtensions.cs
b/src/Apache.Arrow/Extensions/IArrowArrayExtensions.cs
new file mode 100644
index 0000000..98ee0d3
--- /dev/null
+++ b/src/Apache.Arrow/Extensions/IArrowArrayExtensions.cs
@@ -0,0 +1,157 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections;
+using System.Collections.Generic;
+using Apache.Arrow.Types;
+
+namespace Apache.Arrow
+{
+ /// <summary>
+ /// Provides factory methods that return <see cref="IReadOnlyList{T}"/>
views
+ /// over Arrow arrays, transparently handling plain, dictionary-encoded,
+ /// and run-end encoded layouts.
+ /// </summary>
+ public static class IArrowArrayExtensions
+ {
+ /// <summary>
+ /// Returns an <see cref="IReadOnlyList{T}"/> view for the given array,
+ /// regardless of encoding.
+ /// Null slots are represented as <c>default(T)</c>. Callers should use
+ /// nullable value types, as that's what the underlying
<c>IArrowArray</c> uses.
+ /// </summary>
+ public static IReadOnlyList<T> AsDecodedReadOnlyList<T>(this
IArrowArray array)
+ {
+ if (array == null)
+ throw new ArgumentNullException(nameof(array));
+
+ switch (array)
+ {
+ case IReadOnlyList<T> plain:
+ return plain;
+
+ case DictionaryArray dict:
+ IReadOnlyList<T> values = dict.Dictionary as
IReadOnlyList<T>;
+ if (values == null)
+ throw new ArgumentException(
+ $"Dictionary value type
{dict.Dictionary.Data.DataType.TypeId} cannot be read as {typeof(T).Name}.");
+ return new DictionaryReadOnlyList<T>(dict, values);
+
+ case RunEndEncodedArray ree:
+ IReadOnlyList<T> reeValues = ree.Values as
IReadOnlyList<T>;
+ if (reeValues == null)
+ throw new ArgumentException(
+ $"Run-end encoded value type
{ree.Values.Data.DataType.TypeId} cannot be read as {typeof(T).Name}.");
+ return new ReeReadOnlyList<T>(ree, reeValues);
+
+ default:
+ throw new ArgumentException(
+ $"Cannot create {typeof(T).Name} reader for array of
type {array.Data.DataType.TypeId}.",
+ nameof(array));
+ }
+ }
+
+ private sealed class DictionaryReadOnlyList<T> : IReadOnlyList<T>
+ {
+ private readonly IArrowArray _indices;
+ private readonly IReadOnlyList<T> _values;
+ private readonly Func<IArrowArray, int, int> _indexLookup;
+
+ public DictionaryReadOnlyList(DictionaryArray dict,
IReadOnlyList<T> values)
+ {
+ _indices = dict.Indices;
+ _values = values;
+ _indexLookup = GetDictionaryIndex(dict.Indices.Data.DataType);
+ }
+
+ public int Count => _indices.Length;
+
+ public T this[int index]
+ {
+ get
+ {
+ if (index < 0 || index >= _indices.Length)
+ {
+ throw new ArgumentOutOfRangeException(nameof(index));
+ }
+
+ if (_indices.IsNull(index))
+ return default;
+
+ int dictIndex = _indexLookup(_indices, index);
+ return _values[dictIndex];
+ }
+ }
+
+ public IEnumerator<T> GetEnumerator()
+ {
+ for (int i = 0; i < Count; i++)
+ yield return this[i];
+ }
+
+ IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+ }
+
+ private sealed class ReeReadOnlyList<T> : IReadOnlyList<T>
+ {
+ private readonly RunEndEncodedArray _ree;
+ private readonly IReadOnlyList<T> _values;
+
+ public ReeReadOnlyList(RunEndEncodedArray ree, IReadOnlyList<T>
values)
+ {
+ _ree = ree;
+ _values = values;
+ }
+
+ public int Count => _ree.Length;
+
+ public T this[int index]
+ {
+ get
+ {
+ int physicalIndex = _ree.FindPhysicalIndex(index);
+ return _values[physicalIndex];
+ }
+ }
+
+ public IEnumerator<T> GetEnumerator()
+ {
+ foreach (int physicalIndex in _ree.EnumeratePhysicalIndices())
+ yield return _values[physicalIndex];
+ }
+
+ IEnumerator IEnumerable.GetEnumerator() => GetEnumerator();
+ }
+
+ private static Func<IArrowArray, int, int>
GetDictionaryIndex(IArrowType type)
+ {
+ switch (type.TypeId)
+ {
+ case ArrowTypeId.Int8: return (array, logicalIndex) =>
((Int8Array)array).GetValue(logicalIndex) ?? 0;
+ case ArrowTypeId.Int16: return (array, logicalIndex) =>
((Int16Array)array).GetValue(logicalIndex) ?? 0;
+ case ArrowTypeId.Int32: return (array, logicalIndex) =>
((Int32Array)array).GetValue(logicalIndex) ?? 0;
+ case ArrowTypeId.Int64: return (array, logicalIndex) =>
checked((int)(((Int64Array)array).GetValue(logicalIndex) ?? 0));
+ case ArrowTypeId.UInt8: return (array, logicalIndex) =>
((UInt8Array)array).GetValue(logicalIndex) ?? 0;
+ case ArrowTypeId.UInt16: return (array, logicalIndex) =>
((UInt16Array)array).GetValue(logicalIndex) ?? 0;
+ case ArrowTypeId.UInt32: return (array, logicalIndex) =>
checked((int)(((UInt32Array)array).GetValue(logicalIndex) ?? 0));
+ case ArrowTypeId.UInt64: return (array, logicalIndex) =>
checked((int)(((UInt64Array)array).GetValue(logicalIndex) ?? 0));
+ default:
+ throw new InvalidOperationException(
+ $"Unsupported dictionary index type: {type.TypeId}");
+ }
+ }
+ }
+}
diff --git a/test/Apache.Arrow.Tests/ReadOnlyListAdapterTests.cs
b/test/Apache.Arrow.Tests/ReadOnlyListAdapterTests.cs
new file mode 100644
index 0000000..13cd6dd
--- /dev/null
+++ b/test/Apache.Arrow.Tests/ReadOnlyListAdapterTests.cs
@@ -0,0 +1,509 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System.Collections.Generic;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+ /// <summary>
+ /// Tests for ReadOnlyListAdapters transparent encoding readers,
+ /// covering plain, dictionary-encoded, and run-end encoded arrays
+ /// for Int32 and String value types.
+ /// </summary>
+ public class ReadOnlyListAdapterTests
+ {
+ // =============================================================
+ // Test data helpers
+ // =============================================================
+
+ private static Int32Array BuildPlainInt32Array(int?[] values)
+ {
+ var builder = new Int32Array.Builder();
+ foreach (var v in values)
+ {
+ if (v.HasValue)
+ builder.Append(v.Value);
+ else
+ builder.AppendNull();
+ }
+ return builder.Build();
+ }
+
+ private static StringArray BuildPlainStringArray(string[] values)
+ {
+ var builder = new StringArray.Builder();
+ foreach (var v in values)
+ {
+ if (v != null)
+ builder.Append(v);
+ else
+ builder.AppendNull();
+ }
+ return builder.Build();
+ }
+
+ private static DictionaryArray BuildDictionaryInt32Array(int?[]
logicalValues)
+ {
+ // Build a dictionary of unique non-null values
+ var uniqueValues = new List<int>();
+ var uniqueMap = new Dictionary<int, int>();
+ foreach (var v in logicalValues)
+ {
+ if (v.HasValue && !uniqueMap.ContainsKey(v.Value))
+ {
+ uniqueMap[v.Value] = uniqueValues.Count;
+ uniqueValues.Add(v.Value);
+ }
+ }
+
+ var dictBuilder = new Int32Array.Builder();
+ foreach (var v in uniqueValues)
+ dictBuilder.Append(v);
+ Int32Array dictionary = dictBuilder.Build();
+
+ var indicesBuilder = new Int32Array.Builder();
+ foreach (var v in logicalValues)
+ {
+ if (v.HasValue)
+ indicesBuilder.Append(uniqueMap[v.Value]);
+ else
+ indicesBuilder.AppendNull();
+ }
+ Int32Array indices = indicesBuilder.Build();
+
+ var dictType = new DictionaryType(Int32Type.Default,
Int32Type.Default, false);
+ return new DictionaryArray(dictType, indices, dictionary);
+ }
+
+ private static DictionaryArray BuildDictionaryStringArray(string[]
logicalValues)
+ {
+ var uniqueValues = new List<string>();
+ var uniqueMap = new Dictionary<string, int>();
+ foreach (var v in logicalValues)
+ {
+ if (v != null && !uniqueMap.ContainsKey(v))
+ {
+ uniqueMap[v] = uniqueValues.Count;
+ uniqueValues.Add(v);
+ }
+ }
+
+ var dictBuilder = new StringArray.Builder();
+ foreach (var v in uniqueValues)
+ dictBuilder.Append(v);
+ StringArray dictionary = dictBuilder.Build();
+
+ var indicesBuilder = new Int32Array.Builder();
+ foreach (var v in logicalValues)
+ {
+ if (v != null)
+ indicesBuilder.Append(uniqueMap[v]);
+ else
+ indicesBuilder.AppendNull();
+ }
+ Int32Array indices = indicesBuilder.Build();
+
+ var dictType = new DictionaryType(Int32Type.Default,
StringType.Default, false);
+ return new DictionaryArray(dictType, indices, dictionary);
+ }
+
+ private static RunEndEncodedArray BuildReeInt32Array(int?[]
logicalValues)
+ {
+ // Run-length encode: consecutive equal values form a run
+ var runEndsList = new List<int>();
+ var valuesList = new List<int?>();
+
+ if (logicalValues.Length > 0)
+ {
+ int? current = logicalValues[0];
+ for (int i = 1; i < logicalValues.Length; i++)
+ {
+ if (!Equals(logicalValues[i], current))
+ {
+ runEndsList.Add(i);
+ valuesList.Add(current);
+ current = logicalValues[i];
+ }
+ }
+ runEndsList.Add(logicalValues.Length);
+ valuesList.Add(current);
+ }
+
+ var runEndsBuilder = new Int32Array.Builder();
+ foreach (var re in runEndsList)
+ runEndsBuilder.Append(re);
+ Int32Array runEnds = runEndsBuilder.Build();
+
+ Int32Array values = BuildPlainInt32Array(valuesList.ToArray());
+
+ return new RunEndEncodedArray(runEnds, values);
+ }
+
+ private static RunEndEncodedArray BuildReeStringArray(string[]
logicalValues)
+ {
+ var runEndsList = new List<int>();
+ var valuesList = new List<string>();
+
+ if (logicalValues.Length > 0)
+ {
+ string current = logicalValues[0];
+ for (int i = 1; i < logicalValues.Length; i++)
+ {
+ if (logicalValues[i] != current)
+ {
+ runEndsList.Add(i);
+ valuesList.Add(current);
+ current = logicalValues[i];
+ }
+ }
+ runEndsList.Add(logicalValues.Length);
+ valuesList.Add(current);
+ }
+
+ var runEndsBuilder = new Int32Array.Builder();
+ foreach (var re in runEndsList)
+ runEndsBuilder.Append(re);
+ Int32Array runEnds = runEndsBuilder.Build();
+
+ StringArray values = BuildPlainStringArray(valuesList.ToArray());
+
+ return new RunEndEncodedArray(runEnds, values);
+ }
+
+ // =============================================================
+ // Shared test data
+ // =============================================================
+
+ private static readonly int?[] Int32Values = new int?[] { 10, 20, 20,
null, 30, 30, 30, 10 };
+ private static readonly string[] StringValues = new[] { "hello",
"world", "world", null, "foo", "foo", "foo", "hello" };
+
+ // =============================================================
+ // ReadOnlyListAdapters tests
+ // =============================================================
+
+ [Fact]
+ public void PlainInt32()
+ {
+ Int32Array array = BuildPlainInt32Array(Int32Values);
+ IReadOnlyList<int?> reader = array.AsDecodedReadOnlyList<int?>();
+
+ // Returns the array itself (zero overhead)
+ Assert.Same(array, reader);
+ AssertInt32Values(reader);
+ }
+
+ [Fact]
+ public void DictionaryInt32()
+ {
+ DictionaryArray array = BuildDictionaryInt32Array(Int32Values);
+ IReadOnlyList<int?> reader = array.AsDecodedReadOnlyList<int?>();
+
+ AssertInt32Values(reader);
+ }
+
+ [Fact]
+ public void ReeInt32()
+ {
+ RunEndEncodedArray array = BuildReeInt32Array(Int32Values);
+ IReadOnlyList<int?> reader = array.AsDecodedReadOnlyList<int?>();
+
+ AssertInt32Values(reader);
+ }
+
+ [Fact]
+ public void PlainString()
+ {
+ StringArray array = BuildPlainStringArray(StringValues);
+ IReadOnlyList<string> reader =
array.AsDecodedReadOnlyList<string>();
+
+ // Returns the array itself (zero overhead)
+ Assert.Same(array, reader);
+ AssertStringValues(reader);
+ }
+
+ [Fact]
+ public void DictionaryString()
+ {
+ DictionaryArray array = BuildDictionaryStringArray(StringValues);
+ IReadOnlyList<string> reader =
array.AsDecodedReadOnlyList<string>();
+
+ AssertStringValues(reader);
+ }
+
+ [Fact]
+ public void ReeString()
+ {
+ RunEndEncodedArray array = BuildReeStringArray(StringValues);
+ IReadOnlyList<string> reader =
array.AsDecodedReadOnlyList<string>();
+
+ AssertStringValues(reader);
+ }
+
+ // =============================================================
+ // Edge cases
+ // =============================================================
+
+ [Fact]
+ public void EmptyArrays()
+ {
+ var emptyInt = BuildPlainInt32Array(new int?[0]);
+ var emptyStr = BuildPlainStringArray(new string[0]);
+
+ Assert.Empty(emptyInt.AsDecodedReadOnlyList<int?>());
+ Assert.Empty(emptyStr.AsDecodedReadOnlyList<string>());
+ }
+
+ [Fact]
+ public void AllNullsInt32()
+ {
+ var values = new int?[] { null, null, null };
+
+ // Plain
+ var plain = BuildPlainInt32Array(values);
+ AssertAllNullInt32(plain.AsDecodedReadOnlyList<int?>(), 3);
+
+ // Dictionary
+ var dict = BuildDictionaryInt32Array(values);
+ AssertAllNullInt32(dict.AsDecodedReadOnlyList<int?>(), 3);
+ }
+
+ [Fact]
+ public void AllNullsString()
+ {
+ var values = new string[] { null, null, null };
+
+ var plain = BuildPlainStringArray(values);
+ AssertAllNullString(plain.AsDecodedReadOnlyList<string>(), 3);
+
+ var dict = BuildDictionaryStringArray(values);
+ AssertAllNullString(dict.AsDecodedReadOnlyList<string>(), 3);
+ }
+
+ [Fact]
+ public void SingleValueRuns()
+ {
+ // All same value => single run
+ var values = new int?[] { 42, 42, 42, 42, 42 };
+ var ree = BuildReeInt32Array(values);
+
+ var reader = ree.AsDecodedReadOnlyList<int?>();
+
+ Assert.Equal(5, reader.Count);
+ for (int i = 0; i < 5; i++)
+ {
+ Assert.Equal(42, reader[i]);
+ }
+ }
+
+ [Fact]
+ public void EnumerationWorks()
+ {
+ var array = BuildPlainInt32Array(new int?[] { 1, 2, 3 });
+
+ var list = new List<int?>(array.AsDecodedReadOnlyList<int?>());
+ Assert.Equal(new int?[] { 1, 2, 3 }, list.ToArray());
+ }
+
+ // =============================================================
+ // Sliced array tests
+ // =============================================================
+
+ // Full data: { 10, 20, 20, null, 30, 30, 30, 10 }
+ // Slice(2, 4) => { 20, null, 30, 30 }
+
+ [Fact]
+ public void SlicedPlainInt32()
+ {
+ Int32Array array = BuildPlainInt32Array(Int32Values);
+ var sliced = (Int32Array)array.Slice(2, 4);
+
+ IReadOnlyList<int?> reader = sliced.AsDecodedReadOnlyList<int?>();
+
+ Assert.Same(sliced, reader);
+ Assert.Equal(4, reader.Count);
+ Assert.Equal(20, reader[0]);
+ Assert.Null(reader[1]);
+ Assert.Equal(30, reader[2]);
+ Assert.Equal(30, reader[3]);
+ }
+
+ [Fact]
+ public void SlicedDictionaryInt32()
+ {
+ DictionaryArray array = BuildDictionaryInt32Array(Int32Values);
+ // DictionaryArray.Slice returns a new DictionaryArray with sliced
indices
+ var sliced = (DictionaryArray)ArrowArrayFactory.Slice(array, 2, 4);
+
+ IReadOnlyList<int?> reader = sliced.AsDecodedReadOnlyList<int?>();
+
+ Assert.Equal(4, reader.Count);
+ Assert.Equal(20, reader[0]);
+ Assert.Null(reader[1]);
+ Assert.Equal(30, reader[2]);
+ Assert.Equal(30, reader[3]);
+ }
+
+ [Fact]
+ public void SlicedReeInt32()
+ {
+ RunEndEncodedArray array = BuildReeInt32Array(Int32Values);
+ // REE slice adjusts the offset but keeps the same children
+ var sliced = (RunEndEncodedArray)ArrowArrayFactory.Slice(array, 2,
4);
+
+ IReadOnlyList<int?> reader = sliced.AsDecodedReadOnlyList<int?>();
+
+ Assert.Equal(4, reader.Count);
+ Assert.Equal(20, reader[0]);
+ Assert.Null(reader[1]);
+ Assert.Equal(30, reader[2]);
+ Assert.Equal(30, reader[3]);
+ }
+
+ // Full data: { "hello", "world", "world", null, "foo", "foo", "foo",
"hello" }
+ // Slice(1, 5) => { "world", "world", null, "foo", "foo" }
+
+ [Fact]
+ public void SlicedPlainString()
+ {
+ StringArray array = BuildPlainStringArray(StringValues);
+ var sliced = (StringArray)array.Slice(1, 5);
+
+ IReadOnlyList<string> reader =
sliced.AsDecodedReadOnlyList<string>();
+
+ Assert.Same(sliced, reader);
+ Assert.Equal(5, reader.Count);
+ Assert.Equal("world", reader[0]);
+ Assert.Equal("world", reader[1]);
+ Assert.Null(reader[2]);
+ Assert.Equal("foo", reader[3]);
+ Assert.Equal("foo", reader[4]);
+ }
+
+ [Fact]
+ public void SlicedDictionaryString()
+ {
+ DictionaryArray array = BuildDictionaryStringArray(StringValues);
+ var sliced = (DictionaryArray)ArrowArrayFactory.Slice(array, 1, 5);
+
+ IReadOnlyList<string> reader =
sliced.AsDecodedReadOnlyList<string>();
+
+ Assert.Equal(5, reader.Count);
+ Assert.Equal("world", reader[0]);
+ Assert.Equal("world", reader[1]);
+ Assert.Null(reader[2]);
+ Assert.Equal("foo", reader[3]);
+ Assert.Equal("foo", reader[4]);
+ }
+
+ [Fact]
+ public void SlicedReeString()
+ {
+ RunEndEncodedArray array = BuildReeStringArray(StringValues);
+ var sliced = (RunEndEncodedArray)ArrowArrayFactory.Slice(array, 1,
5);
+
+ IReadOnlyList<string> reader =
sliced.AsDecodedReadOnlyList<string>();
+
+ Assert.Equal(5, reader.Count);
+ Assert.Equal("world", reader[0]);
+ Assert.Equal("world", reader[1]);
+ Assert.Null(reader[2]);
+ Assert.Equal("foo", reader[3]);
+ Assert.Equal("foo", reader[4]);
+ }
+
+ [Fact]
+ public void SlicedReeEnumerationIsEfficient()
+ {
+ // Verify the enumerator produces the same results as indexed
access
+ RunEndEncodedArray array = BuildReeInt32Array(Int32Values);
+ var sliced = (RunEndEncodedArray)ArrowArrayFactory.Slice(array, 2,
4);
+
+ IReadOnlyList<int?> reader = sliced.AsDecodedReadOnlyList<int?>();
+
+ var enumerated = new List<int?>(reader);
+ Assert.Equal(4, enumerated.Count);
+ for (int i = 0; i < 4; i++)
+ {
+ Assert.Equal(reader[i], enumerated[i]);
+ }
+ }
+
+ // =============================================================
+ // Assertion helpers
+ // =============================================================
+
+ private static void AssertInt32Values(IReadOnlyList<int?> reader)
+ {
+ Assert.Equal(Int32Values.Length, reader.Count);
+ for (int i = 0; i < Int32Values.Length; i++)
+ {
+ Assert.Equal(Int32Values[i], reader[i]);
+ }
+
+ int position = 0;
+ foreach (int? value in reader)
+ {
+ Assert.Equal(Int32Values[position], value);
+ position++;
+ }
+ }
+
+ private static void AssertStringValues(IReadOnlyList<string> reader)
+ {
+ Assert.Equal(StringValues.Length, reader.Count);
+ for (int i = 0; i < StringValues.Length; i++)
+ {
+ Assert.Equal(StringValues[i], reader[i]);
+ }
+
+ int position = 0;
+ foreach (string value in reader)
+ {
+ Assert.Equal(StringValues[position], value);
+ position++;
+ }
+ }
+
+ private static void AssertAllNullInt32(IReadOnlyList<int?> reader, int
count)
+ {
+ Assert.Equal(count, reader.Count);
+ for (int i = 0; i < count; i++)
+ {
+ Assert.Null(reader[i]);
+ }
+
+ foreach (int? value in reader)
+ {
+ Assert.Null(value);
+ }
+ }
+
+ private static void AssertAllNullString(IReadOnlyList<string> reader,
int count)
+ {
+ Assert.Equal(count, reader.Count);
+ for (int i = 0; i < count; i++)
+ {
+ Assert.Null(reader[i]);
+ }
+
+ foreach (string value in reader)
+ {
+ Assert.Null(value);
+ }
+ }
+ }
+}
diff --git a/test/Apache.Arrow.Tests/RunEndEncodedArrayTests.cs
b/test/Apache.Arrow.Tests/RunEndEncodedArrayTests.cs
index fdf5311..1e179a4 100644
--- a/test/Apache.Arrow.Tests/RunEndEncodedArrayTests.cs
+++ b/test/Apache.Arrow.Tests/RunEndEncodedArrayTests.cs
@@ -430,4 +430,109 @@ public class RunEndEncodedArrayTests
var reeArray = (RunEndEncodedArray)array;
Assert.Equal(6, reeArray.Length);
}
+
+ // =============================================================
+ // EnumeratePhysicalIndices tests
+ // =============================================================
+
+ [Fact]
+ public void EnumeratePhysicalIndicesInt32()
+ {
+ // Run ends: [3, 7, 10] → logical 0-2→phys 0, 3-6→phys 1, 7-9→phys 2
+ Int32Array runEnds = new Int32Array.Builder().AppendRange([3, 7,
10]).Build();
+ StringArray values = new StringArray.Builder().AppendRange(["A", "B",
"C"]).Build();
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ int[] expected = [0, 0, 0, 1, 1, 1, 1, 2, 2, 2];
+ Assert.Equal(expected, reeArray.EnumeratePhysicalIndices());
+ }
+
+ [Fact]
+ public void EnumeratePhysicalIndicesInt16()
+ {
+ // Run ends: [2, 5, 8] → logical 0-1→phys 0, 2-4→phys 1, 5-7→phys 2
+ Int16Array runEnds = new Int16Array.Builder().AppendRange([2, 5,
8]).Build();
+ Int32Array values = new Int32Array.Builder().AppendRange([100, 200,
300]).Build();
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ int[] expected = [0, 0, 1, 1, 1, 2, 2, 2];
+ Assert.Equal(expected, reeArray.EnumeratePhysicalIndices());
+ }
+
+ [Fact]
+ public void EnumeratePhysicalIndicesInt64()
+ {
+ // Run ends: [1, 4, 6] → logical 0→phys 0, 1-3→phys 1, 4-5→phys 2
+ Int64Array runEnds = new Int64Array.Builder().AppendRange([1, 4,
6]).Build();
+ DoubleArray values = new DoubleArray.Builder().AppendRange([1.5, 2.5,
3.5]).Build();
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ int[] expected = [0, 1, 1, 1, 2, 2];
+ Assert.Equal(expected, reeArray.EnumeratePhysicalIndices());
+ }
+
+ [Fact]
+ public void EnumeratePhysicalIndicesEmpty()
+ {
+ Int32Array runEnds = new Int32Array.Builder().Build();
+ StringArray values = new StringArray.Builder().Build();
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ Assert.Empty(reeArray.EnumeratePhysicalIndices());
+ }
+
+ [Fact]
+ public void EnumeratePhysicalIndicesSingleRun()
+ {
+ Int32Array runEnds = new Int32Array.Builder().Append(5).Build();
+ StringArray values = new StringArray.Builder().Append("X").Build();
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ int[] expected = [0, 0, 0, 0, 0];
+ Assert.Equal(expected, reeArray.EnumeratePhysicalIndices());
+ }
+
+ [Fact]
+ public void EnumeratePhysicalIndicesSlicedInt32()
+ {
+ // Run ends: [3, 7, 10] → A(0-2), B(3-6), C(7-9)
+ Int32Array runEnds = new Int32Array.Builder().AppendRange([3, 7,
10]).Build();
+ StringArray values = new StringArray.Builder().AppendRange(["A", "B",
"C"]).Build();
+ var reeArray = new RunEndEncodedArray(runEnds, values);
+
+ // Slice(2, 6) → logical positions 2-7 of original
+ var sliced = (RunEndEncodedArray)ArrowArrayFactory.Slice(reeArray, 2,
6);
+
+ // Expected physical indices: A(2), B(3), B(4), B(5), B(6), C(7)
+ int[] expected = [0, 1, 1, 1, 1, 2];
+ Assert.Equal(expected, sliced.EnumeratePhysicalIndices());
+ }
+
+ [Fact]
+ public void EnumeratePhysicalIndicesMatchesFindPhysicalIndex()
+ {
+ // Verify enumeration matches individual lookups for all three index
types
+ Int16Array runEnds16 = new Int16Array.Builder().AppendRange([2, 5,
8]).Build();
+ Int32Array runEnds32 = new Int32Array.Builder().AppendRange([2, 5,
8]).Build();
+ Int64Array runEnds64 = new Int64Array.Builder().AppendRange([2, 5,
8]).Build();
+ Int32Array values = new Int32Array.Builder().AppendRange([10, 20,
30]).Build();
+
+ var arrays = new[]
+ {
+ new RunEndEncodedArray(runEnds16, values),
+ new RunEndEncodedArray(runEnds32, values),
+ new RunEndEncodedArray(runEnds64, values),
+ };
+
+ foreach (var reeArray in arrays)
+ {
+ int i = 0;
+ foreach (int physicalIndex in reeArray.EnumeratePhysicalIndices())
+ {
+ Assert.Equal(reeArray.FindPhysicalIndex(i), physicalIndex);
+ i++;
+ }
+ Assert.Equal(reeArray.Length, i);
+ }
+ }
}
diff --git a/test/Apache.Arrow.Tests/TimestampWithOffsetArrayTests.cs
b/test/Apache.Arrow.Tests/TimestampWithOffsetArrayTests.cs
new file mode 100644
index 0000000..be37cf6
--- /dev/null
+++ b/test/Apache.Arrow.Tests/TimestampWithOffsetArrayTests.cs
@@ -0,0 +1,415 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements. See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License. You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using Apache.Arrow.Types;
+using Xunit;
+
+namespace Apache.Arrow.Tests
+{
+ public class TimestampWithOffsetArrayTests
+ {
+ // =============================================================
+ // Builder tests
+ // =============================================================
+
+ [Fact]
+ public void BuilderAppendAndRead()
+ {
+ var values = new DateTimeOffset[]
+ {
+ new DateTimeOffset(2024, 3, 15, 10, 30, 0,
TimeSpan.FromHours(5)),
+ new DateTimeOffset(2024, 6, 1, 14, 0, 0,
TimeSpan.FromHours(-8)),
+ new DateTimeOffset(2024, 12, 31, 23, 59, 59, TimeSpan.Zero),
+ new DateTimeOffset(2024, 1, 1, 0, 0, 0,
TimeSpan.FromHours(5.5)),
+ };
+
+ var builder = new TimestampWithOffsetArray.Builder();
+ foreach (var v in values)
+ builder.Append(v);
+ var array = builder.Build();
+
+ Assert.Equal(4, array.Length);
+ Assert.Equal(0, array.NullCount);
+
+ for (int i = 0; i < values.Length; i++)
+ {
+ DateTimeOffset? result = array.GetValue(i);
+ Assert.NotNull(result);
+ Assert.Equal(values[i], result.Value);
+ Assert.Equal(values[i].Offset, result.Value.Offset);
+ }
+ }
+
+ [Fact]
+ public void BuilderAppendNull()
+ {
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.Append(new DateTimeOffset(2024, 1, 1, 0, 0, 0,
TimeSpan.Zero));
+ builder.AppendNull();
+ builder.Append(new DateTimeOffset(2024, 6, 15, 12, 0, 0,
TimeSpan.FromHours(3)));
+ var array = builder.Build();
+
+ Assert.Equal(3, array.Length);
+ Assert.Equal(1, array.NullCount);
+
+ Assert.NotNull(array.GetValue(0));
+ Assert.Null(array.GetValue(1));
+ Assert.NotNull(array.GetValue(2));
+ }
+
+ [Fact]
+ public void BuilderAppendRange()
+ {
+ var values = new DateTimeOffset[]
+ {
+ new DateTimeOffset(2024, 1, 1, 0, 0, 0, TimeSpan.FromHours(1)),
+ new DateTimeOffset(2024, 2, 1, 0, 0, 0,
TimeSpan.FromHours(-5)),
+ };
+
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.AppendRange(values);
+ var array = builder.Build();
+
+ Assert.Equal(2, array.Length);
+ for (int i = 0; i < values.Length; i++)
+ {
+ Assert.Equal(values[i], array.GetValue(i));
+ }
+ }
+
+ [Fact]
+ public void BuilderAppendRangeNullable()
+ {
+ var values = new DateTimeOffset?[]
+ {
+ new DateTimeOffset(2024, 1, 1, 0, 0, 0, TimeSpan.FromHours(1)),
+ null,
+ new DateTimeOffset(2024, 3, 1, 0, 0, 0,
TimeSpan.FromHours(-5)),
+ };
+
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.AppendRange(values);
+ var array = builder.Build();
+
+ Assert.Equal(3, array.Length);
+ Assert.Equal(1, array.NullCount);
+ Assert.NotNull(array.GetValue(0));
+ Assert.Null(array.GetValue(1));
+ Assert.NotNull(array.GetValue(2));
+ }
+
+ [Fact]
+ public void EmptyArray()
+ {
+ var builder = new TimestampWithOffsetArray.Builder();
+ var array = builder.Build();
+
+ Assert.Equal(0, array.Length);
+ Assert.Equal(0, array.NullCount);
+ }
+
+ // =============================================================
+ // TimeUnit tests
+ // =============================================================
+
+ [Theory]
+ [InlineData(TimeUnit.Second)]
+ [InlineData(TimeUnit.Millisecond)]
+ [InlineData(TimeUnit.Microsecond)]
+ [InlineData(TimeUnit.Nanosecond)]
+ public void AllTimeUnitsWork(TimeUnit unit)
+ {
+ var value = new DateTimeOffset(2024, 6, 15, 12, 30, 45,
TimeSpan.FromHours(5));
+ var builder = new TimestampWithOffsetArray.Builder(unit);
+ builder.Append(value);
+ var array = builder.Build();
+
+ var result = array.GetValue(0);
+ Assert.NotNull(result);
+ Assert.Equal(value.Offset, result.Value.Offset);
+
+ // Verify the UTC instant is the same (within the unit's precision)
+ Assert.Equal(value.ToUniversalTime().Ticks / GetTicksPerUnit(unit),
+ result.Value.ToUniversalTime().Ticks /
GetTicksPerUnit(unit));
+ }
+
+ private static long GetTicksPerUnit(TimeUnit unit)
+ {
+ switch (unit)
+ {
+ case TimeUnit.Second: return TimeSpan.TicksPerSecond;
+ case TimeUnit.Millisecond: return TimeSpan.TicksPerMillisecond;
+ case TimeUnit.Microsecond: return 10;
+ case TimeUnit.Nanosecond: return 1;
+ default: throw new ArgumentOutOfRangeException(nameof(unit));
+ }
+ }
+
+ // =============================================================
+ // Extension type tests
+ // =============================================================
+
+ [Fact]
+ public void ExtensionTypeProperties()
+ {
+ var type = TimestampWithOffsetType.Default;
+ Assert.Equal("arrow.timestamp_with_offset", type.Name);
+ Assert.Equal("", type.ExtensionMetadata);
+ Assert.IsType<StructType>(type.StorageType);
+
+ var structType = (StructType)type.StorageType;
+ Assert.Equal(2, structType.Fields.Count);
+ Assert.Equal("timestamp", structType.Fields[0].Name);
+ Assert.Equal("offset_minutes", structType.Fields[1].Name);
+ }
+
+ [Fact]
+ public void ExtensionDefinitionCreatesType()
+ {
+ var storageType = new StructType(new[]
+ {
+ new Field("timestamp", new TimestampType(TimeUnit.Microsecond,
"UTC"), nullable: false),
+ new Field("offset_minutes", Int16Type.Default, nullable:
false),
+ });
+
+ bool result =
TimestampWithOffsetExtensionDefinition.Instance.TryCreateType(
+ storageType, "", out ExtensionType type);
+
+ Assert.True(result);
+ Assert.IsType<TimestampWithOffsetType>(type);
+ Assert.Equal(TimeUnit.Microsecond,
((TimestampWithOffsetType)type).Unit);
+ }
+
+ [Fact]
+ public void ExtensionDefinitionRejectsInvalidStorage()
+ {
+ // Wrong type entirely
+
Assert.False(TimestampWithOffsetExtensionDefinition.Instance.TryCreateType(
+ Int32Type.Default, "", out _));
+
+ // Wrong field names
+ var wrongNames = new StructType(new[]
+ {
+ new Field("ts", new TimestampType(TimeUnit.Microsecond,
"UTC"), nullable: false),
+ new Field("offset", Int16Type.Default, nullable: false),
+ });
+
Assert.False(TimestampWithOffsetExtensionDefinition.Instance.TryCreateType(
+ wrongNames, "", out _));
+
+ // Wrong timestamp timezone
+ var wrongTz = new StructType(new[]
+ {
+ new Field("timestamp", new TimestampType(TimeUnit.Microsecond,
"+00:00"), nullable: false),
+ new Field("offset_minutes", Int16Type.Default, nullable:
false),
+ });
+
Assert.False(TimestampWithOffsetExtensionDefinition.Instance.TryCreateType(
+ wrongTz, "", out _));
+
+ // Wrong offset type
+ var wrongOffset = new StructType(new[]
+ {
+ new Field("timestamp", new TimestampType(TimeUnit.Microsecond,
"UTC"), nullable: false),
+ new Field("offset_minutes", Int32Type.Default, nullable:
false),
+ });
+
Assert.False(TimestampWithOffsetExtensionDefinition.Instance.TryCreateType(
+ wrongOffset, "", out _));
+ }
+
+ [Fact]
+ public void ExtensionDefinitionAcceptsDictionaryEncodedOffset()
+ {
+ var dictOffsetType = new DictionaryType(Int32Type.Default,
Int16Type.Default, false);
+ var storageType = new StructType(new[]
+ {
+ new Field("timestamp", new TimestampType(TimeUnit.Second,
"UTC"), nullable: false),
+ new Field("offset_minutes", dictOffsetType, nullable: false),
+ });
+
+ bool result =
TimestampWithOffsetExtensionDefinition.Instance.TryCreateType(
+ storageType, "", out ExtensionType type);
+
+ Assert.True(result);
+ Assert.IsType<TimestampWithOffsetType>(type);
+ Assert.Equal(TimeUnit.Second,
((TimestampWithOffsetType)type).Unit);
+ }
+
+ // =============================================================
+ // IReadOnlyList tests
+ // =============================================================
+
+ [Fact]
+ public void ReadOnlyListInterface()
+ {
+ var values = new DateTimeOffset[]
+ {
+ new DateTimeOffset(2024, 1, 1, 0, 0, 0, TimeSpan.FromHours(1)),
+ new DateTimeOffset(2024, 6, 15, 12, 0, 0,
TimeSpan.FromHours(-5)),
+ };
+
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.AppendRange(values);
+ var array = builder.Build();
+
+ IReadOnlyList<DateTimeOffset?> list = array;
+
+ Assert.Equal(2, list.Count);
+ Assert.Equal(values[0], list[0]);
+ Assert.Equal(values[1], list[1]);
+ }
+
+ [Fact]
+ public void Enumeration()
+ {
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.Append(new DateTimeOffset(2024, 1, 1, 0, 0, 0,
TimeSpan.FromHours(1)));
+ builder.AppendNull();
+ builder.Append(new DateTimeOffset(2024, 6, 15, 12, 0, 0,
TimeSpan.FromHours(-5)));
+ var array = builder.Build();
+
+ var items = array.ToList();
+ Assert.Equal(3, items.Count);
+ Assert.NotNull(items[0]);
+ Assert.Null(items[1]);
+ Assert.NotNull(items[2]);
+ }
+
+ // =============================================================
+ // Slicing tests
+ // =============================================================
+
+ [Fact]
+ public void SlicedArray()
+ {
+ var values = new DateTimeOffset[]
+ {
+ new DateTimeOffset(2024, 1, 1, 0, 0, 0, TimeSpan.FromHours(1)),
+ new DateTimeOffset(2024, 3, 15, 10, 30, 0,
TimeSpan.FromHours(5)),
+ new DateTimeOffset(2024, 6, 1, 14, 0, 0,
TimeSpan.FromHours(-8)),
+ new DateTimeOffset(2024, 12, 31, 23, 59, 59, TimeSpan.Zero),
+ };
+
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.AppendRange(values);
+ var array = builder.Build();
+
+ // Slice(1, 2) => { values[1], values[2] }
+ var slicedStorage = ArrowArrayFactory.Slice(array.Storage, 1, 2);
+ var sliced = new
TimestampWithOffsetArray((TimestampWithOffsetType)array.ExtensionType,
slicedStorage);
+
+ Assert.Equal(2, sliced.Length);
+ Assert.Equal(values[1], sliced.GetValue(0));
+ Assert.Equal(values[2], sliced.GetValue(1));
+ }
+
+ // =============================================================
+ // Edge cases
+ // =============================================================
+
+ [Fact]
+ public void NegativeOffset()
+ {
+ var value = new DateTimeOffset(2024, 6, 15, 8, 0, 0,
TimeSpan.FromHours(-12));
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.Append(value);
+ var array = builder.Build();
+
+ var result = array.GetValue(0);
+ Assert.Equal(value, result);
+ Assert.Equal(TimeSpan.FromHours(-12), result.Value.Offset);
+ }
+
+ [Fact]
+ public void HalfHourOffset()
+ {
+ // India Standard Time: +05:30
+ var value = new DateTimeOffset(2024, 6, 15, 12, 0, 0,
TimeSpan.FromMinutes(330));
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.Append(value);
+ var array = builder.Build();
+
+ var result = array.GetValue(0);
+ Assert.Equal(value, result);
+ Assert.Equal(TimeSpan.FromMinutes(330), result.Value.Offset);
+ }
+
+ [Fact]
+ public void UtcOffset()
+ {
+ var value = new DateTimeOffset(2024, 6, 15, 12, 0, 0,
TimeSpan.Zero);
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.Append(value);
+ var array = builder.Build();
+
+ var result = array.GetValue(0);
+ Assert.Equal(value, result);
+ Assert.Equal(TimeSpan.Zero, result.Value.Offset);
+ }
+
+ [Fact]
+ public void AllNulls()
+ {
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.AppendNull();
+ builder.AppendNull();
+ builder.AppendNull();
+ var array = builder.Build();
+
+ Assert.Equal(3, array.Length);
+ Assert.Equal(3, array.NullCount);
+ for (int i = 0; i < 3; i++)
+ Assert.Null(array.GetValue(i));
+ }
+
+ [Fact]
+ public void GetValueOutOfRangeThrows()
+ {
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.Append(new DateTimeOffset(2024, 1, 1, 0, 0, 0,
TimeSpan.Zero));
+ var array = builder.Build();
+
+ Assert.Throws<ArgumentOutOfRangeException>(() =>
array.GetValue(-1));
+ Assert.Throws<ArgumentOutOfRangeException>(() =>
array.GetValue(1));
+ }
+
+ [Fact]
+ public void PreservesUtcInstant()
+ {
+ // Same instant, different offsets
+ var utcInstant = new DateTimeOffset(2024, 6, 15, 12, 0, 0,
TimeSpan.Zero);
+ var sameInstantEast = utcInstant.ToOffset(TimeSpan.FromHours(5));
+ var sameInstantWest = utcInstant.ToOffset(TimeSpan.FromHours(-3));
+
+ var builder = new TimestampWithOffsetArray.Builder();
+ builder.Append(utcInstant);
+ builder.Append(sameInstantEast);
+ builder.Append(sameInstantWest);
+ var array = builder.Build();
+
+ // All three represent the same instant
+ Assert.Equal(array.GetValue(0).Value.ToUniversalTime(),
+ array.GetValue(1).Value.ToUniversalTime());
+ Assert.Equal(array.GetValue(0).Value.ToUniversalTime(),
+ array.GetValue(2).Value.ToUniversalTime());
+
+ // But different local offsets
+ Assert.Equal(TimeSpan.Zero, array.GetValue(0).Value.Offset);
+ Assert.Equal(TimeSpan.FromHours(5),
array.GetValue(1).Value.Offset);
+ Assert.Equal(TimeSpan.FromHours(-3),
array.GetValue(2).Value.Offset);
+ }
+ }
+}