lidavidm commented on code in PR #344:
URL: https://github.com/apache/arrow-go/pull/344#discussion_r2105717305


##########
parquet/variant/variant.go:
##########
@@ -0,0 +1,725 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package variant
+
+import (
+       "bytes"
+       "encoding/binary"
+       "encoding/json"
+       "errors"
+       "fmt"
+       "iter"
+       "maps"
+       "slices"
+       "strings"
+       "time"
+       "unsafe"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/arrow-go/v18/arrow/decimal"
+       "github.com/apache/arrow-go/v18/arrow/decimal128"
+       "github.com/apache/arrow-go/v18/parquet/internal/debug"
+       "github.com/google/uuid"
+)
+
+//go:generate go tool stringer -type=BasicType -linecomment 
-output=basic_type_string.go
+//go:generate go tool stringer -type=PrimitiveType -linecomment 
-output=primitive_type_string.go
+
+// BasicType represents the fundamental type category of a variant value.
+type BasicType int
+
+const (
+       BasicUndefined   BasicType = iota - 1 // Unknown
+       BasicPrimitive                        // Primitive
+       BasicShortString                      // ShortString
+       BasicObject                           // Object
+       BasicArray                            // Array
+)
+
+func basicTypeFromHeader(hdr byte) BasicType {
+       return BasicType(hdr & basicTypeMask)

Review Comment:
   worth having a debug.Assert perhaps?



##########
parquet/variant/builder.go:
##########
@@ -0,0 +1,847 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package variant
+
+import (
+       "bytes"
+       "cmp"
+       "encoding/binary"
+       "errors"
+       "fmt"
+       "io"
+       "math"
+       "reflect"
+       "slices"
+       "strings"
+       "time"
+       "unsafe"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/arrow-go/v18/arrow/decimal"
+       "github.com/google/uuid"
+)
+
+// Builder is used to construct Variant values by appending data of various 
types.
+// It manages an internal buffer for the value data and a dictionary for field 
keys.
+type Builder struct {
+       buf             bytes.Buffer
+       dict            map[string]uint32
+       dictKeys        [][]byte
+       allowDuplicates bool
+}
+
+// SetAllowDuplicates controls whether duplicate keys are allowed in objects.
+// When true, the last value for a key is used. When false, an error is 
returned
+// if a duplicate key is detected.
+func (b *Builder) SetAllowDuplicates(allow bool) {
+       b.allowDuplicates = allow
+}
+
+// AddKey adds a key to the builder's dictionary and returns its ID.
+// If the key already exists in the dictionary, its existing ID is returned.
+func (b *Builder) AddKey(key string) (id uint32) {
+       if b.dict == nil {
+               b.dict = make(map[string]uint32)
+               b.dictKeys = make([][]byte, 0, 16)
+       }
+
+       var ok bool
+       if id, ok = b.dict[key]; ok {
+               return id
+       }
+
+       id = uint32(len(b.dictKeys))
+       b.dict[key] = id
+       b.dictKeys = append(b.dictKeys, unsafe.Slice(unsafe.StringData(key), 
len(key)))
+
+       return id
+}
+
+// AppendOpt represents options for appending time-related values. These are 
only
+// used when using the generic Append method that takes an interface{}.
+type AppendOpt int16
+
+const (
+       // OptTimestampNano specifies that timestamps should use nanosecond 
precision,
+       // otherwise microsecond precision is used.
+       OptTimestampNano AppendOpt = 1 << iota
+       // OptTimestampUTC specifies that timestamps should be in UTC timezone, 
otherwise
+       // no time zone (NTZ) is used.
+       OptTimestampUTC
+       // OptTimeAsDate specifies that time.Time values should be encoded as 
dates
+       OptTimeAsDate
+       // OptTimeAsTime specifies that time.Time values should be encoded as a 
time value
+       OptTimeAsTime
+)
+
+func extractFieldInfo(f reflect.StructField) (name string, o AppendOpt) {
+       tag := f.Tag.Get("variant")
+       if tag == "" {
+               return f.Name, 0
+       }
+
+       parts := strings.Split(tag, ",")
+       if len(parts) == 1 {
+               return parts[0], 0
+       }
+
+       name = parts[0]
+       if name == "" {
+               name = f.Name
+       }
+
+       for _, opt := range parts[1:] {
+               switch strings.ToLower(opt) {
+               case "nanos":
+                       o |= OptTimestampNano
+               case "utc":
+                       o |= OptTimestampUTC
+               case "date":
+                       o |= OptTimeAsDate
+               case "time":
+                       o |= OptTimeAsTime
+               }
+       }
+
+       return name, o
+}
+
+// Append adds a value of any supported type to the builder.
+//
+// Any basic primitive type is supported, the AppendOpt options are used to 
control how
+// timestamps are appended (e.g., as microseconds or nanoseconds and 
timezone). The options
+// also control how a [time.Time] value is appended (e.g., as a date, 
timestamp, or time).
+//
+// Appending a value with type `[]any` will construct an array appropriately, 
appending
+// each element. Calling with a map[string]any will construct an object, 
recursively calling
+// Append for each value, propagating the options.
+//
+// For other types (arbitrary slices, arrays, maps and structs), reflection is 
used to determine
+// the type and whether we can append it. A nil pointer will append a null, 
while a non-nil
+// pointer will append the value that it points to.
+//
+// For structs, field tags can be used to control the field names and options. 
Only exported
+// fields are considered, with the field name being used as the key. A struct 
tag of `variant`
+// can be used with the following format and options:
+//
+//                     type MyStruct struct {
+//                             Field1    string    `variant:"key"`           
// Use "key" instead of "Field1" as the field name
+//                             Field2    time.Time `variant:"day,date"`      
// Use "day" instead of "Field2" as the field name
+//                                                                   // append 
this value as a "date" value
+//              Time      time.Time `variant:",time"`         // Use "Time" as 
the field name, append the value as
+//                                                                   // a 
"time" value
+//              Field3    int       `variant:"-"`             // Ignore this 
field
+//              Timestamp time.Time `variant:"ts"`            // Use "ts" as 
the field name, append value as a
+//                                                                   // 
timestamp(UTC=false,MICROS)
+//                         Ts2       time.Time `variant:"ts2,nanos,utc"` // 
Use "ts2" as the field name, append value as a
+//                                                                             
                                          // timestamp(UTC=true,NANOS)
+//                     }
+//
+// Options specified in the struct tags will be OR'd with any options passed 
to the original call

Review Comment:
   Can we document the behavior for conflicting options? (What if the struct 
specifies OptTimeAsDate but we pass OptTimeAsTime?)



##########
parquet/variant/variant_test.go:
##########


Review Comment:
   Also is it worth testing examples of invalid variants too?



##########
parquet/variant/variant_test.go:
##########


Review Comment:
   More out of curiosity but do we have fuzzing set up for variants like we do 
for Parquet in general?



##########
parquet/variant/utils.go:
##########


Review Comment:
   Is a utils_test.go worth having?



##########
parquet/variant/builder.go:
##########
@@ -0,0 +1,847 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package variant
+
+import (
+       "bytes"
+       "cmp"
+       "encoding/binary"
+       "errors"
+       "fmt"
+       "io"
+       "math"
+       "reflect"
+       "slices"
+       "strings"
+       "time"
+       "unsafe"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/arrow-go/v18/arrow/decimal"
+       "github.com/google/uuid"
+)
+
+// Builder is used to construct Variant values by appending data of various 
types.
+// It manages an internal buffer for the value data and a dictionary for field 
keys.
+type Builder struct {
+       buf             bytes.Buffer
+       dict            map[string]uint32
+       dictKeys        [][]byte
+       allowDuplicates bool
+}
+
+// SetAllowDuplicates controls whether duplicate keys are allowed in objects.
+// When true, the last value for a key is used. When false, an error is 
returned
+// if a duplicate key is detected.
+func (b *Builder) SetAllowDuplicates(allow bool) {
+       b.allowDuplicates = allow
+}

Review Comment:
   Will we ever want options beyond 'last value wins' and 'error'?



##########
parquet/variant/variant.go:
##########
@@ -0,0 +1,725 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package variant
+
+import (
+       "bytes"
+       "encoding/binary"
+       "encoding/json"
+       "errors"
+       "fmt"
+       "iter"
+       "maps"
+       "slices"
+       "strings"
+       "time"
+       "unsafe"
+
+       "github.com/apache/arrow-go/v18/arrow"
+       "github.com/apache/arrow-go/v18/arrow/decimal"
+       "github.com/apache/arrow-go/v18/arrow/decimal128"
+       "github.com/apache/arrow-go/v18/parquet/internal/debug"
+       "github.com/google/uuid"
+)
+
+//go:generate go tool stringer -type=BasicType -linecomment 
-output=basic_type_string.go
+//go:generate go tool stringer -type=PrimitiveType -linecomment 
-output=primitive_type_string.go
+
+// BasicType represents the fundamental type category of a variant value.
+type BasicType int
+
+const (
+       BasicUndefined   BasicType = iota - 1 // Unknown
+       BasicPrimitive                        // Primitive
+       BasicShortString                      // ShortString
+       BasicObject                           // Object
+       BasicArray                            // Array
+)
+
+func basicTypeFromHeader(hdr byte) BasicType {
+       return BasicType(hdr & basicTypeMask)
+}
+
+// PrimitiveType represents specific primitive data types within the variant 
format.
+type PrimitiveType int
+
+const (
+       PrimitiveInvalid            PrimitiveType = iota - 1 // Unknown
+       PrimitiveNull                                        // Null
+       PrimitiveBoolTrue                                    // BoolTrue
+       PrimitiveBoolFalse                                   // BoolFalse
+       PrimitiveInt8                                        // Int8
+       PrimitiveInt16                                       // Int16
+       PrimitiveInt32                                       // Int32
+       PrimitiveInt64                                       // Int64
+       PrimitiveDouble                                      // Double
+       PrimitiveDecimal4                                    // Decimal32
+       PrimitiveDecimal8                                    // Decimal64
+       PrimitiveDecimal16                                   // Decimal128
+       PrimitiveDate                                        // Date
+       PrimitiveTimestampMicros                             // 
Timestamp(micros)
+       PrimitiveTimestampMicrosNTZ                          // 
TimestampNTZ(micros)
+       PrimitiveFloat                                       // Float
+       PrimitiveBinary                                      // Binary
+       PrimitiveString                                      // String
+       PrimitiveTimeMicrosNTZ                               // TimeNTZ(micros)
+       PrimitiveTimestampNanos                              // Timestamp(nanos)
+       PrimitiveTimestampNanosNTZ                           // 
TimestampNTZ(nanos)
+       PrimitiveUUID                                        // UUID
+)
+
+func primitiveTypeFromHeader(hdr byte) PrimitiveType {
+       return PrimitiveType((hdr >> basicTypeBits) & typeInfoMask)
+}
+
+// Type represents the high-level variant data type.
+// This is what applications typically use to identify the type of a variant 
value.
+type Type int
+
+const (
+       Object Type = iota
+       Array
+       Null
+       Bool
+       Int8
+       Int16
+       Int32
+       Int64
+       String
+       Double
+       Decimal4
+       Decimal8
+       Decimal16
+       Date
+       TimestampMicros
+       TimestampMicrosNTZ
+       Float
+       Binary
+       Time
+       TimestampNanos
+       TimestampNanosNTZ
+       UUID
+)
+
+const (
+       versionMask        uint8 = 0x0F
+       sortedStrMask      uint8 = 0b10000
+       basicTypeMask      uint8 = 0x3
+       basicTypeBits      uint8 = 2
+       typeInfoMask       uint8 = 0x3F
+       hdrSizeBytes             = 1
+       minOffsetSizeBytes       = 1
+       maxOffsetSizeBytes       = 4
+
+       // mask is applied after shift
+       offsetSizeMask     uint8 = 0b11
+       offsetSizeBitShift uint8 = 6
+       supportedVersion         = 1
+       maxShortStringSize       = 0x3F
+       maxSizeLimit             = 128 * 1024 * 1024 // 128MB
+)
+
+var (
+       // EmptyMetadataBytes contains a minimal valid metadata section with no 
dictionary entries.
+       EmptyMetadataBytes = [3]byte{0x1, 0, 0}
+)
+
+// Metadata represents the dictionary part of a variant value, which stores
+// the keys used in object values.
+type Metadata struct {
+       data []byte
+       keys [][]byte
+}
+
+// NewMetadata creates a Metadata instance from a raw byte slice.
+// It validates the metadata format and loads the key dictionary.
+func NewMetadata(data []byte) (Metadata, error) {
+       m := Metadata{data: data}
+       if len(data) < hdrSizeBytes+minOffsetSizeBytes*2 {
+               return m, fmt.Errorf("invalid variant metadata: too short: 
size=%d", len(data))
+       }
+
+       if m.Version() != supportedVersion {
+               return m, fmt.Errorf("invalid variant metadata: unsupported 
version: %d", m.Version())
+       }
+
+       offsetSz := m.OffsetSize()
+       if offsetSz < minOffsetSizeBytes || offsetSz > maxOffsetSizeBytes {
+               return m, fmt.Errorf("invalid variant metadata: invalid offset 
size: %d", offsetSz)
+       }
+
+       dictSize, err := m.loadDictionary(offsetSz)
+       if err != nil {
+               return m, err
+       }
+
+       if hdrSizeBytes+int(dictSize+1)*int(offsetSz) > len(m.data) {
+               return m, fmt.Errorf("invalid variant metadata: offset out of 
range: %d > %d",
+                       (dictSize+hdrSizeBytes)*uint32(offsetSz), len(m.data))
+       }
+
+       return m, nil
+}
+
+// Clone creates a deep copy of the metadata.
+func (m *Metadata) Clone() Metadata {
+       return Metadata{
+               data: bytes.Clone(m.data),
+               // shallow copy of the values, but the slice is copied
+               // more efficient, and nothing should be mutating the keys
+               // so it's probably safe, but something we should keep in mind
+               keys: slices.Clone(m.keys),
+       }
+}
+
+func (m *Metadata) loadDictionary(offsetSz uint8) (uint32, error) {
+       if int(offsetSz+hdrSizeBytes) > len(m.data) {
+               return 0, errors.New("invalid variant metadata: too short for 
dictionary size")
+       }
+
+       dictSize := readLEU32(m.data[hdrSizeBytes : hdrSizeBytes+offsetSz])
+       m.keys = make([][]byte, dictSize)
+
+       if dictSize == 0 {
+               return 0, nil
+       }
+
+       // first offset is always 0
+       offsetStart, offsetPos := uint32(0), hdrSizeBytes+offsetSz
+       valuesStart := hdrSizeBytes + (dictSize+2)*uint32(offsetSz)
+       for i := range dictSize {
+               offsetPos += offsetSz
+               end := readLEU32(m.data[offsetPos : offsetPos+offsetSz])
+
+               keySize := end - offsetStart
+               valStart := valuesStart + offsetStart
+               if valStart+keySize > uint32(len(m.data)) {
+                       return 0, fmt.Errorf("invalid variant metadata: string 
data out of range: %d + %d > %d",
+                               valStart, keySize, len(m.data))
+               }
+               m.keys[i] = m.data[valStart : valStart+keySize]
+               offsetStart += keySize
+       }
+
+       return dictSize, nil
+}
+
+// Bytes returns the raw byte representation of the metadata.
+func (m Metadata) Bytes() []byte { return m.data }
+
+// Version returns the metadata format version.
+func (m Metadata) Version() uint8 { return m.data[0] & versionMask }
+
+// SortedAndUnique returns whether the keys in the metadata dictionary are 
sorted and unique.
+func (m Metadata) SortedAndUnique() bool { return m.data[0]&sortedStrMask != 0 
}
+
+// OffsetSize returns the size in bytes used to store offsets in the metadata.
+func (m Metadata) OffsetSize() uint8 {
+       return ((m.data[0] >> offsetSizeBitShift) & offsetSizeMask) + 1
+}
+
+// DictionarySize returns the number of keys in the metadata dictionary.
+func (m Metadata) DictionarySize() uint32 { return uint32(len(m.keys)) }
+
+// KeyAt returns the string key at the given dictionary ID.
+// Returns an error if the ID is out of range.
+func (m Metadata) KeyAt(id uint32) (string, error) {
+       if id >= uint32(len(m.keys)) {
+               return "", fmt.Errorf("invalid variant metadata: id out of 
range: %d >= %d",
+                       id, len(m.keys))
+       }
+
+       return unsafe.String(&m.keys[id][0], len(m.keys[id])), nil
+}
+
+// IdFor returns the dictionary IDs for the given key.
+// If the metadata is sorted and unique, this performs a binary search.
+// Otherwise, it performs a linear search.
+//
+// If the metadata is not sorted and unique, then it's possible that multiple
+// IDs will be returned for the same key.
+func (m Metadata) IdFor(key string) []uint32 {
+       k := unsafe.Slice(unsafe.StringData(key), len(key))
+
+       var ret []uint32
+       if m.SortedAndUnique() {
+               idx, found := slices.BinarySearchFunc(m.keys, k, bytes.Compare)
+               if found {
+                       ret = append(ret, uint32(idx))
+               }
+
+               return ret
+       }
+
+       for i, k := range m.keys {
+               if bytes.Equal(k, k) {
+                       ret = append(ret, uint32(i))
+               }
+       }
+
+       return ret
+}
+
+// DecimalValue represents a decimal number with a specified scale.
+// The generic parameter T can be any supported variant decimal type 
(Decimal32, Decimal64, Decimal128).
+type DecimalValue[T decimal.DecimalTypes] struct {
+       Scale uint8
+       Value decimal.Num[T]
+}
+
+// MarshalJSON implements the json.Marshaler interface for DecimalValue.
+func (v DecimalValue[T]) MarshalJSON() ([]byte, error) {
+       return []byte(v.Value.ToString(int32(v.Scale))), nil
+}
+
+// ArrayValue represents an array of variant values.
+type ArrayValue struct {
+       value []byte
+       meta  Metadata
+
+       numElements uint32
+       dataStart   uint32
+       offsetSize  uint8
+       offsetStart uint8
+}
+
+// MarshalJSON implements the json.Marshaler interface for ArrayValue.
+func (v ArrayValue) MarshalJSON() ([]byte, error) {
+       return json.Marshal(slices.Collect(v.Values()))
+}
+
+// Len returns the number of elements in the array.
+func (v ArrayValue) Len() uint32 { return v.numElements }
+
+// Values returns an iterator for the elements in the array, allowing
+// for lazy evaluation of the offsets (for the situation where not all elements
+// are iterated).
+func (v ArrayValue) Values() iter.Seq[Value] {
+       return func(yield func(Value) bool) {
+               for i := range v.numElements {
+                       idx := uint32(v.offsetStart) + i*uint32(v.offsetSize)
+                       offset := readLEU32(v.value[idx : 
idx+uint32(v.offsetSize)])
+                       if !yield(Value{value: v.value[v.dataStart+offset:], 
meta: v.meta}) {
+                               return
+                       }
+               }
+       }
+}
+
+// Value returns the Value at the specified index.
+// Returns an error if the index is out of range.
+func (v ArrayValue) Value(i uint32) (Value, error) {
+       if i >= v.numElements {
+               return Value{}, fmt.Errorf("%w: invalid array value: index out 
of range: %d >= %d",
+                       arrow.ErrIndex, i, v.numElements)
+       }
+
+       idx := uint32(v.offsetStart) + i*uint32(v.offsetSize)
+       offset := readLEU32(v.value[idx : idx+uint32(v.offsetSize)])
+
+       return Value{meta: v.meta, value: v.value[v.dataStart+offset:]}, nil
+}
+
+// ObjectValue represents an object (map/dictionary) of key-value pairs.
+type ObjectValue struct {
+       value []byte
+       meta  Metadata
+
+       numElements uint32
+       offsetStart uint32
+       dataStart   uint32
+       idSize      uint8
+       offsetSize  uint8
+       idStart     uint8
+}
+
+// ObjectField represents a key-value pair in an object.
+type ObjectField struct {
+       Key   string
+       Value Value
+}
+
+// NumElements returns the number of fields in the object.
+func (v ObjectValue) NumElements() uint32 { return v.numElements }
+
+// ValueByKey returns the field with the specified key.
+// Returns arrow.ErrNotFound if the key doesn't exist.
+func (v ObjectValue) ValueByKey(key string) (ObjectField, error) {
+       n := v.numElements
+
+       // if total list size is smaller than threshold, linear search will
+       // likely be faster than a binary search
+       const binarySearchThreshold = 32
+       if n < binarySearchThreshold {
+               for i := range n {
+                       idx := uint32(v.idStart) + i*uint32(v.idSize)
+                       id := readLEU32(v.value[idx : idx+uint32(v.idSize)])
+                       k, err := v.meta.KeyAt(id)
+                       if err != nil {
+                               return ObjectField{}, fmt.Errorf("invalid 
object value: fieldID at idx %d is not in metadata", idx)
+                       }
+                       if k == key {
+                               idx := uint32(v.offsetStart) + 
uint32(v.offsetSize)*i
+                               offset := readLEU32(v.value[idx : 
idx+uint32(v.offsetSize)])
+                               return ObjectField{
+                                       Key:   key,
+                                       Value: Value{value: 
v.value[v.dataStart+offset:], meta: v.meta}}, nil
+                       }
+               }
+               return ObjectField{}, arrow.ErrNotFound
+       }
+
+       i, j := uint32(0), n
+       for i < j {
+               mid := (i + j) >> 1
+               idx := uint32(v.idStart) + mid*uint32(v.idSize)
+               id := readLEU32(v.value[idx : idx+uint32(v.idSize)])
+               k, err := v.meta.KeyAt(id)
+               if err != nil {
+                       return ObjectField{}, fmt.Errorf("invalid object value: 
fieldID at idx %d is not in metadata", idx)
+               }
+
+               switch strings.Compare(k, key) {
+               case -1:
+                       i = mid + 1
+               case 0:
+                       idx := uint32(v.offsetStart) + uint32(v.offsetSize)*mid
+                       offset := readLEU32(v.value[idx : 
idx+uint32(v.offsetSize)])
+
+                       return ObjectField{
+                               Key:   key,
+                               Value: Value{value: 
v.value[v.dataStart+offset:], meta: v.meta}}, nil
+               case 1:
+                       j = mid - 1
+               }
+       }
+
+       return ObjectField{}, arrow.ErrNotFound
+}
+
+// FieldAt returns the field at the specified index.
+// Returns an error if the index is out of range.
+func (v ObjectValue) FieldAt(i uint32) (ObjectField, error) {
+       if i >= v.numElements {
+               return ObjectField{}, fmt.Errorf("%w: invalid object value: 
index out of range: %d >= %d",
+                       arrow.ErrIndex, i, v.numElements)
+       }
+
+       idx := uint32(v.idStart) + i*uint32(v.idSize)
+       id := readLEU32(v.value[idx : idx+uint32(v.idSize)])
+       k, err := v.meta.KeyAt(id)
+       if err != nil {
+               return ObjectField{}, fmt.Errorf("invalid object value: fieldID 
at idx %d is not in metadata", idx)
+       }
+
+       offsetIdx := uint32(v.offsetStart) + i*uint32(v.offsetSize)
+       offset := readLEU32(v.value[offsetIdx : offsetIdx+uint32(v.offsetSize)])
+
+       return ObjectField{
+               Key:   k,
+               Value: Value{value: v.value[v.dataStart+offset:], meta: 
v.meta}}, nil
+}
+
+// Values returns an iterator over all key-value pairs in the object.
+func (v ObjectValue) Values() iter.Seq2[string, Value] {
+       return func(yield func(string, Value) bool) {
+               for i := range v.numElements {
+                       idx := uint32(v.idStart) + i*uint32(v.idSize)
+                       id := readLEU32(v.value[idx : idx+uint32(v.idSize)])
+                       k, err := v.meta.KeyAt(id)
+                       if err != nil {
+                               return
+                       }
+
+                       offsetIdx := uint32(v.offsetStart) + 
i*uint32(v.offsetSize)
+                       offset := readLEU32(v.value[offsetIdx : 
offsetIdx+uint32(v.offsetSize)])
+
+                       if !yield(k, Value{value: v.value[v.dataStart+offset:], 
meta: v.meta}) {
+                               return
+                       }
+               }
+       }
+}
+
+// MarshalJSON implements the json.Marshaler interface for ObjectValue.
+func (v ObjectValue) MarshalJSON() ([]byte, error) {
+       // for now we'll use a naive approach and just build a map
+       // then marshal it. This is not the most efficient way to do this
+       // but it is the simplest and most straightforward.
+       mapping := make(map[string]Value)
+       maps.Insert(mapping, v.Values())
+       return json.Marshal(mapping)
+}
+
+// Value represents a variant value of any type.
+type Value struct {
+       value []byte
+       meta  Metadata
+}
+
+// NewWithMetadata creates a Value with the provided metadata and value bytes.
+func NewWithMetadata(meta Metadata, value []byte) (Value, error) {
+       if len(value) == 0 {
+               return Value{}, errors.New("invalid variant value: empty")
+       }
+
+       return Value{value: value, meta: meta}, nil
+}
+
+// New creates a Value by parsing both the metadata and value bytes.
+func New(meta, value []byte) (Value, error) {
+       m, err := NewMetadata(meta)
+       if err != nil {
+               return Value{}, err
+       }
+
+       return NewWithMetadata(m, value)
+}
+
+// Bytes returns the raw byte representation of the value (excluding metadata).
+func (v Value) Bytes() []byte { return v.value }
+
+// Clone creates a deep copy of the value including its metadata.
+func (v Value) Clone() Value {
+       return Value{
+               meta:  v.meta.Clone(),
+               value: bytes.Clone(v.value)}

Review Comment:
   ```suggestion
                value: bytes.Clone(v.value),
        }
   ```
   
   nit



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to