lidavidm commented on code in PR #344: URL: https://github.com/apache/arrow-go/pull/344#discussion_r2105717305
########## parquet/variant/variant.go: ########## @@ -0,0 +1,725 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package variant + +import ( + "bytes" + "encoding/binary" + "encoding/json" + "errors" + "fmt" + "iter" + "maps" + "slices" + "strings" + "time" + "unsafe" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/decimal" + "github.com/apache/arrow-go/v18/arrow/decimal128" + "github.com/apache/arrow-go/v18/parquet/internal/debug" + "github.com/google/uuid" +) + +//go:generate go tool stringer -type=BasicType -linecomment -output=basic_type_string.go +//go:generate go tool stringer -type=PrimitiveType -linecomment -output=primitive_type_string.go + +// BasicType represents the fundamental type category of a variant value. +type BasicType int + +const ( + BasicUndefined BasicType = iota - 1 // Unknown + BasicPrimitive // Primitive + BasicShortString // ShortString + BasicObject // Object + BasicArray // Array +) + +func basicTypeFromHeader(hdr byte) BasicType { + return BasicType(hdr & basicTypeMask) Review Comment: worth having a debug.Assert perhaps? ########## parquet/variant/builder.go: ########## @@ -0,0 +1,847 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package variant + +import ( + "bytes" + "cmp" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "reflect" + "slices" + "strings" + "time" + "unsafe" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/decimal" + "github.com/google/uuid" +) + +// Builder is used to construct Variant values by appending data of various types. +// It manages an internal buffer for the value data and a dictionary for field keys. +type Builder struct { + buf bytes.Buffer + dict map[string]uint32 + dictKeys [][]byte + allowDuplicates bool +} + +// SetAllowDuplicates controls whether duplicate keys are allowed in objects. +// When true, the last value for a key is used. When false, an error is returned +// if a duplicate key is detected. +func (b *Builder) SetAllowDuplicates(allow bool) { + b.allowDuplicates = allow +} + +// AddKey adds a key to the builder's dictionary and returns its ID. +// If the key already exists in the dictionary, its existing ID is returned. +func (b *Builder) AddKey(key string) (id uint32) { + if b.dict == nil { + b.dict = make(map[string]uint32) + b.dictKeys = make([][]byte, 0, 16) + } + + var ok bool + if id, ok = b.dict[key]; ok { + return id + } + + id = uint32(len(b.dictKeys)) + b.dict[key] = id + b.dictKeys = append(b.dictKeys, unsafe.Slice(unsafe.StringData(key), len(key))) + + return id +} + +// AppendOpt represents options for appending time-related values. These are only +// used when using the generic Append method that takes an interface{}. +type AppendOpt int16 + +const ( + // OptTimestampNano specifies that timestamps should use nanosecond precision, + // otherwise microsecond precision is used. + OptTimestampNano AppendOpt = 1 << iota + // OptTimestampUTC specifies that timestamps should be in UTC timezone, otherwise + // no time zone (NTZ) is used. + OptTimestampUTC + // OptTimeAsDate specifies that time.Time values should be encoded as dates + OptTimeAsDate + // OptTimeAsTime specifies that time.Time values should be encoded as a time value + OptTimeAsTime +) + +func extractFieldInfo(f reflect.StructField) (name string, o AppendOpt) { + tag := f.Tag.Get("variant") + if tag == "" { + return f.Name, 0 + } + + parts := strings.Split(tag, ",") + if len(parts) == 1 { + return parts[0], 0 + } + + name = parts[0] + if name == "" { + name = f.Name + } + + for _, opt := range parts[1:] { + switch strings.ToLower(opt) { + case "nanos": + o |= OptTimestampNano + case "utc": + o |= OptTimestampUTC + case "date": + o |= OptTimeAsDate + case "time": + o |= OptTimeAsTime + } + } + + return name, o +} + +// Append adds a value of any supported type to the builder. +// +// Any basic primitive type is supported, the AppendOpt options are used to control how +// timestamps are appended (e.g., as microseconds or nanoseconds and timezone). The options +// also control how a [time.Time] value is appended (e.g., as a date, timestamp, or time). +// +// Appending a value with type `[]any` will construct an array appropriately, appending +// each element. Calling with a map[string]any will construct an object, recursively calling +// Append for each value, propagating the options. +// +// For other types (arbitrary slices, arrays, maps and structs), reflection is used to determine +// the type and whether we can append it. A nil pointer will append a null, while a non-nil +// pointer will append the value that it points to. +// +// For structs, field tags can be used to control the field names and options. Only exported +// fields are considered, with the field name being used as the key. A struct tag of `variant` +// can be used with the following format and options: +// +// type MyStruct struct { +// Field1 string `variant:"key"` // Use "key" instead of "Field1" as the field name +// Field2 time.Time `variant:"day,date"` // Use "day" instead of "Field2" as the field name +// // append this value as a "date" value +// Time time.Time `variant:",time"` // Use "Time" as the field name, append the value as +// // a "time" value +// Field3 int `variant:"-"` // Ignore this field +// Timestamp time.Time `variant:"ts"` // Use "ts" as the field name, append value as a +// // timestamp(UTC=false,MICROS) +// Ts2 time.Time `variant:"ts2,nanos,utc"` // Use "ts2" as the field name, append value as a +// // timestamp(UTC=true,NANOS) +// } +// +// Options specified in the struct tags will be OR'd with any options passed to the original call Review Comment: Can we document the behavior for conflicting options? (What if the struct specifies OptTimeAsDate but we pass OptTimeAsTime?) ########## parquet/variant/variant_test.go: ########## Review Comment: Also is it worth testing examples of invalid variants too? ########## parquet/variant/variant_test.go: ########## Review Comment: More out of curiosity but do we have fuzzing set up for variants like we do for Parquet in general? ########## parquet/variant/utils.go: ########## Review Comment: Is a utils_test.go worth having? ########## parquet/variant/builder.go: ########## @@ -0,0 +1,847 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package variant + +import ( + "bytes" + "cmp" + "encoding/binary" + "errors" + "fmt" + "io" + "math" + "reflect" + "slices" + "strings" + "time" + "unsafe" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/decimal" + "github.com/google/uuid" +) + +// Builder is used to construct Variant values by appending data of various types. +// It manages an internal buffer for the value data and a dictionary for field keys. +type Builder struct { + buf bytes.Buffer + dict map[string]uint32 + dictKeys [][]byte + allowDuplicates bool +} + +// SetAllowDuplicates controls whether duplicate keys are allowed in objects. +// When true, the last value for a key is used. When false, an error is returned +// if a duplicate key is detected. +func (b *Builder) SetAllowDuplicates(allow bool) { + b.allowDuplicates = allow +} Review Comment: Will we ever want options beyond 'last value wins' and 'error'? ########## parquet/variant/variant.go: ########## @@ -0,0 +1,725 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package variant + +import ( + "bytes" + "encoding/binary" + "encoding/json" + "errors" + "fmt" + "iter" + "maps" + "slices" + "strings" + "time" + "unsafe" + + "github.com/apache/arrow-go/v18/arrow" + "github.com/apache/arrow-go/v18/arrow/decimal" + "github.com/apache/arrow-go/v18/arrow/decimal128" + "github.com/apache/arrow-go/v18/parquet/internal/debug" + "github.com/google/uuid" +) + +//go:generate go tool stringer -type=BasicType -linecomment -output=basic_type_string.go +//go:generate go tool stringer -type=PrimitiveType -linecomment -output=primitive_type_string.go + +// BasicType represents the fundamental type category of a variant value. +type BasicType int + +const ( + BasicUndefined BasicType = iota - 1 // Unknown + BasicPrimitive // Primitive + BasicShortString // ShortString + BasicObject // Object + BasicArray // Array +) + +func basicTypeFromHeader(hdr byte) BasicType { + return BasicType(hdr & basicTypeMask) +} + +// PrimitiveType represents specific primitive data types within the variant format. +type PrimitiveType int + +const ( + PrimitiveInvalid PrimitiveType = iota - 1 // Unknown + PrimitiveNull // Null + PrimitiveBoolTrue // BoolTrue + PrimitiveBoolFalse // BoolFalse + PrimitiveInt8 // Int8 + PrimitiveInt16 // Int16 + PrimitiveInt32 // Int32 + PrimitiveInt64 // Int64 + PrimitiveDouble // Double + PrimitiveDecimal4 // Decimal32 + PrimitiveDecimal8 // Decimal64 + PrimitiveDecimal16 // Decimal128 + PrimitiveDate // Date + PrimitiveTimestampMicros // Timestamp(micros) + PrimitiveTimestampMicrosNTZ // TimestampNTZ(micros) + PrimitiveFloat // Float + PrimitiveBinary // Binary + PrimitiveString // String + PrimitiveTimeMicrosNTZ // TimeNTZ(micros) + PrimitiveTimestampNanos // Timestamp(nanos) + PrimitiveTimestampNanosNTZ // TimestampNTZ(nanos) + PrimitiveUUID // UUID +) + +func primitiveTypeFromHeader(hdr byte) PrimitiveType { + return PrimitiveType((hdr >> basicTypeBits) & typeInfoMask) +} + +// Type represents the high-level variant data type. +// This is what applications typically use to identify the type of a variant value. +type Type int + +const ( + Object Type = iota + Array + Null + Bool + Int8 + Int16 + Int32 + Int64 + String + Double + Decimal4 + Decimal8 + Decimal16 + Date + TimestampMicros + TimestampMicrosNTZ + Float + Binary + Time + TimestampNanos + TimestampNanosNTZ + UUID +) + +const ( + versionMask uint8 = 0x0F + sortedStrMask uint8 = 0b10000 + basicTypeMask uint8 = 0x3 + basicTypeBits uint8 = 2 + typeInfoMask uint8 = 0x3F + hdrSizeBytes = 1 + minOffsetSizeBytes = 1 + maxOffsetSizeBytes = 4 + + // mask is applied after shift + offsetSizeMask uint8 = 0b11 + offsetSizeBitShift uint8 = 6 + supportedVersion = 1 + maxShortStringSize = 0x3F + maxSizeLimit = 128 * 1024 * 1024 // 128MB +) + +var ( + // EmptyMetadataBytes contains a minimal valid metadata section with no dictionary entries. + EmptyMetadataBytes = [3]byte{0x1, 0, 0} +) + +// Metadata represents the dictionary part of a variant value, which stores +// the keys used in object values. +type Metadata struct { + data []byte + keys [][]byte +} + +// NewMetadata creates a Metadata instance from a raw byte slice. +// It validates the metadata format and loads the key dictionary. +func NewMetadata(data []byte) (Metadata, error) { + m := Metadata{data: data} + if len(data) < hdrSizeBytes+minOffsetSizeBytes*2 { + return m, fmt.Errorf("invalid variant metadata: too short: size=%d", len(data)) + } + + if m.Version() != supportedVersion { + return m, fmt.Errorf("invalid variant metadata: unsupported version: %d", m.Version()) + } + + offsetSz := m.OffsetSize() + if offsetSz < minOffsetSizeBytes || offsetSz > maxOffsetSizeBytes { + return m, fmt.Errorf("invalid variant metadata: invalid offset size: %d", offsetSz) + } + + dictSize, err := m.loadDictionary(offsetSz) + if err != nil { + return m, err + } + + if hdrSizeBytes+int(dictSize+1)*int(offsetSz) > len(m.data) { + return m, fmt.Errorf("invalid variant metadata: offset out of range: %d > %d", + (dictSize+hdrSizeBytes)*uint32(offsetSz), len(m.data)) + } + + return m, nil +} + +// Clone creates a deep copy of the metadata. +func (m *Metadata) Clone() Metadata { + return Metadata{ + data: bytes.Clone(m.data), + // shallow copy of the values, but the slice is copied + // more efficient, and nothing should be mutating the keys + // so it's probably safe, but something we should keep in mind + keys: slices.Clone(m.keys), + } +} + +func (m *Metadata) loadDictionary(offsetSz uint8) (uint32, error) { + if int(offsetSz+hdrSizeBytes) > len(m.data) { + return 0, errors.New("invalid variant metadata: too short for dictionary size") + } + + dictSize := readLEU32(m.data[hdrSizeBytes : hdrSizeBytes+offsetSz]) + m.keys = make([][]byte, dictSize) + + if dictSize == 0 { + return 0, nil + } + + // first offset is always 0 + offsetStart, offsetPos := uint32(0), hdrSizeBytes+offsetSz + valuesStart := hdrSizeBytes + (dictSize+2)*uint32(offsetSz) + for i := range dictSize { + offsetPos += offsetSz + end := readLEU32(m.data[offsetPos : offsetPos+offsetSz]) + + keySize := end - offsetStart + valStart := valuesStart + offsetStart + if valStart+keySize > uint32(len(m.data)) { + return 0, fmt.Errorf("invalid variant metadata: string data out of range: %d + %d > %d", + valStart, keySize, len(m.data)) + } + m.keys[i] = m.data[valStart : valStart+keySize] + offsetStart += keySize + } + + return dictSize, nil +} + +// Bytes returns the raw byte representation of the metadata. +func (m Metadata) Bytes() []byte { return m.data } + +// Version returns the metadata format version. +func (m Metadata) Version() uint8 { return m.data[0] & versionMask } + +// SortedAndUnique returns whether the keys in the metadata dictionary are sorted and unique. +func (m Metadata) SortedAndUnique() bool { return m.data[0]&sortedStrMask != 0 } + +// OffsetSize returns the size in bytes used to store offsets in the metadata. +func (m Metadata) OffsetSize() uint8 { + return ((m.data[0] >> offsetSizeBitShift) & offsetSizeMask) + 1 +} + +// DictionarySize returns the number of keys in the metadata dictionary. +func (m Metadata) DictionarySize() uint32 { return uint32(len(m.keys)) } + +// KeyAt returns the string key at the given dictionary ID. +// Returns an error if the ID is out of range. +func (m Metadata) KeyAt(id uint32) (string, error) { + if id >= uint32(len(m.keys)) { + return "", fmt.Errorf("invalid variant metadata: id out of range: %d >= %d", + id, len(m.keys)) + } + + return unsafe.String(&m.keys[id][0], len(m.keys[id])), nil +} + +// IdFor returns the dictionary IDs for the given key. +// If the metadata is sorted and unique, this performs a binary search. +// Otherwise, it performs a linear search. +// +// If the metadata is not sorted and unique, then it's possible that multiple +// IDs will be returned for the same key. +func (m Metadata) IdFor(key string) []uint32 { + k := unsafe.Slice(unsafe.StringData(key), len(key)) + + var ret []uint32 + if m.SortedAndUnique() { + idx, found := slices.BinarySearchFunc(m.keys, k, bytes.Compare) + if found { + ret = append(ret, uint32(idx)) + } + + return ret + } + + for i, k := range m.keys { + if bytes.Equal(k, k) { + ret = append(ret, uint32(i)) + } + } + + return ret +} + +// DecimalValue represents a decimal number with a specified scale. +// The generic parameter T can be any supported variant decimal type (Decimal32, Decimal64, Decimal128). +type DecimalValue[T decimal.DecimalTypes] struct { + Scale uint8 + Value decimal.Num[T] +} + +// MarshalJSON implements the json.Marshaler interface for DecimalValue. +func (v DecimalValue[T]) MarshalJSON() ([]byte, error) { + return []byte(v.Value.ToString(int32(v.Scale))), nil +} + +// ArrayValue represents an array of variant values. +type ArrayValue struct { + value []byte + meta Metadata + + numElements uint32 + dataStart uint32 + offsetSize uint8 + offsetStart uint8 +} + +// MarshalJSON implements the json.Marshaler interface for ArrayValue. +func (v ArrayValue) MarshalJSON() ([]byte, error) { + return json.Marshal(slices.Collect(v.Values())) +} + +// Len returns the number of elements in the array. +func (v ArrayValue) Len() uint32 { return v.numElements } + +// Values returns an iterator for the elements in the array, allowing +// for lazy evaluation of the offsets (for the situation where not all elements +// are iterated). +func (v ArrayValue) Values() iter.Seq[Value] { + return func(yield func(Value) bool) { + for i := range v.numElements { + idx := uint32(v.offsetStart) + i*uint32(v.offsetSize) + offset := readLEU32(v.value[idx : idx+uint32(v.offsetSize)]) + if !yield(Value{value: v.value[v.dataStart+offset:], meta: v.meta}) { + return + } + } + } +} + +// Value returns the Value at the specified index. +// Returns an error if the index is out of range. +func (v ArrayValue) Value(i uint32) (Value, error) { + if i >= v.numElements { + return Value{}, fmt.Errorf("%w: invalid array value: index out of range: %d >= %d", + arrow.ErrIndex, i, v.numElements) + } + + idx := uint32(v.offsetStart) + i*uint32(v.offsetSize) + offset := readLEU32(v.value[idx : idx+uint32(v.offsetSize)]) + + return Value{meta: v.meta, value: v.value[v.dataStart+offset:]}, nil +} + +// ObjectValue represents an object (map/dictionary) of key-value pairs. +type ObjectValue struct { + value []byte + meta Metadata + + numElements uint32 + offsetStart uint32 + dataStart uint32 + idSize uint8 + offsetSize uint8 + idStart uint8 +} + +// ObjectField represents a key-value pair in an object. +type ObjectField struct { + Key string + Value Value +} + +// NumElements returns the number of fields in the object. +func (v ObjectValue) NumElements() uint32 { return v.numElements } + +// ValueByKey returns the field with the specified key. +// Returns arrow.ErrNotFound if the key doesn't exist. +func (v ObjectValue) ValueByKey(key string) (ObjectField, error) { + n := v.numElements + + // if total list size is smaller than threshold, linear search will + // likely be faster than a binary search + const binarySearchThreshold = 32 + if n < binarySearchThreshold { + for i := range n { + idx := uint32(v.idStart) + i*uint32(v.idSize) + id := readLEU32(v.value[idx : idx+uint32(v.idSize)]) + k, err := v.meta.KeyAt(id) + if err != nil { + return ObjectField{}, fmt.Errorf("invalid object value: fieldID at idx %d is not in metadata", idx) + } + if k == key { + idx := uint32(v.offsetStart) + uint32(v.offsetSize)*i + offset := readLEU32(v.value[idx : idx+uint32(v.offsetSize)]) + return ObjectField{ + Key: key, + Value: Value{value: v.value[v.dataStart+offset:], meta: v.meta}}, nil + } + } + return ObjectField{}, arrow.ErrNotFound + } + + i, j := uint32(0), n + for i < j { + mid := (i + j) >> 1 + idx := uint32(v.idStart) + mid*uint32(v.idSize) + id := readLEU32(v.value[idx : idx+uint32(v.idSize)]) + k, err := v.meta.KeyAt(id) + if err != nil { + return ObjectField{}, fmt.Errorf("invalid object value: fieldID at idx %d is not in metadata", idx) + } + + switch strings.Compare(k, key) { + case -1: + i = mid + 1 + case 0: + idx := uint32(v.offsetStart) + uint32(v.offsetSize)*mid + offset := readLEU32(v.value[idx : idx+uint32(v.offsetSize)]) + + return ObjectField{ + Key: key, + Value: Value{value: v.value[v.dataStart+offset:], meta: v.meta}}, nil + case 1: + j = mid - 1 + } + } + + return ObjectField{}, arrow.ErrNotFound +} + +// FieldAt returns the field at the specified index. +// Returns an error if the index is out of range. +func (v ObjectValue) FieldAt(i uint32) (ObjectField, error) { + if i >= v.numElements { + return ObjectField{}, fmt.Errorf("%w: invalid object value: index out of range: %d >= %d", + arrow.ErrIndex, i, v.numElements) + } + + idx := uint32(v.idStart) + i*uint32(v.idSize) + id := readLEU32(v.value[idx : idx+uint32(v.idSize)]) + k, err := v.meta.KeyAt(id) + if err != nil { + return ObjectField{}, fmt.Errorf("invalid object value: fieldID at idx %d is not in metadata", idx) + } + + offsetIdx := uint32(v.offsetStart) + i*uint32(v.offsetSize) + offset := readLEU32(v.value[offsetIdx : offsetIdx+uint32(v.offsetSize)]) + + return ObjectField{ + Key: k, + Value: Value{value: v.value[v.dataStart+offset:], meta: v.meta}}, nil +} + +// Values returns an iterator over all key-value pairs in the object. +func (v ObjectValue) Values() iter.Seq2[string, Value] { + return func(yield func(string, Value) bool) { + for i := range v.numElements { + idx := uint32(v.idStart) + i*uint32(v.idSize) + id := readLEU32(v.value[idx : idx+uint32(v.idSize)]) + k, err := v.meta.KeyAt(id) + if err != nil { + return + } + + offsetIdx := uint32(v.offsetStart) + i*uint32(v.offsetSize) + offset := readLEU32(v.value[offsetIdx : offsetIdx+uint32(v.offsetSize)]) + + if !yield(k, Value{value: v.value[v.dataStart+offset:], meta: v.meta}) { + return + } + } + } +} + +// MarshalJSON implements the json.Marshaler interface for ObjectValue. +func (v ObjectValue) MarshalJSON() ([]byte, error) { + // for now we'll use a naive approach and just build a map + // then marshal it. This is not the most efficient way to do this + // but it is the simplest and most straightforward. + mapping := make(map[string]Value) + maps.Insert(mapping, v.Values()) + return json.Marshal(mapping) +} + +// Value represents a variant value of any type. +type Value struct { + value []byte + meta Metadata +} + +// NewWithMetadata creates a Value with the provided metadata and value bytes. +func NewWithMetadata(meta Metadata, value []byte) (Value, error) { + if len(value) == 0 { + return Value{}, errors.New("invalid variant value: empty") + } + + return Value{value: value, meta: meta}, nil +} + +// New creates a Value by parsing both the metadata and value bytes. +func New(meta, value []byte) (Value, error) { + m, err := NewMetadata(meta) + if err != nil { + return Value{}, err + } + + return NewWithMetadata(m, value) +} + +// Bytes returns the raw byte representation of the value (excluding metadata). +func (v Value) Bytes() []byte { return v.value } + +// Clone creates a deep copy of the value including its metadata. +func (v Value) Clone() Value { + return Value{ + meta: v.meta.Clone(), + value: bytes.Clone(v.value)} Review Comment: ```suggestion value: bytes.Clone(v.value), } ``` nit -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org