zeroshade commented on code in PR #344: URL: https://github.com/apache/arrow-go/pull/344#discussion_r2110087873
########## parquet/variant/doc.go: ########## @@ -0,0 +1,142 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package variant provides an implementation of the Apache Parquet Variant data type. +// +// The Variant type is a flexible binary format designed to represent complex nested +// data structures with minimal overhead. It supports a wide range of primitive types +// as well as nested arrays and objects (similar to JSON). The format uses a memory-efficient +// binary representation with a separate metadata section for dictionary encoding of keys. +// +// # Key Components +// +// - [Value]: The primary type representing a variant value +// - [Metadata]: Contains information about the dictionary of keys +// - [Builder]: Used to construct variant values +// +// # Format Overview +// +// The variant format consists of two parts: +// +// 1. Metadata: A dictionary of keys used in objects +// 2. Value: The actual data payload +// +// Values can be one of the following types: +// +// - Primitive values (null, bool, int8/16/32/64, float32/64, etc.) +// - Short strings (less than 64 bytes) +// - Long strings and binary data +// - Date, time and timestamp values +// - Decimal values (4, 8, or 16 bytes) +// - Arrays of any variant value +// - Objects with key-value pairs +// +// # Working with Variants +// +// To create a variant value, use the Builder: +// +// var b variant.Builder +// b.Append(map[string]any{ +// "id": 123, +// "name": "example", +// "data": []any{1, 2, 3}, +// }) +// value, err := b.Build() +// +// To parse an existing variant value: +// +// v, err := variant.New(metadataBytes, valueBytes) +// +// You can access the data using the [Value.Value] method which returns the appropriate Go type: +// +// switch v.Type() { +// case variant.Object: +// obj := v.Value().(variant.ObjectValue) +// field, err := obj.ValueByKey("name") +// case variant.Array: +// arr := v.Value().(variant.ArrayValue) +// elem, err := arr.Value(0) +// case variant.String: +// s := v.Value().(string) +// case variant.Int64: +// i := v.Value().(int64) +// } +// +// You can also switch on the type of the result value from the [Value.Value] method: +// +// switch val := v.Value().(type) { +// case nil: +// // ... +// case int32: +// // ... +// case string: +// // ... +// case variant.ArrayValue: +// for i, item := range val.Values() { +// // item is a variant.Value +// } +// case variant.ObjectValue: +// for k, item := range val.Values() { +// // k is the field key +// // item is a variant.Value for that field +// } +// } +// +// Values can also be converted to JSON: +// +// jsonBytes, err := json.Marshal(v) +// +// # Low-level Construction +// +// For direct construction of complex nested structures, you can use the low-level +// methods: +// +// var b variant.Builder Review Comment: If we instead use a `StartObject`/`StartArray` pattern to construct child builders, then the user ends up having to keep track of the child builders instead of keeping track of the offests, so I'm not sure the added complexity of having to check and enforce that all the nested builders are completed before calling `Finish`/`Build` is worth it or saves that much. I also couldn't think of a clean way to keep the ability to build the whole object with a single buffer (instead of multiple buffers) while handling the nested builders and enforcing all the checks without a ton of repetitive code. I'll think about this a bit more and see if I can come up with something I like. @lidavid @mapleFU what do you think? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org