sfc-gh-mbojanczyk commented on code in PR #344: URL: https://github.com/apache/arrow-go/pull/344#discussion_r2064947485
########## parquet/variants/primitive.go: ########## @@ -0,0 +1,678 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package variants + +import ( + "fmt" + "io" + "math" + "reflect" + "strings" + "time" +) + +// Variant primitive type IDs. +type primitiveType int + +const ( + primitiveInvalid primitiveType = -1 + primitiveNull primitiveType = 0 + primitiveTrue primitiveType = 1 + primitiveFalse primitiveType = 2 + primitiveInt8 primitiveType = 3 + primitiveInt16 primitiveType = 4 + primitiveInt32 primitiveType = 5 + primitiveInt64 primitiveType = 6 + primitiveDouble primitiveType = 7 + primitiveDecimal4 primitiveType = 8 // TODO + primitiveDecimal8 primitiveType = 9 // TODO + primitiveDecimal16 primitiveType = 10 // TODO + primitiveDate primitiveType = 11 + primitiveTimestampMicros primitiveType = 12 + primitiveTimestampNTZMicros primitiveType = 13 + primitiveFloat primitiveType = 14 + primitiveBinary primitiveType = 15 + primitiveString primitiveType = 16 + primitiveTimeNTZ primitiveType = 17 + primitiveTimestampNanos primitiveType = 18 + primitiveTimestampNTZNanos primitiveType = 19 + primitiveUUID primitiveType = 20 +) + +func (pt primitiveType) String() string { + switch pt { + case primitiveNull: + return "Null" + case primitiveFalse, primitiveTrue: + return "Boolean" + case primitiveInt8: + return "Int8" + case primitiveInt16: + return "Int16" + case primitiveInt32: + return "Int32" + case primitiveInt64: + return "Int64" + case primitiveDouble: + return "Double" + case primitiveDecimal4: + return "Decimal4" + case primitiveDecimal8: + return "Decimal8" + case primitiveDecimal16: + return "Decimal16" + case primitiveDate: + return "Date" + case primitiveTimestampMicros: + return "Timestamp(micros)" + case primitiveTimestampNTZMicros: + return "TimestampNTZ(micros)" + case primitiveFloat: + return "Float" + case primitiveBinary: + return "Binary" + case primitiveString: + return "String" + case primitiveTimeNTZ: + return "TimeNTZ" + case primitiveTimestampNanos: + return "Timestamp(nanos)" + case primitiveTimestampNTZNanos: + return "TimestampNTZ(nanos)" + case primitiveUUID: + return "UUID" + } + return "Invalid" +} + +func validPrimitiveValue(prim primitiveType) error { + if prim < primitiveNull || prim > primitiveUUID { + return fmt.Errorf("invalid primitive type: %d", prim) + } + return nil +} + +func primitiveFromHeader(hdr byte) (primitiveType, error) { + // Special case the basic type of Short String and call it a Primitive String. + bt := BasicTypeFromHeader(hdr) + if bt == BasicShortString { + return primitiveString, nil + } else if bt == BasicPrimitive { + prim := primitiveType(hdr >> 2) + if err := validPrimitiveValue(prim); err != nil { + return primitiveInvalid, err + } + return prim, nil + } + return primitiveInvalid, fmt.Errorf("header is not of a primitive or short string basic type: %s", bt) +} + +func primitiveHeader(prim primitiveType) (byte, error) { + if err := validPrimitiveValue(prim); err != nil { + return 0, err + } + hdr := byte(prim << 2) + hdr |= byte(BasicPrimitive) + return hdr, nil +} + +// marshalPrimitive takes in a primitive value, asserts its type, then marshals the data according to the Variant spec +// into the provided writer, returning the number of bytes written. +// +// Time can be provided in various ways- either by a time.Time struct, or by an int64 when the EncodeAs{Date,Time,Timestamp} +// options are provided. By default, timestamps are written as microseconds- to use nanoseconds pass in EncodeTimeAsNanos. +// Timezone information can be determined from a time.Time struct. Otherwise, by default, timestamps will be written with +// local timezone set. +func marshalPrimitive(v any, w io.Writer, opts ...MarshalOpts) (int, error) { + var allOpts MarshalOpts + for _, o := range opts { + allOpts |= o + } + switch val := v.(type) { + case bool: + return marshalBoolean(val, w), nil + case int: + return marshalInt(int64(val), w), nil + case int8: + return marshalInt(int64(val), w), nil + case int16: + return marshalInt(int64(val), w), nil + case int32: + return marshalInt(int64(val), w), nil Review Comment: Please do! I'm not territorial about this :) I appreciate any insight you've got. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org