zeroshade commented on code in PR #35654: URL: https://github.com/apache/arrow/pull/35654#discussion_r1221876814
########## go/arrow/compute/exprs/builders.go: ########## @@ -0,0 +1,445 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//go:build go1.18 + +package exprs + +import ( + "fmt" + "strconv" + "strings" + "unicode" + + "github.com/apache/arrow/go/v13/arrow" + "github.com/apache/arrow/go/v13/arrow/compute" + "github.com/substrait-io/substrait-go/expr" + "github.com/substrait-io/substrait-go/extensions" + "github.com/substrait-io/substrait-go/types" +) + +// NewDefaultExtensionSet constructs an empty extension set using the default +// Arrow Extension registry and the default collection of substrait extensions +// from the Substrait-go repo. +func NewDefaultExtensionSet() ExtensionIDSet { + return NewExtensionSetDefault(expr.NewEmptyExtensionRegistry(&extensions.DefaultCollection)) +} + +// NewScalarCall constructs a substrait ScalarFunction expression with the provided +// options and arguments. +// +// The function name (fn) is looked up in the internal Arrow DefaultExtensionIDRegistry +// to ensure it exists and to convert from the Arrow function name to the substrait +// function name. It is then looked up using the DefaultCollection from the +// substrait extensions module to find the declaration. If it cannot be found, +// we try constructing the compound signature name by getting the types of the +// arguments which were passed and appending them to the function name appropriately. +// +// An error is returned if the function cannot be resolved. +func NewScalarCall(reg ExtensionIDSet, fn string, opts []*types.FunctionOption, args ...types.FuncArg) (*expr.ScalarFunction, error) { + conv, ok := reg.GetArrowRegistry().GetArrowToSubstrait(fn) + if !ok { + return nil, arrow.ErrNotFound + } + + id, convOpts, err := conv(fn) + if err != nil { + return nil, err + } + + opts = append(opts, convOpts...) + return expr.NewScalarFunc(reg.GetSubstraitRegistry(), id, opts, args...) +} + +// NewFieldRefFromDotPath constructs a substrait reference segment from +// a dot path and the base schema. +// +// dot_path = '.' name +// +// | '[' digit+ ']' +// | dot_path+ +// +// # Examples +// +// Assume root schema of {alpha: i32, beta: struct<gamma: list<i32>>, delta: map<string, i32>} +// +// ".alpha" => StructFieldRef(0) +// "[2]" => StructFieldRef(2) +// ".beta[0]" => StructFieldRef(1, StructFieldRef(0)) +// "[1].gamma[3]" => StructFieldRef(1, StructFieldRef(0, ListElementRef(3))) +// ".delta.foobar" => StructFieldRef(2, MapKeyRef("foobar")) +// +// Note: when parsing a name, a '\' preceding any other character +// will be dropped from the resulting name. Therefore if a name must +// contain the characters '.', '\', '[', or ']' then they must be escaped +// with a preceding '\'. +func NewFieldRefFromDotPath(dotpath string, rootSchema *arrow.Schema) (expr.ReferenceSegment, error) { + if len(dotpath) == 0 { + return nil, fmt.Errorf("%w dotpath was empty", arrow.ErrInvalid) + } + + parseName := func() string { + var name string + for { + idx := strings.IndexAny(dotpath, `\[.`) + if idx == -1 { + name += dotpath + dotpath = "" + break + } + + if dotpath[idx] != '\\' { + // subscript for a new field ref Review Comment: On line 120, the `name += dotpath[:idx] + string(dotpath[idx+1])` will end up adding the `]` to the `name`, and then we consume the `]` on the next line by doing `dotpath = dotpath[idx+2:]`. For this function, the `parseName` there isn't necessarily a requirement that there be a matching `]`, since you only end up with a `[` here if it's escaped with `\\`. For the index case of something like `[2]`, the `]` is consumed by line 199 when we do `dotpath = dotpath[subend+1:]` to skip past the `]` (as `dotpath[subend]` is the closing `]`). -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@arrow.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org