[ https://issues.apache.org/jira/browse/BEAM-3612?focusedWorklogId=164597&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-164597 ]
ASF GitHub Bot logged work on BEAM-3612: ---------------------------------------- Author: ASF GitHub Bot Created on: 10/Nov/18 01:10 Start Date: 10/Nov/18 01:10 Worklog Time Spent: 10m Work Description: lostluck commented on a change in pull request #7000: [BEAM-3612] Add a shim generator tool URL: https://github.com/apache/beam/pull/7000#discussion_r232434065 ########## File path: sdks/go/pkg/beam/util/starcgenx/starcgenx.go ########## @@ -0,0 +1,565 @@ +// Licensed to the Apache Software Foundation (ASF) under one or more +// contributor license agreements. See the NOTICE file distributed with +// this work for additional information regarding copyright ownership. +// The ASF licenses this file to You under the Apache License, Version 2.0 +// (the "License"); you may not use this file except in compliance with +// the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package starcgenx is a Static Analysis Type Assertion shim and Registration Code Generator +// which provides an extractor to extract types from a package, in order to generate +// approprate shimsr a package so code can be generated for it. +// +// It's written for use by the starcgen tool, but separate to permit +// alternative "go/importer" Importers for accessing types from imported packages. +package starcgenx + +import ( + "bytes" + "fmt" + "go/ast" + "go/token" + "go/types" + "strings" + + "github.com/apache/beam/sdks/go/pkg/beam/util/shimx" +) + +// NewExtractor returns an extractor for the given package. +func NewExtractor(pkg string) *Extractor { + return &Extractor{ + Package: pkg, + functions: make(map[string]struct{}), + types: make(map[string]struct{}), + funcs: make(map[string]*types.Signature), + emits: make(map[string]shimx.Emitter), + iters: make(map[string]shimx.Input), + imports: make(map[string]struct{}), + allExported: true, + } +} + +// Extractor contains and uniquifies the cache of types and things that need to be generated. +type Extractor struct { + w bytes.Buffer + Package string + debug bool + + // Ids is an optional slice of package local identifiers + Ids []string + + // Register and uniquify the needed shims for each kind. + // Functions to Register + functions map[string]struct{} + // Types to Register (structs, essentially) + types map[string]struct{} + // FuncShims needed + funcs map[string]*types.Signature + // Emitter Shims needed + emits map[string]shimx.Emitter + // Iterator Shims needed + iters map[string]shimx.Input + + // list of packages we need to import. + imports map[string]struct{} + + allExported bool // Marks if all ptransforms are exported and available in main. +} + +// Summary prints out a summary of the shims and registrations to +// be generated to the buffer. +func (e *Extractor) Summary() { + e.Print("\n") + e.Print("Summary\n") + e.Printf("All exported?: %v\n", e.allExported) + e.Printf("%d\t Functions\n", len(e.functions)) + e.Printf("%d\t Types\n", len(e.types)) + e.Printf("%d\t Shims\n", len(e.funcs)) + e.Printf("%d\t Emits\n", len(e.emits)) + e.Printf("%d\t Inputs\n", len(e.iters)) +} + +// lifecycleMethodName returns if the passed in string is one of the lifecycle method names used +// by the Go SDK as DoFn or CombineFn lifecycle methods. These are the only methods that need +// shims generated for them, as per beam/core/graph/fn.go +// TODO(lostluck): Move this to beam/core/graph/fn.go, so it can stay up to date. +func lifecycleMethodName(n string) bool { + switch n { + case "ProcessElement", "StartBundle", "FinishBundle", "Setup", "Teardown", "CreateAccumulator", "AddInput", "MergeAccumulators", "ExtractOutput", "Compact": + return true + default: + return false + } +} + +// Bytes forwards to fmt.Fprint to the extractor buffer. +func (e *Extractor) Bytes() []byte { + return e.w.Bytes() +} + +// Print forwards to fmt.Fprint to the extractor buffer. +func (e *Extractor) Print(s string) { + if e.debug { + fmt.Fprint(&e.w, s) + } +} + +// Printf forwards to fmt.Printf to the extractor buffer. +func (e *Extractor) Printf(f string, args ...interface{}) { + if e.debug { + fmt.Fprintf(&e.w, f, args...) + } +} + +// FromAsts analyses the contents of a package +func (e *Extractor) FromAsts(imp types.Importer, fset *token.FileSet, files []*ast.File) error { + conf := types.Config{ + Importer: imp, + IgnoreFuncBodies: true, + DisableUnusedImportCheck: true, + } + info := &types.Info{ + Defs: make(map[*ast.Ident]types.Object), + } + if len(e.Ids) != 0 { + // TODO(lostluck): This becomes unnnecessary iff we can figure out + // which ParDos are being passed to beam.ParDo or beam.Combine. + // If there are ids, we need to also look at function bodies, and uses. + var checkFuncBodies bool + for _, v := range e.Ids { + if strings.Contains(v, ".") { + checkFuncBodies = true + break + } + } + conf.IgnoreFuncBodies = !checkFuncBodies + info.Uses = make(map[*ast.Ident]types.Object) + } + + if _, err := conf.Check(e.Package, fset, files, info); err != nil { + return fmt.Errorf("failed to type check package %s : %v", e.Package, err) + } + + e.Print("/*\n") + var idsRequired, idsFound map[string]bool + if len(e.Ids) > 0 { + e.Printf("Filtering by %d identifiers: %q\n", len(e.Ids), strings.Join(e.Ids, ", ")) + idsRequired = make(map[string]bool) + idsFound = make(map[string]bool) + for _, id := range e.Ids { + idsRequired[id] = true + } + } + // TODO(rebo): Need to sort out struct types and their methods, so we only + // register structs that are used as function parameters, or that are clearly + // DoFns or CombineFns. + e.Print("CHECKING DEFS\n") + for id, obj := range info.Defs { + e.fromObj(fset, id, obj, idsRequired, idsFound) + } + e.Print("CHECKING USES\n") + for id, obj := range info.Uses { + e.fromObj(fset, id, obj, idsRequired, idsFound) + } + var notFound []string + for _, k := range e.Ids { + if !idsFound[k] { + notFound = append(notFound, k) + } + } + if len(notFound) > 0 { + return fmt.Errorf("couldn't find the following identifiers; please check for typos, or remove them: %v", strings.Join(notFound, ", ")) + } + e.Print("*/\n") + + return nil +} + +func (e *Extractor) isRequired(ident string, obj types.Object, idsRequired, idsFound map[string]bool) bool { + if idsRequired == nil { + return true + } + if idsFound == nil { + panic("broken invariant: idsFound map is nil, but idsRequired map exists") + } + // If we're filtering IDs, then it needs to be in the filtered identifiers, + // or it's receiver type identifier needs to be in the filtered identifiers. + if idsRequired[ident] { + idsFound[ident] = true + return true + } + // Check if this is a function. + sig, ok := obj.Type().(*types.Signature) + if !ok { + return false + } + // If this is a function, and it has a receiver, it's a method. + if recv := sig.Recv(); recv != nil && lifecycleMethodName(ident) { + // We don't want to care about pointers, so dereference to value type. + t := recv.Type() + p, ok := t.(*types.Pointer) + for ok { + t = p.Elem() + p, ok = t.(*types.Pointer) + } + ts := types.TypeString(t, e.qualifier) + e.Printf("RRR has %v, ts: %s %s--- ", sig, ts, ident) + if !idsRequired[ts] { + e.Print("IGNORE\n") + return false + } + e.Print("KEEP\n") + idsFound[ts] = true + return true + } + return false +} + +func (e *Extractor) fromObj(fset *token.FileSet, id *ast.Ident, obj types.Object, idsRequired, idsFound map[string]bool) { + if obj == nil { // Omit the package declaration. + e.Printf("%s: %q has no object, probably a package\n", + fset.Position(id.Pos()), id.Name) + return + } + + pkg := obj.Pkg() + if pkg == nil { + e.Printf("%s: %q has no package \n", + fset.Position(id.Pos()), id.Name) + // No meaningful identifier. + return + } + ident := fmt.Sprintf("%s.%s", pkg.Name(), obj.Name()) + if pkg.Name() == e.Package { + ident = obj.Name() + } + if !e.isRequired(ident, obj, idsRequired, idsFound) { + return + } + + switch ot := obj.(type) { + case *types.Var: + // Vars are tricky since they could be anything, and anywhere (package scope, parameters, etc) + // eg. Flags, or Field Tags, among others. + // I'm increasingly convinced that we should simply igonore vars. + // Do nothing for vars. + case *types.Func: + sig := obj.Type().(*types.Signature) + if recv := sig.Recv(); recv != nil { + // Methods don't need registering, but they do need shim generation. + e.Printf("%s: %q is a method of %v -> %v--- %T %v %v %v\n", + fset.Position(id.Pos()), id.Name, recv.Type(), obj, obj, id, obj.Pkg(), obj.Type()) + if !lifecycleMethodName(id.Name) { + // If this is not a lifecycle method, we should ignore it. + return + } + } else if id.Name != "init" { + // init functions are special and should be ignored. + // Functions need registering, as well as shim generation. + e.Printf("%s: %q is a top level func %v --- %T %v %v %v\n", + fset.Position(id.Pos()), ident, obj, obj, id, obj.Pkg(), obj.Type()) + e.functions[ident] = struct{}{} + } + // For functions from other packages. + if pkg.Name() != e.Package { + e.imports[pkg.Path()] = struct{}{} + } + + e.funcs[e.sigKey(sig)] = sig + e.extractParameters(sig) + e.Printf("\t%v\n", sig) + case *types.TypeName: + e.Printf("%s: %q is a type %v --- %T %v %v %v %v\n", + fset.Position(id.Pos()), id.Name, obj, obj, id, obj.Pkg(), obj.Type(), obj.Name()) + // Probably need to sanity check that this type actually is/has a ProcessElement + // or MergeAccumulators defined for this type so unnecessary registrations don't happen, + // an can explicitly produce an error if an explicitly named type *isn't* a DoFn or CombineFn. + e.extractType(ot) + default: + e.Printf("%s: %q defines %v --- %T %v %v %v\n", + fset.Position(id.Pos()), types.ObjectString(obj, e.qualifier), obj, obj, id, obj.Pkg(), obj.Type()) + } +} + +func (e *Extractor) extractType(ot *types.TypeName) { + name := types.TypeString(ot.Type(), e.qualifier) + // Unwrap an alias by one level. + // Attempting to deference a full chain of aliases runs the risk of crossing + // a visibility boundary such as internal packages. + // A single level is safe since the code we're analysing imports it, + // so we can assume the generated code can access it too. + if ot.IsAlias() { + if t, ok := ot.Type().(*types.Named); ok { + ot = t.Obj() + name = types.TypeString(t, e.qualifier) + + if pkg := t.Obj().Pkg(); pkg != nil { + e.imports[pkg.Path()] = struct{}{} + } + } + } + e.types[name] = struct{}{} +} + +// Examines the signature and extracts types of parameters for generating +// necessary imports and emitter and iterator code. +func (e *Extractor) extractParameters(sig *types.Signature) { + in := sig.Params() // *types.Tuple + for i := 0; i < in.Len(); i++ { + s := in.At(i) // *types.Var + + // Pointer types need to be iteratively unwrapped until we're at the base type, + // so we can get the import if necessary. + t := s.Type() + p, ok := t.(*types.Pointer) + for ok { + t = p.Elem() + p, ok = t.(*types.Pointer) + } + // Here's were we ensure we register new imports. + if t, ok := t.(*types.Named); ok { + if pkg := t.Obj().Pkg(); pkg != nil { + e.imports[pkg.Path()] = struct{}{} Review comment: 1. Exactly, empty struct, being initialized. 2. an empty struct has size 0, so this makes maps functionally a set, with only the map bits. This is useful for de-duplicating entries by key. The alternative is to use booleans, which is more concise when subsequently checking presence eg. foo := make(map[string]bool) if foo["bar] { Baz() } vs foo := make(map[string]struct{}) if _, ok := foo["bar"]; ok { Baz() } ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 164597) Time Spent: 5h (was: 4h 50m) > Make it easy to generate type-specialized Go SDK reflectx.Funcs > --------------------------------------------------------------- > > Key: BEAM-3612 > URL: https://issues.apache.org/jira/browse/BEAM-3612 > Project: Beam > Issue Type: Improvement > Components: sdk-go > Reporter: Henning Rohde > Assignee: Robert Burke > Priority: Major > Time Spent: 5h > Remaining Estimate: 0h > -- This message was sent by Atlassian JIRA (v7.6.3#76005)