[ 
https://issues.apache.org/jira/browse/BEAM-3612?focusedWorklogId=164597&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-164597
 ]

ASF GitHub Bot logged work on BEAM-3612:
----------------------------------------

                Author: ASF GitHub Bot
            Created on: 10/Nov/18 01:10
            Start Date: 10/Nov/18 01:10
    Worklog Time Spent: 10m 
      Work Description: lostluck commented on a change in pull request #7000: 
[BEAM-3612] Add a shim generator tool
URL: https://github.com/apache/beam/pull/7000#discussion_r232434065
 
 

 ##########
 File path: sdks/go/pkg/beam/util/starcgenx/starcgenx.go
 ##########
 @@ -0,0 +1,565 @@
+// Licensed to the Apache Software Foundation (ASF) under one or more
+// contributor license agreements.  See the NOTICE file distributed with
+// this work for additional information regarding copyright ownership.
+// The ASF licenses this file to You under the Apache License, Version 2.0
+// (the "License"); you may not use this file except in compliance with
+// the License.  You may obtain a copy of the License at
+//
+//    http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package starcgenx is a Static Analysis Type Assertion shim and Registration 
Code Generator
+// which provides an extractor to extract types from a package, in order to 
generate
+// approprate shimsr a package so code can be generated for it.
+//
+// It's written for use by the starcgen tool, but separate to permit
+// alternative "go/importer" Importers for accessing types from imported 
packages.
+package starcgenx
+
+import (
+       "bytes"
+       "fmt"
+       "go/ast"
+       "go/token"
+       "go/types"
+       "strings"
+
+       "github.com/apache/beam/sdks/go/pkg/beam/util/shimx"
+)
+
+// NewExtractor returns an extractor for the given package.
+func NewExtractor(pkg string) *Extractor {
+       return &Extractor{
+               Package:     pkg,
+               functions:   make(map[string]struct{}),
+               types:       make(map[string]struct{}),
+               funcs:       make(map[string]*types.Signature),
+               emits:       make(map[string]shimx.Emitter),
+               iters:       make(map[string]shimx.Input),
+               imports:     make(map[string]struct{}),
+               allExported: true,
+       }
+}
+
+// Extractor contains and uniquifies the cache of types and things that need 
to be generated.
+type Extractor struct {
+       w       bytes.Buffer
+       Package string
+       debug   bool
+
+       // Ids is an optional slice of package local identifiers
+       Ids []string
+
+       // Register and uniquify the needed shims for each kind.
+       // Functions to Register
+       functions map[string]struct{}
+       // Types to Register (structs, essentially)
+       types map[string]struct{}
+       // FuncShims needed
+       funcs map[string]*types.Signature
+       // Emitter Shims needed
+       emits map[string]shimx.Emitter
+       // Iterator Shims needed
+       iters map[string]shimx.Input
+
+       // list of packages we need to import.
+       imports map[string]struct{}
+
+       allExported bool // Marks if all ptransforms are exported and available 
in main.
+}
+
+// Summary prints out a summary of the shims and registrations to
+// be generated to the buffer.
+func (e *Extractor) Summary() {
+       e.Print("\n")
+       e.Print("Summary\n")
+       e.Printf("All exported?: %v\n", e.allExported)
+       e.Printf("%d\t Functions\n", len(e.functions))
+       e.Printf("%d\t Types\n", len(e.types))
+       e.Printf("%d\t Shims\n", len(e.funcs))
+       e.Printf("%d\t Emits\n", len(e.emits))
+       e.Printf("%d\t Inputs\n", len(e.iters))
+}
+
+// lifecycleMethodName returns if the passed in string is one of the lifecycle 
method names used
+// by the Go SDK as DoFn or CombineFn lifecycle methods. These are the only 
methods that need
+// shims generated for them, as per beam/core/graph/fn.go
+// TODO(lostluck): Move this to beam/core/graph/fn.go, so it can stay up to 
date.
+func lifecycleMethodName(n string) bool {
+       switch n {
+       case "ProcessElement", "StartBundle", "FinishBundle", "Setup", 
"Teardown", "CreateAccumulator", "AddInput", "MergeAccumulators", 
"ExtractOutput", "Compact":
+               return true
+       default:
+               return false
+       }
+}
+
+// Bytes forwards to fmt.Fprint to the extractor buffer.
+func (e *Extractor) Bytes() []byte {
+       return e.w.Bytes()
+}
+
+// Print forwards to fmt.Fprint to the extractor buffer.
+func (e *Extractor) Print(s string) {
+       if e.debug {
+               fmt.Fprint(&e.w, s)
+       }
+}
+
+// Printf forwards to fmt.Printf to the extractor buffer.
+func (e *Extractor) Printf(f string, args ...interface{}) {
+       if e.debug {
+               fmt.Fprintf(&e.w, f, args...)
+       }
+}
+
+// FromAsts analyses the contents of a package
+func (e *Extractor) FromAsts(imp types.Importer, fset *token.FileSet, files 
[]*ast.File) error {
+       conf := types.Config{
+               Importer:                 imp,
+               IgnoreFuncBodies:         true,
+               DisableUnusedImportCheck: true,
+       }
+       info := &types.Info{
+               Defs: make(map[*ast.Ident]types.Object),
+       }
+       if len(e.Ids) != 0 {
+               // TODO(lostluck): This becomes unnnecessary iff we can figure 
out
+               // which ParDos are being passed to beam.ParDo or beam.Combine.
+               // If there are ids, we need to also look at function bodies, 
and uses.
+               var checkFuncBodies bool
+               for _, v := range e.Ids {
+                       if strings.Contains(v, ".") {
+                               checkFuncBodies = true
+                               break
+                       }
+               }
+               conf.IgnoreFuncBodies = !checkFuncBodies
+               info.Uses = make(map[*ast.Ident]types.Object)
+       }
+
+       if _, err := conf.Check(e.Package, fset, files, info); err != nil {
+               return fmt.Errorf("failed to type check package %s : %v", 
e.Package, err)
+       }
+
+       e.Print("/*\n")
+       var idsRequired, idsFound map[string]bool
+       if len(e.Ids) > 0 {
+               e.Printf("Filtering by %d identifiers: %q\n", len(e.Ids), 
strings.Join(e.Ids, ", "))
+               idsRequired = make(map[string]bool)
+               idsFound = make(map[string]bool)
+               for _, id := range e.Ids {
+                       idsRequired[id] = true
+               }
+       }
+       // TODO(rebo): Need to sort out struct types and their methods, so we 
only
+       // register structs that are used as function parameters, or that are 
clearly
+       // DoFns or CombineFns.
+       e.Print("CHECKING DEFS\n")
+       for id, obj := range info.Defs {
+               e.fromObj(fset, id, obj, idsRequired, idsFound)
+       }
+       e.Print("CHECKING USES\n")
+       for id, obj := range info.Uses {
+               e.fromObj(fset, id, obj, idsRequired, idsFound)
+       }
+       var notFound []string
+       for _, k := range e.Ids {
+               if !idsFound[k] {
+                       notFound = append(notFound, k)
+               }
+       }
+       if len(notFound) > 0 {
+               return fmt.Errorf("couldn't find the following identifiers; 
please check for typos, or remove them: %v", strings.Join(notFound, ", "))
+       }
+       e.Print("*/\n")
+
+       return nil
+}
+
+func (e *Extractor) isRequired(ident string, obj types.Object, idsRequired, 
idsFound map[string]bool) bool {
+       if idsRequired == nil {
+               return true
+       }
+       if idsFound == nil {
+               panic("broken invariant: idsFound map is nil, but idsRequired 
map exists")
+       }
+       // If we're filtering IDs, then it needs to be in the filtered 
identifiers,
+       // or it's receiver type identifier needs to be in the filtered 
identifiers.
+       if idsRequired[ident] {
+               idsFound[ident] = true
+               return true
+       }
+       // Check if this is a function.
+       sig, ok := obj.Type().(*types.Signature)
+       if !ok {
+               return false
+       }
+       // If this is a function, and it has a receiver, it's a method.
+       if recv := sig.Recv(); recv != nil && lifecycleMethodName(ident) {
+               // We don't want to care about pointers, so dereference to 
value type.
+               t := recv.Type()
+               p, ok := t.(*types.Pointer)
+               for ok {
+                       t = p.Elem()
+                       p, ok = t.(*types.Pointer)
+               }
+               ts := types.TypeString(t, e.qualifier)
+               e.Printf("RRR has %v, ts: %s %s--- ", sig, ts, ident)
+               if !idsRequired[ts] {
+                       e.Print("IGNORE\n")
+                       return false
+               }
+               e.Print("KEEP\n")
+               idsFound[ts] = true
+               return true
+       }
+       return false
+}
+
+func (e *Extractor) fromObj(fset *token.FileSet, id *ast.Ident, obj 
types.Object, idsRequired, idsFound map[string]bool) {
+       if obj == nil { // Omit the package declaration.
+               e.Printf("%s: %q has no object, probably a package\n",
+                       fset.Position(id.Pos()), id.Name)
+               return
+       }
+
+       pkg := obj.Pkg()
+       if pkg == nil {
+               e.Printf("%s: %q has no package \n",
+                       fset.Position(id.Pos()), id.Name)
+               // No meaningful identifier.
+               return
+       }
+       ident := fmt.Sprintf("%s.%s", pkg.Name(), obj.Name())
+       if pkg.Name() == e.Package {
+               ident = obj.Name()
+       }
+       if !e.isRequired(ident, obj, idsRequired, idsFound) {
+               return
+       }
+
+       switch ot := obj.(type) {
+       case *types.Var:
+               // Vars are tricky since they could be anything, and anywhere 
(package scope, parameters, etc)
+               // eg. Flags, or Field Tags, among others.
+               // I'm increasingly convinced that we should simply igonore 
vars.
+               // Do nothing for vars.
+       case *types.Func:
+               sig := obj.Type().(*types.Signature)
+               if recv := sig.Recv(); recv != nil {
+                       // Methods don't need registering, but they do need 
shim generation.
+                       e.Printf("%s: %q is a method of %v -> %v--- %T %v %v 
%v\n",
+                               fset.Position(id.Pos()), id.Name, recv.Type(), 
obj, obj, id, obj.Pkg(), obj.Type())
+                       if !lifecycleMethodName(id.Name) {
+                               // If this is not a lifecycle method, we should 
ignore it.
+                               return
+                       }
+               } else if id.Name != "init" {
+                       // init functions are special and should be ignored.
+                       // Functions need registering, as well as shim 
generation.
+                       e.Printf("%s: %q is a top level func %v --- %T %v %v 
%v\n",
+                               fset.Position(id.Pos()), ident, obj, obj, id, 
obj.Pkg(), obj.Type())
+                       e.functions[ident] = struct{}{}
+               }
+               // For functions from other packages.
+               if pkg.Name() != e.Package {
+                       e.imports[pkg.Path()] = struct{}{}
+               }
+
+               e.funcs[e.sigKey(sig)] = sig
+               e.extractParameters(sig)
+               e.Printf("\t%v\n", sig)
+       case *types.TypeName:
+               e.Printf("%s: %q is a type %v --- %T %v %v %v %v\n",
+                       fset.Position(id.Pos()), id.Name, obj, obj, id, 
obj.Pkg(), obj.Type(), obj.Name())
+               // Probably need to sanity check that this type actually is/has 
a ProcessElement
+               // or MergeAccumulators defined for this type so unnecessary 
registrations don't happen,
+               // an can explicitly produce an error if an explicitly named 
type *isn't* a DoFn or CombineFn.
+               e.extractType(ot)
+       default:
+               e.Printf("%s: %q defines %v --- %T %v %v %v\n",
+                       fset.Position(id.Pos()), types.ObjectString(obj, 
e.qualifier), obj, obj, id, obj.Pkg(), obj.Type())
+       }
+}
+
+func (e *Extractor) extractType(ot *types.TypeName) {
+       name := types.TypeString(ot.Type(), e.qualifier)
+       // Unwrap an alias by one level.
+       // Attempting to deference a full chain of aliases runs the risk of 
crossing
+       // a visibility boundary such as internal packages.
+       // A single level is safe since the code we're analysing imports it,
+       // so we can assume the generated code can access it too.
+       if ot.IsAlias() {
+               if t, ok := ot.Type().(*types.Named); ok {
+                       ot = t.Obj()
+                       name = types.TypeString(t, e.qualifier)
+
+                       if pkg := t.Obj().Pkg(); pkg != nil {
+                               e.imports[pkg.Path()] = struct{}{}
+                       }
+               }
+       }
+       e.types[name] = struct{}{}
+}
+
+// Examines the signature and extracts types of parameters for generating
+// necessary imports and emitter and iterator code.
+func (e *Extractor) extractParameters(sig *types.Signature) {
+       in := sig.Params() // *types.Tuple
+       for i := 0; i < in.Len(); i++ {
+               s := in.At(i) // *types.Var
+
+               // Pointer types need to be iteratively unwrapped until we're 
at the base type,
+               // so we can get the import if necessary.
+               t := s.Type()
+               p, ok := t.(*types.Pointer)
+               for ok {
+                       t = p.Elem()
+                       p, ok = t.(*types.Pointer)
+               }
+               // Here's were we ensure we register new imports.
+               if t, ok := t.(*types.Named); ok {
+                       if pkg := t.Obj().Pkg(); pkg != nil {
+                               e.imports[pkg.Path()] = struct{}{}
 
 Review comment:
   1. Exactly, empty struct, being initialized. 
   2. an empty struct has size 0, so this makes maps functionally a set, with 
only the map bits. 
   This is useful for de-duplicating entries by key.
   
   The alternative is to use booleans, which is more concise when subsequently 
checking presence
   eg.
   foo := make(map[string]bool)
   if foo["bar] { 
    Baz()
   }
   
   vs  
   foo := make(map[string]struct{})
   if _, ok := foo["bar"]; ok {
     Baz()
   }
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Issue Time Tracking
-------------------

    Worklog Id:     (was: 164597)
    Time Spent: 5h  (was: 4h 50m)

> Make it easy to generate type-specialized Go SDK reflectx.Funcs
> ---------------------------------------------------------------
>
>                 Key: BEAM-3612
>                 URL: https://issues.apache.org/jira/browse/BEAM-3612
>             Project: Beam
>          Issue Type: Improvement
>          Components: sdk-go
>            Reporter: Henning Rohde
>            Assignee: Robert Burke
>            Priority: Major
>          Time Spent: 5h
>  Remaining Estimate: 0h
>




--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to