[ https://issues.apache.org/jira/browse/BEAM-9746?focusedWorklogId=422286&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-422286 ]
ASF GitHub Bot logged work on BEAM-9746: ---------------------------------------- Author: ASF GitHub Bot Created on: 14/Apr/20 20:24 Start Date: 14/Apr/20 20:24 Worklog Time Spent: 10m Work Description: thetorpedodog commented on pull request #11413: [BEAM-9746] check for 0 length copies from state URL: https://github.com/apache/beam/pull/11413#discussion_r408401030 ########## File path: sdks/go/pkg/beam/core/runtime/harness/statemgr_test.go ########## @@ -258,6 +261,167 @@ func TestStateChannel(t *testing.T) { } } +// TestStateKeyReader validates ordinary Read cases +func TestStateKeyReader(t *testing.T) { + const readLen = 4 + tests := []struct { + name string + buflens []int // sizes of the buffers received on the state channel. + numReads int + closed bool // tries to read from closed reader + noGet bool // tries to read from nil get response reader + }{ + { + name: "emptyData", + buflens: []int{-1}, + numReads: 1, + }, { + name: "singleBufferSingleRead", + buflens: []int{readLen}, + numReads: 2, + }, { + name: "singleBufferMultipleReads", + buflens: []int{2 * readLen}, + numReads: 3, + }, { + name: "singleBufferShortRead", + buflens: []int{readLen - 1}, + numReads: 2, + }, { + name: "multiBuffer", + buflens: []int{readLen, readLen}, + numReads: 3, + }, { + name: "multiBuffer-short-reads", + buflens: []int{readLen - 1, readLen - 1, readLen - 2}, + numReads: 4, + }, { + name: "emptyDataFirst", // Shouldn't happen, but not unreasonable to handle. + buflens: []int{-1, readLen, readLen}, + numReads: 4, + }, { + name: "emptyDataMid", // Shouldn't happen, but not unreasonable to handle. + buflens: []int{readLen, readLen, -1, readLen}, + numReads: 5, + }, { + name: "emptyDataLast", // Shouldn't happen, but not unreasonable to handle. + buflens: []int{readLen, readLen, -1}, + numReads: 3, + }, { + name: "emptyDataLast-short", + buflens: []int{3*readLen - 2, -1}, + numReads: 4, + }, { + name: "closed", + buflens: []int{-1, -1}, + numReads: 1, + closed: true, + }, { + name: "noGet", + buflens: []int{-1}, + numReads: 1, + noGet: true, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + ctx, cancelFn := context.WithCancel(context.Background()) + ch := &StateChannel{ + id: "test", + requests: make(chan *fnpb.StateRequest), + responses: make(map[string]chan<- *fnpb.StateResponse), + cancelFn: cancelFn, + DoneCh: ctx.Done(), + } + + // Handle the channel behavior asynchronously. + go func() { + for i := 0; i < len(test.buflens); i++ { + token := []byte(strconv.Itoa(i)) + var buf []byte + if test.buflens[i] >= 0 { + buf = bytes.Repeat([]byte{42}, test.buflens[i]) + } + // On the last request response pair, send no token. + if i+1 == len(test.buflens) { + token = nil + } + + req := <-ch.requests + + if test.noGet { + ch.responses[req.Id] <- &fnpb.StateResponse{ + Id: req.Id, + } + return + } Review comment: Since this doesn't depend on the rest of what is going on in the loop, pulling this up to the top might be a good idea. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 422286) Time Spent: 1h 20m (was: 1h 10m) > [Go SDK] Empty side inputs causing spurious zero elements > --------------------------------------------------------- > > Key: BEAM-9746 > URL: https://issues.apache.org/jira/browse/BEAM-9746 > Project: Beam > Issue Type: Improvement > Components: sdk-go > Reporter: Robert Burke > Assignee: Robert Burke > Priority: Major > Time Spent: 1h 20m > Remaining Estimate: 0h > > A user discovered that empty side inputs would spuriously provide a single > zero element. > The error was narrowed down to the Go SDK's state manager codeĀ copying the > stateGetResponse data wasn't checking that the original data source even had > any bytes in it, leading it in particular to interpret length prefixed data > as having 0 length, which would cause zero value elements to be generated. > Notably, this caused empty strings. -- This message was sent by Atlassian Jira (v8.3.4#803005)