[ https://issues.apache.org/jira/browse/BEAM-4745?focusedWorklogId=120996&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-120996 ]
ASF GitHub Bot logged work on BEAM-4745: ---------------------------------------- Author: ASF GitHub Bot Created on: 09/Jul/18 20:12 Start Date: 09/Jul/18 20:12 Worklog Time Spent: 10m Work Description: iemejia closed pull request #5905: [BEAM-4745] Revert "[BEAM-4016] Invoke Setup and TearDown on SplitRestrictionFn and PairWithRestrictionFn" URL: https://github.com/apache/beam/pull/5905 This is a PR merged from a forked repository. As GitHub hides the original diff on merge, it is displayed below for the sake of provenance: As this is a foreign pull request (from a fork), the diff is supplied below (as it won't show otherwise due to GitHub magic): diff --git a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java index 8d8da216afa..b581eecf414 100644 --- a/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java +++ b/runners/core-construction-java/src/main/java/org/apache/beam/runners/core/construction/SplittableParDo.java @@ -415,7 +415,6 @@ public String apply(T input) { @Setup public void setup() { invoker = DoFnInvokers.invokerFor(fn); - invoker.invokeSetup(); } @ProcessElement @@ -423,12 +422,6 @@ public void processElement(ProcessContext context) { context.output( KV.of(context.element(), invoker.invokeGetInitialRestriction(context.element()))); } - - @Teardown - public void tearDown() { - invoker.invokeTeardown(); - invoker = null; - } } /** Splits the restriction using the given {@link SplitRestriction} method. */ @@ -446,7 +439,6 @@ public void tearDown() { @Setup public void setup() { invoker = DoFnInvokers.invokerFor(splittableFn); - invoker.invokeSetup(); } @ProcessElement @@ -467,11 +459,5 @@ public void outputWithTimestamp(RestrictionT part, Instant timestamp) { } }); } - - @Teardown - public void tearDown() { - invoker.invokeTeardown(); - invoker = null; - } } } diff --git a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java index fe33b1ab5b8..b7f0c10d046 100644 --- a/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java +++ b/sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/SplittableDoFnTest.java @@ -513,19 +513,18 @@ public void testLateData() throws Exception { private State state = State.BEFORE_SETUP; + @ProcessElement + public void processElement(ProcessContext c, OffsetRangeTracker tracker) { + assertEquals(State.INSIDE_BUNDLE, state); + assertTrue(tracker.tryClaim(0L)); + c.output(c.element()); + } + @GetInitialRestriction public OffsetRange getInitialRestriction(String value) { - assertEquals(State.OUTSIDE_BUNDLE, state); return new OffsetRange(0, 1); } - @SplitRestriction - public void splitRestriction( - String value, OffsetRange range, OutputReceiver<OffsetRange> receiver) { - assertEquals(State.OUTSIDE_BUNDLE, state); - receiver.output(range); - } - @Setup public void setUp() { assertEquals(State.BEFORE_SETUP, state); @@ -538,13 +537,6 @@ public void startBundle() { state = State.INSIDE_BUNDLE; } - @ProcessElement - public void processElement(ProcessContext c, OffsetRangeTracker tracker) { - assertEquals(State.INSIDE_BUNDLE, state); - assertTrue(tracker.tryClaim(0L)); - c.output(c.element()); - } - @FinishBundle public void finishBundle() { assertEquals(State.INSIDE_BUNDLE, state); @@ -561,9 +553,12 @@ public void tearDown() { @Test @Category({ValidatesRunner.class, UsesSplittableParDo.class}) public void testLifecycleMethods() throws Exception { + PCollection<String> res = p.apply(Create.of("a", "b", "c")).apply(ParDo.of(new SDFWithLifecycle())); + PAssert.that(res).containsInAnyOrder("a", "b", "c"); + p.run(); } ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 120996) Time Spent: 20m (was: 10m) > SDF tests broken by innocent change due to Dataflow worker dependencies > ----------------------------------------------------------------------- > > Key: BEAM-4745 > URL: https://issues.apache.org/jira/browse/BEAM-4745 > Project: Beam > Issue Type: Bug > Components: runner-dataflow > Reporter: Eugene Kirpichov > Assignee: Eugene Kirpichov > Priority: Critical > Time Spent: 20m > Remaining Estimate: 0h > > https://github.com/apache/beam/pull/5894 broke SDF in Dataflow streaming > runner, using SDFs fails with the error below. > The reason is that Dataflow worker has a staged copy of some stuff including > runners-core-construction, and it comes before user code in the classpath. So > the pipeline includes a serialized SplittableParDo from master, but the > worker deserializes it using a stale class file. > This needs to be fixed on Dataflow side. Filing this JIRA just to track the > externally facing issue. > Meanwhile to stop the bleeding I'm going to revert the change, even though by > itself it's a correct change, but it's better to have SDFs not invoke > setup/teardown than to have them not work at all. > CC: [~iemejia] > java.lang.RuntimeException: > com.google.cloud.dataflow.worker.repackaged.com.google.common.util.concurrent.UncheckedExecutionException: > java.lang.IllegalArgumentException: unable to deserialize Serialized DoFnInfo > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory$1.typedApply(IntrinsicMapTaskExecutorFactory.java:192) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory$1.typedApply(IntrinsicMapTaskExecutorFactory.java:163) > > com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:63) > > com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:50) > > com.google.cloud.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:87) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory.create(IntrinsicMapTaskExecutorFactory.java:123) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker.process(StreamingDataflowWorker.java:1143) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker.access$1000(StreamingDataflowWorker.java:136) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker$6.run(StreamingDataflowWorker.java:966) > > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > java.lang.Thread.run(Thread.java:745) > Caused by: > com.google.cloud.dataflow.worker.repackaged.com.google.common.util.concurrent.UncheckedExecutionException: > java.lang.IllegalArgumentException: unable to deserialize Serialized DoFnInfo > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2214) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache.get(LocalCache.java:4053) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4899) > > com.google.cloud.dataflow.worker.UserParDoFnFactory.create(UserParDoFnFactory.java:90) > > com.google.cloud.dataflow.worker.DefaultParDoFnFactory.create(DefaultParDoFnFactory.java:74) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory.createParDoOperation(IntrinsicMapTaskExecutorFactory.java:262) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory.access$000(IntrinsicMapTaskExecutorFactory.java:84) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory$1.typedApply(IntrinsicMapTaskExecutorFactory.java:181) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory$1.typedApply(IntrinsicMapTaskExecutorFactory.java:163) > > com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:63) > > com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:50) > > com.google.cloud.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:87) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory.create(IntrinsicMapTaskExecutorFactory.java:123) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker.process(StreamingDataflowWorker.java:1143) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker.access$1000(StreamingDataflowWorker.java:136) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker$6.run(StreamingDataflowWorker.java:966) > > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > java.lang.Thread.run(Thread.java:745) > Caused by: java.lang.IllegalArgumentException: unable to deserialize > Serialized DoFnInfo > > org.apache.beam.sdk.util.SerializableUtils.deserializeFromByteArray(SerializableUtils.java:74) > > com.google.cloud.dataflow.worker.UserParDoFnFactory$UserDoFnExtractor.getDoFnInfo(UserParDoFnFactory.java:61) > > com.google.cloud.dataflow.worker.UserParDoFnFactory.lambda$create$0(UserParDoFnFactory.java:92) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4904) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3628) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2336) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2295) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2208) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache.get(LocalCache.java:4053) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4899) > > com.google.cloud.dataflow.worker.UserParDoFnFactory.create(UserParDoFnFactory.java:90) > > com.google.cloud.dataflow.worker.DefaultParDoFnFactory.create(DefaultParDoFnFactory.java:74) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory.createParDoOperation(IntrinsicMapTaskExecutorFactory.java:262) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory.access$000(IntrinsicMapTaskExecutorFactory.java:84) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory$1.typedApply(IntrinsicMapTaskExecutorFactory.java:181) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory$1.typedApply(IntrinsicMapTaskExecutorFactory.java:163) > > com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:63) > > com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:50) > > com.google.cloud.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:87) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory.create(IntrinsicMapTaskExecutorFactory.java:123) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker.process(StreamingDataflowWorker.java:1143) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker.access$1000(StreamingDataflowWorker.java:136) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker$6.run(StreamingDataflowWorker.java:966) > > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > java.lang.Thread.run(Thread.java:745) > Caused by: java.io.InvalidClassException: > org.apache.beam.runners.core.construction.SplittableParDo$PairWithRestrictionFn; > local class incompatible: stream classdesc serialVersionUID = > -2216501394657530686, local class serialVersionUID = -6277163835950193211 > java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:616) > > java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:1630) > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1521) > > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1781) > java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353) > > java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2018) > java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:1942) > > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:1808) > java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1353) > java.io.ObjectInputStream.readObject(ObjectInputStream.java:373) > > org.apache.beam.sdk.util.SerializableUtils.deserializeFromByteArray(SerializableUtils.java:71) > > com.google.cloud.dataflow.worker.UserParDoFnFactory$UserDoFnExtractor.getDoFnInfo(UserParDoFnFactory.java:61) > > com.google.cloud.dataflow.worker.UserParDoFnFactory.lambda$create$0(UserParDoFnFactory.java:92) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$LocalManualCache$1.load(LocalCache.java:4904) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3628) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2336) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2295) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2208) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache.get(LocalCache.java:4053) > > com.google.cloud.dataflow.worker.repackaged.com.google.common.cache.LocalCache$LocalManualCache.get(LocalCache.java:4899) > > com.google.cloud.dataflow.worker.UserParDoFnFactory.create(UserParDoFnFactory.java:90) > > com.google.cloud.dataflow.worker.DefaultParDoFnFactory.create(DefaultParDoFnFactory.java:74) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory.createParDoOperation(IntrinsicMapTaskExecutorFactory.java:262) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory.access$000(IntrinsicMapTaskExecutorFactory.java:84) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory$1.typedApply(IntrinsicMapTaskExecutorFactory.java:181) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory$1.typedApply(IntrinsicMapTaskExecutorFactory.java:163) > > com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:63) > > com.google.cloud.dataflow.worker.graph.Networks$TypeSafeNodeFunction.apply(Networks.java:50) > > com.google.cloud.dataflow.worker.graph.Networks.replaceDirectedNetworkNodes(Networks.java:87) > > com.google.cloud.dataflow.worker.IntrinsicMapTaskExecutorFactory.create(IntrinsicMapTaskExecutorFactory.java:123) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker.process(StreamingDataflowWorker.java:1143) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker.access$1000(StreamingDataflowWorker.java:136) > > com.google.cloud.dataflow.worker.StreamingDataflowWorker$6.run(StreamingDataflowWorker.java:966) > > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) > > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) > java.lang.Thread.run(Thread.java:745) -- This message was sent by Atlassian JIRA (v7.6.3#76005)