[
https://issues.apache.org/jira/browse/FLINK-38403?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18023062#comment-18023062
]
Zakelly Lan commented on FLINK-38403:
-------------------------------------
In
https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=69874&view=logs&j=5c8e7682-d68f-54d1-16a2-a09310218a49&t=9d734c8c-6253-55e6-3bce-47e7cdf68ac4
I found:
{code:java}
ERROR org.apache.flink.runtime.minicluster.MiniCluster [] -
TaskManager #0 failed.
java.lang.RuntimeException:
org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException:
Error at remote task manager 'localhost/127.0.0.1:45253 [
b9be95dc-38ff-46cf-9706-33c27a822bb0 ] '.
at
org.apache.flink.util.ExceptionUtils.rethrow(ExceptionUtils.java:321)
~[flink-core-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.io.network.partition.consumer.BufferManager.recycle(BufferManager.java:237)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.io.network.buffer.NetworkBuffer.deallocate(NetworkBuffer.java:189)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.buffer.AbstractReferenceCountedByteBuf.handleRelease(AbstractReferenceCountedByteBuf.java:111)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.buffer.AbstractReferenceCountedByteBuf.release(AbstractReferenceCountedByteBuf.java:101)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.runtime.io.network.buffer.NetworkBuffer.recycleBuffer(NetworkBuffer.java:161)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.io.network.api.serialization.SpillingAdaptiveSpanningRecordDeserializer.clear(SpillingAdaptiveSpanningRecordDeserializer.java:140)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.io.AbstractStreamTaskNetworkInput.releaseDeserializer(AbstractStreamTaskNetworkInput.java:328)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.io.AbstractStreamTaskNetworkInput.close(AbstractStreamTaskNetworkInput.java:320)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.io.StreamTaskNetworkInput.close(StreamTaskNetworkInput.java:142)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.close(StreamOneInputProcessor.java:88)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.tasks.StreamTask.cleanUpInternal(StreamTask.java:1110)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at org.apache.flink.util.IOUtils.closeAll(IOUtils.java:257)
~[flink-core-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.core.fs.AutoCloseableRegistry.doClose(AutoCloseableRegistry.java:83)
~[flink-core-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.util.AbstractAutoCloseableRegistry.close(AbstractAutoCloseableRegistry.java:127)
~[flink-core-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.tasks.StreamTask.cleanUp(StreamTask.java:1101)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.taskmanager.Task.lambda$restoreAndInvoke$2(Task.java:958)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:973)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.taskmanager.Task.lambda$restoreAndInvoke$3(Task.java:958)
[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at org.apache.flink.util.IOUtils.closeAll(IOUtils.java:257)
~[flink-core-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.core.fs.AutoCloseableRegistry.doClose(AutoCloseableRegistry.java:83)
~[flink-core-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.util.AbstractAutoCloseableRegistry.close(AbstractAutoCloseableRegistry.java:127)
~[flink-core-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:794)
[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at org.apache.flink.runtime.taskmanager.Task.run(Task.java:569)
[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at java.base/java.lang.Thread.run(Thread.java:833) [?:?]
Caused by:
org.apache.flink.runtime.io.network.netty.exception.RemoteTransportException:
Error at remote task manager 'localhost/127.0.0.1:45253 [
b9be95dc-38ff-46cf-9706-33c27a822bb0 ] '.
at
org.apache.flink.runtime.io.network.netty.CreditBasedPartitionRequestClientHandler.decodeMsg(CreditBasedPartitionRequestClientHandler.java:333)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.io.network.netty.CreditBasedPartitionRequestClientHandler.channelRead(CreditBasedPartitionRequestClientHandler.java:197)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.runtime.io.network.netty.NettyMessageClientDecoderDelegate.channelRead(NettyMessageClientDecoderDelegate.java:112)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:440)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:800)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:509)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:407)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:997)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
... 1 more
Caused by:
org.apache.flink.runtime.io.network.partition.ProducerFailedException:
org.apache.flink.test.checkpointing.UnalignedCheckpointTestBase$TestException:
org.apache.flink.test.checkpointing.UnalignedCheckpointTestBase$TestException:
Failing map @ 7 (2 attempt); last value 48914
at
org.apache.flink.runtime.io.network.partition.PipelinedSubpartitionView.getFailureCause(PipelinedSubpartitionView.java:96)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.io.network.netty.CreditBasedSequenceNumberingViewReader.getFailureCause(CreditBasedSequenceNumberingViewReader.java:282)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.io.network.netty.PartitionRequestQueue.writeAndFlushNextMessageIfPossible(PartitionRequestQueue.java:325)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.io.network.netty.PartitionRequestQueue.enqueueAvailableReader(PartitionRequestQueue.java:126)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.io.network.netty.PartitionRequestQueue.userEventTriggered(PartitionRequestQueue.java:254)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:400)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:376)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireUserEventTriggered(AbstractChannelHandlerContext.java:368)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.ChannelInboundHandlerAdapter.userEventTriggered(ChannelInboundHandlerAdapter.java:117)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.handler.codec.ByteToMessageDecoder.userEventTriggered(ByteToMessageDecoder.java:387)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:400)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:376)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.fireUserEventTriggered(AbstractChannelHandlerContext.java:368)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline$HeadContext.userEventTriggered(DefaultChannelPipeline.java:1428)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:396)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.AbstractChannelHandlerContext.invokeUserEventTriggered(AbstractChannelHandlerContext.java:376)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.DefaultChannelPipeline.fireUserEventTriggered(DefaultChannelPipeline.java:913)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.runtime.io.network.netty.PartitionRequestQueue.lambda$notifyReaderNonEmpty$0(PartitionRequestQueue.java:94)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.shaded.netty4.io.netty.util.concurrent.AbstractEventExecutor.runTask(AbstractEventExecutor.java:173)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:166)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:470)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:413)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:997)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
at
org.apache.flink.shaded.netty4.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
~[flink-shaded-netty-4.1.100.Final-20.0.jar:?]
... 1 more
Caused by: org.apache.flink.util.SerializedThrowable:
org.apache.flink.test.checkpointing.UnalignedCheckpointTestBase$TestException:
Failing map @ 7 (2 attempt); last value 48914
at
org.apache.flink.test.checkpointing.UnalignedCheckpointTestBase$FailingMapper.failMapper(UnalignedCheckpointTestBase.java:875)
~[test-classes/:?]
at
org.apache.flink.test.checkpointing.UnalignedCheckpointTestBase$FailingMapper.checkFail(UnalignedCheckpointTestBase.java:870)
~[test-classes/:?]
at
org.apache.flink.test.checkpointing.UnalignedCheckpointTestBase$FailingMapper.map(UnalignedCheckpointTestBase.java:863)
~[test-classes/:?]
at
org.apache.flink.streaming.api.operators.StreamMap.processElement(StreamMap.java:37)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.tasks.OneInputStreamTask$StreamTaskNetworkOutput.emitRecord(OneInputStreamTask.java:247)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.io.AbstractStreamTaskNetworkInput.processElement(AbstractStreamTaskNetworkInput.java:206)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.io.AbstractStreamTaskNetworkInput.emitNext(AbstractStreamTaskNetworkInput.java:163)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.io.StreamOneInputProcessor.processInput(StreamOneInputProcessor.java:65)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.tasks.StreamTask.processInput(StreamTask.java:646)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.tasks.mailbox.MailboxProcessor.runMailboxLoop(MailboxProcessor.java:231)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.tasks.StreamTask.runMailboxLoop(StreamTask.java:988)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.streaming.runtime.tasks.StreamTask.invoke(StreamTask.java:925)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.taskmanager.Task.runWithSystemExitMonitoring(Task.java:973)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at
org.apache.flink.runtime.taskmanager.Task.restoreAndInvoke(Task.java:955)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:760)
~[flink-runtime-2.2-SNAPSHOT.jar:2.2-SNAPSHOT]
... 2 more
{code}
It seems that the artificial error caused the TM exit?
> UnalignedCheckpointITCase failed in test_cron_hadoop313 tests
> -------------------------------------------------------------
>
> Key: FLINK-38403
> URL: https://issues.apache.org/jira/browse/FLINK-38403
> Project: Flink
> Issue Type: Bug
> Components: Tests
> Affects Versions: 2.2.0
> Reporter: Ruan Hang
> Priority: Major
>
> Details:
> https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=69810&view=logs&j=baf26b34-3c6a-54e8-f93f-cf269b32f802&t=b380e762-00fc-5c06-e76c-b8e53634ca34
--
This message was sent by Atlassian Jira
(v8.20.10#820010)