[ https://issues.apache.org/jira/browse/FLINK-33184?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17810661#comment-17810661 ]
Xingcan Cui commented on FLINK-33184: ------------------------------------- Just hit a similar issue in Flink 1.18.1. If [https://github.com/apache/flink/pull/23532] solved the issue, it's better to backport it. {code:java} ERROR org.apache.flink.runtime.taskmanager.Task [] - Error in the task canceler for task KeyedProcess (112/128)#1. java.lang.IllegalStateException: Leaking buffers. at org.apache.flink.util.Preconditions.checkState(Preconditions.java:193) ~[flink-dist-1.18.1.jar:1.18.1] at org.apache.flink.runtime.io.network.partition.hybrid.tiered.tier.disk.SubpartitionDiskCacheManager.release(SubpartitionDiskCacheManager.java:113) ~[flink-dist-1.18.1.jar:1.18.1] at java.util.Spliterators$ArraySpliterator.forEachRemaining(Unknown Source) ~[?:?] at java.util.stream.ReferencePipeline$Head.forEach(Unknown Source) ~[?:?] at org.apache.flink.runtime.io.network.partition.hybrid.tiered.tier.disk.DiskCacheManager.release(DiskCacheManager.java:128) ~[flink-dist-1.18.1.jar:1.18.1] at org.apache.flink.runtime.io.network.partition.hybrid.tiered.tier.disk.DiskTierProducerAgent.releaseResources(DiskTierProducerAgent.java:222) ~[flink-dist-1.18.1.jar:1.18.1] at java.util.ArrayList.forEach(Unknown Source) ~[?:?] at org.apache.flink.runtime.io.network.partition.hybrid.tiered.storage.TieredStorageResourceRegistry.clearResourceFor(TieredStorageResourceRegistry.java:59) ~[flink-dist-1.18.1.jar:1.18.1] at org.apache.flink.runtime.io.network.partition.hybrid.tiered.shuffle.TieredResultPartition.releaseInternal(TieredResultPartition.java:195) ~[flink-dist-1.18.1.jar:1.18.1] at org.apache.flink.runtime.io.network.partition.ResultPartition.release(ResultPartition.java:262) ~[flink-dist-1.18.1.jar:1.18.1] at org.apache.flink.runtime.io.network.partition.ResultPartitionManager.releasePartition(ResultPartitionManager.java:88) ~[flink-dist-1.18.1.jar:1.18.1] at org.apache.flink.runtime.io.network.partition.ResultPartition.fail(ResultPartition.java:284) ~[flink-dist-1.18.1.jar:1.18.1] at org.apache.flink.runtime.taskmanager.Task.failAllResultPartitions(Task.java:1004) ~[flink-dist-1.18.1.jar:1.18.1] at org.apache.flink.runtime.taskmanager.Task.access$100(Task.java:139) ~[flink-dist-1.18.1.jar:1.18.1] at org.apache.flink.runtime.taskmanager.Task$TaskCanceler.run(Task.java:1677) [flink-dist-1.18.1.jar:1.18.1] at java.lang.Thread.run(Unknown Source) [?:?] 2024-01-25 03:44:21 [KeyedProcess (112/128)#1] INFO org.apache.flink.runtime.taskmanager.Task [] - KeyedProcess (112/128)#1 (7bb761e84f2d7957d3b927e49a6b28b3_e0d77c22cedd08ffffdc719831d914bf_111_1) switched from CANCELING to CANCELED. {code} > HybridShuffleITCase fails with exception in resource cleanup of task Map on > AZP > ------------------------------------------------------------------------------- > > Key: FLINK-33184 > URL: https://issues.apache.org/jira/browse/FLINK-33184 > Project: Flink > Issue Type: Bug > Components: Runtime / Network > Affects Versions: 1.19.0 > Reporter: Sergey Nuyanzin > Priority: Critical > Labels: test-stability > > This build fails > https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=53548&view=logs&j=baf26b34-3c6a-54e8-f93f-cf269b32f802&t=8c9d126d-57d2-5a9e-a8c8-ff53f7b35cd9&l=8710 > {noformat} > Map (5/10)#0] ERROR org.apache.flink.runtime.taskmanager.Task > [] - FATAL - exception in resource cleanup of task Map (5/10)#0 > (159f887fbd200ea7cfa4aaeb1127c4ab_0a448493b4782967b150582570326227_4_0) > . > java.lang.IllegalStateException: Leaking buffers. > at > org.apache.flink.util.Preconditions.checkState(Preconditions.java:193) > ~[flink-core-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.io.network.partition.hybrid.tiered.storage.TieredStorageMemoryManagerImpl.release(TieredStorageMemoryManagerImpl.java:236) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at java.util.ArrayList.forEach(ArrayList.java:1259) ~[?:1.8.0_292] > at > org.apache.flink.runtime.io.network.partition.hybrid.tiered.storage.TieredStorageResourceRegistry.clearResourceFor(TieredStorageResourceRegistry.java:59) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.io.network.partition.hybrid.tiered.shuffle.TieredResultPartition.releaseInternal(TieredResultPartition.java:195) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.io.network.partition.ResultPartition.release(ResultPartition.java:262) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.io.network.partition.ResultPartitionManager.releasePartition(ResultPartitionManager.java:88) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.io.network.partition.ResultPartition.fail(ResultPartition.java:284) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.taskmanager.Task.failAllResultPartitions(Task.java:1004) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.taskmanager.Task.releaseResources(Task.java:990) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:838) > [flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at org.apache.flink.runtime.taskmanager.Task.run(Task.java:562) > [flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at java.lang.Thread.run(Thread.java:748) [?:1.8.0_292] > 01:17:22,375 [flink-pekko.actor.default-dispatcher-5] INFO > org.apache.flink.runtime.taskmanager.Task [] - Task Sink: > Unnamed (3/10)#0 is already in state CANCELING > 01:17:22,375 [ Map (5/10)#0] ERROR > org.apache.flink.runtime.taskexecutor.TaskExecutor [] - FATAL - > exception in resource cleanup of task Map (5/10)#0 > (159f887fbd200ea7cfa4aaeb1127c4ab_0a448493b4782967b150582570326227_4_0) > . > java.lang.IllegalStateException: Leaking buffers. > at > org.apache.flink.util.Preconditions.checkState(Preconditions.java:193) > ~[flink-core-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.io.network.partition.hybrid.tiered.storage.TieredStorageMemoryManagerImpl.release(TieredStorageMemoryManagerImpl.java:236) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at java.util.ArrayList.forEach(ArrayList.java:1259) ~[?:1.8.0_292] > at > org.apache.flink.runtime.io.network.partition.hybrid.tiered.storage.TieredStorageResourceRegistry.clearResourceFor(TieredStorageResourceRegistry.java:59) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.io.network.partition.hybrid.tiered.shuffle.TieredResultPartition.releaseInternal(TieredResultPartition.java:195) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.io.network.partition.ResultPartition.release(ResultPartition.java:262) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.io.network.partition.ResultPartitionManager.releasePartition(ResultPartitionManager.java:88) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.io.network.partition.ResultPartition.fail(ResultPartition.java:284) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.taskmanager.Task.failAllResultPartitions(Task.java:1004) > ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at > org.apache.flink.runtime.taskmanager.Task.releaseResources(Task.java:990) > [flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:838) > [flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at org.apache.flink.runtime.taskmanager.Task.run(Task.java:562) > [flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT] > at java.lang.Thread.run(Thread.java:748) [?:1.8.0_292] > {noformat} -- This message was sent by Atlassian Jira (v8.20.10#820010)