[ 
https://issues.apache.org/jira/browse/FLINK-33184?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17810661#comment-17810661
 ] 

Xingcan Cui commented on FLINK-33184:
-------------------------------------

Just hit a similar issue in Flink 1.18.1. If 
[https://github.com/apache/flink/pull/23532] solved the issue, it's better to 
backport it.
{code:java}
ERROR org.apache.flink.runtime.taskmanager.Task                    [] - Error 
in the task canceler for task KeyedProcess (112/128)#1.
java.lang.IllegalStateException: Leaking buffers.
    at org.apache.flink.util.Preconditions.checkState(Preconditions.java:193) 
~[flink-dist-1.18.1.jar:1.18.1]
    at 
org.apache.flink.runtime.io.network.partition.hybrid.tiered.tier.disk.SubpartitionDiskCacheManager.release(SubpartitionDiskCacheManager.java:113)
 ~[flink-dist-1.18.1.jar:1.18.1]
    at java.util.Spliterators$ArraySpliterator.forEachRemaining(Unknown Source) 
~[?:?]
    at java.util.stream.ReferencePipeline$Head.forEach(Unknown Source) ~[?:?]
    at 
org.apache.flink.runtime.io.network.partition.hybrid.tiered.tier.disk.DiskCacheManager.release(DiskCacheManager.java:128)
 ~[flink-dist-1.18.1.jar:1.18.1]
    at 
org.apache.flink.runtime.io.network.partition.hybrid.tiered.tier.disk.DiskTierProducerAgent.releaseResources(DiskTierProducerAgent.java:222)
 ~[flink-dist-1.18.1.jar:1.18.1]
    at java.util.ArrayList.forEach(Unknown Source) ~[?:?]
    at 
org.apache.flink.runtime.io.network.partition.hybrid.tiered.storage.TieredStorageResourceRegistry.clearResourceFor(TieredStorageResourceRegistry.java:59)
 ~[flink-dist-1.18.1.jar:1.18.1]
    at 
org.apache.flink.runtime.io.network.partition.hybrid.tiered.shuffle.TieredResultPartition.releaseInternal(TieredResultPartition.java:195)
 ~[flink-dist-1.18.1.jar:1.18.1]
    at 
org.apache.flink.runtime.io.network.partition.ResultPartition.release(ResultPartition.java:262)
 ~[flink-dist-1.18.1.jar:1.18.1]
    at 
org.apache.flink.runtime.io.network.partition.ResultPartitionManager.releasePartition(ResultPartitionManager.java:88)
 ~[flink-dist-1.18.1.jar:1.18.1]
    at 
org.apache.flink.runtime.io.network.partition.ResultPartition.fail(ResultPartition.java:284)
 ~[flink-dist-1.18.1.jar:1.18.1]
    at 
org.apache.flink.runtime.taskmanager.Task.failAllResultPartitions(Task.java:1004)
 ~[flink-dist-1.18.1.jar:1.18.1]
    at org.apache.flink.runtime.taskmanager.Task.access$100(Task.java:139) 
~[flink-dist-1.18.1.jar:1.18.1]
    at 
org.apache.flink.runtime.taskmanager.Task$TaskCanceler.run(Task.java:1677) 
[flink-dist-1.18.1.jar:1.18.1]
    at java.lang.Thread.run(Unknown Source) [?:?]
2024-01-25 03:44:21 [KeyedProcess (112/128)#1] INFO  
org.apache.flink.runtime.taskmanager.Task                    [] - KeyedProcess 
(112/128)#1 
(7bb761e84f2d7957d3b927e49a6b28b3_e0d77c22cedd08ffffdc719831d914bf_111_1) 
switched from CANCELING to CANCELED. {code}

> HybridShuffleITCase fails with exception in resource cleanup of task Map on 
> AZP
> -------------------------------------------------------------------------------
>
>                 Key: FLINK-33184
>                 URL: https://issues.apache.org/jira/browse/FLINK-33184
>             Project: Flink
>          Issue Type: Bug
>          Components: Runtime / Network
>    Affects Versions: 1.19.0
>            Reporter: Sergey Nuyanzin
>            Priority: Critical
>              Labels: test-stability
>
> This build fails 
> https://dev.azure.com/apache-flink/apache-flink/_build/results?buildId=53548&view=logs&j=baf26b34-3c6a-54e8-f93f-cf269b32f802&t=8c9d126d-57d2-5a9e-a8c8-ff53f7b35cd9&l=8710
> {noformat} 
> Map (5/10)#0] ERROR org.apache.flink.runtime.taskmanager.Task                 
>    [] - FATAL - exception in resource cleanup of task Map (5/10)#0 
> (159f887fbd200ea7cfa4aaeb1127c4ab_0a448493b4782967b150582570326227_4_0)
> .
> java.lang.IllegalStateException: Leaking buffers.
>         at 
> org.apache.flink.util.Preconditions.checkState(Preconditions.java:193) 
> ~[flink-core-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.io.network.partition.hybrid.tiered.storage.TieredStorageMemoryManagerImpl.release(TieredStorageMemoryManagerImpl.java:236)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at java.util.ArrayList.forEach(ArrayList.java:1259) ~[?:1.8.0_292]
>         at 
> org.apache.flink.runtime.io.network.partition.hybrid.tiered.storage.TieredStorageResourceRegistry.clearResourceFor(TieredStorageResourceRegistry.java:59)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.io.network.partition.hybrid.tiered.shuffle.TieredResultPartition.releaseInternal(TieredResultPartition.java:195)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.io.network.partition.ResultPartition.release(ResultPartition.java:262)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.io.network.partition.ResultPartitionManager.releasePartition(ResultPartitionManager.java:88)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.io.network.partition.ResultPartition.fail(ResultPartition.java:284)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.taskmanager.Task.failAllResultPartitions(Task.java:1004)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.taskmanager.Task.releaseResources(Task.java:990) 
> ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:838) 
> [flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at org.apache.flink.runtime.taskmanager.Task.run(Task.java:562) 
> [flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at java.lang.Thread.run(Thread.java:748) [?:1.8.0_292]
> 01:17:22,375 [flink-pekko.actor.default-dispatcher-5] INFO  
> org.apache.flink.runtime.taskmanager.Task                    [] - Task Sink: 
> Unnamed (3/10)#0 is already in state CANCELING
> 01:17:22,375 [        Map (5/10)#0] ERROR 
> org.apache.flink.runtime.taskexecutor.TaskExecutor           [] - FATAL - 
> exception in resource cleanup of task Map (5/10)#0 
> (159f887fbd200ea7cfa4aaeb1127c4ab_0a448493b4782967b150582570326227_4_0)
> .
> java.lang.IllegalStateException: Leaking buffers.
>         at 
> org.apache.flink.util.Preconditions.checkState(Preconditions.java:193) 
> ~[flink-core-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.io.network.partition.hybrid.tiered.storage.TieredStorageMemoryManagerImpl.release(TieredStorageMemoryManagerImpl.java:236)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at java.util.ArrayList.forEach(ArrayList.java:1259) ~[?:1.8.0_292]
>         at 
> org.apache.flink.runtime.io.network.partition.hybrid.tiered.storage.TieredStorageResourceRegistry.clearResourceFor(TieredStorageResourceRegistry.java:59)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.io.network.partition.hybrid.tiered.shuffle.TieredResultPartition.releaseInternal(TieredResultPartition.java:195)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.io.network.partition.ResultPartition.release(ResultPartition.java:262)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.io.network.partition.ResultPartitionManager.releasePartition(ResultPartitionManager.java:88)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.io.network.partition.ResultPartition.fail(ResultPartition.java:284)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.taskmanager.Task.failAllResultPartitions(Task.java:1004)
>  ~[flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at 
> org.apache.flink.runtime.taskmanager.Task.releaseResources(Task.java:990) 
> [flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at org.apache.flink.runtime.taskmanager.Task.doRun(Task.java:838) 
> [flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at org.apache.flink.runtime.taskmanager.Task.run(Task.java:562) 
> [flink-runtime-1.19-SNAPSHOT.jar:1.19-SNAPSHOT]
>         at java.lang.Thread.run(Thread.java:748) [?:1.8.0_292]
> {noformat}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to