[ 
https://issues.apache.org/jira/browse/FLINK-4485?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15436917#comment-15436917
 ] 

Niels Basjes commented on FLINK-4485:
-------------------------------------

I just reproduced the effect on a non-secure Yarn cluster.
After having run a few jobs I see this on the node where the jobmanager runs:

{code}
[root@node1 ~]# lsof | fgrep '/tmp/blobStore'
java      15358          yarn  mem       REG                8,3  70243224   
25936270 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/cache/blob_501262b25ff9158ff07ee1f4264b5e3afeaaf69f
java      15358          yarn  DEL       REG                8,3             
25936269 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000027
java      15358          yarn  DEL       REG                8,3             
25936268 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000026
java      15358          yarn  DEL       REG                8,3             
25936267 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000025
java      15358          yarn  DEL       REG                8,3             
25936266 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000024
java      15358          yarn  DEL       REG                8,3             
25936265 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000023
java      15358          yarn  DEL       REG                8,3             
25936264 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000022
java      15358          yarn  DEL       REG                8,3             
25936263 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000021
java      15358          yarn  DEL       REG                8,3             
25936258 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000020
java      15358          yarn  DEL       REG                8,3             
25936257 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000019
java      15358          yarn  DEL       REG                8,3             
25936260 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000018
java      15358          yarn  DEL       REG                8,3             
25936259 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000017
java      15358          yarn  DEL       REG                8,3             
25936256 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000016
java      15358          yarn  DEL       REG                8,3             
25936255 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000015
java      15358          yarn  DEL       REG                8,3             
25936254 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000014
java      15358          yarn  DEL       REG                8,3             
25936253 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000013
java      15358          yarn  DEL       REG                8,3             
25936252 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000012
java      15358          yarn  DEL       REG                8,3             
25936251 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000011
java      15358          yarn  DEL       REG                8,3             
25936250 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000010
java      15358          yarn  DEL       REG                8,3             
25936249 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000009
java      15358          yarn  DEL       REG                8,3             
25936248 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000008
java      15358          yarn  DEL       REG                8,3             
25936247 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000007
java      15358          yarn  DEL       REG                8,3             
25936246 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000006
java      15358          yarn  DEL       REG                8,3             
25936244 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000005
java      15358          yarn  DEL       REG                8,3             
25936222 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000004
java      15358          yarn  DEL       REG                8,3             
25936221 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000003
java      15358          yarn  DEL       REG                8,3             
25936220 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000002
java      15358          yarn  DEL       REG                8,3             
25936215 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000001
java      15358          yarn  422r      REG                8,3  70243224   
25936222 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000004 
(deleted)
java      15358          yarn  581u      REG                8,3  70243224   
25936265 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000023 
(deleted)
java      15358          yarn  582u      REG                8,3  70243224   
25936267 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000025 
(deleted)
java      15358          yarn  583r      REG                8,3  70243224   
25936246 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000006 
(deleted)
java      15358          yarn  584r      REG                8,3  70243224   
25936215 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000001 
(deleted)
java      15358          yarn  590u      REG                8,3  70243224   
25936266 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000024 
(deleted)
java      15358          yarn  591r      REG                8,3  70243224   
25936220 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000002 
(deleted)
java      15358          yarn  593r      REG                8,3  70243224   
25936221 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000003 
(deleted)
java      15358          yarn  594u      REG                8,3  70243224   
25936268 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000026 
(deleted)
java      15358          yarn  595u      REG                8,3  70243224   
25936270 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/cache/blob_501262b25ff9158ff07ee1f4264b5e3afeaaf69f
java      15358          yarn  597r      REG                8,3  70243224   
25936255 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000015 
(deleted)
java      15358          yarn  598u      REG                8,3  70243224   
25936269 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000027 
(deleted)
java      15358          yarn  599r      REG                8,3  70243224   
25936252 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000012 
(deleted)
java      15358          yarn  600r      REG                8,3  70243224   
25936250 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000010 
(deleted)
java      15358          yarn  601r      REG                8,3  70243224   
25936254 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000014 
(deleted)
java      15358          yarn  602r      REG                8,3  70243224   
25936244 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000005 
(deleted)
java      15358          yarn  603r      REG                8,3  70243224   
25936259 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000017 
(deleted)
java      15358          yarn  604r      REG                8,3  70243224   
25936248 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000008 
(deleted)
java      15358          yarn  605r      REG                8,3  70243224   
25936260 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000018 
(deleted)
java      15358          yarn  607r      REG                8,3  70243224   
25936257 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000019 
(deleted)
java      15358          yarn  608r      REG                8,3  70243224   
25936258 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000020 
(deleted)
java      15358          yarn  609r      REG                8,3  70243224   
25936263 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000021 
(deleted)
java      15358          yarn  610r      REG                8,3  70243224   
25936264 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000022 
(deleted)
java      15358          yarn  613r      REG                8,3  70243224   
25936247 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000007 
(deleted)
java      15358          yarn  617r      REG                8,3  70243224   
25936253 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000013 
(deleted)
java      15358          yarn  618r      REG                8,3  70243224   
25936251 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000011 
(deleted)
java      15358          yarn  619r      REG                8,3  70243224   
25936249 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000009 
(deleted)
java      15358          yarn  631r      REG                8,3  70243224   
25936256 
/tmp/blobStore-0864a537-f6fa-4b27-9b7f-8cb5a3722c3e/incoming/temp-00000016 
(deleted)
java      15454          yarn  mem       REG                8,3  70243224   
25936219 
/tmp/blobStore-087a0b08-ee59-4d21-8523-c78a79984a4a/cache/blob_501262b25ff9158ff07ee1f4264b5e3afeaaf69f
java      15454          yarn  490r      REG                8,3  70243224   
25936219 
/tmp/blobStore-087a0b08-ee59-4d21-8523-c78a79984a4a/cache/blob_501262b25ff9158ff07ee1f4264b5e3afeaaf69f
{code}

The two process ids you see here are:
{code}yarn     15358  4.9  0.3 1362160 431128 ?      Sl   15:24   1:52  |       
\_ /usr/lib/jvm/jre/bin/java -Xmx424M 
-Dlog.file=/var/log/hadoop-yarn/containers/application_1464009968005_2639/container_1464009968005_2639_01_000001/jobmanager.log
 -Dlogback.configurationFile=file:logback.xml 
-Dlog4j.configuration=file:log4j.properties 
org.apache.flink.yarn.YarnApplicationMasterRunner{code}

{code}yarn     15454 10.1  0.6 1306404 801228 ?      Sl   15:24   3:51          
\_ /usr/lib/jvm/jre/bin/java -Xms424m -Xmx424m -XX:MaxDirectMemorySize=424m 
-Dlog.file=/var/log/hadoop-yarn/containers/application_1464009968005_2639/container_1464009968005_2639_01_000002/taskmanager.log
 -Dlogback.configurationFile=file:./logback.xml 
-Dlog4j.configuration=file:./log4j.properties 
org.apache.flink.yarn.YarnTaskManager --configDir .{code}





> Finished jobs in yarn session fill /tmp filesystem
> --------------------------------------------------
>
>                 Key: FLINK-4485
>                 URL: https://issues.apache.org/jira/browse/FLINK-4485
>             Project: Flink
>          Issue Type: Bug
>          Components: JobManager
>    Affects Versions: 1.1.0
>            Reporter: Niels Basjes
>            Priority: Blocker
>
> On a Yarn cluster I start a yarn-session with a few containers and task slots.
> Then I fire a 'large' number of Flink batch jobs in sequence against this 
> yarn session. It is the exact same job (java code) yet it gets different 
> parameters.
> In this scenario it is exporting HBase tables to files in HDFS and the 
> parameters are about which data from which tables and the name of the target 
> directory.
> After running several dozen jobs the jobs submission started to fail and we 
> investigated.
> We found that the cause was that on the Yarn node which was hosting the 
> jobmanager the /tmp file system was full (4GB was 100% full).
> How ever the output of {{du -hcs /tmp}} showed only 200MB in use.
> We found that a very large file (we guess it is the jar of the job) was put 
> in /tmp , used, deleted yet the file handle was not closed by the jobmanager.
> As soon as we killed the jobmanager the disk space was freed.
> The summary of the impact of this is that a yarn-session that receives enough 
> jobs brings down the Yarn node for all users.
> See parts of the output we got from {{lsof}} below.
> {code}
> COMMAND     PID      USER   FD      TYPE             DEVICE      SIZE       
> NODE NAME
> java      15034   nbasjes  550r      REG             253,17  66219695        
> 245 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000003 
> (deleted)
> java      15034   nbasjes  551r      REG             253,17  66219695        
> 252 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000007 
> (deleted)
> java      15034   nbasjes  552r      REG             253,17  66219695        
> 267 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000012 
> (deleted)
> java      15034   nbasjes  553r      REG             253,17  66219695        
> 250 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000005 
> (deleted)
> java      15034   nbasjes  554r      REG             253,17  66219695        
> 288 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000018 
> (deleted)
> java      15034   nbasjes  555r      REG             253,17  66219695        
> 298 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000025 
> (deleted)
> java      15034   nbasjes  557r      REG             253,17  66219695        
> 254 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000008 
> (deleted)
> java      15034   nbasjes  558r      REG             253,17  66219695        
> 292 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000019 
> (deleted)
> java      15034   nbasjes  559r      REG             253,17  66219695        
> 275 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000013 
> (deleted)
> java      15034   nbasjes  560r      REG             253,17  66219695        
> 159 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000002 
> (deleted)
> java      15034   nbasjes  562r      REG             253,17  66219695        
> 238 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000001 
> (deleted)
> java      15034   nbasjes  568r      REG             253,17  66219695        
> 246 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000004 
> (deleted)
> java      15034   nbasjes  569r      REG             253,17  66219695        
> 255 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000009 
> (deleted)
> java      15034   nbasjes  571r      REG             253,17  66219695        
> 299 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000026 
> (deleted)
> java      15034   nbasjes  572r      REG             253,17  66219695        
> 293 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000020 
> (deleted)
> java      15034   nbasjes  574r      REG             253,17  66219695        
> 256 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000010 
> (deleted)
> java      15034   nbasjes  575r      REG             253,17  66219695        
> 302 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000029 
> (deleted)
> java      15034   nbasjes  576r      REG             253,17  66219695        
> 294 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000021 
> (deleted)
> java      15034   nbasjes  577r      REG             253,17  66219695        
> 262 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000011 
> (deleted)
> java      15034   nbasjes  578r      REG             253,17  66219695        
> 251 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000006 
> (deleted)
> java      15034   nbasjes  580r      REG             253,17  66219695        
> 295 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000022 
> (deleted)
> java      15034   nbasjes  581r      REG             253,17  66219695        
> 300 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000027 
> (deleted)
> java      15034   nbasjes  582r      REG             253,17  66219695        
> 188 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/cache/blob_e318d1698aa6e7dc91e5f4a9f8ba29781aebd8c4
>  (deleted)
> java      15034   nbasjes  585r      REG             253,17  66219695        
> 279 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000014 
> (deleted)
> java      15034   nbasjes  586r      REG             253,17  66219695        
> 296 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000023 
> (deleted)
> java      15034   nbasjes  588r      REG             253,17  66219695        
> 301 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000028 
> (deleted)
> java      15034   nbasjes  589r      REG             253,17  66219695        
> 297 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000024 
> (deleted)
> java      15034   nbasjes  598r      REG             253,17  66219695        
> 280 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000015 
> (deleted)
> java      15034   nbasjes  601r      REG             253,17  66219695        
> 289 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000016 
> (deleted)
> java      15034   nbasjes  604r      REG             253,17  66219695        
> 284 
> /tmp/blobStore-fbe9c4cf-1f85-48cb-aad9-180e8d4ec7ce/incoming/temp-00000017 
> (deleted)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to