[jira] [Updated] (HADOOP-18911) DfsClientShmManager allocSlot hang on awaitUninterruptibly itself
[ https://issues.apache.org/jira/browse/HADOOP-18911?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] konwu updated HADOOP-18911: --- Description: Maybe the same issue of HADOOP-14451 Spark task Driver Thread hang on awaitUninterruptibly thread stack like below: {code:java} sun.misc.Unsafe.park(Native Method) java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUninterruptibly(AbstractQueuedSynchronizer.java:1976) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:244) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:1006) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:535) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:786) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:723) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:483) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:360) org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:755) org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:685) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:884) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:957) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) java.io.DataInputStream.read(DataInputStream.java:100) java.nio.file.Files.copy(Files.java:2908) java.nio.file.Files.copy(Files.java:3027) sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220) sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216) java.security.AccessController.doPrivileged(Native Method) sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215) sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71) sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84) sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122) sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89) sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944) sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801) sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886) sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879) java.security.AccessController.doPrivileged(Native Method) sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878) sun.misc.URLClassPath$JarLoader.(URLClassPath.java:829) sun.misc.URLClassPath$3.run(URLClassPath.java:575) sun.misc.URLClassPath$3.run(URLClassPath.java:565) java.security.AccessController.doPrivileged(Native Method) sun.misc.URLClassPath.getLoader(URLClassPath.java:564) sun.misc.URLClassPath.getLoader(URLClassPath.java:529) sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) => holding Monitor(sun.misc.URLClassPath@929546251}) sun.misc.URLClassPath.findResource(URLClassPath.java:224) java.net.URLClassLoader$2.run(URLClassLoader.java:572) java.net.URLClassLoader$2.run(URLClassLoader.java:570) java.security.AccessController.doPrivileged(Native Method) java.net.URLClassLoader.findResource(URLClassLoader.java:569) java.lang.ClassLoader.getResource(ClassLoader.java:1096) java.lang.ClassLoader.getResource(ClassLoader.java:1091) org.apache.hadoop.conf.Configuration.getResource(Configuration.java:2809) org.apache.hadoop.conf.Configuration.getStreamReader(Configuration.java:3081) org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:3040) org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:3013) org.apache.hadoop.conf.Configuration.loadProps(Configuration.java:2893) => holding Monitor(org.apache.hadoop.conf.Configuration@1190238736}) org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2875) => holding Monitor(org.apache.hadoop.conf.Configuration@1190238736}) org.apache.hadoop.conf.Configuration.get(Configuration.java:1225) org.apache.hadoop.conf.Configuration.getTrimmed(Configuration.java:1279) org.apache.hadoop.conf.Configuration.getBoolean(Configuration.java:1686) org.apache.hadoop.io.nativeio.NativeIO$POSIX.(NativeIO.java:334) org.apache.hadoop.io.nativeio.NativeIO.initNative(Native Method) org.apache.hadoop.io.nativeio.NativeIO.(NativeIO.java:831) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.(ShortCircuitShm.java:469) org.apache.hadoop.hdfs.shortcircuit.DfsClientShm.(DfsClientShm.java:70)
[jira] [Updated] (HADOOP-18911) DfsClientShmManager allocSlot hang on awaitUninterruptibly itself
[ https://issues.apache.org/jira/browse/HADOOP-18911?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] konwu updated HADOOP-18911: --- Environment: hadoop 3.3.1 yarn 3.3.1 spark 3.1.3 was: hadoop 3.3.1 yarn 3.3.1 > DfsClientShmManager allocSlot hang on awaitUninterruptibly itself > - > > Key: HADOOP-18911 > URL: https://issues.apache.org/jira/browse/HADOOP-18911 > Project: Hadoop Common > Issue Type: Bug > Components: hdfs-client >Affects Versions: 3.3.1 > Environment: hadoop 3.3.1 > yarn 3.3.1 > spark 3.1.3 > >Reporter: konwu >Priority: Major > > Spark task Driver Thread hang on awaitUninterruptibly > thread stack like below: > {code:java} > sun.misc.Unsafe.park(Native Method) > java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) > java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUninterruptibly(AbstractQueuedSynchronizer.java:1976) > org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:244) > org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417) > org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:1006) > org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:535) > org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:786) > org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:723) > org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:483) > org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:360) > org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:755) > org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:685) => > holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) > org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:884) > => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) > org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:957) => > holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) > java.io.DataInputStream.read(DataInputStream.java:100) > java.nio.file.Files.copy(Files.java:2908) > java.nio.file.Files.copy(Files.java:3027) > sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220) > sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216) > java.security.AccessController.doPrivileged(Native Method) > sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215) > sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71) > sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84) > sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122) > sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89) > sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944) > sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801) > sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886) > sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879) > java.security.AccessController.doPrivileged(Native Method) > sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878) > sun.misc.URLClassPath$JarLoader.(URLClassPath.java:829) > sun.misc.URLClassPath$3.run(URLClassPath.java:575) > sun.misc.URLClassPath$3.run(URLClassPath.java:565) > java.security.AccessController.doPrivileged(Native Method) > sun.misc.URLClassPath.getLoader(URLClassPath.java:564) > sun.misc.URLClassPath.getLoader(URLClassPath.java:529) > sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) => holding > Monitor(sun.misc.URLClassPath@929546251}) > sun.misc.URLClassPath.findResource(URLClassPath.java:224) > java.net.URLClassLoader$2.run(URLClassLoader.java:572) > java.net.URLClassLoader$2.run(URLClassLoader.java:570) > java.security.AccessController.doPrivileged(Native Method) > java.net.URLClassLoader.findResource(URLClassLoader.java:569) > java.lang.ClassLoader.getResource(ClassLoader.java:1096) > java.lang.ClassLoader.getResource(ClassLoader.java:1091) > org.apache.hadoop.conf.Configuration.getResource(Configuration.java:2809) > org.apache.hadoop.conf.Configuration.getStreamReader(Configuration.java:3081) > org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:3040) > org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:3013) > org.apache.hadoop.conf.Configuration.loadProps(Configuration.java:2893) => > holding Monitor(org.apache.hadoop.conf.Configuration@1190238736}) > org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2875) => > holding
[jira] [Updated] (HADOOP-18911) DfsClientShmManager allocSlot hang on awaitUninterruptibly itself
[ https://issues.apache.org/jira/browse/HADOOP-18911?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] konwu updated HADOOP-18911: --- Description: Spark task Driver Thread hang on awaitUninterruptibly thread stack like below: {code:java} sun.misc.Unsafe.park(Native Method) java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUninterruptibly(AbstractQueuedSynchronizer.java:1976) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:244) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:1006) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:535) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:786) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:723) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:483) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:360) org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:755) org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:685) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:884) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:957) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) java.io.DataInputStream.read(DataInputStream.java:100) java.nio.file.Files.copy(Files.java:2908) java.nio.file.Files.copy(Files.java:3027) sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220) sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216) java.security.AccessController.doPrivileged(Native Method) sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215) sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71) sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84) sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122) sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89) sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944) sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801) sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886) sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879) java.security.AccessController.doPrivileged(Native Method) sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878) sun.misc.URLClassPath$JarLoader.(URLClassPath.java:829) sun.misc.URLClassPath$3.run(URLClassPath.java:575) sun.misc.URLClassPath$3.run(URLClassPath.java:565) java.security.AccessController.doPrivileged(Native Method) sun.misc.URLClassPath.getLoader(URLClassPath.java:564) sun.misc.URLClassPath.getLoader(URLClassPath.java:529) sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) => holding Monitor(sun.misc.URLClassPath@929546251}) sun.misc.URLClassPath.findResource(URLClassPath.java:224) java.net.URLClassLoader$2.run(URLClassLoader.java:572) java.net.URLClassLoader$2.run(URLClassLoader.java:570) java.security.AccessController.doPrivileged(Native Method) java.net.URLClassLoader.findResource(URLClassLoader.java:569) java.lang.ClassLoader.getResource(ClassLoader.java:1096) java.lang.ClassLoader.getResource(ClassLoader.java:1091) org.apache.hadoop.conf.Configuration.getResource(Configuration.java:2809) org.apache.hadoop.conf.Configuration.getStreamReader(Configuration.java:3081) org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:3040) org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:3013) org.apache.hadoop.conf.Configuration.loadProps(Configuration.java:2893) => holding Monitor(org.apache.hadoop.conf.Configuration@1190238736}) org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2875) => holding Monitor(org.apache.hadoop.conf.Configuration@1190238736}) org.apache.hadoop.conf.Configuration.get(Configuration.java:1225) org.apache.hadoop.conf.Configuration.getTrimmed(Configuration.java:1279) org.apache.hadoop.conf.Configuration.getBoolean(Configuration.java:1686) org.apache.hadoop.io.nativeio.NativeIO$POSIX.(NativeIO.java:334) org.apache.hadoop.io.nativeio.NativeIO.initNative(Native Method) org.apache.hadoop.io.nativeio.NativeIO.(NativeIO.java:831) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.(ShortCircuitShm.java:469) org.apache.hadoop.hdfs.shortcircuit.DfsClientShm.(DfsClientShm.java:70) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.requestNewShm(DfsClientShmManager.java:181)
[jira] [Created] (HADOOP-18911) DfsClientShmManager allocSlot hang on awaitUninterruptibly itself
konwu created HADOOP-18911: -- Summary: DfsClientShmManager allocSlot hang on awaitUninterruptibly itself Key: HADOOP-18911 URL: https://issues.apache.org/jira/browse/HADOOP-18911 Project: Hadoop Common Issue Type: Bug Components: hdfs-client Affects Versions: 3.3.1 Environment: hadoop 3.3.1 yarn 3.3.1 Reporter: konwu Spark task Driver Thread hang on awaitUninterruptibly thread stack like below: ```java sun.misc.Unsafe.park(Native Method) java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUninterruptibly(AbstractQueuedSynchronizer.java:1976) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:244) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:1006) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:535) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:786) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:723) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:483) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:360) org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:755) org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:685) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:884) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:957) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) java.io.DataInputStream.read(DataInputStream.java:100) java.nio.file.Files.copy(Files.java:2908) java.nio.file.Files.copy(Files.java:3027) sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220) sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216) java.security.AccessController.doPrivileged(Native Method) sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215) sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71) sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84) sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122) sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89) sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944) sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801) sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886) sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879) java.security.AccessController.doPrivileged(Native Method) sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878) sun.misc.URLClassPath$JarLoader.(URLClassPath.java:829) sun.misc.URLClassPath$3.run(URLClassPath.java:575) sun.misc.URLClassPath$3.run(URLClassPath.java:565) java.security.AccessController.doPrivileged(Native Method) sun.misc.URLClassPath.getLoader(URLClassPath.java:564) sun.misc.URLClassPath.getLoader(URLClassPath.java:529) sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) => holding Monitor(sun.misc.URLClassPath@929546251}) sun.misc.URLClassPath.findResource(URLClassPath.java:224) java.net.URLClassLoader$2.run(URLClassLoader.java:572) java.net.URLClassLoader$2.run(URLClassLoader.java:570) java.security.AccessController.doPrivileged(Native Method) java.net.URLClassLoader.findResource(URLClassLoader.java:569) java.lang.ClassLoader.getResource(ClassLoader.java:1096) java.lang.ClassLoader.getResource(ClassLoader.java:1091) org.apache.hadoop.conf.Configuration.getResource(Configuration.java:2809) org.apache.hadoop.conf.Configuration.getStreamReader(Configuration.java:3081) org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:3040) org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:3013) org.apache.hadoop.conf.Configuration.loadProps(Configuration.java:2893) => holding Monitor(org.apache.hadoop.conf.Configuration@1190238736}) org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2875) => holding Monitor(org.apache.hadoop.conf.Configuration@1190238736}) org.apache.hadoop.conf.Configuration.get(Configuration.java:1225) org.apache.hadoop.conf.Configuration.getTrimmed(Configuration.java:1279) org.apache.hadoop.conf.Configuration.getBoolean(Configuration.java:1686) org.apache.hadoop.io.nativeio.NativeIO$POSIX.(NativeIO.java:334) org.apache.hadoop.io.nativeio.NativeIO.initNative(Native Method) org.apache.hadoop.io.nativeio.NativeIO.(NativeIO.java:831)