konwu created HADOOP-18911:
------------------------------

             Summary: DfsClientShmManager allocSlot hang on 
awaitUninterruptibly itself
                 Key: HADOOP-18911
                 URL: https://issues.apache.org/jira/browse/HADOOP-18911
             Project: Hadoop Common
          Issue Type: Bug
          Components: hdfs-client
    Affects Versions: 3.3.1
         Environment: hadoop 3.3.1

yarn 3.3.1

 
            Reporter: konwu


Spark task Driver Thread hang on awaitUninterruptibly

thread stack like below:

```java

sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUninterruptibly(AbstractQueuedSynchronizer.java:1976)
org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:244)
org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417)
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:1006)
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:535)
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:786)
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:723)
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:483)
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:360)
org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:755)
org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:685) => 
holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:884) 
=> holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:957) => holding 
Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
java.io.DataInputStream.read(DataInputStream.java:100)
java.nio.file.Files.copy(Files.java:2908)
java.nio.file.Files.copy(Files.java:3027)
sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220)
sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216)
java.security.AccessController.doPrivileged(Native Method)
sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215)
sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71)
sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84)
sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122)
sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89)
sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944)
sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801)
sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886)
sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879)
java.security.AccessController.doPrivileged(Native Method)
sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878)
sun.misc.URLClassPath$JarLoader.(URLClassPath.java:829)
sun.misc.URLClassPath$3.run(URLClassPath.java:575)
sun.misc.URLClassPath$3.run(URLClassPath.java:565)
java.security.AccessController.doPrivileged(Native Method)
sun.misc.URLClassPath.getLoader(URLClassPath.java:564)
sun.misc.URLClassPath.getLoader(URLClassPath.java:529)
sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) => holding 
Monitor(sun.misc.URLClassPath@929546251})
sun.misc.URLClassPath.findResource(URLClassPath.java:224)
java.net.URLClassLoader$2.run(URLClassLoader.java:572)
java.net.URLClassLoader$2.run(URLClassLoader.java:570)
java.security.AccessController.doPrivileged(Native Method)
java.net.URLClassLoader.findResource(URLClassLoader.java:569)
java.lang.ClassLoader.getResource(ClassLoader.java:1096)
java.lang.ClassLoader.getResource(ClassLoader.java:1091)
org.apache.hadoop.conf.Configuration.getResource(Configuration.java:2809)
org.apache.hadoop.conf.Configuration.getStreamReader(Configuration.java:3081)
org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:3040)
org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:3013)
org.apache.hadoop.conf.Configuration.loadProps(Configuration.java:2893) => 
holding Monitor(org.apache.hadoop.conf.Configuration@1190238736})
org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2875) => 
holding Monitor(org.apache.hadoop.conf.Configuration@1190238736})
org.apache.hadoop.conf.Configuration.get(Configuration.java:1225)
org.apache.hadoop.conf.Configuration.getTrimmed(Configuration.java:1279)
org.apache.hadoop.conf.Configuration.getBoolean(Configuration.java:1686)
org.apache.hadoop.io.nativeio.NativeIO$POSIX.(NativeIO.java:334)
org.apache.hadoop.io.nativeio.NativeIO.initNative(Native Method)
org.apache.hadoop.io.nativeio.NativeIO.(NativeIO.java:831)
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.(ShortCircuitShm.java:469)
org.apache.hadoop.hdfs.shortcircuit.DfsClientShm.(DfsClientShm.java:70)
org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.requestNewShm(DfsClientShmManager.java:181)
org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:251)
org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417)
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:1006)
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:535)
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:786)
org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:723)
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:483)
org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:360)
org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:755)
org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:685) => 
holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:884) 
=> holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:957) => holding 
Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309})
java.io.DataInputStream.read(DataInputStream.java:100)
java.nio.file.Files.copy(Files.java:2908)
java.nio.file.Files.copy(Files.java:3027)
sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220)
sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216)
java.security.AccessController.doPrivileged(Native Method)
sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215)
sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71)
sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84)
sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122)
sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89)
sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944)
sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801)
sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886)
sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879)
java.security.AccessController.doPrivileged(Native Method)
sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878)
sun.misc.URLClassPath$JarLoader.(URLClassPath.java:829)
sun.misc.URLClassPath$3.run(URLClassPath.java:575)
sun.misc.URLClassPath$3.run(URLClassPath.java:565)
java.security.AccessController.doPrivileged(Native Method)
sun.misc.URLClassPath.getLoader(URLClassPath.java:564)
sun.misc.URLClassPath.getLoader(URLClassPath.java:529)
sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) => holding 
Monitor(sun.misc.URLClassPath@929546251})
sun.misc.URLClassPath.access$100(URLClassPath.java:66)
sun.misc.URLClassPath$1.next(URLClassPath.java:276)
sun.misc.URLClassPath$1.hasMoreElements(URLClassPath.java:287)
java.net.URLClassLoader$3$1.run(URLClassLoader.java:604)
java.net.URLClassLoader$3$1.run(URLClassLoader.java:602)
java.security.AccessController.doPrivileged(Native Method)
java.net.URLClassLoader$3.next(URLClassLoader.java:601)
java.net.URLClassLoader$3.hasMoreElements(URLClassLoader.java:626)
sun.misc.CompoundEnumeration.next(CompoundEnumeration.java:45)
sun.misc.CompoundEnumeration.hasMoreElements(CompoundEnumeration.java:54)
org.aspectj.weaver.loadtime.ClassLoaderWeavingAdaptor.parseDefinitions(ClassLoaderWeavingAdaptor.java:282)
org.aspectj.weaver.loadtime.DefaultWeavingContext.getDefinitions(DefaultWeavingContext.java:130)
org.aspectj.weaver.loadtime.ClassLoaderWeavingAdaptor.initialize(ClassLoaderWeavingAdaptor.java:173)
org.aspectj.weaver.loadtime.Aj$ExplicitlyInitializedClassLoaderWeavingAdaptor.initialize(Aj.java:344)
org.aspectj.weaver.loadtime.Aj$ExplicitlyInitializedClassLoaderWeavingAdaptor.getWeavingAdaptor(Aj.java:349)
org.aspectj.weaver.loadtime.Aj$WeaverContainer.getWeaver(Aj.java:323)
org.aspectj.weaver.loadtime.Aj.preProcess(Aj.java:115) => holding 
Monitor(org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@879400492})
org.aspectj.weaver.loadtime.ClassPreProcessorAgentAdapter.transform(ClassPreProcessorAgentAdapter.java:51)
sun.instrument.TransformerManager.transform(TransformerManager.java:188)
sun.instrument.InstrumentationImpl.transform(InstrumentationImpl.java:428)
java.lang.ClassLoader.defineClass1(Native Method)
java.lang.ClassLoader.defineClass(ClassLoader.java:763)
java.lang.ClassLoader.defineClass(ClassLoader.java:642)
org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1.doLoadClass(IsolatedClientLoader.scala:244)
org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1.loadClass(IsolatedClientLoader.scala:236)
java.lang.ClassLoader.loadClass(ClassLoader.java:411) => holding 
Monitor(java.lang.Object@815422741})
java.lang.ClassLoader.loadClass(ClassLoader.java:357)
org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:291)
 => holding 
Monitor(org.apache.spark.sql.hive.client.IsolatedClientLoader@1509517497})
org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:492)
org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:352)
org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:71)
 => holding Monitor(org.apache.spark.sql.hive.HiveExternalCatalog@1343720469})
org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:70)

```



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org
For additional commands, e-mail: common-issues-h...@hadoop.apache.org

Reply via email to