konwu created HADOOP-18911: ------------------------------ Summary: DfsClientShmManager allocSlot hang on awaitUninterruptibly itself Key: HADOOP-18911 URL: https://issues.apache.org/jira/browse/HADOOP-18911 Project: Hadoop Common Issue Type: Bug Components: hdfs-client Affects Versions: 3.3.1 Environment: hadoop 3.3.1
yarn 3.3.1 Reporter: konwu Spark task Driver Thread hang on awaitUninterruptibly thread stack like below: ```java sun.misc.Unsafe.park(Native Method) java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitUninterruptibly(AbstractQueuedSynchronizer.java:1976) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:244) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:1006) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:535) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:786) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:723) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:483) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:360) org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:755) org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:685) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:884) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:957) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) java.io.DataInputStream.read(DataInputStream.java:100) java.nio.file.Files.copy(Files.java:2908) java.nio.file.Files.copy(Files.java:3027) sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220) sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216) java.security.AccessController.doPrivileged(Native Method) sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215) sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71) sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84) sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122) sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89) sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944) sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801) sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886) sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879) java.security.AccessController.doPrivileged(Native Method) sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878) sun.misc.URLClassPath$JarLoader.(URLClassPath.java:829) sun.misc.URLClassPath$3.run(URLClassPath.java:575) sun.misc.URLClassPath$3.run(URLClassPath.java:565) java.security.AccessController.doPrivileged(Native Method) sun.misc.URLClassPath.getLoader(URLClassPath.java:564) sun.misc.URLClassPath.getLoader(URLClassPath.java:529) sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) => holding Monitor(sun.misc.URLClassPath@929546251}) sun.misc.URLClassPath.findResource(URLClassPath.java:224) java.net.URLClassLoader$2.run(URLClassLoader.java:572) java.net.URLClassLoader$2.run(URLClassLoader.java:570) java.security.AccessController.doPrivileged(Native Method) java.net.URLClassLoader.findResource(URLClassLoader.java:569) java.lang.ClassLoader.getResource(ClassLoader.java:1096) java.lang.ClassLoader.getResource(ClassLoader.java:1091) org.apache.hadoop.conf.Configuration.getResource(Configuration.java:2809) org.apache.hadoop.conf.Configuration.getStreamReader(Configuration.java:3081) org.apache.hadoop.conf.Configuration.loadResource(Configuration.java:3040) org.apache.hadoop.conf.Configuration.loadResources(Configuration.java:3013) org.apache.hadoop.conf.Configuration.loadProps(Configuration.java:2893) => holding Monitor(org.apache.hadoop.conf.Configuration@1190238736}) org.apache.hadoop.conf.Configuration.getProps(Configuration.java:2875) => holding Monitor(org.apache.hadoop.conf.Configuration@1190238736}) org.apache.hadoop.conf.Configuration.get(Configuration.java:1225) org.apache.hadoop.conf.Configuration.getTrimmed(Configuration.java:1279) org.apache.hadoop.conf.Configuration.getBoolean(Configuration.java:1686) org.apache.hadoop.io.nativeio.NativeIO$POSIX.(NativeIO.java:334) org.apache.hadoop.io.nativeio.NativeIO.initNative(Native Method) org.apache.hadoop.io.nativeio.NativeIO.(NativeIO.java:831) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.(ShortCircuitShm.java:469) org.apache.hadoop.hdfs.shortcircuit.DfsClientShm.(DfsClientShm.java:70) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.requestNewShm(DfsClientShmManager.java:181) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager$EndpointShmManager.allocSlot(DfsClientShmManager.java:251) org.apache.hadoop.hdfs.shortcircuit.DfsClientShmManager.allocSlot(DfsClientShmManager.java:417) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.allocShmSlot(ShortCircuitCache.java:1006) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.createShortCircuitReplicaInfo(BlockReaderFactory.java:535) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.create(ShortCircuitCache.java:786) org.apache.hadoop.hdfs.shortcircuit.ShortCircuitCache.fetchOrCreate(ShortCircuitCache.java:723) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.getBlockReaderLocal(BlockReaderFactory.java:483) org.apache.hadoop.hdfs.client.impl.BlockReaderFactory.build(BlockReaderFactory.java:360) org.apache.hadoop.hdfs.DFSInputStream.getBlockReader(DFSInputStream.java:755) org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:685) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:884) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:957) => holding Monitor(org.apache.hadoop.hdfs.DFSInputStream@59925309}) java.io.DataInputStream.read(DataInputStream.java:100) java.nio.file.Files.copy(Files.java:2908) java.nio.file.Files.copy(Files.java:3027) sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:220) sun.net.www.protocol.jar.URLJarFile$1.run(URLJarFile.java:216) java.security.AccessController.doPrivileged(Native Method) sun.net.www.protocol.jar.URLJarFile.retrieve(URLJarFile.java:215) sun.net.www.protocol.jar.URLJarFile.getJarFile(URLJarFile.java:71) sun.net.www.protocol.jar.JarFileFactory.get(JarFileFactory.java:84) sun.net.www.protocol.jar.JarURLConnection.connect(JarURLConnection.java:122) sun.net.www.protocol.jar.JarURLConnection.getJarFile(JarURLConnection.java:89) sun.misc.URLClassPath$JarLoader.getJarFile(URLClassPath.java:944) sun.misc.URLClassPath$JarLoader.access$800(URLClassPath.java:801) sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:886) sun.misc.URLClassPath$JarLoader$1.run(URLClassPath.java:879) java.security.AccessController.doPrivileged(Native Method) sun.misc.URLClassPath$JarLoader.ensureOpen(URLClassPath.java:878) sun.misc.URLClassPath$JarLoader.(URLClassPath.java:829) sun.misc.URLClassPath$3.run(URLClassPath.java:575) sun.misc.URLClassPath$3.run(URLClassPath.java:565) java.security.AccessController.doPrivileged(Native Method) sun.misc.URLClassPath.getLoader(URLClassPath.java:564) sun.misc.URLClassPath.getLoader(URLClassPath.java:529) sun.misc.URLClassPath.getNextLoader(URLClassPath.java:494) => holding Monitor(sun.misc.URLClassPath@929546251}) sun.misc.URLClassPath.access$100(URLClassPath.java:66) sun.misc.URLClassPath$1.next(URLClassPath.java:276) sun.misc.URLClassPath$1.hasMoreElements(URLClassPath.java:287) java.net.URLClassLoader$3$1.run(URLClassLoader.java:604) java.net.URLClassLoader$3$1.run(URLClassLoader.java:602) java.security.AccessController.doPrivileged(Native Method) java.net.URLClassLoader$3.next(URLClassLoader.java:601) java.net.URLClassLoader$3.hasMoreElements(URLClassLoader.java:626) sun.misc.CompoundEnumeration.next(CompoundEnumeration.java:45) sun.misc.CompoundEnumeration.hasMoreElements(CompoundEnumeration.java:54) org.aspectj.weaver.loadtime.ClassLoaderWeavingAdaptor.parseDefinitions(ClassLoaderWeavingAdaptor.java:282) org.aspectj.weaver.loadtime.DefaultWeavingContext.getDefinitions(DefaultWeavingContext.java:130) org.aspectj.weaver.loadtime.ClassLoaderWeavingAdaptor.initialize(ClassLoaderWeavingAdaptor.java:173) org.aspectj.weaver.loadtime.Aj$ExplicitlyInitializedClassLoaderWeavingAdaptor.initialize(Aj.java:344) org.aspectj.weaver.loadtime.Aj$ExplicitlyInitializedClassLoaderWeavingAdaptor.getWeavingAdaptor(Aj.java:349) org.aspectj.weaver.loadtime.Aj$WeaverContainer.getWeaver(Aj.java:323) org.aspectj.weaver.loadtime.Aj.preProcess(Aj.java:115) => holding Monitor(org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1@879400492}) org.aspectj.weaver.loadtime.ClassPreProcessorAgentAdapter.transform(ClassPreProcessorAgentAdapter.java:51) sun.instrument.TransformerManager.transform(TransformerManager.java:188) sun.instrument.InstrumentationImpl.transform(InstrumentationImpl.java:428) java.lang.ClassLoader.defineClass1(Native Method) java.lang.ClassLoader.defineClass(ClassLoader.java:763) java.lang.ClassLoader.defineClass(ClassLoader.java:642) org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1.doLoadClass(IsolatedClientLoader.scala:244) org.apache.spark.sql.hive.client.IsolatedClientLoader$$anon$1.loadClass(IsolatedClientLoader.scala:236) java.lang.ClassLoader.loadClass(ClassLoader.java:411) => holding Monitor(java.lang.Object@815422741}) java.lang.ClassLoader.loadClass(ClassLoader.java:357) org.apache.spark.sql.hive.client.IsolatedClientLoader.createClient(IsolatedClientLoader.scala:291) => holding Monitor(org.apache.spark.sql.hive.client.IsolatedClientLoader@1509517497}) org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:492) org.apache.spark.sql.hive.HiveUtils$.newClientForMetadata(HiveUtils.scala:352) org.apache.spark.sql.hive.HiveExternalCatalog.client$lzycompute(HiveExternalCatalog.scala:71) => holding Monitor(org.apache.spark.sql.hive.HiveExternalCatalog@1343720469}) org.apache.spark.sql.hive.HiveExternalCatalog.client(HiveExternalCatalog.scala:70) ``` -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: common-issues-unsubscr...@hadoop.apache.org For additional commands, e-mail: common-issues-h...@hadoop.apache.org