[ 
https://issues.apache.org/jira/browse/HIVE-13730?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15279098#comment-15279098
 ] 

Wei Zheng commented on HIVE-13730:
----------------------------------

It's stuck in an infinite while loop in 
BytesBytesMultiHashMap.findKeySlotToWrite().
{code}
$ jps
90673 TezChild
90976 TezChild
90855 TezChild
91225 Jps
82923 RemoteMavenServer
90205 surefirebooter3625226115924096543.jar
90191 Launcher
90542 DAGAppMaster
$ jstack 90673
2016-05-10 15:13:47
Full thread dump Java HotSpot(TM) 64-Bit Server VM (25.74-b02 mixed mode):

"Attach Listener" #138 daemon prio=9 os_prio=31 tid=0x00007feea4800000 
nid=0x3d3b waiting on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"TezTaskEventRouter{attempt_1462916018098_0001_32_01_000000_0}" #134 daemon 
prio=5 os_prio=31 tid=0x00007feea684f000 nid=0x692f waiting on condition 
[0x0000700001be7000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x00000007bc9d6490> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
        at 
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442)
        at 
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask$1.runInternal(LogicalIOProcessorRuntimeTask.java:773)
        at org.apache.tez.common.RunnableWithNdc.run(RunnableWithNdc.java:35)
        at java.lang.Thread.run(Thread.java:745)

"org.apache.hadoop.hdfs.PeerCache@35f41fc9" #22 daemon prio=5 os_prio=31 
tid=0x00007feea686d800 nid=0x6a03 waiting on condition [0x0000700001cea000]
   java.lang.Thread.State: TIMED_WAITING (sleeping)
        at java.lang.Thread.sleep(Native Method)
        at org.apache.hadoop.hdfs.PeerCache.run(PeerCache.java:244)
        at org.apache.hadoop.hdfs.PeerCache.access$000(PeerCache.java:41)
        at org.apache.hadoop.hdfs.PeerCache$1.run(PeerCache.java:119)
        at java.lang.Thread.run(Thread.java:745)

"TaskHeartbeatThread" #15 daemon prio=5 os_prio=31 tid=0x00007feea310c000 
nid=0x6403 waiting on condition [0x00007000019e1000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x00000007bcb6aa40> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
        at 
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2163)
        at 
org.apache.tez.runtime.task.TaskReporter$HeartbeatCallable.call(TaskReporter.java:200)
        at 
org.apache.tez.runtime.task.TaskReporter$HeartbeatCallable.call(TaskReporter.java:128)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)

"IPC Parameter Sending Thread #0" #14 daemon prio=5 os_prio=31 
tid=0x00007feea0979000 nid=0x6203 waiting on condition [0x00007000018de000]
   java.lang.Thread.State: TIMED_WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x000000078df78428> (a 
java.util.concurrent.SynchronousQueue$TransferStack)
        at 
java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:460)
        at 
java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:362)
        at java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:941)
        at 
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1066)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1127)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)

"IPC Client (1617838096) connection to /10.22.27.129:64289 from 
application_1462916018098_0001" #13 daemon prio=5 os_prio=31 
tid=0x00007feea11f6800 nid=0x6003 in Object.wait() [0x00007000017db000]
   java.lang.Thread.State: TIMED_WAITING (on object monitor)
        at java.lang.Object.wait(Native Method)
        at org.apache.hadoop.ipc.Client$Connection.waitForWork(Client.java:920)
        - locked <0x000000078df52318> (a 
org.apache.hadoop.ipc.Client$Connection)
        at org.apache.hadoop.ipc.Client$Connection.run(Client.java:965)

"TezChild" #12 daemon prio=5 os_prio=31 tid=0x00007feea0a65000 nid=0x5e07 
runnable [0x00007000016d7000]
   java.lang.Thread.State: RUNNABLE
        at 
org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.findKeySlotToWrite(BytesBytesMultiHashMap.java:602)
        at 
org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap.put(BytesBytesMultiHashMap.java:454)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.reloadHashTable(MapJoinOperator.java:646)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.continueProcess(MapJoinOperator.java:591)
        at 
org.apache.hadoop.hive.ql.exec.MapJoinOperator.closeOp(MapJoinOperator.java:528)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:641)
        at org.apache.hadoop.hive.ql.exec.Operator.close(Operator.java:655)
        at 
org.apache.hadoop.hive.ql.exec.tez.MapRecordProcessor.close(MapRecordProcessor.java:413)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.initializeAndRunProcessor(TezProcessor.java:186)
        at 
org.apache.hadoop.hive.ql.exec.tez.TezProcessor.run(TezProcessor.java:160)
        at 
org.apache.tez.runtime.LogicalIOProcessorRuntimeTask.run(LogicalIOProcessorRuntimeTask.java:355)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:72)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable$1.run(TaskRunner2Callable.java:60)
        at java.security.AccessController.doPrivileged(Native Method)
        at javax.security.auth.Subject.doAs(Subject.java:422)
        at 
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1628)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:60)
        at 
org.apache.tez.runtime.task.TaskRunner2Callable.callInternal(TaskRunner2Callable.java:36)
        at org.apache.tez.common.CallableWithNdc.call(CallableWithNdc.java:36)
        at java.util.concurrent.FutureTask.run(FutureTask.java:266)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)

"AsyncLogger-1" #11 daemon prio=5 os_prio=31 tid=0x00007feea1235000 nid=0x5a0f 
waiting on condition [0x00007000015d5000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x000000078e0657c8> (a 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
        at 
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039)
        at 
com.lmax.disruptor.BlockingWaitStrategy.waitFor(BlockingWaitStrategy.java:45)
        at 
com.lmax.disruptor.ProcessingSequenceBarrier.waitFor(ProcessingSequenceBarrier.java:55)
        at 
com.lmax.disruptor.BatchEventProcessor.run(BatchEventProcessor.java:123)
        at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
        at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
        at java.lang.Thread.run(Thread.java:745)

"Service Thread" #9 daemon prio=9 os_prio=31 tid=0x00007feea4801000 nid=0x5203 
runnable [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"C1 CompilerThread3" #8 daemon prio=9 os_prio=31 tid=0x00007feea3004800 
nid=0x5003 waiting on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"C2 CompilerThread2" #7 daemon prio=9 os_prio=31 tid=0x00007feea102c800 
nid=0x4e03 waiting on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"C2 CompilerThread1" #6 daemon prio=9 os_prio=31 tid=0x00007feea1803800 
nid=0x4c03 waiting on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"C2 CompilerThread0" #5 daemon prio=9 os_prio=31 tid=0x00007feea1801000 
nid=0x4a03 waiting on condition [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"Signal Dispatcher" #4 daemon prio=9 os_prio=31 tid=0x00007feea081c800 
nid=0x3e0f runnable [0x0000000000000000]
   java.lang.Thread.State: RUNNABLE

"Finalizer" #3 daemon prio=8 os_prio=31 tid=0x00007feea080f800 nid=0x3803 in 
Object.wait() [0x0000700000d3a000]
   java.lang.Thread.State: WAITING (on object monitor)
        at java.lang.Object.wait(Native Method)
        at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:143)
        - locked <0x000000078e1a8a90> (a java.lang.ref.ReferenceQueue$Lock)
        at java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:164)
        at java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:209)

"Reference Handler" #2 daemon prio=10 os_prio=31 tid=0x00007feea3845000 
nid=0x3603 in Object.wait() [0x0000700000c37000]
   java.lang.Thread.State: WAITING (on object monitor)
        at java.lang.Object.wait(Native Method)
        at java.lang.Object.wait(Object.java:502)
        at java.lang.ref.Reference.tryHandlePending(Reference.java:191)
        - locked <0x000000078e1a8b28> (a java.lang.ref.Reference$Lock)
        at java.lang.ref.Reference$ReferenceHandler.run(Reference.java:153)

"main" #1 prio=5 os_prio=31 tid=0x00007feea2802000 nid=0x1703 waiting on 
condition [0x0000700000219000]
   java.lang.Thread.State: WAITING (parking)
        at sun.misc.Unsafe.park(Native Method)
        - parking to wait for  <0x00000007bcb6b0d8> (a 
com.google.common.util.concurrent.ListenableFutureTask)
        at java.util.concurrent.locks.LockSupport.park(LockSupport.java:175)
        at java.util.concurrent.FutureTask.awaitDone(FutureTask.java:429)
        at java.util.concurrent.FutureTask.get(FutureTask.java:191)
        at 
org.apache.tez.runtime.task.TezTaskRunner2.run(TezTaskRunner2.java:158)
        at org.apache.tez.runtime.task.TezChild.run(TezChild.java:264)
        at org.apache.tez.runtime.task.TezChild.main(TezChild.java:508)

"VM Thread" os_prio=31 tid=0x00007feea102c000 nid=0x3403 runnable

"GC task thread#0 (ParallelGC)" os_prio=31 tid=0x00007feea101d000 nid=0x2403 
runnable

"GC task thread#1 (ParallelGC)" os_prio=31 tid=0x00007feea080a800 nid=0x2603 
runnable

"GC task thread#2 (ParallelGC)" os_prio=31 tid=0x00007feea3000000 nid=0x2803 
runnable

"GC task thread#3 (ParallelGC)" os_prio=31 tid=0x00007feea0804000 nid=0x2a03 
runnable

"GC task thread#4 (ParallelGC)" os_prio=31 tid=0x00007feea080d000 nid=0x2c03 
runnable

"GC task thread#5 (ParallelGC)" os_prio=31 tid=0x00007feea080d800 nid=0x2e03 
runnable

"GC task thread#6 (ParallelGC)" os_prio=31 tid=0x00007feea080e800 nid=0x3003 
runnable

"GC task thread#7 (ParallelGC)" os_prio=31 tid=0x00007feea080f000 nid=0x3203 
runnable

"VM Periodic Task Thread" os_prio=31 tid=0x00007feea481c800 nid=0x5403 waiting 
on condition

JNI global references: 273
{code}

> hybridgrace_hashjoin_1.q test gets stuck
> ----------------------------------------
>
>                 Key: HIVE-13730
>                 URL: https://issues.apache.org/jira/browse/HIVE-13730
>             Project: Hive
>          Issue Type: Bug
>          Components: Tez
>    Affects Versions: 2.1.0
>            Reporter: Vikram Dixit K
>            Assignee: Wei Zheng
>            Priority: Blocker
>
> I am seeing hybridgrace_hashjoin_1.q getting stuck on master.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to