[ https://issues.apache.org/jira/browse/HBASE-14274?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14705605#comment-14705605 ]
Andrew Purtell commented on HBASE-14274: ---------------------------------------- JMX cache buster came in on HBASE-14166 > Deadlock in region metrics on shutdown: MetricsRegionSourceImpl vs > MetricsRegionAggregateSourceImpl > --------------------------------------------------------------------------------------------------- > > Key: HBASE-14274 > URL: https://issues.apache.org/jira/browse/HBASE-14274 > Project: HBase > Issue Type: Sub-task > Components: test > Reporter: stack > Attachments: 23612.stack > > > Looking into parent issue, got a hang locally of TestDistributedLogReplay. > We have region closes here: > {code} > "RS_CLOSE_META-localhost:59610-0" prio=5 tid=0x00007ff65c03f800 nid=0x54347 > waiting on condition [0x000000011f7ac000] > java.lang.Thread.State: WAITING (parking) > at sun.misc.Unsafe.park(Native Method) > - parking to wait for <0x000000075636d8c0> (a > java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireQueued(AbstractQueuedSynchronizer.java:867) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(AbstractQueuedSynchronizer.java:1197) > at > java.util.concurrent.locks.ReentrantReadWriteLock$WriteLock.lock(ReentrantReadWriteLock.java:945) > at > org.apache.hadoop.hbase.regionserver.MetricsRegionAggregateSourceImpl.deregister(MetricsRegionAggregateSourceImpl.java:78) > at > org.apache.hadoop.hbase.regionserver.MetricsRegionSourceImpl.close(MetricsRegionSourceImpl.java:120) > at > org.apache.hadoop.hbase.regionserver.MetricsRegion.close(MetricsRegion.java:41) > at > org.apache.hadoop.hbase.regionserver.HRegion.doClose(HRegion.java:1500) > at org.apache.hadoop.hbase.regionserver.HRegion.close(HRegion.java:1344) > - locked <0x00000007ff878190> (a java.lang.Object) > at > org.apache.hadoop.hbase.regionserver.handler.CloseRegionHandler.process(CloseRegionHandler.java:102) > at > org.apache.hadoop.hbase.executor.EventHandler.run(EventHandler.java:103) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) > at java.lang.Thread.run(Thread.java:744) > {code} > They are trying to MetricsRegionAggregateSourceImpl.deregister. They want to > get a write lock on this classes local ReentrantReadWriteLock while holding > MetricsRegionSourceImpl's readWriteLock write lock. > Then, elsewhere the JmxCacheBuster is running trying to get metrics with > above locks held in reverse: > {code} > "HBase-Metrics2-1" daemon prio=5 tid=0x00007ff65e14b000 nid=0x59a03 waiting > on condition [0x0000000140ea5000] > java.lang.Thread.State: WAITING (parking) > at sun.misc.Unsafe.park(Native Method) > - parking to wait for <0x00000007cade1480> (a > java.util.concurrent.locks.ReentrantReadWriteLock$NonfairSync) > at java.util.concurrent.locks.LockSupport.park(LockSupport.java:186) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(AbstractQueuedSynchronizer.java:834) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireShared(AbstractQueuedSynchronizer.java:964) > at > java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireShared(AbstractQueuedSynchronizer.java:1282) > at > java.util.concurrent.locks.ReentrantReadWriteLock$ReadLock.lock(ReentrantReadWriteLock.java:731) > at > org.apache.hadoop.hbase.regionserver.MetricsRegionSourceImpl.snapshot(MetricsRegionSourceImpl.java:193) > at > org.apache.hadoop.hbase.regionserver.MetricsRegionAggregateSourceImpl.getMetrics(MetricsRegionAggregateSourceImpl.java:115) > at > org.apache.hadoop.metrics2.impl.MetricsSourceAdapter.getMetrics(MetricsSourceAdapter.java:195) > at > org.apache.hadoop.metrics2.impl.MetricsSourceAdapter.updateJmxCache(MetricsSourceAdapter.java:172) > at > org.apache.hadoop.metrics2.impl.MetricsSourceAdapter.getMBeanInfo(MetricsSourceAdapter.java:151) > at > com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.getNewMBeanClassName(DefaultMBeanServerInterceptor.java:333) > at > com.sun.jmx.interceptor.DefaultMBeanServerInterceptor.registerMBean(DefaultMBeanServerInterceptor.java:319) > at > com.sun.jmx.mbeanserver.JmxMBeanServer.registerMBean(JmxMBeanServer.java:522) > at org.apache.hadoop.metrics2.util.MBeans.register(MBeans.java:57) > at > org.apache.hadoop.metrics2.impl.MetricsSourceAdapter.startMBeans(MetricsSourceAdapter.java:221) > - locked <0x00000007e654bdc0> (a > org.apache.hadoop.metrics2.impl.MetricsSourceAdapter) > at > org.apache.hadoop.metrics2.impl.MetricsSourceAdapter.start(MetricsSourceAdapter.java:96) > at > org.apache.hadoop.metrics2.impl.MetricsSystemImpl.registerSource(MetricsSystemImpl.java:245) > - locked <0x0000000754302660> (a > org.apache.hadoop.metrics2.impl.MetricsSystemImpl) > at > org.apache.hadoop.metrics2.impl.MetricsSystemImpl$1.postStart(MetricsSystemImpl.java:229) > at sun.reflect.GeneratedMethodAccessor45.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:606) > at > org.apache.hadoop.metrics2.impl.MetricsSystemImpl$3.invoke(MetricsSystemImpl.java:290) > at com.sun.proxy.$Proxy13.postStart(Unknown Source) > at > org.apache.hadoop.metrics2.impl.MetricsSystemImpl.start(MetricsSystemImpl.java:185) > - locked <0x0000000754302660> (a > org.apache.hadoop.metrics2.impl.MetricsSystemImpl) > at > org.apache.hadoop.metrics2.impl.JmxCacheBuster$JmxCacheBusterRunnable.run(JmxCacheBuster.java:81) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332)