Found it: in hbase-site.xml: <property> <name>hbase.io.compress.snappy.codec</name> <value>org.apache.hadoop.hbase.io.compress.xerial.SnappyCodec</value> </property>
> Am 30.04.2024 um 16:32 schrieb Udo Offermann <udo.offerm...@zfabrik.de>: > > I think we finally made it. > There were a few more problems: First, I made sure that the class paths were > clean - Classpath hygiene in Java is the be-all and end-all ;-) > Then I saw that the region servers had problems with Snappy compression. I'm > not sure, but I believe the native Snappy libs were part of the previous > Hadoop distribution, at least they are not included in the current one. After > copying them over it seems to work now. But what is the recommended way to > enable snappy compression in Hbase now? > > > I noticed another small error on the Master Web UI: The „Regions in > transition“ JSP throws a NullPointerException when clicking on the link: > > http://master1ct:16010/rits.jsp <http://gmd9mtsma1ct:16010/rits.jsp> > HTTP ERROR 500 java.lang.NullPointerException > URI: /rits.jsp > STATUS: 500 > MESSAGE: java.lang.NullPointerException > SERVLET: org.apache.hadoop.hbase.generated.master.rits_jsp > CAUSED BY: java.lang.NullPointerException > Caused by: > > java.lang.NullPointerException > at > org.apache.hadoop.hbase.generated.master.rits_jsp._jspService(rits_jsp.java:113) > at org.apache.jasper.runtime.HttpJspBase.service(HttpJspBase.java:111) > at javax.servlet.http.HttpServlet.service(HttpServlet.java:790) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHolder$NotAsync.service(ServletHolder.java:1450) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHolder.handle(ServletHolder.java:799) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHandler$ChainEnd.doFilter(ServletHandler.java:1656) > at > org.apache.hadoop.hbase.http.lib.StaticUserWebFilter$StaticUserFilter.doFilter(StaticUserWebFilter.java:117) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) > at > org.apache.hadoop.hbase.http.SecurityHeadersFilter.doFilter(SecurityHeadersFilter.java:65) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) > at > org.apache.hadoop.hbase.http.ClickjackingPreventionFilter.doFilter(ClickjackingPreventionFilter.java:49) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) > at > org.apache.hadoop.hbase.http.HttpServer$QuotingInputFilter.doFilter(HttpServer.java:1521) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) > at > org.apache.hadoop.hbase.http.NoCacheFilter.doFilter(NoCacheFilter.java:47) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.FilterHolder.doFilter(FilterHolder.java:193) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHandler$Chain.doFilter(ServletHandler.java:1626) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:552) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:600) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:235) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:1624) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.ScopedHandler.nextHandle(ScopedHandler.java:233) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1440) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:188) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:505) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:1594) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.ScopedHandler.nextScope(ScopedHandler.java:186) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1355) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:146) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.gzip.GzipHandler.handle(GzipHandler.java:772) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:127) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.Server.handle(Server.java:516) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.HttpChannel.lambda$handle$1(HttpChannel.java:487) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.HttpChannel.dispatch(HttpChannel.java:732) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:479) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:277) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:311) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:105) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.io.ChannelEndPoint$1.run(ChannelEndPoint.java:104) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.runTask(EatWhatYouKill.java:338) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.doProduce(EatWhatYouKill.java:315) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.tryProduce(EatWhatYouKill.java:173) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.util.thread.strategy.EatWhatYouKill.run(EatWhatYouKill.java:131) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.util.thread.ReservedThreadExecutor$ReservedThread.run(ReservedThreadExecutor.java:409) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.util.thread.QueuedThreadPool.runJob(QueuedThreadPool.java:883) > at > org.apache.hbase.thirdparty.org.eclipse.jetty.util.thread.QueuedThreadPool$Runner.run(QueuedThreadPool.java:1034) > at java.lang.Thread.run(Thread.java:750) > > >> Am 30.04.2024 um 04:42 schrieb 张铎(Duo Zhang) <palomino...@gmail.com>: >> >> Oh, there is a typo, I mean the ServerCrashProcedure should not block other >> procedures if it is in claim replication queue stage. >> >> 张铎(Duo Zhang) <palomino...@gmail.com>于2024年4月30日 周二10:41写道: >> >>> Sorry to be a pain as the procedure store is a big problem before HBase >>> 2.3 so we have done a big refactoring on HBase 2.3+ so we have a migration >>> which makes the upgrading a bit complicated. >>> >>> And on the upgrading, you do not need to mix up HBase and Hadoop, you can >>> upgrading them separately. Second, rolling upgrading is also a bit >>> complicated, so I suggest you try fully down/up upgrading first, if you >>> have successfully done an upgrading, then you can start to try rolling >>> upgrading. >>> >>> To your scenario, I suggest, you first upgrading Hadoop, including >>> namenode and datanode, HBase should be functional after the upgrading. And >>> then, as discussed above, turn off the balancer, view the master page to >>> make sure there are no RITs and no procedures, then shutdown master, and >>> then shutdown all the region servers. And then, start master(do not need to >>> wait the master finishes start up, as it relies on meta region online, >>> where we must have at least one region server), and then all the region >>> servers, to see if the cluster can go back to normal. >>> >>> On the ServerCrashProcedure, it is blocked in claim replication queue, >>> which should be blocked other procedures as the region assignment should >>> have already been finished. Does your cluster has replication peers? If >>> not, it is a bit strange that why your procedure is blocked in the claim >>> replication queue stage… >>> >>> Thanks. >>> >>> Udo Offermann <udo.offerm...@zfabrik.de>于2024年4月29日 周一21:26写道: >>> >>>> This time we made progress. >>>> I first upgraded the Master Hadoop and HBase wise (after making sure that >>>> there are no regions in transition and no running procedures) with keeping >>>> Zookeeper running. Master was started with new version 2.8.5 telling that >>>> there are 6 nodes with inconsistent version (what was to be expected). Now >>>> the startup process completes with "Starting cluster schema service >>>> COMPLETE“, >>>> all regions were assigned and the cluster seemed to be stable. >>>> >>>> Again there were no regions in transitions and no procedures running and >>>> so I started to upgrade the data nodes one by one. >>>> The problem now is that the new region servers are not assigned regions >>>> except of 3: hbase:namespace, hbase:meta and one of our application level >>>> tables (which is empty most of the time). >>>> The more data nodes I migrated, the more regions were accumulated on the >>>> nodes running the old version until the last old data node has managed all >>>> regions except for 3. >>>> >>>> >>>> >>>> After all regions have been transitioned I migrated the last node which >>>> yields that all regions are in transition and look like this one: >>>> >>>> 2185 2184 WAITING_TIMEOUT seritrack >>>> TransitRegionStateProcedure table=tt_items, >>>> region=d7a411647663dd9e0fc972c7e14088a5, ASSIGN Mon Apr 29 14:12:36 >>>> CEST 2024 Mon Apr 29 14:59:44 CEST 2024 pid=2185, ppid=2184, >>>> state=WAITING_TIMEOUT:REGION_STATE_TRANSITION_GET_ASSIGN_CANDIDATE, >>>> locked=true; TransitRegionStateProcedure table=tt_items, >>>> region=d7a411647663dd9e0fc972c7e14088a5, ASSIGN >>>> >>>> They are all waiting on this one: >>>> >>>> 2184 WAITING seritrack ServerCrashProcedure >>>> datanode06ct.gmd9.intern,16020,1714378085579 Mon Apr 29 14:12:36 CEST >>>> 2024 Mon Apr 29 14:12:36 CEST 2024 pid=2184, >>>> state=WAITING:SERVER_CRASH_CLAIM_REPLICATION_QUEUES, locked=true; >>>> ServerCrashProcedure datanode06ct.gmd9.intern,16020,1714378085579, >>>> splitWal=true, meta=false >>>> >>>> Again „ServerCrashProcedure“! Why are they not processed? >>>> Why is it so hard to upgrade the cluster? Is it worthwhile to take the >>>> next stable version 2.5.8? >>>> And - btw- what is the difference between the two distributions „bin“ and >>>> „hadoop3-bin“? >>>> >>>> Best regards >>>> Udo >>>> >>>> >>>> >>>> >>>> >>>>> Am 28.04.2024 um 03:03 schrieb 张铎(Duo Zhang) <palomino...@gmail.com>: >>>>> >>>>> Better turn it off, and observe the master page until there is no RITs >>>>> and no other procedures, then call hbase-daemon.sh stop master, and >>>>> then hbase-daemon.sh stop regionserver. >>>>> >>>>> I'm not 100% sure about the shell command, you'd better search try it >>>>> by yourself. The key here is to stop master first and make sure there >>>>> is no procedure, so we can safely remove MasterProcWALs, and then stop >>>>> all region servers. >>>>> >>>>> Thanks. >>>>> >>>>> Udo Offermann <udo.offerm...@zfabrik.de> 于2024年4月26日周五 23:34写道: >>>>>> >>>>>> I know, but is it necessary or beneficial to turn it off - and if so - >>>> when? >>>>>> And what is your recommendation about stopping the region servers? Just >>>>>> hbase-daemon.sh stop regionserver >>>>>> or >>>>>> gracefull_stop.sh localhost >>>>>> ? >>>>>> >>>>>>> Am 26.04.2024 um 17:22 schrieb 张铎(Duo Zhang) <palomino...@gmail.com>: >>>>>>> >>>>>>> Turning off balancer is to make sure that the balancer will not >>>>>>> schedule any procedures to balance the cluster. >>>>>>> >>>>>>> Udo Offermann <udo.offerm...@zfabrik.de> 于2024年4月26日周五 23:03写道: >>>>>>>> >>>>>>>> and what’s about turning of Hbase balancer before stopping hmaster? >>>>>>>> >>>>>>>>> Am 26.04.2024 um 17:00 schrieb Udo Offermann < >>>> udo.offerm...@zfabrik.de>: >>>>>>>>> >>>>>>>>> So there is no need for >>>>>>>>> >>>>>>>>> hbase/bin/graceful_stop.sh localhost >>>>>>>>> >>>>>>>>> in order to stop the region servers? >>>>>>>>> >>>>>>>>>> Am 26.04.2024 um 16:51 schrieb 张铎(Duo Zhang) < >>>> palomino...@gmail.com>: >>>>>>>>>> >>>>>>>>>> The key here is to make sure there are no procedures in HBase so we >>>>>>>>>> are safe to move MasterProcWALs. >>>>>>>>>> >>>>>>>>>> And procedures can only be scheduled by master. >>>>>>>>>> >>>>>>>>>> So once there are no procedures in HBase, you should stop master >>>>>>>>>> first, and then you are free to stop all the regionservers. And >>>> then >>>>>>>>>> you can proceed with the upgrading of hdfs/hadoop, and then restart >>>>>>>>>> master and region servers with new versions. >>>>>>>>>> >>>>>>>>>> You can have a try. >>>>>>>>>> >>>>>>>>>> Udo Offermann <udo.offerm...@zfabrik.de> 于2024年4月26日周五 22:47写道: >>>>>>>>>>> >>>>>>>>>>> Ah, this sounds interesting! >>>>>>>>>>> >>>>>>>>>>> I need to think about how I'm going to manage this together with >>>> upgrading Hadoop. My strategy was to first upgrade Hadoop on all machines >>>> and then start HBase with the new version on all machines. But now I have >>>> to upgrade the master first - Hadoop and Hbase wise - and then the data >>>> nodes one by one - again Hadoop and Hbase wise. Is it also safe to do the >>>> Hbase upgrade „inside“ a rolling Hadoop upgrade? >>>>>>>>>>> >>>>>>>>>>> I mean: >>>>>>>>>>> >>>>>>>>>>> 1) Upgrade master >>>>>>>>>>> >>>>>>>>>>> make sure there are no hbase procedures running >>>>>>>>>>> >>>>>>>>>>> hdfs dfsadmin -safemode enter >>>>>>>>>>> hdfs dfsadmin -rollingUpgrade prepare >>>>>>>>>>> kill hmaster >>>>>>>>>>> kill/stop zookeeper ??? >>>>>>>>>>> hdfs dfs -rm /hbase/MasterProcWALs/* >>>>>>>>>>> stop secondary and namenode >>>>>>>>>>> SWITCH-TO-NEW-VERSION >>>>>>>>>>> hadoop-daemon.sh start namenode -rollingUpgrade started >>>>>>>>>>> start secondary >>>>>>>>>>> start zookeeper >>>>>>>>>>> start hmaster >>>>>>>>>>>> The cluster should be in an intermediate state, where master >>>>>>>>>>>> is in new version but region servers remain in old version, but >>>> it >>>>>>>>>>>> should be functional. >>>>>>>>>>> >>>>>>>>>>> 2) Upgrade data node 1..6 >>>>>>>>>>> stop / kill region server ??? >>>>>>>>>>> hdfs dfsadmin -shutdownDatanode localhost:50020 upgrade >>>>>>>>>>> SWITCH-TO-NEW-VERSION >>>>>>>>>>> start datanode >>>>>>>>>>> start region server >>>>>>>>>>> >>>>>>>>>>> 3) Finalize upgrade >>>>>>>>>>> hdfs dfsadmin -rollingUpgrade finalize >>>>>>>>>>> start yarn processes >>>>>>>>>>> >>>>>>>>>>> Hmm, sounds like a plan, what do you think? >>>>>>>>>>> >>>>>>>>>>> >>>>>>>>>>>> Am 26.04.2024 um 16:25 schrieb 张铎(Duo Zhang) < >>>> palomino...@gmail.com>: >>>>>>>>>>>> >>>>>>>>>>>> I think the cluster is not in a correct state, none of the SCPs >>>> has >>>>>>>>>>>> carrying meta = true but meta is not online... >>>>>>>>>>>> >>>>>>>>>>>> If you have gracefully shutdown all the region servers, you >>>> should not >>>>>>>>>>>> delete all the MasterWALProcs, as there are already SCPs in it. >>>> This >>>>>>>>>>>> is how we deal with graceful shutdown, master just does not >>>> process >>>>>>>>>>>> the SCPs, but we do have already scheduled the SCPs... >>>>>>>>>>>> >>>>>>>>>>>> What I said above, is to make sure that there are no procedures >>>> in the >>>>>>>>>>>> system, then kill the master directly, without shutting down all >>>> the >>>>>>>>>>>> region servers, remove MasterWALProcs, and then restart master >>>> with >>>>>>>>>>>> new code. The cluster should be in an intermediate state, where >>>> master >>>>>>>>>>>> is in new version but region servers remain in old version, but >>>> it >>>>>>>>>>>> should be functional. And then you can rolling upgrade the region >>>>>>>>>>>> servers one by one. >>>>>>>>>>>> >>>>>>>>>>>> You could try it again. >>>>>>>>>>>> >>>>>>>>>>>> Thanks. >>>>>>>>>>>> >>>>>>>>>>>> >>>>>>>>>>>> Udo Offermann <udo.offerm...@zfabrik.de> 于2024年4月26日周五 22:03写道: >>>>>>>>>>>>> >>>>>>>>>>>>>> I think hostnames should be case insensitive? So why is there a >>>>>>>>>>>>>> 'DATANODE01CT' and then a 'DATANODE01ct'? >>>>>>>>>>>>> Well observed ;-) I was asked by our customer to disguise the >>>> server names, and I missed some of them when searching and replacing, but I >>>> can assure you that all server names are correct and we have never had any >>>> problems with them. >>>>>>>>>>>>> >>>>>>>>>>>>> The cluster consists of 7 servers: one master and 6 data nodes >>>> running on Alma Linux (version 8 I believe) and Java 8 (updated only some >>>> weeks ago). Master is running Hadoop name node, secondary name node, yarn >>>> resource manager and history server as well as Hbase Zookeeper and Master. >>>> The data nodes are running data node, region server and Yarn node manager. >>>> They're all virtual machines at the same size ram (16GB) and cpu wise (4 >>>> cores). The basic setup is from 2015 (with hbase 0.9 and we never change it >>>> except upgrading to HBase 1.0 and to Hbase 2.2.5 in 2020), thus we have >>>> been running Hadoop/HBase for almost 10 years now without any major >>>> problems. >>>>>>>>>>>>> >>>>>>>>>>>>> The HBCKServerCrashProcedure comes from my attempt to recover >>>> the cluster as you advised me the other day: >>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> Then use HBCK2, to schedule a SCP for this region server, >>>> to see if it >>>>>>>>>>>>>>>>>>> can fix the problem. >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> >>>> https://github.com/apache/hbase-operator-tools/blob/master/hbase-hbck2/README.md >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> This is the document for HBCK2, you should use the >>>> scheduleRecoveries command. >>>>>>>>>>>>> >>>>>>>>>>>>> You can take it as an act of desperation ;-) >>>>>>>>>>>>> >>>>>>>>>>>>> I will take care about log4j2 but how can I get the cluster up >>>> and running? >>>>>>>>>>>>> >>>>>>>>>>>>> Best regards >>>>>>>>>>>>> Udo >>>>>>>>>>>>> >>>>>>>>>>>>> >>>>>>>>>>>>>> Am 26.04.2024 um 15:29 schrieb 张铎(Duo Zhang) < >>>> palomino...@gmail.com>: >>>>>>>>>>>>>> >>>>>>>>>>>>>> It is a bit strange that why do you have a >>>> HBCKServerCrashProcedure? >>>>>>>>>>>>>> It should only appear when you use HBCK2 to force schedule a >>>> SCP. >>>>>>>>>>>>>> And it is also a bit strange that all the SCPs are marked as >>>> not >>>>>>>>>>>>>> carrying meta... How many region servers do you have in your >>>> cluster? >>>>>>>>>>>>>> >>>>>>>>>>>>>> I think hostnames should be case insensitive? So why is there a >>>>>>>>>>>>>> 'DATANODE01CT' and then a 'DATANODE01ct'? >>>>>>>>>>>>>> >>>>>>>>>>>>>> And for hbase 2.5.x, we have switched to use log4j2, instead >>>> of log4j. >>>>>>>>>>>>>> >>>> https://github.com/apache/hbase/blob/branch-2.5/conf/log4j2.properties >>>>>>>>>>>>>> >>>>>>>>>>>>>> Udo Offermann <udo.offerm...@zfabrik.de> 于2024年4月26日周五 >>>> 19:59写道: >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> After resetting the VMs, we started a new upgrade attempt. >>>>>>>>>>>>>>> The Hadoop part ran smoothly again, but we got stuck again >>>> with HBase. >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> Before upgrading HBase I turned off the balancer and stopped >>>> all region servers gracefully. I also deleted the MasterProcWALs folder in >>>> hdfs. >>>>>>>>>>>>>>> Then I startet the master and region servers with version >>>> 2.5.7. Again the master stops at „Starting assignment manager“ task. >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> There are a number of server crash procedures that do not >>>> appear to be processed: >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> HBase Shell >>>>>>>>>>>>>>> Use "help" to get list of supported commands. >>>>>>>>>>>>>>> Use "exit" to quit this interactive shell. >>>>>>>>>>>>>>> For Reference, please visit: >>>> http://hbase.apache.org/2.0/book.html#shell >>>>>>>>>>>>>>> Version 2.5.7, r6788f98356dd70b4a7ff766ea7a8298e022e7b95, Thu >>>> Dec 14 15:59:16 PST 2023 >>>>>>>>>>>>>>> Took 0.0016 seconds >>>>>>>>>>>>>>> hbase:001:0> list_procedures >>>>>>>>>>>>>>> PID Name State Submitted Last_Update Parameters >>>>>>>>>>>>>>> 1 >>>> org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure RUNNABLE >>>> 2024-04-26 12:22:12 +0200 2024-04-26 13:16:57 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE01CT", "port"=>16020, >>>> "startCode"=>"1714126714199"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 2 >>>> org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure RUNNABLE >>>> 2024-04-26 12:22:18 +0200 2024-04-26 13:16:57 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE02CT", "port"=>16020, >>>> "startCode"=>"1714126737220"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 3 >>>> org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure RUNNABLE >>>> 2024-04-26 12:22:24 +0200 2024-04-26 13:16:57 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE03CT", "port"=>16020, >>>> "startCode"=>"1714126742645"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 4 >>>> org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure RUNNABLE >>>> 2024-04-26 12:22:37 +0200 2024-04-26 13:16:57 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE05CT", "port"=>16020, >>>> "startCode"=>"1714126754579"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 5 >>>> org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure RUNNABLE >>>> 2024-04-26 12:22:44 +0200 2024-04-26 13:16:57 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE06CT", "port"=>16020, >>>> "startCode"=>"1714126762089"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 6 >>>> org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure RUNNABLE >>>> 2024-04-26 13:13:43 +0200 2024-04-26 13:16:57 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE01ct", "port"=>16020, >>>> "startCode"=>"1714127123596"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 7 >>>> org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure RUNNABLE >>>> 2024-04-26 13:13:53 +0200 2024-04-26 13:16:57 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE02ct", "port"=>16020, >>>> "startCode"=>"1714127133136"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 8 >>>> org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure RUNNABLE >>>> 2024-04-26 13:14:07 +0200 2024-04-26 13:16:57 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE03ct", "port"=>16020, >>>> "startCode"=>"1714127138682"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 9 >>>> org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure RUNNABLE >>>> 2024-04-26 13:14:17 +0200 2024-04-26 13:16:57 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE05ct", "port"=>16020, >>>> "startCode"=>"1714127155080"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 10 >>>> org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure RUNNABLE >>>> 2024-04-26 13:14:30 +0200 2024-04-26 13:16:57 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE06ct", "port"=>16020, >>>> "startCode"=>"1714127158551"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 11 >>>> org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure RUNNABLE >>>> 2024-04-26 13:16:57 +0200 2024-04-26 13:16:57 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE04ct", "port"=>16020, >>>> "startCode"=>"1714126747741"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 12 >>>> org.apache.hadoop.hbase.master.procedure.HBCKServerCrashProcedure RUNNABLE >>>> 2024-04-26 13:22:16 +0200 2024-04-26 13:22:16 +0200 [{"state"=>[1, 3]}, >>>> {"serverName"=>{"hostName"=>"DATANODE03CT", "port"=>16020, >>>> "startCode"=>"1714130315364"}, "carryingMeta"=>false, >>>> "shouldSplitWal"=>true}] >>>>>>>>>>>>>>> 12 row(s) >>>>>>>>>>>>>>> Took 0.6564 seconds >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> Strangely enough, the log files are empty: >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> cat logs/hbase-seritrack-master-server.out >>>>>>>>>>>>>>> 13:31:57.280 >>>> [ActiveMasterInitializationMonitor-1714130217278] ERROR >>>> org.apache.hadoop.hbase.master.MasterInitializationMonitor - Master failed >>>> to complete initialization after 900000ms. Please consider submitting a bug >>>> report including a thread dump of this process. >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> cat logs/hbase-seritrack-master-server.log >>>>>>>>>>>>>>> Fri Apr 26 13:16:47 CEST 2024 Starting master on master-server >>>>>>>>>>>>>>> core file size (blocks, -c) 0 >>>>>>>>>>>>>>> data seg size (kbytes, -d) unlimited >>>>>>>>>>>>>>> scheduling priority (-e) 0 >>>>>>>>>>>>>>> file size (blocks, -f) unlimited >>>>>>>>>>>>>>> pending signals (-i) 95119 >>>>>>>>>>>>>>> max locked memory (kbytes, -l) 64 >>>>>>>>>>>>>>> max memory size (kbytes, -m) unlimited >>>>>>>>>>>>>>> open files (-n) 1024 >>>>>>>>>>>>>>> pipe size (512 bytes, -p) 8 >>>>>>>>>>>>>>> POSIX message queues (bytes, -q) 819200 >>>>>>>>>>>>>>> real-time priority (-r) 0 >>>>>>>>>>>>>>> stack size (kbytes, -s) 8192 >>>>>>>>>>>>>>> cpu time (seconds, -t) unlimited >>>>>>>>>>>>>>> max user processes (-u) 95119 >>>>>>>>>>>>>>> virtual memory (kbytes, -v) unlimited >>>>>>>>>>>>>>> file locks (-x) unlimited >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> I have checked the settings: >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> Submitted Log Name: org.apache.hadoop.hbase >>>>>>>>>>>>>>> Log Class: org.apache.logging.slf4j.Log4jLogger >>>>>>>>>>>>>>> Effective level: ERROR >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> I then explicitly set the log level again: >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> cat hbase/conf/log4j.properties >>>>>>>>>>>>>>> [...] >>>>>>>>>>>>>>> log4j.logger.org.apache.hadoop.hbase=INFO >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> und >>>>>>>>>>>>>>> export HBASE_ROOT_LOGGER=hbase.root.logger=INFO,console >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> And then restarted HMaster - without success. >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> Why does the log level remain at ERROR? >>>>>>>>>>>>>>> I'm pretty sure that the levels will be set to INFO at some >>>> point later on but they remain at level ERROR during the startup phase. >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> Here I post the Zookeeper Dump: >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> HBase is rooted at /hbase >>>>>>>>>>>>>>> Active master address: >>>>>>>>>>>>>>> master-server,16000,1714130208769 >>>>>>>>>>>>>>> Backup master addresses: >>>>>>>>>>>>>>> Region server holding hbase:meta: >>>>>>>>>>>>>>> DATANODE03ct,16020,1714122680513 >>>>>>>>>>>>>>> Region servers: >>>>>>>>>>>>>>> DATANODE06ct,16020,1714130693358 >>>>>>>>>>>>>>> DATANODE03ct,16020,1714130672936 >>>>>>>>>>>>>>> DATANODE02ct,16020,1714130665456 >>>>>>>>>>>>>>> DATANODE01ct,16020,1714130653350 >>>>>>>>>>>>>>> DATANODE04ct,16020,1714130248620 >>>>>>>>>>>>>>> Quorum Server Statistics: >>>>>>>>>>>>>>> master-server:2181 >>>>>>>>>>>>>>> stat is not executed because it is not in the whitelist. >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> What do you have to do to solve the server crash procedures? >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> Best regards >>>>>>>>>>>>>>> Udo >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> >>>>>>>>>>>>>>> >>>>>>>>>>>>>>>> Am 23.04.2024 um 09:36 schrieb 张铎(Duo Zhang) < >>>> palomino...@gmail.com>: >>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>> Strange, I checked the code, it seems we get NPE on this line >>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>> >>>> https://github.com/apache/hbase/blob/4d7ce1aac724fbf09e526fc422b5a11e530c32f0/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterRpcServices.java#L2872 >>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>> Could you please confirm that you connect to the correct >>>> active master >>>>>>>>>>>>>>>> which is hanging? It seems that you are connecting the backup >>>>>>>>>>>>>>>> master... >>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>> Thanks. >>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>> 张铎(Duo Zhang) <palomino...@gmail.com> 于2024年4月23日周二 15:31写道: >>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>> Ah, NPE usually means a code bug, then there is no simple >>>> way to fix >>>>>>>>>>>>>>>>> it, need to take a deep look on the code :( >>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>> Sorry. >>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>> Udo Offermann <udo.offerm...@zfabrik.de> 于2024年4月22日周一 >>>> 15:32写道: >>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>> Unfortunately not. >>>>>>>>>>>>>>>>>> I’ve found the node hosting the meta region and was able >>>> to run hack scheduleRecoveries using hbase-operator-tools-1.2.0. >>>>>>>>>>>>>>>>>> The tool however stops with an NPE: >>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>> 09:22:00.532 [main] WARN >>>> org.apache.hadoop.util.NativeCodeLoader - Unable to load native-hadoop >>>> library for your platform... using builtin-java classes where applicable >>>>>>>>>>>>>>>>>> 09:22:00.703 [main] INFO >>>> org.apache.hadoop.conf.Configuration.deprecation - hbase.client.pause.cqtbe >>>> is deprecated. Instead, use hbase.client.pause.server.overloaded >>>>>>>>>>>>>>>>>> 09:22:00.765 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client >>>> environment:zookeeper.version=3.8.3-6ad6d364c7c0bcf0de452d54ebefa3058098ab56, >>>> built on 2023-10-05 10:34 UTC >>>>>>>>>>>>>>>>>> 09:22:00.765 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:host.name=HBaseMaster.gmd9.intern >>>>>>>>>>>>>>>>>> 09:22:00.765 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:java.version=1.8.0_402 >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:java.vendor=Red Hat, Inc. >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client >>>> environment:java.home=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.402.b06-2.el8.x86_64/jre >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client >>>> environment:java.class.path=hbase-operator-tools-1.2.0/hbase-hbck2/hbase-hbck2-1.2.0.jar:hbase/conf:/opt/seritrack/tt/jdk/lib/tools.jar:/opt/seritrack/tt/nosql/hbase:/opt/seritrack/tt/nosql/hbase/lib/shaded-clients/hbase-shaded-mapreduce-2.5.7.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/audience-annotations-0.13.0.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/commons-logging-1.2.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/htrace-core4-4.1.0-incubating.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/jcl-over-slf4j-1.7.33.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/jul-to-slf4j-1.7.33.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/opentelemetry-api-1.15.0.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/opentelemetry-context-1.15.0.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/opentelemetry-semconv-1.15.0-alpha.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/slf4j-api-1.7.33.jar:/opt/seritrack/tt/nosql/hbase/lib/shaded-clients/hbase-shaded-client-2.5.7.jar:/opt/seritrack/tt/nosql/pl_nosql_ext/libs/pl_nosql_ext-3.0.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/log4j-1.2-api-2.17.2.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/log4j-api-2.17.2.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/log4j-core-2.17.2.jar:/opt/seritrack/tt/nosql/hbase/lib/client-facing-thirdparty/log4j-slf4j-impl-2.17.2.jar:/opt/seritrack/tt/prometheus_exporters/jmx_exporter/jmx_prometheus_javaagent.jar >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client >>>> environment:java.library.path=/opt/seritrack/tt/nosql/hadoop/lib/native >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:java.io.tmpdir=/tmp >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:java.compiler=<NA> >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:os.name=Linux >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:os.arch=amd64 >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:os.version=4.18.0-513.18.1.el8_9.x86_64 >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:user.name=seritrack >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:user.home=/opt/seritrack >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:user.dir=/opt/seritrack/tt/nosql_3.0 >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:os.memory.free=275MB >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:os.memory.max=2966MB >>>>>>>>>>>>>>>>>> 09:22:00.766 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Client environment:os.memory.total=361MB >>>>>>>>>>>>>>>>>> 09:22:00.771 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ZooKeeper - >>>> Initiating client connection, connectString=HBaseMaster:2181 >>>> sessionTimeout=90000 >>>> watcher=org.apache.hadoop.hbase.zookeeper.ReadOnlyZKClient$$Lambda$45/1091799416@aed32c5 >>>>>>>>>>>>>>>>>> 09:22:00.774 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.common.X509Util - >>>> Setting -D jdk.tls.rejectClientInitiatedRenegotiation=true to disable >>>> client-initiated TLS renegotiation >>>>>>>>>>>>>>>>>> 09:22:00.777 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ClientCnxnSocket >>>> - jute.maxbuffer value is 1048575 Bytes >>>>>>>>>>>>>>>>>> 09:22:00.785 [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ClientCnxn - >>>> zookeeper.request.timeout value is 0. feature enabled=false >>>>>>>>>>>>>>>>>> 09:22:00.793 >>>> [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f-SendThread(HBaseMaster:2181)] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ClientCnxn - >>>> Opening socket connection to server HBaseMaster/10.21.204.230:2181. >>>>>>>>>>>>>>>>>> 09:22:00.793 >>>> [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f-SendThread(HBaseMaster:2181)] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ClientCnxn - SASL >>>> config status: Will not attempt to authenticate using SASL (unknown error) >>>>>>>>>>>>>>>>>> 09:22:00.797 >>>> [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f-SendThread(HBaseMaster:2181)] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ClientCnxn - >>>> Socket connection established, initiating session, client: / >>>> 10.21.204.230:41072, server: HBaseMaster/10.21.204.230:2181 >>>>>>>>>>>>>>>>>> 09:22:00.801 >>>> [ReadOnlyZKClient-HBaseMaster:2181@0x7d9f158f-SendThread(HBaseMaster:2181)] >>>> INFO org.apache.hadoop.hbase.shaded.org.apache.zookeeper.ClientCnxn - >>>> Session establishment complete on server HBaseMaster/10.21.204.230:2181, >>>> session id = 0x10009a4f379001e, negotiated timeout = 90000 >>>>>>>>>>>>>>>>>> -1 >>>>>>>>>>>>>>>>>> Exception in thread "main" java.io.IOException: >>>> org.apache.hbase.thirdparty.com.google.protobuf.ServiceException: >>>> org.apache.hadoop.hbase.ipc.RemoteWithExtrasException(java.io.IOException): >>>> java.io.IOException >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:479) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:124) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcHandler.run(RpcHandler.java:102) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcHandler.run(RpcHandler.java:82) >>>>>>>>>>>>>>>>>> Caused by: java.lang.NullPointerException >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.master.MasterRpcServices.shouldSubmitSCP(MasterRpcServices.java:2872) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.master.MasterRpcServices.scheduleServerCrashProcedure(MasterRpcServices.java:2600) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos$HbckService$2.callBlockingMethod(MasterProtos.java) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:415) >>>>>>>>>>>>>>>>>> ... 3 more >>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.client.HBaseHbck.scheduleServerCrashProcedures(HBaseHbck.java:198) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.client.Hbck.scheduleServerCrashProcedure(Hbck.java:128) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.HBCK2.scheduleRecoveries(HBCK2.java:418) >>>>>>>>>>>>>>>>>> at org.apache.hbase.HBCK2.doCommandLine(HBCK2.java:960) >>>>>>>>>>>>>>>>>> at org.apache.hbase.HBCK2.run(HBCK2.java:830) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:76) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:90) >>>>>>>>>>>>>>>>>> at org.apache.hbase.HBCK2.main(HBCK2.java:1145) >>>>>>>>>>>>>>>>>> Caused by: >>>> org.apache.hbase.thirdparty.com.google.protobuf.ServiceException: >>>> org.apache.hadoop.hbase.ipc.RemoteWithExtrasException(java.io.IOException): >>>> java.io.IOException >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:479) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:124) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcHandler.run(RpcHandler.java:102) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcHandler.run(RpcHandler.java:82) >>>>>>>>>>>>>>>>>> Caused by: java.lang.NullPointerException >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.master.MasterRpcServices.shouldSubmitSCP(MasterRpcServices.java:2872) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.master.MasterRpcServices.scheduleServerCrashProcedure(MasterRpcServices.java:2600) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos$HbckService$2.callBlockingMethod(MasterProtos.java) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:415) >>>>>>>>>>>>>>>>>> ... 3 more >>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.AbstractRpcClient.callBlockingMethod(AbstractRpcClient.java:340) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.AbstractRpcClient.access$200(AbstractRpcClient.java:92) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.AbstractRpcClient$BlockingRpcChannelImplementation.callBlockingMethod(AbstractRpcClient.java:595) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos$HbckService$BlockingStub.scheduleServerCrashProcedure(MasterProtos.java) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.client.HBaseHbck.scheduleServerCrashProcedures(HBaseHbck.java:190) >>>>>>>>>>>>>>>>>> ... 7 more >>>>>>>>>>>>>>>>>> Caused by: >>>> org.apache.hadoop.hbase.ipc.RemoteWithExtrasException(java.io.IOException): >>>> java.io.IOException >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:479) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:124) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcHandler.run(RpcHandler.java:102) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcHandler.run(RpcHandler.java:82) >>>>>>>>>>>>>>>>>> Caused by: java.lang.NullPointerException >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.master.MasterRpcServices.shouldSubmitSCP(MasterRpcServices.java:2872) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.master.MasterRpcServices.scheduleServerCrashProcedure(MasterRpcServices.java:2600) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos$HbckService$2.callBlockingMethod(MasterProtos.java) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:415) >>>>>>>>>>>>>>>>>> ... 3 more >>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.AbstractRpcClient.onCallFinished(AbstractRpcClient.java:388) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.AbstractRpcClient.access$100(AbstractRpcClient.java:92) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.AbstractRpcClient$3.run(AbstractRpcClient.java:425) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.AbstractRpcClient$3.run(AbstractRpcClient.java:420) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.Call.callComplete(Call.java:114) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.Call.setException(Call.java:129) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.NettyRpcDuplexHandler.readResponse(NettyRpcDuplexHandler.java:199) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hadoop.hbase.ipc.NettyRpcDuplexHandler.channelRead(NettyRpcDuplexHandler.java:220) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:442) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:346) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:318) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:286) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:442) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:440) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:166) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:788) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:724) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java:650) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.channel.nio.NioEventLoop.run(NioEventLoop.java:562) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:997) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) >>>>>>>>>>>>>>>>>> at >>>> org.apache.hbase.thirdparty.io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) >>>>>>>>>>>>>>>>>> at java.lang.Thread.run(Thread.java:750) >>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> Am 20.04.2024 um 15:53 schrieb 张铎(Duo Zhang) < >>>> palomino...@gmail.com>: >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> OK, it was waitForMetaOnline. >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> Maybe the problem is that you do have some correct >>>> procedures before >>>>>>>>>>>>>>>>>>> upgrading, like ServerCrashProcedure, but then you delete >>>> all the >>>>>>>>>>>>>>>>>>> procedure wals so the ServerCrashProcedure is also gone, >>>> so meta can >>>>>>>>>>>>>>>>>>> never be online. >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> Please check the /hbase/meta-region-server znode on >>>> zookeeper, dump >>>>>>>>>>>>>>>>>>> its content, it is protobuf based but anyway, you could >>>> see the >>>>>>>>>>>>>>>>>>> encoded server name which hosts meta region. >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> Then use HBCK2, to schedule a SCP for this region server, >>>> to see if it >>>>>>>>>>>>>>>>>>> can fix the problem. >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> >>>> https://github.com/apache/hbase-operator-tools/blob/master/hbase-hbck2/README.md >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> This is the document for HBCK2, you should use the >>>> scheduleRecoveries command. >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> Hope this could fix your problem. >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> Thread 92 (master/masterserver:16000:becomeActiveMaster): >>>>>>>>>>>>>>>>>>> State: TIMED_WAITING >>>>>>>>>>>>>>>>>>> Blocked count: 165 >>>>>>>>>>>>>>>>>>> Waited count: 404 >>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>> java.lang.Thread.sleep(Native Method) >>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.util.Threads.sleep(Threads.java:125) >>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.HMaster.isRegionOnline(HMaster.java:1358) >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.HMaster.waitForMetaOnline(HMaster.java:1328) >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.HMaster.finishActiveMasterInitialization(HMaster.java:1069) >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.HMaster.startActiveMasterManager(HMaster.java:2405) >>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.HMaster.lambda$null$0(HMaster.java:565) >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.HMaster$$Lambda$265/1598878738.run(Unknown >>>>>>>>>>>>>>>>>>> Source) >>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.trace.TraceUtil.trace(TraceUtil.java:187) >>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.trace.TraceUtil.trace(TraceUtil.java:177) >>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.HMaster.lambda$run$1(HMaster.java:562) >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.HMaster$$Lambda$264/1129144214.run(Unknown >>>>>>>>>>>>>>>>>>> Source) >>>>>>>>>>>>>>>>>>> java.lang.Thread.run(Thread.java:750) >>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>> Udo Offermann <udo.offerm...@zfabrik.de <mailto: >>>> udo.offerm...@zfabrik.de>> 于2024年4月20日周六 21:13写道: >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> Master status for >>>> masterserver.gmd9.intern,16000,1713515965162 as of Fri >>>>>>>>>>>>>>>>>>>> Apr 19 10:55:22 CEST 2024 >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> Version Info: >>>>>>>>>>>>>>>>>>>> >>>> =========================================================== >>>>>>>>>>>>>>>>>>>> HBase 2.5.7 >>>>>>>>>>>>>>>>>>>> Source code repository >>>>>>>>>>>>>>>>>>>> git://buildbox.localdomain/home/apurtell/tmp/RM/hbase >>>>>>>>>>>>>>>>>>>> revision=6788f98356dd70b4a7ff766ea7a8298e022e7b95 >>>>>>>>>>>>>>>>>>>> Compiled by apurtell on Thu Dec 14 15:59:16 PST 2023 >>>>>>>>>>>>>>>>>>>> From source with checksum >>>>>>>>>>>>>>>>>>>> >>>> 1501d7fdf72398791ee335a229d099fc972cea7c2a952da7622eb087ddf975361f107cbbbee5d0ad6f603466e9afa1f4fd242ffccbd4371eb0b56059bb3b5402 >>>>>>>>>>>>>>>>>>>> Hadoop 2.10.2 >>>>>>>>>>>>>>>>>>>> Source code repository Unknown >>>>>>>>>>>>>>>>>>>> revision=965fd380006fa78b2315668fbc7eb432e1d8200f >>>>>>>>>>>>>>>>>>>> Compiled by ubuntu on 2022-05-25T00:12Z >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> Tasks: >>>>>>>>>>>>>>>>>>>> >>>> =========================================================== >>>>>>>>>>>>>>>>>>>> Task: Master startup >>>>>>>>>>>>>>>>>>>> Status: RUNNING:Starting assignment manager >>>>>>>>>>>>>>>>>>>> Running for 954s >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> Task: Flushing >>>> master:store,,1.1595e783b53d99cd5eef43b6debb2682. >>>>>>>>>>>>>>>>>>>> Status: COMPLETE:Flush successful flush >>>> result:CANNOT_FLUSH_MEMSTORE_EMPTY, >>>>>>>>>>>>>>>>>>>> failureReason:Nothing to flush,flush seq id14 >>>>>>>>>>>>>>>>>>>> Completed 49s ago >>>>>>>>>>>>>>>>>>>> Ran for 0s >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> Task: >>>> RpcServer.priority.RWQ.Fifo.write.handler=0,queue=0,port=16000 >>>>>>>>>>>>>>>>>>>> Status: WAITING:Waiting for a call >>>>>>>>>>>>>>>>>>>> Running for 951s >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> Task: >>>> RpcServer.priority.RWQ.Fifo.write.handler=1,queue=0,port=16000 >>>>>>>>>>>>>>>>>>>> Status: WAITING:Waiting for a call >>>>>>>>>>>>>>>>>>>> Running for 951s >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> Servers: >>>>>>>>>>>>>>>>>>>> >>>> =========================================================== >>>>>>>>>>>>>>>>>>>> servername1ct.gmd9.intern,16020,1713514863737: >>>> requestsPerSecond=0.0, >>>>>>>>>>>>>>>>>>>> numberOfOnlineRegions=0, usedHeapMB=37.0MB, >>>> maxHeapMB=2966.0MB, >>>>>>>>>>>>>>>>>>>> numberOfStores=0, numberOfStorefiles=0, storeRefCount=0, >>>>>>>>>>>>>>>>>>>> maxCompactedStoreFileRefCount=0, >>>> storefileUncompressedSizeMB=0, >>>>>>>>>>>>>>>>>>>> storefileSizeMB=0, memstoreSizeMB=0, readRequestsCount=0, >>>>>>>>>>>>>>>>>>>> filteredReadRequestsCount=0, writeRequestsCount=0, >>>> rootIndexSizeKB=0, >>>>>>>>>>>>>>>>>>>> totalStaticIndexSizeKB=0, totalStaticBloomSizeKB=0, >>>> totalCompactingKVs=0, >>>>>>>>>>>>>>>>>>>> currentCompactedKVs=0, compactionProgressPct=NaN, >>>> coprocessors=[] >>>>>>>>>>>>>>>>>>>> servername2ct.gmd9.intern,16020,1713514925960: >>>> requestsPerSecond=0.0, >>>>>>>>>>>>>>>>>>>> numberOfOnlineRegions=0, usedHeapMB=20.0MB, >>>> maxHeapMB=2966.0MB, >>>>>>>>>>>>>>>>>>>> numberOfStores=0, numberOfStorefiles=0, storeRefCount=0, >>>>>>>>>>>>>>>>>>>> maxCompactedStoreFileRefCount=0, >>>> storefileUncompressedSizeMB=0, >>>>>>>>>>>>>>>>>>>> storefileSizeMB=0, memstoreSizeMB=0, readRequestsCount=0, >>>>>>>>>>>>>>>>>>>> filteredReadRequestsCount=0, writeRequestsCount=0, >>>> rootIndexSizeKB=0, >>>>>>>>>>>>>>>>>>>> totalStaticIndexSizeKB=0, totalStaticBloomSizeKB=0, >>>> totalCompactingKVs=0, >>>>>>>>>>>>>>>>>>>> currentCompactedKVs=0, compactionProgressPct=NaN, >>>> coprocessors=[] >>>>>>>>>>>>>>>>>>>> servername3ct.gmd9.intern,16020,1713514937151: >>>> requestsPerSecond=0.0, >>>>>>>>>>>>>>>>>>>> numberOfOnlineRegions=0, usedHeapMB=67.0MB, >>>> maxHeapMB=2966.0MB, >>>>>>>>>>>>>>>>>>>> numberOfStores=0, numberOfStorefiles=0, storeRefCount=0, >>>>>>>>>>>>>>>>>>>> maxCompactedStoreFileRefCount=0, >>>> storefileUncompressedSizeMB=0, >>>>>>>>>>>>>>>>>>>> storefileSizeMB=0, memstoreSizeMB=0, readRequestsCount=0, >>>>>>>>>>>>>>>>>>>> filteredReadRequestsCount=0, writeRequestsCount=0, >>>> rootIndexSizeKB=0, >>>>>>>>>>>>>>>>>>>> totalStaticIndexSizeKB=0, totalStaticBloomSizeKB=0, >>>> totalCompactingKVs=0, >>>>>>>>>>>>>>>>>>>> currentCompactedKVs=0, compactionProgressPct=NaN, >>>> coprocessors=[] >>>>>>>>>>>>>>>>>>>> servername4ct.gmd9.intern,16020,1713514968019: >>>> requestsPerSecond=0.0, >>>>>>>>>>>>>>>>>>>> numberOfOnlineRegions=0, usedHeapMB=24.0MB, >>>> maxHeapMB=2966.0MB, >>>>>>>>>>>>>>>>>>>> numberOfStores=0, numberOfStorefiles=0, storeRefCount=0, >>>>>>>>>>>>>>>>>>>> maxCompactedStoreFileRefCount=0, >>>> storefileUncompressedSizeMB=0, >>>>>>>>>>>>>>>>>>>> storefileSizeMB=0, memstoreSizeMB=0, readRequestsCount=0, >>>>>>>>>>>>>>>>>>>> filteredReadRequestsCount=0, writeRequestsCount=0, >>>> rootIndexSizeKB=0, >>>>>>>>>>>>>>>>>>>> totalStaticIndexSizeKB=0, totalStaticBloomSizeKB=0, >>>> totalCompactingKVs=0, >>>>>>>>>>>>>>>>>>>> currentCompactedKVs=0, compactionProgressPct=NaN, >>>> coprocessors=[] >>>>>>>>>>>>>>>>>>>> servername5ct.gmd9.intern,16020,1713514979294: >>>> requestsPerSecond=0.0, >>>>>>>>>>>>>>>>>>>> numberOfOnlineRegions=0, usedHeapMB=58.0MB, >>>> maxHeapMB=2966.0MB, >>>>>>>>>>>>>>>>>>>> numberOfStores=0, numberOfStorefiles=0, storeRefCount=0, >>>>>>>>>>>>>>>>>>>> maxCompactedStoreFileRefCount=0, >>>> storefileUncompressedSizeMB=0, >>>>>>>>>>>>>>>>>>>> storefileSizeMB=0, memstoreSizeMB=0, readRequestsCount=0, >>>>>>>>>>>>>>>>>>>> filteredReadRequestsCount=0, writeRequestsCount=0, >>>> rootIndexSizeKB=0, >>>>>>>>>>>>>>>>>>>> totalStaticIndexSizeKB=0, totalStaticBloomSizeKB=0, >>>> totalCompactingKVs=0, >>>>>>>>>>>>>>>>>>>> currentCompactedKVs=0, compactionProgressPct=NaN, >>>> coprocessors=[] >>>>>>>>>>>>>>>>>>>> servername6ct.gmd9.intern,16020,1713514994770: >>>> requestsPerSecond=0.0, >>>>>>>>>>>>>>>>>>>> numberOfOnlineRegions=0, usedHeapMB=31.0MB, >>>> maxHeapMB=2966.0MB, >>>>>>>>>>>>>>>>>>>> numberOfStores=0, numberOfStorefiles=0, storeRefCount=0, >>>>>>>>>>>>>>>>>>>> maxCompactedStoreFileRefCount=0, >>>> storefileUncompressedSizeMB=0, >>>>>>>>>>>>>>>>>>>> storefileSizeMB=0, memstoreSizeMB=0, readRequestsCount=0, >>>>>>>>>>>>>>>>>>>> filteredReadRequestsCount=0, writeRequestsCount=0, >>>> rootIndexSizeKB=0, >>>>>>>>>>>>>>>>>>>> totalStaticIndexSizeKB=0, totalStaticBloomSizeKB=0, >>>> totalCompactingKVs=0, >>>>>>>>>>>>>>>>>>>> currentCompactedKVs=0, compactionProgressPct=NaN, >>>> coprocessors=[] >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> Regions-in-transition: >>>>>>>>>>>>>>>>>>>> >>>> =========================================================== >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> Executors: >>>>>>>>>>>>>>>>>>>> >>>> =========================================================== >>>>>>>>>>>>>>>>>>>> Status for executor: >>>>>>>>>>>>>>>>>>>> >>>> Executor-4-MASTER_META_SERVER_OPERATIONS-master/masterserver:16000 >>>>>>>>>>>>>>>>>>>> ======================================= >>>>>>>>>>>>>>>>>>>> 0 events queued, 0 running >>>>>>>>>>>>>>>>>>>> Status for executor: >>>>>>>>>>>>>>>>>>>> >>>> Executor-6-MASTER_SNAPSHOT_OPERATIONS-master/masterserver:16000 >>>>>>>>>>>>>>>>>>>> ======================================= >>>>>>>>>>>>>>>>>>>> 0 events queued, 0 running >>>>>>>>>>>>>>>>>>>> Status for executor: >>>>>>>>>>>>>>>>>>>> >>>> Executor-3-MASTER_SERVER_OPERATIONS-master/masterserver:16000 >>>>>>>>>>>>>>>>>>>> ======================================= >>>>>>>>>>>>>>>>>>>> 0 events queued, 0 running >>>>>>>>>>>>>>>>>>>> Status for executor: >>>> Executor-5-M_LOG_REPLAY_OPS-master/masterserver:16000 >>>>>>>>>>>>>>>>>>>> ======================================= >>>>>>>>>>>>>>>>>>>> 0 events queued, 0 running >>>>>>>>>>>>>>>>>>>> Status for executor: >>>>>>>>>>>>>>>>>>>> Executor-2-MASTER_CLOSE_REGION-master/masterserver:16000 >>>>>>>>>>>>>>>>>>>> ======================================= >>>>>>>>>>>>>>>>>>>> 0 events queued, 0 running >>>>>>>>>>>>>>>>>>>> Status for executor: >>>>>>>>>>>>>>>>>>>> >>>> Executor-7-MASTER_MERGE_OPERATIONS-master/masterserver:16000 >>>>>>>>>>>>>>>>>>>> ======================================= >>>>>>>>>>>>>>>>>>>> 0 events queued, 0 running >>>>>>>>>>>>>>>>>>>> Status for executor: >>>>>>>>>>>>>>>>>>>> >>>> Executor-8-MASTER_TABLE_OPERATIONS-master/masterserver:16000 >>>>>>>>>>>>>>>>>>>> ======================================= >>>>>>>>>>>>>>>>>>>> 0 events queued, 0 running >>>>>>>>>>>>>>>>>>>> Status for executor: >>>>>>>>>>>>>>>>>>>> Executor-1-MASTER_OPEN_REGION-master/masterserver:16000 >>>>>>>>>>>>>>>>>>>> ======================================= >>>>>>>>>>>>>>>>>>>> 0 events queued, 0 running >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> Stacks: >>>>>>>>>>>>>>>>>>>> >>>> =========================================================== >>>>>>>>>>>>>>>>>>>> Process Thread Dump: >>>>>>>>>>>>>>>>>>>> 131 active threads >>>>>>>>>>>>>>>>>>>> Thread 186 (WAL-Archive-0): >>>>>>>>>>>>>>>>>>>> State: WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 5 >>>>>>>>>>>>>>>>>>>> Waited count: 11 >>>>>>>>>>>>>>>>>>>> Waiting on >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject@42f44d41 >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1074) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >>>>>>>>>>>>>>>>>>>> java.lang.Thread.run(Thread.java:750) >>>>>>>>>>>>>>>>>>>> Thread 185 (Close-WAL-Writer-0): >>>>>>>>>>>>>>>>>>>> State: TIMED_WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 2 >>>>>>>>>>>>>>>>>>>> Waited count: 6 >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.SynchronousQueue$TransferStack.awaitFulfill(SynchronousQueue.java:460) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.SynchronousQueue$TransferStack.transfer(SynchronousQueue.java:362) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.SynchronousQueue.poll(SynchronousQueue.java:941) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1073) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >>>>>>>>>>>>>>>>>>>> java.lang.Thread.run(Thread.java:750) >>>>>>>>>>>>>>>>>>>> Thread 152 (Session-Scheduler-3bc4ef12-1): >>>>>>>>>>>>>>>>>>>> State: TIMED_WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 0 >>>>>>>>>>>>>>>>>>>> Waited count: 1 >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1093) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1074) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >>>>>>>>>>>>>>>>>>>> java.lang.Thread.run(Thread.java:750) >>>>>>>>>>>>>>>>>>>> Thread 151 >>>>>>>>>>>>>>>>>>>> >>>> (master/masterserver:16000:becomeActiveMaster-HFileCleaner.small.0-1713515973400): >>>>>>>>>>>>>>>>>>>> State: WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 0 >>>>>>>>>>>>>>>>>>>> Waited count: 1 >>>>>>>>>>>>>>>>>>>> Waiting on >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject@58626ec5 >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.PriorityBlockingQueue.take(PriorityBlockingQueue.java:549) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.cleaner.HFileCleaner.consumerLoop(HFileCleaner.java:285) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.cleaner.HFileCleaner$2.run(HFileCleaner.java:269) >>>>>>>>>>>>>>>>>>>> Thread 150 >>>>>>>>>>>>>>>>>>>> >>>> (master/masterserver:16000:becomeActiveMaster-HFileCleaner.large.0-1713515973400): >>>>>>>>>>>>>>>>>>>> State: WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 0 >>>>>>>>>>>>>>>>>>>> Waited count: 1 >>>>>>>>>>>>>>>>>>>> Waiting on >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject@18916420 >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039) >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.util.StealJobQueue.take(StealJobQueue.java:101) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.cleaner.HFileCleaner.consumerLoop(HFileCleaner.java:285) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.cleaner.HFileCleaner$1.run(HFileCleaner.java:254) >>>>>>>>>>>>>>>>>>>> Thread 149 (snapshot-hfile-cleaner-cache-refresher): >>>>>>>>>>>>>>>>>>>> State: TIMED_WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 4 >>>>>>>>>>>>>>>>>>>> Waited count: 11 >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> java.lang.Object.wait(Native Method) >>>>>>>>>>>>>>>>>>>> java.util.TimerThread.mainLoop(Timer.java:552) >>>>>>>>>>>>>>>>>>>> java.util.TimerThread.run(Timer.java:505) >>>>>>>>>>>>>>>>>>>> Thread 148 (master/masterserver:16000.Chore.1): >>>>>>>>>>>>>>>>>>>> State: TIMED_WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 2 >>>>>>>>>>>>>>>>>>>> Waited count: 10 >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:1093) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ScheduledThreadPoolExecutor$DelayedWorkQueue.take(ScheduledThreadPoolExecutor.java:809) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:1074) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1134) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >>>>>>>>>>>>>>>>>>>> java.lang.Thread.run(Thread.java:750) >>>>>>>>>>>>>>>>>>>> Thread 147 (OldWALsCleaner-1): >>>>>>>>>>>>>>>>>>>> State: WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 0 >>>>>>>>>>>>>>>>>>>> Waited count: 1 >>>>>>>>>>>>>>>>>>>> Waiting on >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject@7a6a3b7e >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.cleaner.LogCleaner.deleteFile(LogCleaner.java:172) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.cleaner.LogCleaner.lambda$createOldWalsCleaner$1(LogCleaner.java:152) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.cleaner.LogCleaner$$Lambda$494/556458560.run(Unknown >>>>>>>>>>>>>>>>>>>> Source) >>>>>>>>>>>>>>>>>>>> java.lang.Thread.run(Thread.java:750) >>>>>>>>>>>>>>>>>>>> Thread 146 (OldWALsCleaner-0): >>>>>>>>>>>>>>>>>>>> State: WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 0 >>>>>>>>>>>>>>>>>>>> Waited count: 1 >>>>>>>>>>>>>>>>>>>> Waiting on >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject@7a6a3b7e >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.park(LockSupport.java:175) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2039) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:442) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.cleaner.LogCleaner.deleteFile(LogCleaner.java:172) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.cleaner.LogCleaner.lambda$createOldWalsCleaner$1(LogCleaner.java:152) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.master.cleaner.LogCleaner$$Lambda$494/556458560.run(Unknown >>>>>>>>>>>>>>>>>>>> Source) >>>>>>>>>>>>>>>>>>>> java.lang.Thread.run(Thread.java:750) >>>>>>>>>>>>>>>>>>>> Thread 139 (PEWorker-16): >>>>>>>>>>>>>>>>>>>> State: TIMED_WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 0 >>>>>>>>>>>>>>>>>>>> Waited count: 16 >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.procedure2.AbstractProcedureScheduler.poll(AbstractProcedureScheduler.java:165) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.procedure2.AbstractProcedureScheduler.poll(AbstractProcedureScheduler.java:147) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:2113) >>>>>>>>>>>>>>>>>>>> Thread 138 (PEWorker-15): >>>>>>>>>>>>>>>>>>>> State: TIMED_WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 0 >>>>>>>>>>>>>>>>>>>> Waited count: 16 >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.procedure2.AbstractProcedureScheduler.poll(AbstractProcedureScheduler.java:165) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.procedure2.AbstractProcedureScheduler.poll(AbstractProcedureScheduler.java:147) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:2113) >>>>>>>>>>>>>>>>>>>> Thread 137 (PEWorker-14): >>>>>>>>>>>>>>>>>>>> State: TIMED_WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 0 >>>>>>>>>>>>>>>>>>>> Waited count: 16 >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.awaitNanos(AbstractQueuedSynchronizer.java:2078) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.procedure2.AbstractProcedureScheduler.poll(AbstractProcedureScheduler.java:165) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.procedure2.AbstractProcedureScheduler.poll(AbstractProcedureScheduler.java:147) >>>>>>>>>>>>>>>>>>>> >>>>>>>>>>>>>>>>>>>> >>>> org.apache.hadoop.hbase.procedure2.ProcedureExecutor$WorkerThread.run(ProcedureExecutor.java:2113) >>>>>>>>>>>>>>>>>>>> Thread 136 (PEWorker-13): >>>>>>>>>>>>>>>>>>>> State: TIMED_WAITING >>>>>>>>>>>>>>>>>>>> Blocked count: 0 >>>>>>>>>>>>>>>>>>>> Waited count: 16 >>>>>>>>>>>>>>>>>>>> Stack: >>>>>>>>>>>>>>>>>>>> sun.misc.Unsafe.park(Native Method) >>>>>>>>>>>>>>>>>>>> >>>> java.util.concurrent.locks.LockSupport.parkNanos(LockSupport.java:215) >>>>>>>>>> >>> >>> >