[ 
https://issues.apache.org/jira/browse/HBASE-16393?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15416295#comment-15416295
 ] 

binlijin commented on HBASE-16393:
----------------------------------

Master balancer's jstack 
{code}
hbase(main):002:0> balancer

ERROR: Call id=3, waitTime=180001, operationTimeout=180000 expired.
{code}

{code}
"B.defaultRpcServer.handler=31,queue=5,port=60100" daemon prio=10 
tid=0x00007f3e2aec1800 nid=0x369b2 in Object.wait() [0x00007f3e1affd000]
   java.lang.Thread.State: WAITING (on object monitor)
        at java.lang.Object.wait(Native Method)
        at java.lang.Object.wait(Object.java:503)
        at org.apache.hadoop.ipc.Client.call(Client.java:1484)
        - locked <0x0000000603eb5738> (a org.apache.hadoop.ipc.Client$Call)
        at org.apache.hadoop.ipc.Client.call(Client.java:1429)
        at 
org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:232)
        at com.sun.proxy.$Proxy15.getBlockLocations(Unknown Source)
        at 
org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.getBlockLocations(ClientNamenodeProtocolTranslatorPB.java:254)
        at sun.reflect.GeneratedMethodAccessor75.invoke(Unknown Source)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at 
org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:187)
        at 
org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
        at com.sun.proxy.$Proxy16.getBlockLocations(Unknown Source)
        at sun.reflect.GeneratedMethodAccessor75.invoke(Unknown Source)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.hbase.fs.HFileSystem$1.invoke(HFileSystem.java:330)
        at com.sun.proxy.$Proxy17.getBlockLocations(Unknown Source)
        at sun.reflect.GeneratedMethodAccessor75.invoke(Unknown Source)
        at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.lang.reflect.Method.invoke(Method.java:606)
        at org.apache.hadoop.hbase.fs.HFileSystem$1.invoke(HFileSystem.java:330)
        at com.sun.proxy.$Proxy17.getBlockLocations(Unknown Source)
        at 
org.apache.hadoop.hdfs.DFSClient.callGetBlockLocations(DFSClient.java:1205)
        at 
org.apache.hadoop.hdfs.DFSClient.getLocatedBlocks(DFSClient.java:1195)
        at 
org.apache.hadoop.hdfs.DFSClient.getBlockLocations(DFSClient.java:1245)
        at 
org.apache.hadoop.hdfs.DistributedFileSystem$1.doCall(DistributedFileSystem.java:220)
        at 
org.apache.hadoop.hdfs.DistributedFileSystem$1.doCall(DistributedFileSystem.java:216)
        at 
org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
        at 
org.apache.hadoop.hdfs.DistributedFileSystem.getFileBlockLocations(DistributedFileSystem.java:216)
        at 
org.apache.hadoop.hdfs.DistributedFileSystem.getFileBlockLocations(DistributedFileSystem.java:208)
        at 
org.apache.hadoop.hbase.util.FSUtils.computeHDFSBlocksDistribution(FSUtils.java:1042)
        at 
org.apache.hadoop.hbase.regionserver.StoreFileInfo.computeHDFSBlocksDistributionInternal(StoreFileInfo.java:294)
        at 
org.apache.hadoop.hbase.regionserver.StoreFileInfo.computeHDFSBlocksDistribution(StoreFileInfo.java:284)
        at 
org.apache.hadoop.hbase.regionserver.HRegion.computeHDFSBlocksDistribution(HRegion.java:1083)
        at 
org.apache.hadoop.hbase.regionserver.HRegion.computeHDFSBlocksDistribution(HRegion.java:1058)
        at 
org.apache.hadoop.hbase.master.balancer.RegionLocationFinder.internalGetTopBlockLocation(RegionLocationFinder.java:127)
        at 
org.apache.hadoop.hbase.master.balancer.RegionLocationFinder$1.load(RegionLocationFinder.java:65)
        at 
org.apache.hadoop.hbase.master.balancer.RegionLocationFinder$1.load(RegionLocationFinder.java:61)
        at 
com.google.common.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3584)
        at 
com.google.common.cache.LocalCache$Segment.loadSync(LocalCache.java:2372)
        at 
com.google.common.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2335)
        - locked <0x0000000603eabc40> (a 
com.google.common.cache.LocalCache$StrongAccessEntry)
        at com.google.common.cache.LocalCache$Segment.get(LocalCache.java:2250)
        at com.google.common.cache.LocalCache.get(LocalCache.java:3985)
        at com.google.common.cache.LocalCache.getOrLoad(LocalCache.java:3989)
        at 
com.google.common.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4873)
        at 
org.apache.hadoop.hbase.master.balancer.RegionLocationFinder.getTopBlockLocations(RegionLocationFinder.java:105)
        at 
org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer$Cluster.registerRegion(BaseLoadBalancer.java:433)
        at 
org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer$Cluster.<init>(BaseLoadBalancer.java:274)
        at 
org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer$Cluster.<init>(BaseLoadBalancer.java:148)
        at 
org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer.balanceCluster(SimpleLoadBalancer.java:201)
        at org.apache.hadoop.hbase.master.HMaster.balance(HMaster.java:1322)
        - locked <0x000000061bafbb00> (a 
org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer)
        at 
org.apache.hadoop.hbase.master.MasterRpcServices.balance(MasterRpcServices.java:395)
        at 
org.apache.hadoop.hbase.protobuf.generated.MasterProtos$MasterService$2.callBlockingMethod(MasterProtos.java:48508)
        at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2188)
        at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:102)
        at 
org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:133)
        at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:108)
        at java.lang.Thread.run(Thread.java:756)
{code}

> Improve computeHDFSBlocksDistribution
> -------------------------------------
>
>                 Key: HBASE-16393
>                 URL: https://issues.apache.org/jira/browse/HBASE-16393
>             Project: HBase
>          Issue Type: Improvement
>            Reporter: binlijin
>
> With our cluster is big, i can see the balancer is slow from time to time. 
> And the balancer will be called on master startup, so we can see the startup 
> is slow also. 
> The first thing i think whether if we can parallel compute different region's 
> HDFSBlocksDistribution. 
> The second i think we can improve compute single region's 
> HDFSBlocksDistribution.
> When to compute a storefile's HDFSBlocksDistribution first we call 
> FileSystem#getFileStatus(path) and then 
> FileSystem#getFileBlockLocations(status, start, length), so two namenode rpc 
> call for every storefile. Instead we can use FileSystem#listLocatedStatus to 
> get a LocatedFileStatus for the information we need, so reduce the namenode 
> rpc call to one.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Reply via email to