[ https://issues.apache.org/jira/browse/HBASE-16993?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16083457#comment-16083457 ]
Anoop Sam John commented on HBASE-16993: ---------------------------------------- bq. if a data block is 45kb and bucket value(s) is [...,32k,48k,....] then the datablcok will be in bucket 48k and the offset is multiples of 48k,one block will waster 3kb Yes that would happen any way even if this patch is committed no? In such cases, the user is expected to change the config and change the 48 to 45 or 46. This patch is changing the 4 +1 bytes way of offset declare to use a long. That will help when the block size to be NOT multiple of 256. That much lesser granular is ok IMO. What we waste max is few bytes (<255). That is my point saying here. This change of offset type to be long is not needed. We can add proper documentation for the user and add check in code to fail fast > BucketCache throw java.io.IOException: Invalid HFile block magic when > DATA_BLOCK_ENCODING set to DIFF > ----------------------------------------------------------------------------------------------------- > > Key: HBASE-16993 > URL: https://issues.apache.org/jira/browse/HBASE-16993 > Project: HBase > Issue Type: Bug > Components: BucketCache, io > Affects Versions: 1.1.3 > Environment: hbase version 1.1.3 > Reporter: liubangchen > Assignee: liubangchen > Fix For: 2.0.0 > > Attachments: HBASE-16993.000.patch, HBASE-16993.001.patch, > HBASE-16993.master.001.patch, HBASE-16993.master.002.patch, > HBASE-16993.master.003.patch, HBASE-16993.master.004.patch, > HBASE-16993.master.005.patch > > Original Estimate: 336h > Remaining Estimate: 336h > > hbase-site.xml setting > <property> > <name>hbase.bucketcache.bucket.sizes</name> > <value>16384,32768,40960, > 46000,49152,51200,65536,131072,524288</value> > </property> > <property> > <name>hbase.bucketcache.size</name> > <value>16384</value> > </property> > <property> > <name>hbase.bucketcache.ioengine</name> > <value>offheap</value> > </property> > <property> > <name>hfile.block.cache.size</name> > <value>0.3</value> > </property> > <property> > <name>hfile.block.bloom.cacheonwrite</name> > <value>true</value> > </property> > <property> > <name>hbase.rs.cacheblocksonwrite</name> > <value>true</value> > </property> > <property> > <name>hfile.block.index.cacheonwrite</name> > <value>true</value> > </property > n_splits = 200 > create 'usertable',{NAME =>'family', COMPRESSION => 'snappy', VERSIONS => > 1,DATA_BLOCK_ENCODING => 'DIFF',CONFIGURATION => > {'hbase.hregion.memstore.block.multiplier' => 5}},{DURABILITY => > 'SKIP_WAL'},{SPLITS => (1..n_splits).map {|i| > "user#{1000+i*(9999-1000)/n_splits}"}} > load data > bin/ycsb load hbase10 -P workloads/workloada -p table=usertable -p > columnfamily=family -p fieldcount=10 -p fieldlength=100 -p > recordcount=200000000 -p insertorder=hashed -p insertstart=0 -p > clientbuffering=true -p durability=SKIP_WAL -threads 20 -s > run > bin/ycsb run hbase10 -P workloads/workloadb -p table=usertable -p > columnfamily=family -p fieldcount=10 -p fieldlength=100 -p > operationcount=20000000 -p readallfields=true -p clientbuffering=true -p > requestdistribution=zipfian -threads 10 -s > log info > 2016-11-02 20:20:20,261 ERROR > [RW.default.readRpcServer.handler=36,queue=21,port=6020] bucket.BucketCache: > Failed reading block fdcc7ed6f3b2498b9ef316cc8206c233_44819759 from bucket > cache > java.io.IOException: Invalid HFile block magic: > \x00\x00\x00\x00\x00\x00\x00\x00 > at > org.apache.hadoop.hbase.io.hfile.BlockType.parse(BlockType.java:154) > at org.apache.hadoop.hbase.io.hfile.BlockType.read(BlockType.java:167) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock.<init>(HFileBlock.java:273) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$1.deserialize(HFileBlock.java:134) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$1.deserialize(HFileBlock.java:121) > at > org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.getBlock(BucketCache.java:427) > at > org.apache.hadoop.hbase.io.hfile.CombinedBlockCache.getBlock(CombinedBlockCache.java:85) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderV2.getCachedBlock(HFileReaderV2.java:266) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderV2.readBlock(HFileReaderV2.java:403) > at > org.apache.hadoop.hbase.io.hfile.HFileBlockIndex$BlockIndexReader.loadDataBlockWithScanInfo(HFileBlockIndex.java:269) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderV2$AbstractScannerV2.seekTo(HFileReaderV2.java:634) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderV2$AbstractScannerV2.seekTo(HFileReaderV2.java:584) > at > org.apache.hadoop.hbase.regionserver.StoreFileScanner.seekAtOrAfter(StoreFileScanner.java:247) > at > org.apache.hadoop.hbase.regionserver.StoreFileScanner.seek(StoreFileScanner.java:156) > at > org.apache.hadoop.hbase.regionserver.StoreScanner.seekScanners(StoreScanner.java:363) > at > org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:217) > at > org.apache.hadoop.hbase.regionserver.HStore.getScanner(HStore.java:2071) > at > org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl.<init>(HRegion.java:5369) > at > org.apache.hadoop.hbase.regionserver.HRegion.instantiateRegionScanner(HRegion.java:2546) > at > org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:2532) > at > org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:2514) > at org.apache.hadoop.hbase.regionserver.HRegion.get(HRegion.java:6558) > at org.apache.hadoop.hbase.regionserver.HRegion.get(HRegion.java:6537) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.get(RSRpcServices.java:1935) > at > org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:32381) > at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2117) > at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:104) > at > org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:133) > at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:108) > at java.lang.Thread.run(Thread.java:745) > 2016-11-02 20:20:20,263 ERROR > [RW.default.readRpcServer.handler=50,queue=20,port=6020] bucket.BucketCache: > Failed reading block c45d6b14789546b785bae94c69c683d5_34198622 from bucket > cache > java.io.IOException: Invalid HFile block magic: > \x00\x00\x00\x00\x00\x00\x00\x00 > at > org.apache.hadoop.hbase.io.hfile.BlockType.parse(BlockType.java:154) > at org.apache.hadoop.hbase.io.hfile.BlockType.read(BlockType.java:167) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock.<init>(HFileBlock.java:273) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$1.deserialize(HFileBlock.java:134) > at > org.apache.hadoop.hbase.io.hfile.HFileBlock$1.deserialize(HFileBlock.java:121) > at > org.apache.hadoop.hbase.io.hfile.bucket.BucketCache.getBlock(BucketCache.java:427) > at > org.apache.hadoop.hbase.io.hfile.CombinedBlockCache.getBlock(CombinedBlockCache.java:85) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderV2.getCachedBlock(HFileReaderV2.java:266) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderV2.readBlock(HFileReaderV2.java:403) > at > org.apache.hadoop.hbase.io.hfile.HFileBlockIndex$BlockIndexReader.loadDataBlockWithScanInfo(HFileBlockIndex.java:269) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderV2$AbstractScannerV2.seekTo(HFileReaderV2.java:634) > at > org.apache.hadoop.hbase.io.hfile.HFileReaderV2$AbstractScannerV2.seekTo(HFileReaderV2.java:584) > at > org.apache.hadoop.hbase.regionserver.StoreFileScanner.seekAtOrAfter(StoreFileScanner.java:247) > at > org.apache.hadoop.hbase.regionserver.StoreFileScanner.seek(StoreFileScanner.java:156) > at > org.apache.hadoop.hbase.regionserver.StoreScanner.seekScanners(StoreScanner.java:363) > at > org.apache.hadoop.hbase.regionserver.StoreScanner.<init>(StoreScanner.java:217) > at > org.apache.hadoop.hbase.regionserver.HStore.getScanner(HStore.java:2071) > at > org.apache.hadoop.hbase.regionserver.HRegion$RegionScannerImpl.<init>(HRegion.java:5369) > at > org.apache.hadoop.hbase.regionserver.HRegion.instantiateRegionScanner(HRegion.java:2546) > at > org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:2532) > at > org.apache.hadoop.hbase.regionserver.HRegion.getScanner(HRegion.java:2514) > at org.apache.hadoop.hbase.regionserver.HRegion.get(HRegion.java:6558) > at org.apache.hadoop.hbase.regionserver.HRegion.get(HRegion.java:6537) > at > org.apache.hadoop.hbase.regionserver.RSRpcServices.get(RSRpcServices.java:1935) > at > org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:32381) > at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2117) > at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:104) > at > org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:133) > at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:108) > at java.lang.Thread.run(Thread.java:745) -- This message was sent by Atlassian JIRA (v6.4.14#64029)