This is an automated email from the ASF dual-hosted git repository.
wchevreuil pushed a commit to branch branch-2
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-2 by this push:
new 80c33e0d46f HBASE-27232 Fix checking for encoded block size when
deciding if bloc… (#4640)
80c33e0d46f is described below
commit 80c33e0d46feffb5390f792d848c5d9c38eb7a71
Author: Wellington Ramos Chevreuil <[email protected]>
AuthorDate: Mon Jul 25 11:17:16 2022 +0100
HBASE-27232 Fix checking for encoded block size when deciding if bloc…
(#4640)
Signed-off-by: Andor Molnár <[email protected]>
Signed-off-by: Bryan Beaudreault <[email protected]>
Signed-off-by: Ankit Singhal <[email protected]>
Signed-off-by: Duo Zhang <[email protected]>
(cherry picked from commit d5ed8f59e582272c2e3cd9b6b33488fa0f265cbf)
---
.../hadoop/hbase/io/hfile/HFileWriterImpl.java | 19 +++++---
.../hadoop/hbase/regionserver/TestHStoreFile.java | 52 ++++++++++++++++++++++
2 files changed, 64 insertions(+), 7 deletions(-)
diff --git
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
index a638a443fc4..80e333050c6 100644
---
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
+++
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
@@ -172,8 +172,9 @@ public class HFileWriterImpl implements HFile.Writer {
}
closeOutputStream = path != null;
this.cacheConf = cacheConf;
- float encodeBlockSizeRatio =
conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 1f);
+ float encodeBlockSizeRatio =
conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 0f);
this.encodedBlockSizeLimit = (int) (hFileContext.getBlocksize() *
encodeBlockSizeRatio);
+
finishInit(conf);
if (LOG.isTraceEnabled()) {
LOG.trace("Writer" + (path != null ? " for " + path : "") + "
initialized with cacheConf: "
@@ -309,12 +310,16 @@ public class HFileWriterImpl implements HFile.Writer {
* At a block boundary, write all the inline blocks and opens new block.
*/
protected void checkBlockBoundary() throws IOException {
- // For encoder like prefixTree, encoded size is not available, so we have
to compare both
- // encoded size and unencoded size to blocksize limit.
- if (
- blockWriter.encodedBlockSizeWritten() >= encodedBlockSizeLimit
- || blockWriter.blockSizeWritten() >= hFileContext.getBlocksize()
- ) {
+ boolean shouldFinishBlock = false;
+ // This means hbase.writer.unified.encoded.blocksize.ratio was set to
something different from 0
+ // and we should use the encoding ratio
+ if (encodedBlockSizeLimit > 0) {
+ shouldFinishBlock = blockWriter.encodedBlockSizeWritten() >=
encodedBlockSizeLimit;
+ } else {
+ shouldFinishBlock = blockWriter.encodedBlockSizeWritten() >=
hFileContext.getBlocksize()
+ || blockWriter.blockSizeWritten() >= hFileContext.getBlocksize();
+ }
+ if (shouldFinishBlock) {
finishBlock();
writeInlineBlocks(false);
newBlock();
diff --git
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
index be8e0403d5e..2addbd59ad1 100644
---
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
+++
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestHStoreFile.java
@@ -67,6 +67,9 @@ import org.apache.hadoop.hbase.io.hfile.BlockCache;
import org.apache.hadoop.hbase.io.hfile.BlockCacheFactory;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.CacheStats;
+import org.apache.hadoop.hbase.io.hfile.FixedFileTrailer;
+import org.apache.hadoop.hbase.io.hfile.HFile;
+import org.apache.hadoop.hbase.io.hfile.HFileBlock;
import org.apache.hadoop.hbase.io.hfile.HFileContext;
import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
import org.apache.hadoop.hbase.io.hfile.HFileDataBlockEncoder;
@@ -1137,4 +1140,53 @@ public class TestHStoreFile {
byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
assertArrayEquals(dataBlockEncoderAlgo.getNameInBytes(), value);
}
+
+ @Test
+ public void testDataBlockSizeEncoded() throws Exception {
+ // Make up a directory hierarchy that has a regiondir ("7e0102") and
familyname.
+ Path dir = new Path(new Path(this.testDir, "7e0102"), "familyname");
+ Path path = new Path(dir, "1234567890");
+
+ DataBlockEncoding dataBlockEncoderAlgo = DataBlockEncoding.FAST_DIFF;
+
+ conf.setDouble("hbase.writer.unified.encoded.blocksize.ratio", 1);
+
+ cacheConf = new CacheConfig(conf);
+ HFileContext meta =
+ new
HFileContextBuilder().withBlockSize(BLOCKSIZE_SMALL).withChecksumType(CKTYPE)
+
.withBytesPerCheckSum(CKBYTES).withDataBlockEncoding(dataBlockEncoderAlgo).build();
+ // Make a store file and write data to it.
+ StoreFileWriter writer = new StoreFileWriter.Builder(conf, cacheConf,
this.fs)
+ .withFilePath(path).withMaxKeyCount(2000).withFileContext(meta).build();
+ writeStoreFile(writer);
+
+ HStoreFile storeFile =
+ new HStoreFile(fs, writer.getPath(), conf, cacheConf, BloomType.NONE,
true);
+ storeFile.initReader();
+ StoreFileReader reader = storeFile.getReader();
+
+ Map<byte[], byte[]> fileInfo = reader.loadFileInfo();
+ byte[] value = fileInfo.get(HFileDataBlockEncoder.DATA_BLOCK_ENCODING);
+ assertEquals(dataBlockEncoderAlgo.name(), Bytes.toString(value));
+
+ HFile.Reader fReader =
+ HFile.createReader(fs, writer.getPath(), storeFile.getCacheConf(), true,
conf);
+
+ FSDataInputStreamWrapper fsdis = new FSDataInputStreamWrapper(fs,
writer.getPath());
+ long fileSize = fs.getFileStatus(writer.getPath()).getLen();
+ FixedFileTrailer trailer =
FixedFileTrailer.readFromStream(fsdis.getStream(false), fileSize);
+ long offset = trailer.getFirstDataBlockOffset(), max =
trailer.getLastDataBlockOffset();
+ HFileBlock block;
+ while (offset <= max) {
+ block = fReader.readBlock(offset, -1, /* cacheBlock */
+ false, /* pread */ false, /* isCompaction */ false, /*
updateCacheMetrics */
+ false, null, null);
+ offset += block.getOnDiskSizeWithHeader();
+ double diff = block.getOnDiskSizeWithHeader() - BLOCKSIZE_SMALL;
+ if (offset <= max) {
+ assertTrue(diff >= 0 && diff < (BLOCKSIZE_SMALL * 0.05));
+ }
+ }
+ }
+
}