This is an automated email from the ASF dual-hosted git repository. wchevreuil pushed a commit to branch branch-2 in repository https://gitbox.apache.org/repos/asf/hbase.git
commit da62666b84500c91e3e62a10454866057153bcf8 Author: vinayak hegde <[email protected]> AuthorDate: Wed May 22 18:59:49 2024 +0530 HBASE-28469: Integration of time-based priority caching into compaction paths (#5866) Signed-off-by: Wellington Chevreuil <[email protected]> Reviewed-by: Janardhan Hugund <[email protected]> Change-Id: Ib992689f769774a2af5fc3f98af892e926b0f7bf --- .../apache/hadoop/hbase/io/hfile/BlockCache.java | 17 +++ .../hadoop/hbase/io/hfile/BlockCacheKey.java | 1 - .../hadoop/hbase/io/hfile/CombinedBlockCache.java | 20 +++- .../org/apache/hadoop/hbase/io/hfile/HFile.java | 5 + .../hadoop/hbase/io/hfile/HFileWriterImpl.java | 42 ++++++++ .../hadoop/hbase/io/hfile/bucket/BucketCache.java | 13 +++ .../hbase/regionserver/DataTieringManager.java | 50 ++++++++- .../hbase/regionserver/HRegionFileSystem.java | 25 +++++ .../hadoop/hbase/regionserver/StoreFileWriter.java | 23 +--- .../hbase/regionserver/TimeRangeTracker.java | 4 +- .../hbase/regionserver/TestDataTieringManager.java | 120 ++++++++++++++++++--- 11 files changed, 271 insertions(+), 49 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java index e01b34cdc5d..d0efd6dde8a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java @@ -23,6 +23,7 @@ import java.util.Optional; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.conf.ConfigurationObserver; +import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; @@ -193,6 +194,22 @@ public interface BlockCache extends Iterable<CachedBlock>, ConfigurationObserver return Optional.empty(); } + /** + * Checks whether the block represented by the given key should be cached or not. This method may + * not be overridden by all implementing classes. In such cases, the returned Optional will be + * empty. For subclasses implementing this logic, the returned Optional would contain the boolean + * value reflecting if the passed block should indeed be cached. + * @param key The key representing the block to check if it should be cached. + * @param timeRangeTracker the time range tracker containing the timestamps + * @param conf The configuration object to use for determining caching behavior. + * @return An empty Optional if this method is not supported; otherwise, the returned Optional + * contains the boolean value indicating if the block should be cached. + */ + default Optional<Boolean> shouldCacheBlock(BlockCacheKey key, TimeRangeTracker timeRangeTracker, + Configuration conf) { + return Optional.empty(); + } + /** * Checks whether the block for the passed key is already cached. This method may not be * overridden by all implementing classes. In such cases, the returned Optional will be empty. For diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java index bf22d38e373..bcc1f58ba5e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java @@ -116,5 +116,4 @@ public class BlockCacheKey implements HeapSize, java.io.Serializable { public Path getFilePath() { return filePath; } - } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java index a618000c27d..ecb8d39e9b9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java @@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; +import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.Pair; import org.apache.yetus.audience.InterfaceAudience; import org.slf4j.Logger; @@ -478,11 +479,22 @@ public class CombinedBlockCache implements ResizableBlockCache, HeapSize { @Override public Optional<Boolean> shouldCacheFile(HFileInfo hFileInfo, Configuration conf) { - Optional<Boolean> l1Result = l1Cache.shouldCacheFile(hFileInfo, conf); - Optional<Boolean> l2Result = l2Cache.shouldCacheFile(hFileInfo, conf); + return combineCacheResults(l1Cache.shouldCacheFile(hFileInfo, conf), + l2Cache.shouldCacheFile(hFileInfo, conf)); + } + + @Override + public Optional<Boolean> shouldCacheBlock(BlockCacheKey key, TimeRangeTracker timeRangeTracker, + Configuration conf) { + return combineCacheResults(l1Cache.shouldCacheBlock(key, timeRangeTracker, conf), + l2Cache.shouldCacheBlock(key, timeRangeTracker, conf)); + } + + private Optional<Boolean> combineCacheResults(Optional<Boolean> result1, + Optional<Boolean> result2) { final Mutable<Boolean> combinedResult = new MutableBoolean(true); - l1Result.ifPresent(b -> combinedResult.setValue(b && combinedResult.getValue())); - l2Result.ifPresent(b -> combinedResult.setValue(b && combinedResult.getValue())); + result1.ifPresent(b -> combinedResult.setValue(b && combinedResult.getValue())); + result2.ifPresent(b -> combinedResult.setValue(b && combinedResult.getValue())); return Optional.of(combinedResult.getValue()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java index 135c6cfecbc..d8dffce59e8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java @@ -212,6 +212,11 @@ public final class HFile { /** Add an element to the file info map. */ void appendFileInfo(byte[] key, byte[] value) throws IOException; + /** + * Add TimestampRange and earliest put timestamp to Metadata + */ + void appendTrackedTimestampsToMetadata() throws IOException; + /** Returns the path to this {@link HFile} */ Path getPath(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java index c8c21e0625c..96bfe42f1fd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hbase.io.hfile; import static org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.MAX_BLOCK_SIZE_UNCOMPRESSED; +import static org.apache.hadoop.hbase.regionserver.HStoreFile.EARLIEST_PUT_TS; +import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY; import java.io.DataOutput; import java.io.DataOutputStream; @@ -26,6 +28,7 @@ import java.net.InetSocketAddress; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; +import java.util.Optional; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -46,6 +49,7 @@ import org.apache.hadoop.hbase.io.crypto.Encryption; import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding; import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable; +import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.security.EncryptionUtil; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.BloomFilterWriter; @@ -118,6 +122,8 @@ public class HFileWriterImpl implements HFile.Writer { /** May be null if we were passed a stream. */ protected final Path path; + protected final Configuration conf; + /** Cache configuration for caching data on write. */ protected final CacheConfig cacheConf; @@ -171,12 +177,16 @@ public class HFileWriterImpl implements HFile.Writer { protected long maxMemstoreTS = 0; + private final TimeRangeTracker timeRangeTracker; + private long earliestPutTs = HConstants.LATEST_TIMESTAMP; + public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path path, FSDataOutputStream outputStream, HFileContext fileContext) { this.outputStream = outputStream; this.path = path; this.name = path != null ? path.getName() : outputStream.toString(); this.hFileContext = fileContext; + this.timeRangeTracker = TimeRangeTracker.create(TimeRangeTracker.Type.NON_SYNC); DataBlockEncoding encoding = hFileContext.getDataBlockEncoding(); if (encoding != DataBlockEncoding.NONE) { this.blockEncoder = new HFileDataBlockEncoderImpl(encoding); @@ -191,6 +201,7 @@ public class HFileWriterImpl implements HFile.Writer { } closeOutputStream = path != null; this.cacheConf = cacheConf; + this.conf = conf; float encodeBlockSizeRatio = conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 0f); this.encodedBlockSizeLimit = (int) (hFileContext.getBlocksize() * encodeBlockSizeRatio); @@ -557,6 +568,9 @@ public class HFileWriterImpl implements HFile.Writer { cacheConf.getBlockCache().ifPresent(cache -> { HFileBlock cacheFormatBlock = blockWriter.getBlockForCaching(cacheConf); BlockCacheKey key = buildCacheBlockKey(offset, cacheFormatBlock.getBlockType()); + if (!shouldCacheBlock(cache, key)) { + return; + } try { cache.cacheBlock(key, cacheFormatBlock, cacheConf.isInMemory(), true); } finally { @@ -573,6 +587,11 @@ public class HFileWriterImpl implements HFile.Writer { return new BlockCacheKey(name, offset, true, blockType); } + private boolean shouldCacheBlock(BlockCache cache, BlockCacheKey key) { + Optional<Boolean> result = cache.shouldCacheBlock(key, timeRangeTracker, conf); + return result.orElse(true); + } + /** * Ready a new block for writing. */ @@ -775,6 +794,8 @@ public class HFileWriterImpl implements HFile.Writer { if (tagsLength > this.maxTagsLength) { this.maxTagsLength = tagsLength; } + + trackTimestamps(cell); } @Override @@ -867,4 +888,25 @@ public class HFileWriterImpl implements HFile.Writer { outputStream = null; } } + + /** + * Add TimestampRange and earliest put timestamp to Metadata + */ + public void appendTrackedTimestampsToMetadata() throws IOException { + // TODO: The StoreFileReader always converts the byte[] to TimeRange + // via TimeRangeTracker, so we should write the serialization data of TimeRange directly. + appendFileInfo(TIMERANGE_KEY, TimeRangeTracker.toByteArray(timeRangeTracker)); + appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs)); + } + + /** + * Record the earliest Put timestamp. If the timeRangeTracker is not set, update TimeRangeTracker + * to include the timestamp of this key + */ + private void trackTimestamps(final ExtendedCell cell) { + if (Cell.Type.Put == cell.getType()) { + earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp()); + } + timeRangeTracker.includeTimestamp(cell); + } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java index f5b29b4d747..7c5d8a82f83 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java @@ -86,6 +86,7 @@ import org.apache.hadoop.hbase.protobuf.ProtobufMagic; import org.apache.hadoop.hbase.regionserver.DataTieringManager; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.StoreFileInfo; +import org.apache.hadoop.hbase.regionserver.TimeRangeTracker; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.IdReadWriteLock; @@ -2444,6 +2445,18 @@ public class BucketCache implements BlockCache, HeapSize { return Optional.of(!fullyCachedFiles.containsKey(fileName)); } + @Override + public Optional<Boolean> shouldCacheBlock(BlockCacheKey key, TimeRangeTracker timeRangeTracker, + Configuration conf) { + DataTieringManager dataTieringManager = DataTieringManager.getInstance(); + if (dataTieringManager != null && !dataTieringManager.isHotData(timeRangeTracker, conf)) { + LOG.debug("Data tiering is enabled for file: '{}' and it is not hot data", + key.getHfileName()); + return Optional.of(false); + } + return Optional.of(true); + } + @Override public Optional<Boolean> isAlreadyCached(BlockCacheKey key) { boolean foundKey = backingMap.containsKey(key); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java index d3bdbf330cc..f71bc5e43aa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java @@ -131,6 +131,27 @@ public class DataTieringManager { return isHotData(hFilePath); } + /** + * Determines whether the data associated with the given time range tracker is considered hot. If + * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it uses the maximum + * timestamp from the time range tracker to determine if the data is hot. Otherwise, it considers + * the data as hot by default. + * @param timeRangeTracker the time range tracker containing the timestamps + * @param conf The configuration object to use for determining hot data criteria. + * @return {@code true} if the data is hot, {@code false} otherwise + */ + public boolean isHotData(TimeRangeTracker timeRangeTracker, Configuration conf) { + DataTieringType dataTieringType = getDataTieringType(conf); + if ( + dataTieringType.equals(DataTieringType.TIME_RANGE) + && timeRangeTracker.getMax() != TimeRangeTracker.INITIAL_MAX_TIMESTAMP + ) { + return hotDataValidator(timeRangeTracker.getMax(), getDataTieringHotDataAge(conf)); + } + // DataTieringType.NONE or other types are considered hot by default + return true; + } + /** * Determines whether the data in the HFile at the given path is considered hot based on the * configured data tiering type and hot data age. If the data tiering type is set to @@ -151,6 +172,27 @@ public class DataTieringManager { return true; } + /** + * Determines whether the data in the HFile at the given path is considered hot based on the + * configured data tiering type and hot data age. If the data tiering type is set to + * {@link DataTieringType#TIME_RANGE}, it validates the data against the provided maximum + * timestamp. + * @param hFilePath the path to the HFile + * @param maxTimestamp the maximum timestamp to validate against + * @return {@code true} if the data is hot, {@code false} otherwise + * @throws DataTieringException if there is an error retrieving data tiering information + */ + public boolean isHotData(Path hFilePath, long maxTimestamp) throws DataTieringException { + Configuration configuration = getConfiguration(hFilePath); + DataTieringType dataTieringType = getDataTieringType(configuration); + + if (dataTieringType.equals(DataTieringType.TIME_RANGE)) { + return hotDataValidator(maxTimestamp, getDataTieringHotDataAge(configuration)); + } + // DataTieringType.NONE or other types are considered hot by default + return true; + } + /** * Determines whether the data in the HFile being read is considered hot based on the configured * data tiering type and hot data age. If the data tiering type is set to @@ -231,10 +273,12 @@ public class DataTieringManager { } private HRegion getHRegion(Path hFilePath) throws DataTieringException { - if (hFilePath.getParent() == null || hFilePath.getParent().getParent() == null) { - throw new DataTieringException("Incorrect HFile Path: " + hFilePath); + String regionId; + try { + regionId = HRegionFileSystem.getRegionId(hFilePath); + } catch (IOException e) { + throw new DataTieringException(e.getMessage()); } - String regionId = hFilePath.getParent().getParent().getName(); HRegion hRegion = this.onlineRegions.get(regionId); if (hRegion == null) { throw new DataTieringException("HRegion corresponding to " + hFilePath + " doesn't exist"); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java index 06c36853b67..4570eac9ec8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java @@ -1011,6 +1011,31 @@ public class HRegionFileSystem { } } + /** + * Retrieves the Region ID from the given HFile path. + * @param hFilePath The path of the HFile. + * @return The Region ID extracted from the HFile path. + * @throws IOException If an I/O error occurs or if the HFile path is incorrect. + */ + public static String getRegionId(Path hFilePath) throws IOException { + if (hFilePath.getParent() == null || hFilePath.getParent().getParent() == null) { + throw new IOException("Incorrect HFile Path: " + hFilePath); + } + Path dir = hFilePath.getParent().getParent(); + if (isTemporaryDirectoryName(dir.getName())) { + if (dir.getParent() == null) { + throw new IOException("Incorrect HFile Path: " + hFilePath); + } + return dir.getParent().getName(); + } + return dir.getName(); + } + + private static boolean isTemporaryDirectoryName(String dirName) { + return REGION_MERGES_DIR.equals(dirName) || REGION_SPLITS_DIR.equals(dirName) + || REGION_TEMP_DIR.equals(dirName); + } + /** * Creates a directory. Assumes the user has already checked for this directory existence. * @return the result of fs.mkdirs(). In case underlying fs throws an IOException, it checks diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java index 7b4279fb70e..18b758d040b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java @@ -22,13 +22,11 @@ import static org.apache.hadoop.hbase.regionserver.HStoreFile.BLOOM_FILTER_PARAM import static org.apache.hadoop.hbase.regionserver.HStoreFile.BLOOM_FILTER_TYPE_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.COMPACTION_EVENT_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.DELETE_FAMILY_COUNT; -import static org.apache.hadoop.hbase.regionserver.HStoreFile.EARLIEST_PUT_TS; import static org.apache.hadoop.hbase.regionserver.HStoreFile.HISTORICAL_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAJOR_COMPACTION_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAX_SEQ_ID_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_FILE_REFS; -import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY; import static org.apache.hadoop.hbase.regionserver.StoreEngine.STORE_ENGINE_CLASS_KEY; import java.io.IOException; @@ -53,7 +51,6 @@ import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.ExtendedCell; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.PrivateCellUtil; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; @@ -498,11 +495,9 @@ public class StoreFileWriter implements CellSink, ShipperListener { private final BloomFilterWriter deleteFamilyBloomFilterWriter; private final BloomType bloomType; private byte[] bloomParam = null; - private long earliestPutTs = HConstants.LATEST_TIMESTAMP; private long deleteFamilyCnt = 0; private BloomContext bloomContext = null; private BloomContext deleteFamilyBloomContext = null; - private final TimeRangeTracker timeRangeTracker; private final Supplier<Collection<HStoreFile>> compactedFilesSupplier; private HFile.Writer writer; @@ -526,7 +521,6 @@ public class StoreFileWriter implements CellSink, ShipperListener { HFileContext fileContext, boolean shouldDropCacheBehind, Supplier<Collection<HStoreFile>> compactedFilesSupplier) throws IOException { this.compactedFilesSupplier = compactedFilesSupplier; - this.timeRangeTracker = TimeRangeTracker.create(TimeRangeTracker.Type.NON_SYNC); // TODO : Change all writers to be specifically created for compaction context writer = HFile.getWriterFactory(conf, cacheConf).withPath(fs, path).withFavoredNodes(favoredNodes) @@ -668,21 +662,7 @@ public class StoreFileWriter implements CellSink, ShipperListener { * Add TimestampRange and earliest put timestamp to Metadata */ private void appendTrackedTimestampsToMetadata() throws IOException { - // TODO: The StoreFileReader always converts the byte[] to TimeRange - // via TimeRangeTracker, so we should write the serialization data of TimeRange directly. - appendFileInfo(TIMERANGE_KEY, TimeRangeTracker.toByteArray(timeRangeTracker)); - appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs)); - } - - /** - * Record the earlest Put timestamp. If the timeRangeTracker is not set, update TimeRangeTracker - * to include the timestamp of this key - */ - private void trackTimestamps(final ExtendedCell cell) { - if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) { - earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp()); - } - timeRangeTracker.includeTimestamp(cell); + writer.appendTrackedTimestampsToMetadata(); } private void appendGeneralBloomfilter(final ExtendedCell cell) throws IOException { @@ -713,7 +693,6 @@ public class StoreFileWriter implements CellSink, ShipperListener { appendGeneralBloomfilter(cell); appendDeleteFamilyBloomFilter(cell); writer.append(cell); - trackTimestamps(cell); } private void beforeShipped() throws IOException { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java index 6846f077c72..5de13650b05 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java @@ -53,8 +53,8 @@ public abstract class TimeRangeTracker { SYNC } - static final long INITIAL_MIN_TIMESTAMP = Long.MAX_VALUE; - static final long INITIAL_MAX_TIMESTAMP = -1L; + public static final long INITIAL_MIN_TIMESTAMP = Long.MAX_VALUE; + public static final long INITIAL_MAX_TIMESTAMP = -1L; public static TimeRangeTracker create(Type type) { switch (type) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java index b74937bf94d..881c47d1bfe 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java @@ -99,6 +99,7 @@ public class TestDataTieringManager { private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private static final Logger LOG = LoggerFactory.getLogger(TestDataTieringManager.class); + private static final long DAY = 24 * 60 * 60 * 1000; private static Configuration defaultConf; private static FileSystem fs; private static BlockCache blockCache; @@ -120,16 +121,16 @@ public class TestDataTieringManager { public static void setupBeforeClass() throws Exception { testDir = TEST_UTIL.getDataTestDir(TestDataTieringManager.class.getSimpleName()); defaultConf = TEST_UTIL.getConfiguration(); - defaultConf.setBoolean(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, true); - defaultConf.setStrings(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap"); - defaultConf.setLong(BUCKET_CACHE_SIZE_KEY, 32); - defaultConf.setBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, true); - fs = HFileSystem.get(defaultConf); - blockCache = BlockCacheFactory.createBlockCache(defaultConf); - cacheConf = new CacheConfig(defaultConf, blockCache); + updateCommonConfigurations(); assertTrue(DataTieringManager.instantiate(defaultConf, testOnlineRegions)); - setupOnlineRegions(); dataTieringManager = DataTieringManager.getInstance(); + rowKeyString = ""; + } + + private static void updateCommonConfigurations() { + defaultConf.setBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, true); + defaultConf.setStrings(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap"); + defaultConf.setLong(BUCKET_CACHE_SIZE_KEY, 32); } @FunctionalInterface @@ -143,7 +144,8 @@ public class TestDataTieringManager { } @Test - public void testDataTieringEnabledWithKey() { + public void testDataTieringEnabledWithKey() throws IOException { + initializeTestEnvironment(); DataTieringMethodCallerWithKey methodCallerWithKey = DataTieringManager::isDataTieringEnabled; // Test with valid key @@ -161,7 +163,8 @@ public class TestDataTieringManager { } @Test - public void testDataTieringEnabledWithPath() { + public void testDataTieringEnabledWithPath() throws IOException { + initializeTestEnvironment(); DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isDataTieringEnabled; // Test with valid path @@ -191,7 +194,8 @@ public class TestDataTieringManager { } @Test - public void testHotDataWithKey() { + public void testHotDataWithKey() throws IOException { + initializeTestEnvironment(); DataTieringMethodCallerWithKey methodCallerWithKey = DataTieringManager::isHotData; // Test with valid key @@ -204,7 +208,8 @@ public class TestDataTieringManager { } @Test - public void testHotDataWithPath() { + public void testHotDataWithPath() throws IOException { + initializeTestEnvironment(); DataTieringMethodCallerWithPath methodCallerWithPath = DataTieringManager::isHotData; // Test with valid path @@ -244,7 +249,8 @@ public class TestDataTieringManager { } @Test - public void testColdDataFiles() { + public void testColdDataFiles() throws IOException { + initializeTestEnvironment(); Set<BlockCacheKey> allCachedBlocks = new HashSet<>(); for (HStoreFile file : hStoreFiles) { allCachedBlocks.add(new BlockCacheKey(file.getPath(), 0, true, BlockType.DATA)); @@ -270,7 +276,74 @@ public class TestDataTieringManager { } @Test - public void testPickColdDataFiles() { + public void testCacheCompactedBlocksOnWriteDataTieringDisabled() throws IOException { + setCacheCompactBlocksOnWrite(); + initializeTestEnvironment(); + + HRegion region = createHRegion("table3"); + testCacheCompactedBlocksOnWrite(region, true); + } + + @Test + public void testCacheCompactedBlocksOnWriteWithHotData() throws IOException { + setCacheCompactBlocksOnWrite(); + initializeTestEnvironment(); + + HRegion region = createHRegion("table3", getConfWithTimeRangeDataTieringEnabled(5 * DAY)); + testCacheCompactedBlocksOnWrite(region, true); + } + + @Test + public void testCacheCompactedBlocksOnWriteWithColdData() throws IOException { + setCacheCompactBlocksOnWrite(); + initializeTestEnvironment(); + + HRegion region = createHRegion("table3", getConfWithTimeRangeDataTieringEnabled(DAY)); + testCacheCompactedBlocksOnWrite(region, false); + } + + private void setCacheCompactBlocksOnWrite() { + defaultConf.setBoolean(CacheConfig.CACHE_COMPACTED_BLOCKS_ON_WRITE_KEY, true); + } + + private void testCacheCompactedBlocksOnWrite(HRegion region, boolean expectDataBlocksCached) + throws IOException { + HStore hStore = createHStore(region, "cf1"); + createTestFilesForCompaction(hStore); + hStore.refreshStoreFiles(); + + region.stores.put(Bytes.toBytes("cf1"), hStore); + testOnlineRegions.put(region.getRegionInfo().getEncodedName(), region); + + long initialStoreFilesCount = hStore.getStorefilesCount(); + long initialCacheDataBlockCount = blockCache.getDataBlockCount(); + assertEquals(3, initialStoreFilesCount); + assertEquals(0, initialCacheDataBlockCount); + + region.compact(true); + + long compactedStoreFilesCount = hStore.getStorefilesCount(); + long compactedCacheDataBlockCount = blockCache.getDataBlockCount(); + assertEquals(1, compactedStoreFilesCount); + assertEquals(expectDataBlocksCached, compactedCacheDataBlockCount > 0); + } + + private void createTestFilesForCompaction(HStore hStore) throws IOException { + long currentTime = System.currentTimeMillis(); + Path storeDir = hStore.getStoreContext().getFamilyStoreDirectoryPath(); + Configuration configuration = hStore.getReadOnlyConfiguration(); + + createHStoreFile(storeDir, configuration, currentTime - 2 * DAY, + hStore.getHRegion().getRegionFileSystem()); + createHStoreFile(storeDir, configuration, currentTime - 3 * DAY, + hStore.getHRegion().getRegionFileSystem()); + createHStoreFile(storeDir, configuration, currentTime - 4 * DAY, + hStore.getHRegion().getRegionFileSystem()); + } + + @Test + public void testPickColdDataFiles() throws IOException { + initializeTestEnvironment(); Map<String, String> coldDataFiles = dataTieringManager.getColdFilesList(); assertEquals(1, coldDataFiles.size()); // hStoreFiles[3] is the cold file. @@ -283,6 +356,7 @@ public class TestDataTieringManager { */ @Test public void testBlockEvictions() throws Exception { + initializeTestEnvironment(); long capacitySize = 40 * 1024; int writeThreads = 3; int writerQLen = 64; @@ -333,6 +407,7 @@ public class TestDataTieringManager { */ @Test public void testBlockEvictionsAllColdBlocks() throws Exception { + initializeTestEnvironment(); long capacitySize = 40 * 1024; int writeThreads = 3; int writerQLen = 64; @@ -380,6 +455,7 @@ public class TestDataTieringManager { */ @Test public void testBlockEvictionsHotBlocks() throws Exception { + initializeTestEnvironment(); long capacitySize = 40 * 1024; int writeThreads = 3; int writerQLen = 64; @@ -427,6 +503,8 @@ public class TestDataTieringManager { public void testFeatureKeyDisabled() throws Exception { DataTieringManager.resetForTestingOnly(); defaultConf.setBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, false); + initializeTestEnvironment(); + try { assertFalse(DataTieringManager.instantiate(defaultConf, testOnlineRegions)); // Verify that the DataaTieringManager instance is not instantiated in the @@ -632,7 +710,12 @@ public class TestDataTieringManager { HRegionFileSystem regionFs = HRegionFileSystem.createRegionOnFileSystem(testConf, fs, CommonFSUtils.getTableDir(testDir, hri.getTable()), hri); - return new HRegion(regionFs, null, conf, htd, null); + HRegion region = new HRegion(regionFs, null, conf, htd, null); + // Manually sets the BlockCache for the HRegion instance. + // This is necessary because the region server is not started within this method, + // and therefore the BlockCache needs to be explicitly configured. + region.setBlockCache(blockCache); + return region; } private static HStore createHStore(HRegion region, String columnFamily) throws IOException { @@ -675,6 +758,11 @@ public class TestDataTieringManager { return new HStoreFile(fs, storeFileWriter.getPath(), conf, cacheConf, BloomType.NONE, true, sft); } + /** + * Writes random data to a store file with rows arranged in lexicographically increasing order. + * Each row is generated using the {@link #nextString()} method, ensuring that each subsequent row + * is lexicographically larger than the previous one. + */ private static void writeStoreFileRandomData(final StoreFileWriter writer, byte[] columnFamily, long timestamp) throws IOException { int cellsPerFile = 10; @@ -691,7 +779,6 @@ public class TestDataTieringManager { } } - private static byte[] generateRandomBytes(int sizeInBytes) { Random random = new Random(); byte[] randomBytes = new byte[sizeInBytes]; @@ -714,5 +801,4 @@ public class TestDataTieringManager { } return rowKeyString; } - }
