This is an automated email from the ASF dual-hosted git repository.

wchevreuil pushed a commit to branch branch-2.6
in repository https://gitbox.apache.org/repos/asf/hbase.git

commit 56dc8bb6baf187d02c8c672310cf3ed3786fad0f
Author: vinayak hegde <[email protected]>
AuthorDate: Wed May 22 18:59:49 2024 +0530

    HBASE-28469: Integration of time-based priority caching into compaction 
paths (#5866)
    
    Signed-off-by: Wellington Chevreuil <[email protected]>
    Reviewed-by: Janardhan Hugund <[email protected]>
    Change-Id: Ib992689f769774a2af5fc3f98af892e926b0f7bf
---
 .../apache/hadoop/hbase/io/hfile/BlockCache.java   |  17 +++
 .../hadoop/hbase/io/hfile/BlockCacheKey.java       |   1 -
 .../hadoop/hbase/io/hfile/CombinedBlockCache.java  |  20 +++-
 .../org/apache/hadoop/hbase/io/hfile/HFile.java    |   5 +
 .../hadoop/hbase/io/hfile/HFileWriterImpl.java     |  50 ++++++++
 .../hadoop/hbase/io/hfile/bucket/BucketCache.java  |  13 +++
 .../hbase/regionserver/DataTieringManager.java     |  50 +++++++-
 .../hbase/regionserver/HRegionFileSystem.java      |  25 ++++
 .../hadoop/hbase/regionserver/StoreFileWriter.java |  23 +---
 .../hbase/regionserver/TimeRangeTracker.java       |   4 +-
 .../hbase/regionserver/TestDataTieringManager.java | 130 +++++++++++++++++----
 11 files changed, 281 insertions(+), 57 deletions(-)

diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
index 8380fc194e7..90fb7ce3491 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCache.java
@@ -23,6 +23,7 @@ import java.util.Optional;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.conf.ConfigurationObserver;
+import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.yetus.audience.InterfaceAudience;
 
@@ -193,6 +194,22 @@ public interface BlockCache extends Iterable<CachedBlock>, 
ConfigurationObserver
     return Optional.empty();
   }
 
+  /**
+   * Checks whether the block represented by the given key should be cached or 
not. This method may
+   * not be overridden by all implementing classes. In such cases, the 
returned Optional will be
+   * empty. For subclasses implementing this logic, the returned Optional 
would contain the boolean
+   * value reflecting if the passed block should indeed be cached.
+   * @param key              The key representing the block to check if it 
should be cached.
+   * @param timeRangeTracker the time range tracker containing the timestamps
+   * @param conf             The configuration object to use for determining 
caching behavior.
+   * @return An empty Optional if this method is not supported; otherwise, the 
returned Optional
+   *         contains the boolean value indicating if the block should be 
cached.
+   */
+  default Optional<Boolean> shouldCacheBlock(BlockCacheKey key, 
TimeRangeTracker timeRangeTracker,
+    Configuration conf) {
+    return Optional.empty();
+  }
+
   /**
    * Checks whether the block for the passed key is already cached. This 
method may not be
    * overridden by all implementing classes. In such cases, the returned 
Optional will be empty. For
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
index bf22d38e373..bcc1f58ba5e 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/BlockCacheKey.java
@@ -116,5 +116,4 @@ public class BlockCacheKey implements HeapSize, 
java.io.Serializable {
   public Path getFilePath() {
     return filePath;
   }
-
 }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
index fe675aade7b..cb3000dda19 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/CombinedBlockCache.java
@@ -26,6 +26,7 @@ import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hbase.io.HeapSize;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
+import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
 import org.apache.hadoop.hbase.util.Pair;
 import org.apache.yetus.audience.InterfaceAudience;
 import org.slf4j.Logger;
@@ -478,11 +479,22 @@ public class CombinedBlockCache implements 
ResizableBlockCache, HeapSize {
 
   @Override
   public Optional<Boolean> shouldCacheFile(HFileInfo hFileInfo, Configuration 
conf) {
-    Optional<Boolean> l1Result = l1Cache.shouldCacheFile(hFileInfo, conf);
-    Optional<Boolean> l2Result = l2Cache.shouldCacheFile(hFileInfo, conf);
+    return combineCacheResults(l1Cache.shouldCacheFile(hFileInfo, conf),
+      l2Cache.shouldCacheFile(hFileInfo, conf));
+  }
+
+  @Override
+  public Optional<Boolean> shouldCacheBlock(BlockCacheKey key, 
TimeRangeTracker timeRangeTracker,
+    Configuration conf) {
+    return combineCacheResults(l1Cache.shouldCacheBlock(key, timeRangeTracker, 
conf),
+      l2Cache.shouldCacheBlock(key, timeRangeTracker, conf));
+  }
+
+  private Optional<Boolean> combineCacheResults(Optional<Boolean> result1,
+    Optional<Boolean> result2) {
     final Mutable<Boolean> combinedResult = new MutableBoolean(true);
-    l1Result.ifPresent(b -> combinedResult.setValue(b && 
combinedResult.getValue()));
-    l2Result.ifPresent(b -> combinedResult.setValue(b && 
combinedResult.getValue()));
+    result1.ifPresent(b -> combinedResult.setValue(b && 
combinedResult.getValue()));
+    result2.ifPresent(b -> combinedResult.setValue(b && 
combinedResult.getValue()));
     return Optional.of(combinedResult.getValue());
   }
 
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
index ae79ad85724..2c3908aa33f 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFile.java
@@ -212,6 +212,11 @@ public final class HFile {
     /** Add an element to the file info map. */
     void appendFileInfo(byte[] key, byte[] value) throws IOException;
 
+    /**
+     * Add TimestampRange and earliest put timestamp to Metadata
+     */
+    void appendTrackedTimestampsToMetadata() throws IOException;
+
     /** Returns the path to this {@link HFile} */
     Path getPath();
 
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
index d2dfaf62106..44ec324686e 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/HFileWriterImpl.java
@@ -18,6 +18,8 @@
 package org.apache.hadoop.hbase.io.hfile;
 
 import static 
org.apache.hadoop.hbase.io.hfile.BlockCompressedSizePredicator.MAX_BLOCK_SIZE_UNCOMPRESSED;
+import static org.apache.hadoop.hbase.regionserver.HStoreFile.EARLIEST_PUT_TS;
+import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY;
 
 import java.io.DataOutput;
 import java.io.DataOutputStream;
@@ -26,6 +28,7 @@ import java.net.InetSocketAddress;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Optional;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -45,6 +48,7 @@ import org.apache.hadoop.hbase.io.crypto.Encryption;
 import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
 import org.apache.hadoop.hbase.io.encoding.IndexBlockEncoding;
 import org.apache.hadoop.hbase.io.hfile.HFileBlock.BlockWritable;
+import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
 import org.apache.hadoop.hbase.security.EncryptionUtil;
 import org.apache.hadoop.hbase.security.User;
 import org.apache.hadoop.hbase.util.BloomFilterWriter;
@@ -117,6 +121,8 @@ public class HFileWriterImpl implements HFile.Writer {
   /** May be null if we were passed a stream. */
   protected final Path path;
 
+  protected final Configuration conf;
+
   /** Cache configuration for caching data on write. */
   protected final CacheConfig cacheConf;
 
@@ -170,12 +176,16 @@ public class HFileWriterImpl implements HFile.Writer {
 
   protected long maxMemstoreTS = 0;
 
+  private final TimeRangeTracker timeRangeTracker;
+  private long earliestPutTs = HConstants.LATEST_TIMESTAMP;
+
   public HFileWriterImpl(final Configuration conf, CacheConfig cacheConf, Path 
path,
     FSDataOutputStream outputStream, HFileContext fileContext) {
     this.outputStream = outputStream;
     this.path = path;
     this.name = path != null ? path.getName() : outputStream.toString();
     this.hFileContext = fileContext;
+    this.timeRangeTracker = 
TimeRangeTracker.create(TimeRangeTracker.Type.NON_SYNC);
     DataBlockEncoding encoding = hFileContext.getDataBlockEncoding();
     if (encoding != DataBlockEncoding.NONE) {
       this.blockEncoder = new HFileDataBlockEncoderImpl(encoding);
@@ -190,6 +200,7 @@ public class HFileWriterImpl implements HFile.Writer {
     }
     closeOutputStream = path != null;
     this.cacheConf = cacheConf;
+    this.conf = conf;
     float encodeBlockSizeRatio = 
conf.getFloat(UNIFIED_ENCODED_BLOCKSIZE_RATIO, 0f);
     this.encodedBlockSizeLimit = (int) (hFileContext.getBlocksize() * 
encodeBlockSizeRatio);
 
@@ -555,6 +566,10 @@ public class HFileWriterImpl implements HFile.Writer {
   private void doCacheOnWrite(long offset) {
     cacheConf.getBlockCache().ifPresent(cache -> {
       HFileBlock cacheFormatBlock = blockWriter.getBlockForCaching(cacheConf);
+      BlockCacheKey key = buildCacheBlockKey(offset, 
cacheFormatBlock.getBlockType());
+      if (!shouldCacheBlock(cache, key)) {
+        return;
+      }
       try {
         cache.cacheBlock(new BlockCacheKey(name, offset, true, 
cacheFormatBlock.getBlockType()),
           cacheFormatBlock, cacheConf.isInMemory(), true);
@@ -565,6 +580,18 @@ public class HFileWriterImpl implements HFile.Writer {
     });
   }
 
+  private BlockCacheKey buildCacheBlockKey(long offset, BlockType blockType) {
+    if (path != null) {
+      return new BlockCacheKey(path, offset, true, blockType);
+    }
+    return new BlockCacheKey(name, offset, true, blockType);
+  }
+
+  private boolean shouldCacheBlock(BlockCache cache, BlockCacheKey key) {
+    Optional<Boolean> result = cache.shouldCacheBlock(key, timeRangeTracker, 
conf);
+    return result.orElse(true);
+  }
+
   /**
    * Ready a new block for writing.
    */
@@ -767,6 +794,8 @@ public class HFileWriterImpl implements HFile.Writer {
     if (tagsLength > this.maxTagsLength) {
       this.maxTagsLength = tagsLength;
     }
+
+    trackTimestamps(cell);
   }
 
   @Override
@@ -859,4 +888,25 @@ public class HFileWriterImpl implements HFile.Writer {
       outputStream = null;
     }
   }
+
+  /**
+   * Add TimestampRange and earliest put timestamp to Metadata
+   */
+  public void appendTrackedTimestampsToMetadata() throws IOException {
+    // TODO: The StoreFileReader always converts the byte[] to TimeRange
+    // via TimeRangeTracker, so we should write the serialization data of 
TimeRange directly.
+    appendFileInfo(TIMERANGE_KEY, 
TimeRangeTracker.toByteArray(timeRangeTracker));
+    appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs));
+  }
+
+  /**
+   * Record the earliest Put timestamp. If the timeRangeTracker is not set, 
update TimeRangeTracker
+   * to include the timestamp of this key
+   */
+  private void trackTimestamps(final Cell cell) {
+    if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) {
+      earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp());
+    }
+    timeRangeTracker.includeTimestamp(cell);
+  }
 }
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
index 7242e40bf05..f3903284067 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/io/hfile/bucket/BucketCache.java
@@ -86,6 +86,7 @@ import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
 import org.apache.hadoop.hbase.regionserver.DataTieringManager;
 import org.apache.hadoop.hbase.regionserver.HRegion;
 import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
+import org.apache.hadoop.hbase.regionserver.TimeRangeTracker;
 import org.apache.hadoop.hbase.util.Bytes;
 import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
 import org.apache.hadoop.hbase.util.IdReadWriteLock;
@@ -2363,6 +2364,18 @@ public class BucketCache implements BlockCache, HeapSize 
{
     return Optional.of(!fullyCachedFiles.containsKey(fileName));
   }
 
+  @Override
+  public Optional<Boolean> shouldCacheBlock(BlockCacheKey key, 
TimeRangeTracker timeRangeTracker,
+    Configuration conf) {
+    DataTieringManager dataTieringManager = DataTieringManager.getInstance();
+    if (dataTieringManager != null && 
!dataTieringManager.isHotData(timeRangeTracker, conf)) {
+      LOG.debug("Data tiering is enabled for file: '{}' and it is not hot 
data",
+        key.getHfileName());
+      return Optional.of(false);
+    }
+    return Optional.of(true);
+  }
+
   @Override
   public Optional<Boolean> isAlreadyCached(BlockCacheKey key) {
     boolean foundKey = backingMap.containsKey(key);
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
index d3bdbf330cc..f71bc5e43aa 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/DataTieringManager.java
@@ -131,6 +131,27 @@ public class DataTieringManager {
     return isHotData(hFilePath);
   }
 
+  /**
+   * Determines whether the data associated with the given time range tracker 
is considered hot. If
+   * the data tiering type is set to {@link DataTieringType#TIME_RANGE}, it 
uses the maximum
+   * timestamp from the time range tracker to determine if the data is hot. 
Otherwise, it considers
+   * the data as hot by default.
+   * @param timeRangeTracker the time range tracker containing the timestamps
+   * @param conf             The configuration object to use for determining 
hot data criteria.
+   * @return {@code true} if the data is hot, {@code false} otherwise
+   */
+  public boolean isHotData(TimeRangeTracker timeRangeTracker, Configuration 
conf) {
+    DataTieringType dataTieringType = getDataTieringType(conf);
+    if (
+      dataTieringType.equals(DataTieringType.TIME_RANGE)
+        && timeRangeTracker.getMax() != TimeRangeTracker.INITIAL_MAX_TIMESTAMP
+    ) {
+      return hotDataValidator(timeRangeTracker.getMax(), 
getDataTieringHotDataAge(conf));
+    }
+    // DataTieringType.NONE or other types are considered hot by default
+    return true;
+  }
+
   /**
    * Determines whether the data in the HFile at the given path is considered 
hot based on the
    * configured data tiering type and hot data age. If the data tiering type 
is set to
@@ -151,6 +172,27 @@ public class DataTieringManager {
     return true;
   }
 
+  /**
+   * Determines whether the data in the HFile at the given path is considered 
hot based on the
+   * configured data tiering type and hot data age. If the data tiering type 
is set to
+   * {@link DataTieringType#TIME_RANGE}, it validates the data against the 
provided maximum
+   * timestamp.
+   * @param hFilePath    the path to the HFile
+   * @param maxTimestamp the maximum timestamp to validate against
+   * @return {@code true} if the data is hot, {@code false} otherwise
+   * @throws DataTieringException if there is an error retrieving data tiering 
information
+   */
+  public boolean isHotData(Path hFilePath, long maxTimestamp) throws 
DataTieringException {
+    Configuration configuration = getConfiguration(hFilePath);
+    DataTieringType dataTieringType = getDataTieringType(configuration);
+
+    if (dataTieringType.equals(DataTieringType.TIME_RANGE)) {
+      return hotDataValidator(maxTimestamp, 
getDataTieringHotDataAge(configuration));
+    }
+    // DataTieringType.NONE or other types are considered hot by default
+    return true;
+  }
+
   /**
    * Determines whether the data in the HFile being read is considered hot 
based on the configured
    * data tiering type and hot data age. If the data tiering type is set to
@@ -231,10 +273,12 @@ public class DataTieringManager {
   }
 
   private HRegion getHRegion(Path hFilePath) throws DataTieringException {
-    if (hFilePath.getParent() == null || hFilePath.getParent().getParent() == 
null) {
-      throw new DataTieringException("Incorrect HFile Path: " + hFilePath);
+    String regionId;
+    try {
+      regionId = HRegionFileSystem.getRegionId(hFilePath);
+    } catch (IOException e) {
+      throw new DataTieringException(e.getMessage());
     }
-    String regionId = hFilePath.getParent().getParent().getName();
     HRegion hRegion = this.onlineRegions.get(regionId);
     if (hRegion == null) {
       throw new DataTieringException("HRegion corresponding to " + hFilePath + 
" doesn't exist");
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
index f7144c7fa9d..c77f4d4aefd 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionFileSystem.java
@@ -1067,6 +1067,31 @@ public class HRegionFileSystem {
     }
   }
 
+  /**
+   * Retrieves the Region ID from the given HFile path.
+   * @param hFilePath The path of the HFile.
+   * @return The Region ID extracted from the HFile path.
+   * @throws IOException If an I/O error occurs or if the HFile path is 
incorrect.
+   */
+  public static String getRegionId(Path hFilePath) throws IOException {
+    if (hFilePath.getParent() == null || hFilePath.getParent().getParent() == 
null) {
+      throw new IOException("Incorrect HFile Path: " + hFilePath);
+    }
+    Path dir = hFilePath.getParent().getParent();
+    if (isTemporaryDirectoryName(dir.getName())) {
+      if (dir.getParent() == null) {
+        throw new IOException("Incorrect HFile Path: " + hFilePath);
+      }
+      return dir.getParent().getName();
+    }
+    return dir.getName();
+  }
+
+  private static boolean isTemporaryDirectoryName(String dirName) {
+    return REGION_MERGES_DIR.equals(dirName) || 
REGION_SPLITS_DIR.equals(dirName)
+      || REGION_TEMP_DIR.equals(dirName);
+  }
+
   /**
    * Creates a directory. Assumes the user has already checked for this 
directory existence.
    * @return the result of fs.mkdirs(). In case underlying fs throws an 
IOException, it checks
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java
index a2a641df6d8..5f5fcf2001a 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/StoreFileWriter.java
@@ -22,13 +22,11 @@ import static 
org.apache.hadoop.hbase.regionserver.HStoreFile.BLOOM_FILTER_PARAM
 import static 
org.apache.hadoop.hbase.regionserver.HStoreFile.BLOOM_FILTER_TYPE_KEY;
 import static 
org.apache.hadoop.hbase.regionserver.HStoreFile.COMPACTION_EVENT_KEY;
 import static 
org.apache.hadoop.hbase.regionserver.HStoreFile.DELETE_FAMILY_COUNT;
-import static org.apache.hadoop.hbase.regionserver.HStoreFile.EARLIEST_PUT_TS;
 import static org.apache.hadoop.hbase.regionserver.HStoreFile.HISTORICAL_KEY;
 import static 
org.apache.hadoop.hbase.regionserver.HStoreFile.MAJOR_COMPACTION_KEY;
 import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAX_SEQ_ID_KEY;
 import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT;
 import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_FILE_REFS;
-import static org.apache.hadoop.hbase.regionserver.HStoreFile.TIMERANGE_KEY;
 import static 
org.apache.hadoop.hbase.regionserver.StoreEngine.STORE_ENGINE_CLASS_KEY;
 
 import java.io.IOException;
@@ -52,7 +50,6 @@ import org.apache.hadoop.hbase.CellComparator;
 import org.apache.hadoop.hbase.CellUtil;
 import org.apache.hadoop.hbase.HBaseInterfaceAudience;
 import org.apache.hadoop.hbase.HConstants;
-import org.apache.hadoop.hbase.KeyValue;
 import org.apache.hadoop.hbase.PrivateCellUtil;
 import org.apache.hadoop.hbase.TableName;
 import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
@@ -497,11 +494,9 @@ public class StoreFileWriter implements CellSink, 
ShipperListener {
     private final BloomFilterWriter deleteFamilyBloomFilterWriter;
     private final BloomType bloomType;
     private byte[] bloomParam = null;
-    private long earliestPutTs = HConstants.LATEST_TIMESTAMP;
     private long deleteFamilyCnt = 0;
     private BloomContext bloomContext = null;
     private BloomContext deleteFamilyBloomContext = null;
-    private final TimeRangeTracker timeRangeTracker;
     private final Supplier<Collection<HStoreFile>> compactedFilesSupplier;
 
     private HFile.Writer writer;
@@ -525,7 +520,6 @@ public class StoreFileWriter implements CellSink, 
ShipperListener {
       HFileContext fileContext, boolean shouldDropCacheBehind,
       Supplier<Collection<HStoreFile>> compactedFilesSupplier) throws 
IOException {
       this.compactedFilesSupplier = compactedFilesSupplier;
-      this.timeRangeTracker = 
TimeRangeTracker.create(TimeRangeTracker.Type.NON_SYNC);
       // TODO : Change all writers to be specifically created for compaction 
context
       writer =
         HFile.getWriterFactory(conf, cacheConf).withPath(fs, 
path).withFavoredNodes(favoredNodes)
@@ -667,21 +661,7 @@ public class StoreFileWriter implements CellSink, 
ShipperListener {
      * Add TimestampRange and earliest put timestamp to Metadata
      */
     private void appendTrackedTimestampsToMetadata() throws IOException {
-      // TODO: The StoreFileReader always converts the byte[] to TimeRange
-      // via TimeRangeTracker, so we should write the serialization data of 
TimeRange directly.
-      appendFileInfo(TIMERANGE_KEY, 
TimeRangeTracker.toByteArray(timeRangeTracker));
-      appendFileInfo(EARLIEST_PUT_TS, Bytes.toBytes(earliestPutTs));
-    }
-
-    /**
-     * Record the earlest Put timestamp. If the timeRangeTracker is not set, 
update TimeRangeTracker
-     * to include the timestamp of this key
-     */
-    private void trackTimestamps(final Cell cell) {
-      if (KeyValue.Type.Put.getCode() == cell.getTypeByte()) {
-        earliestPutTs = Math.min(earliestPutTs, cell.getTimestamp());
-      }
-      timeRangeTracker.includeTimestamp(cell);
+      writer.appendTrackedTimestampsToMetadata();
     }
 
     private void appendGeneralBloomfilter(final Cell cell) throws IOException {
@@ -712,7 +692,6 @@ public class StoreFileWriter implements CellSink, 
ShipperListener {
       appendGeneralBloomfilter(cell);
       appendDeleteFamilyBloomFilter(cell);
       writer.append(cell);
-      trackTimestamps(cell);
     }
 
     private void beforeShipped() throws IOException {
diff --git 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java
 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java
index 7fc79642d91..53deb7e9cea 100644
--- 
a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java
+++ 
b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TimeRangeTracker.java
@@ -53,8 +53,8 @@ public abstract class TimeRangeTracker {
     SYNC
   }
 
-  static final long INITIAL_MIN_TIMESTAMP = Long.MAX_VALUE;
-  static final long INITIAL_MAX_TIMESTAMP = -1L;
+  public static final long INITIAL_MIN_TIMESTAMP = Long.MAX_VALUE;
+  public static final long INITIAL_MAX_TIMESTAMP = -1L;
 
   public static TimeRangeTracker create(Type type) {
     switch (type) {
diff --git 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java
 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java
index b74937bf94d..315d88d3836 100644
--- 
a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java
+++ 
b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestDataTieringManager.java
@@ -30,8 +30,8 @@ import java.util.HashMap;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Random;
 import java.util.Optional;
+import java.util.Random;
 import java.util.Set;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -57,8 +57,6 @@ import org.apache.hadoop.hbase.io.hfile.CacheConfig;
 import org.apache.hadoop.hbase.io.hfile.CacheTestUtils;
 import org.apache.hadoop.hbase.io.hfile.HFileContextBuilder;
 import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache;
-import org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTracker;
-import 
org.apache.hadoop.hbase.regionserver.storefiletracker.StoreFileTrackerFactory;
 import org.apache.hadoop.hbase.testclassification.RegionServerTests;
 import org.apache.hadoop.hbase.testclassification.SmallTests;
 import org.apache.hadoop.hbase.util.Bytes;
@@ -99,6 +97,7 @@ public class TestDataTieringManager {
 
   private static final HBaseTestingUtility TEST_UTIL = new 
HBaseTestingUtility();
   private static final Logger LOG = 
LoggerFactory.getLogger(TestDataTieringManager.class);
+  private static final long DAY = 24 * 60 * 60 * 1000;
   private static Configuration defaultConf;
   private static FileSystem fs;
   private static BlockCache blockCache;
@@ -120,16 +119,16 @@ public class TestDataTieringManager {
   public static void setupBeforeClass() throws Exception {
     testDir = 
TEST_UTIL.getDataTestDir(TestDataTieringManager.class.getSimpleName());
     defaultConf = TEST_UTIL.getConfiguration();
-    defaultConf.setBoolean(CacheConfig.PREFETCH_BLOCKS_ON_OPEN_KEY, true);
-    defaultConf.setStrings(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap");
-    defaultConf.setLong(BUCKET_CACHE_SIZE_KEY, 32);
-    defaultConf.setBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, 
true);
-    fs = HFileSystem.get(defaultConf);
-    blockCache = BlockCacheFactory.createBlockCache(defaultConf);
-    cacheConf = new CacheConfig(defaultConf, blockCache);
+    updateCommonConfigurations();
     assertTrue(DataTieringManager.instantiate(defaultConf, testOnlineRegions));
-    setupOnlineRegions();
     dataTieringManager = DataTieringManager.getInstance();
+    rowKeyString = "";
+  }
+
+  private static void updateCommonConfigurations() {
+    defaultConf.setBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, 
true);
+    defaultConf.setStrings(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap");
+    defaultConf.setLong(BUCKET_CACHE_SIZE_KEY, 32);
   }
 
   @FunctionalInterface
@@ -143,7 +142,8 @@ public class TestDataTieringManager {
   }
 
   @Test
-  public void testDataTieringEnabledWithKey() {
+  public void testDataTieringEnabledWithKey() throws IOException {
+    initializeTestEnvironment();
     DataTieringMethodCallerWithKey methodCallerWithKey = 
DataTieringManager::isDataTieringEnabled;
 
     // Test with valid key
@@ -161,7 +161,8 @@ public class TestDataTieringManager {
   }
 
   @Test
-  public void testDataTieringEnabledWithPath() {
+  public void testDataTieringEnabledWithPath() throws IOException {
+    initializeTestEnvironment();
     DataTieringMethodCallerWithPath methodCallerWithPath = 
DataTieringManager::isDataTieringEnabled;
 
     // Test with valid path
@@ -191,7 +192,8 @@ public class TestDataTieringManager {
   }
 
   @Test
-  public void testHotDataWithKey() {
+  public void testHotDataWithKey() throws IOException {
+    initializeTestEnvironment();
     DataTieringMethodCallerWithKey methodCallerWithKey = 
DataTieringManager::isHotData;
 
     // Test with valid key
@@ -204,7 +206,8 @@ public class TestDataTieringManager {
   }
 
   @Test
-  public void testHotDataWithPath() {
+  public void testHotDataWithPath() throws IOException {
+    initializeTestEnvironment();
     DataTieringMethodCallerWithPath methodCallerWithPath = 
DataTieringManager::isHotData;
 
     // Test with valid path
@@ -244,7 +247,8 @@ public class TestDataTieringManager {
   }
 
   @Test
-  public void testColdDataFiles() {
+  public void testColdDataFiles() throws IOException {
+    initializeTestEnvironment();
     Set<BlockCacheKey> allCachedBlocks = new HashSet<>();
     for (HStoreFile file : hStoreFiles) {
       allCachedBlocks.add(new BlockCacheKey(file.getPath(), 0, true, 
BlockType.DATA));
@@ -270,7 +274,74 @@ public class TestDataTieringManager {
   }
 
   @Test
-  public void testPickColdDataFiles() {
+  public void testCacheCompactedBlocksOnWriteDataTieringDisabled() throws 
IOException {
+    setCacheCompactBlocksOnWrite();
+    initializeTestEnvironment();
+
+    HRegion region = createHRegion("table3");
+    testCacheCompactedBlocksOnWrite(region, true);
+  }
+
+  @Test
+  public void testCacheCompactedBlocksOnWriteWithHotData() throws IOException {
+    setCacheCompactBlocksOnWrite();
+    initializeTestEnvironment();
+
+    HRegion region = createHRegion("table3", 
getConfWithTimeRangeDataTieringEnabled(5 * DAY));
+    testCacheCompactedBlocksOnWrite(region, true);
+  }
+
+  @Test
+  public void testCacheCompactedBlocksOnWriteWithColdData() throws IOException 
{
+    setCacheCompactBlocksOnWrite();
+    initializeTestEnvironment();
+
+    HRegion region = createHRegion("table3", 
getConfWithTimeRangeDataTieringEnabled(DAY));
+    testCacheCompactedBlocksOnWrite(region, false);
+  }
+
+  private void setCacheCompactBlocksOnWrite() {
+    defaultConf.setBoolean(CacheConfig.CACHE_COMPACTED_BLOCKS_ON_WRITE_KEY, 
true);
+  }
+
+  private void testCacheCompactedBlocksOnWrite(HRegion region, boolean 
expectDataBlocksCached)
+    throws IOException {
+    HStore hStore = createHStore(region, "cf1");
+    createTestFilesForCompaction(hStore);
+    hStore.refreshStoreFiles();
+
+    region.stores.put(Bytes.toBytes("cf1"), hStore);
+    testOnlineRegions.put(region.getRegionInfo().getEncodedName(), region);
+
+    long initialStoreFilesCount = hStore.getStorefilesCount();
+    long initialCacheDataBlockCount = blockCache.getDataBlockCount();
+    assertEquals(3, initialStoreFilesCount);
+    assertEquals(0, initialCacheDataBlockCount);
+
+    region.compact(true);
+
+    long compactedStoreFilesCount = hStore.getStorefilesCount();
+    long compactedCacheDataBlockCount = blockCache.getDataBlockCount();
+    assertEquals(1, compactedStoreFilesCount);
+    assertEquals(expectDataBlocksCached, compactedCacheDataBlockCount > 0);
+  }
+
+  private void createTestFilesForCompaction(HStore hStore) throws IOException {
+    long currentTime = System.currentTimeMillis();
+    Path storeDir = hStore.getStoreContext().getFamilyStoreDirectoryPath();
+    Configuration configuration = hStore.getReadOnlyConfiguration();
+
+    createHStoreFile(storeDir, configuration, currentTime - 2 * DAY,
+      hStore.getHRegion().getRegionFileSystem());
+    createHStoreFile(storeDir, configuration, currentTime - 3 * DAY,
+      hStore.getHRegion().getRegionFileSystem());
+    createHStoreFile(storeDir, configuration, currentTime - 4 * DAY,
+      hStore.getHRegion().getRegionFileSystem());
+  }
+
+  @Test
+  public void testPickColdDataFiles() throws IOException {
+    initializeTestEnvironment();
     Map<String, String> coldDataFiles = dataTieringManager.getColdFilesList();
     assertEquals(1, coldDataFiles.size());
     // hStoreFiles[3] is the cold file.
@@ -283,6 +354,7 @@ public class TestDataTieringManager {
    */
   @Test
   public void testBlockEvictions() throws Exception {
+    initializeTestEnvironment();
     long capacitySize = 40 * 1024;
     int writeThreads = 3;
     int writerQLen = 64;
@@ -333,6 +405,7 @@ public class TestDataTieringManager {
    */
   @Test
   public void testBlockEvictionsAllColdBlocks() throws Exception {
+    initializeTestEnvironment();
     long capacitySize = 40 * 1024;
     int writeThreads = 3;
     int writerQLen = 64;
@@ -380,6 +453,7 @@ public class TestDataTieringManager {
    */
   @Test
   public void testBlockEvictionsHotBlocks() throws Exception {
+    initializeTestEnvironment();
     long capacitySize = 40 * 1024;
     int writeThreads = 3;
     int writerQLen = 64;
@@ -427,6 +501,8 @@ public class TestDataTieringManager {
   public void testFeatureKeyDisabled() throws Exception {
     DataTieringManager.resetForTestingOnly();
     defaultConf.setBoolean(DataTieringManager.GLOBAL_DATA_TIERING_ENABLED_KEY, 
false);
+    initializeTestEnvironment();
+
     try {
       assertFalse(DataTieringManager.instantiate(defaultConf, 
testOnlineRegions));
       // Verify that the DataaTieringManager instance is not instantiated in 
the
@@ -632,7 +708,12 @@ public class TestDataTieringManager {
     HRegionFileSystem regionFs = 
HRegionFileSystem.createRegionOnFileSystem(testConf, fs,
       CommonFSUtils.getTableDir(testDir, hri.getTable()), hri);
 
-    return new HRegion(regionFs, null, conf, htd, null);
+    HRegion region = new HRegion(regionFs, null, conf, htd, null);
+    // Manually sets the BlockCache for the HRegion instance.
+    // This is necessary because the region server is not started within this 
method,
+    // and therefore the BlockCache needs to be explicitly configured.
+    region.setBlockCache(blockCache);
+    return region;
   }
 
   private static HStore createHStore(HRegion region, String columnFamily) 
throws IOException {
@@ -668,13 +749,14 @@ public class TestDataTieringManager {
 
     writeStoreFileRandomData(storeFileWriter, Bytes.toBytes(columnFamily), 
timestamp);
 
-    StoreContext storeContext =
-      StoreContext.getBuilder().withRegionFileSystem(regionFs).build();
-
-    StoreFileTracker sft = StoreFileTrackerFactory.create(conf, true, 
storeContext);
-    return new HStoreFile(fs, storeFileWriter.getPath(), conf, cacheConf, 
BloomType.NONE, true, sft);
+    return new HStoreFile(fs, storeFileWriter.getPath(), conf, cacheConf, 
BloomType.NONE, true);
   }
 
+  /**
+   * Writes random data to a store file with rows arranged in 
lexicographically increasing order.
+   * Each row is generated using the {@link #nextString()} method, ensuring 
that each subsequent row
+   * is lexicographically larger than the previous one.
+   */
   private static void writeStoreFileRandomData(final StoreFileWriter writer, 
byte[] columnFamily,
     long timestamp) throws IOException {
     int cellsPerFile = 10;
@@ -691,7 +773,6 @@ public class TestDataTieringManager {
     }
   }
 
-
   private static byte[] generateRandomBytes(int sizeInBytes) {
     Random random = new Random();
     byte[] randomBytes = new byte[sizeInBytes];
@@ -714,5 +795,4 @@ public class TestDataTieringManager {
     }
     return rowKeyString;
   }
-
 }


Reply via email to