kamalcph commented on code in PR #14511:
URL: https://github.com/apache/kafka/pull/14511#discussion_r1349882603


##########
core/src/test/scala/unit/kafka/log/remote/RemoteIndexCacheTest.scala:
##########
@@ -554,6 +554,115 @@ class RemoteIndexCacheTest {
     assertTrue(cache.internalCache().estimatedSize() == 0)
   }
 
+  @Test
+  def testCorrectnessForCacheAndIndexFilesWhenResizeCache(): Unit = {
+
+    def getIndexFileFromRemoteCacheDir(suffix: String) = {
+      Files.walk(cache.cacheDir())
+        .filter(Files.isRegularFile(_))
+        .filter(path => path.getFileName.toString.endsWith(suffix))
+        .findAny()
+    }
+
+    // The test process for resizing is: put 1 entry -> evict to empty -> put 
3 entrys with limited capacity of 2 entrys -> evict to 1 entry
+    val estimateEntryBytesSize = estimateOneEntryBytesSize()
+    val tpId = new TopicIdPartition(Uuid.randomUuid(), new 
TopicPartition("foo", 0))
+    val metadataList = generateRemoteLogSegmentMetadata(size = 3, tpId)
+
+    assertCacheSize(0)
+    // getIndex for first time will call rsm#fetchIndex
+    val cacheEntry = cache.getIndexEntry(metadataList.head)
+    assertCacheSize(1)
+    
assertTrue(getIndexFileFromRemoteCacheDir(LogFileUtils.INDEX_FILE_SUFFIX).isPresent)
+    
assertTrue(getIndexFileFromRemoteCacheDir(LogFileUtils.TXN_INDEX_FILE_SUFFIX).isPresent)
+    
assertTrue(getIndexFileFromRemoteCacheDir(LogFileUtils.TIME_INDEX_FILE_SUFFIX).isPresent)
+
+    // Reduce the cache limit to 1 to ensure that all are evicted.
+    cache.resizeCacheSize(1L)
+
+    // wait until entry is marked for deletion
+    TestUtils.waitUntilTrue(() => cacheEntry.isMarkedForCleanup,
+      "Failed to mark cache entry for cleanup after resizing cache.")
+    TestUtils.waitUntilTrue(() => cacheEntry.isCleanStarted,
+      "Failed to cleanup cache entry after resizing cache.")
+
+    // verify no index files on remote cache dir
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(LogFileUtils.INDEX_FILE_SUFFIX).isPresent,
+      s"Offset index file should not be present on disk at 
${cache.cacheDir()}")
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(LogFileUtils.TXN_INDEX_FILE_SUFFIX).isPresent,
+      s"Txn index file should not be present on disk at ${cache.cacheDir()}")
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(LogFileUtils.TIME_INDEX_FILE_SUFFIX).isPresent,
+      s"Time index file should not be present on disk at ${cache.cacheDir()}")
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(LogFileUtils.DELETED_FILE_SUFFIX).isPresent,
+      s"Index file marked for deletion should not be present on disk at 
${cache.cacheDir()}")
+
+    assertTrue(cache.internalCache().estimatedSize() == 0)
+
+    // Increase cache capacity to only store 2 entries
+    cache.resizeCacheSize(2 * estimateEntryBytesSize)
+    assertCacheSize(0)
+
+    val cacheEntrys = new mutable.HashMap[Uuid, RemoteIndexCache.Entry]()
+    cacheEntrys.put(metadataList(0).remoteLogSegmentId().id(), 
cache.getIndexEntry(metadataList(0)))
+    cacheEntrys.put(metadataList(1).remoteLogSegmentId().id(), 
cache.getIndexEntry(metadataList(1)))
+    cacheEntrys.put(metadataList(2).remoteLogSegmentId().id(), 
cache.getIndexEntry(metadataList(2)))
+
+    assertCacheSize(2)
+    val missingMetadataOpt = {
+      metadataList.find(segmentMetadata => {
+        val segmentId = segmentMetadata.remoteLogSegmentId().id()
+        !cache.internalCache.asMap().containsKey(segmentId)
+      })
+    }
+    assertFalse(missingMetadataOpt.isEmpty)
+    val missingEntry = cacheEntrys.find(entry => 
entry._1.equals(missingMetadataOpt.get.remoteLogSegmentId().id())).get._2

Review Comment:
   Since this is a LRU cache, can we directly check whether the first entry 
metadaList(0).remoteLogSegmentId().id() is evicted? 



##########
core/src/test/scala/unit/kafka/log/remote/RemoteIndexCacheTest.scala:
##########
@@ -554,6 +554,115 @@ class RemoteIndexCacheTest {
     assertTrue(cache.internalCache().estimatedSize() == 0)
   }
 
+  @Test
+  def testCorrectnessForCacheAndIndexFilesWhenResizeCache(): Unit = {
+
+    def getIndexFileFromRemoteCacheDir(suffix: String) = {
+      Files.walk(cache.cacheDir())
+        .filter(Files.isRegularFile(_))
+        .filter(path => path.getFileName.toString.endsWith(suffix))
+        .findAny()
+    }
+
+    // The test process for resizing is: put 1 entry -> evict to empty -> put 
3 entrys with limited capacity of 2 entrys -> evict to 1 entry
+    val estimateEntryBytesSize = estimateOneEntryBytesSize()
+    val tpId = new TopicIdPartition(Uuid.randomUuid(), new 
TopicPartition("foo", 0))
+    val metadataList = generateRemoteLogSegmentMetadata(size = 3, tpId)
+
+    assertCacheSize(0)
+    // getIndex for first time will call rsm#fetchIndex
+    val cacheEntry = cache.getIndexEntry(metadataList.head)
+    assertCacheSize(1)
+    
assertTrue(getIndexFileFromRemoteCacheDir(LogFileUtils.INDEX_FILE_SUFFIX).isPresent)
+    
assertTrue(getIndexFileFromRemoteCacheDir(LogFileUtils.TXN_INDEX_FILE_SUFFIX).isPresent)
+    
assertTrue(getIndexFileFromRemoteCacheDir(LogFileUtils.TIME_INDEX_FILE_SUFFIX).isPresent)
+
+    // Reduce the cache limit to 1 to ensure that all are evicted.

Review Comment:
   Can we update the comment?
   
   ```
   Reduce the cache size to 1 byte to ensure that all the entries are evicted 
from it.
   ```
   
   Without the comment, it's not clear whether we are talking about the number 
of entries in the cache (or) size.



##########
core/src/test/scala/unit/kafka/log/remote/RemoteIndexCacheTest.scala:
##########
@@ -554,6 +554,115 @@ class RemoteIndexCacheTest {
     assertTrue(cache.internalCache().estimatedSize() == 0)
   }
 
+  @Test
+  def testCorrectnessForCacheAndIndexFilesWhenResizeCache(): Unit = {
+
+    def getIndexFileFromRemoteCacheDir(suffix: String) = {
+      Files.walk(cache.cacheDir())
+        .filter(Files.isRegularFile(_))
+        .filter(path => path.getFileName.toString.endsWith(suffix))
+        .findAny()
+    }
+
+    // The test process for resizing is: put 1 entry -> evict to empty -> put 
3 entrys with limited capacity of 2 entrys -> evict to 1 entry
+    val estimateEntryBytesSize = estimateOneEntryBytesSize()
+    val tpId = new TopicIdPartition(Uuid.randomUuid(), new 
TopicPartition("foo", 0))
+    val metadataList = generateRemoteLogSegmentMetadata(size = 3, tpId)
+
+    assertCacheSize(0)
+    // getIndex for first time will call rsm#fetchIndex
+    val cacheEntry = cache.getIndexEntry(metadataList.head)
+    assertCacheSize(1)
+    
assertTrue(getIndexFileFromRemoteCacheDir(LogFileUtils.INDEX_FILE_SUFFIX).isPresent)
+    
assertTrue(getIndexFileFromRemoteCacheDir(LogFileUtils.TXN_INDEX_FILE_SUFFIX).isPresent)
+    
assertTrue(getIndexFileFromRemoteCacheDir(LogFileUtils.TIME_INDEX_FILE_SUFFIX).isPresent)
+
+    // Reduce the cache limit to 1 to ensure that all are evicted.
+    cache.resizeCacheSize(1L)
+
+    // wait until entry is marked for deletion
+    TestUtils.waitUntilTrue(() => cacheEntry.isMarkedForCleanup,
+      "Failed to mark cache entry for cleanup after resizing cache.")
+    TestUtils.waitUntilTrue(() => cacheEntry.isCleanStarted,
+      "Failed to cleanup cache entry after resizing cache.")
+
+    // verify no index files on remote cache dir
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(LogFileUtils.INDEX_FILE_SUFFIX).isPresent,
+      s"Offset index file should not be present on disk at 
${cache.cacheDir()}")
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(LogFileUtils.TXN_INDEX_FILE_SUFFIX).isPresent,
+      s"Txn index file should not be present on disk at ${cache.cacheDir()}")
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(LogFileUtils.TIME_INDEX_FILE_SUFFIX).isPresent,
+      s"Time index file should not be present on disk at ${cache.cacheDir()}")
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(LogFileUtils.DELETED_FILE_SUFFIX).isPresent,
+      s"Index file marked for deletion should not be present on disk at 
${cache.cacheDir()}")
+
+    assertTrue(cache.internalCache().estimatedSize() == 0)
+
+    // Increase cache capacity to only store 2 entries
+    cache.resizeCacheSize(2 * estimateEntryBytesSize)
+    assertCacheSize(0)
+
+    val cacheEntrys = new mutable.HashMap[Uuid, RemoteIndexCache.Entry]()
+    cacheEntrys.put(metadataList(0).remoteLogSegmentId().id(), 
cache.getIndexEntry(metadataList(0)))
+    cacheEntrys.put(metadataList(1).remoteLogSegmentId().id(), 
cache.getIndexEntry(metadataList(1)))
+    cacheEntrys.put(metadataList(2).remoteLogSegmentId().id(), 
cache.getIndexEntry(metadataList(2)))
+
+    assertCacheSize(2)
+    val missingMetadataOpt = {
+      metadataList.find(segmentMetadata => {
+        val segmentId = segmentMetadata.remoteLogSegmentId().id()
+        !cache.internalCache.asMap().containsKey(segmentId)
+      })
+    }
+    assertFalse(missingMetadataOpt.isEmpty)
+    val missingEntry = cacheEntrys.find(entry => 
entry._1.equals(missingMetadataOpt.get.remoteLogSegmentId().id())).get._2
+    // wait until evicted entry is marked for deletion
+    TestUtils.waitUntilTrue(() => missingEntry.isMarkedForCleanup,
+      "Failed to mark evicted cache entry for cleanup after resizing cache.")
+    TestUtils.waitUntilTrue(() => missingEntry.isCleanStarted,
+      "Failed to cleanup evicted cache entry after resizing cache.")
+    // verify no index files for `missingEntry` on remote cache dir
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(remoteOffsetIndexFileName(missingMetadataOpt.get)).isPresent,
+      s"Offset index file for evicted entry should not be present on disk at 
${cache.cacheDir()}")
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(remoteTimeIndexFileName(missingMetadataOpt.get)).isPresent,
+      s"Time index file for evicted entry should not be present on disk at 
${cache.cacheDir()}")
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(remoteTransactionIndexFileName(missingMetadataOpt.get)).isPresent,
+      s"Txn index file for evicted entry should not be present on disk at 
${cache.cacheDir()}")
+    TestUtils.waitUntilTrue(() => 
!getIndexFileFromRemoteCacheDir(remoteDeletedSuffixIndexFileName(missingMetadataOpt.get)).isPresent,
+      s"Index file marked for deletion for evicted entry should not be present 
on disk at ${cache.cacheDir()}")
+
+    val cacheMetadataList = metadataList.filter(segmentMetadata => 
segmentMetadata.remoteLogSegmentId().id() != 
missingMetadataOpt.get.remoteLogSegmentId().id())
+
+    // Reduce cache capacity to only store 1 entries
+    cache.resizeCacheSize(1 * estimateEntryBytesSize)
+    assertCacheSize(1)
+
+    val nextMissingMetadataOpt = {

Review Comment:
   same as above



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to