satishd commented on code in PR #13850:
URL: https://github.com/apache/kafka/pull/13850#discussion_r1231867641


##########
core/src/main/scala/kafka/log/remote/RemoteIndexCache.scala:
##########
@@ -37,88 +40,125 @@ object RemoteIndexCache {
   val TmpFileSuffix = ".tmp"
 }
 
-class Entry(val offsetIndex: OffsetIndex, val timeIndex: TimeIndex, val 
txnIndex: TransactionIndex) {
+class Entry(val offsetIndex: OffsetIndex, val timeIndex: TimeIndex, val 
txnIndex: TransactionIndex) extends AutoCloseable {
   private var markedForCleanup: Boolean = false
-  private val lock: ReentrantReadWriteLock = new ReentrantReadWriteLock()
+  private val entryLock: ReentrantReadWriteLock = new ReentrantReadWriteLock()
 
   def lookupOffset(targetOffset: Long): OffsetPosition = {
-    CoreUtils.inLock(lock.readLock()) {
+    inReadLock(entryLock) {
       if (markedForCleanup) throw new IllegalStateException("This entry is 
marked for cleanup")
       else offsetIndex.lookup(targetOffset)
     }
   }
 
   def lookupTimestamp(timestamp: Long, startingOffset: Long): OffsetPosition = 
{
-    CoreUtils.inLock(lock.readLock()) {
+    inReadLock(entryLock) {
       if (markedForCleanup) throw new IllegalStateException("This entry is 
marked for cleanup")
-
       val timestampOffset = timeIndex.lookup(timestamp)
       offsetIndex.lookup(math.max(startingOffset, timestampOffset.offset))
     }
   }
 
   def markForCleanup(): Unit = {
-    CoreUtils.inLock(lock.writeLock()) {
+    inWriteLock(entryLock) {
       if (!markedForCleanup) {
         markedForCleanup = true
         Array(offsetIndex, timeIndex).foreach(index =>
           index.renameTo(new File(Utils.replaceSuffix(index.file.getPath, "", 
LogFileUtils.DELETED_FILE_SUFFIX))))
+        // txn index needs to be renamed separately since it's not of type 
AbstractIndex
         txnIndex.renameTo(new File(Utils.replaceSuffix(txnIndex.file.getPath, 
"",
           LogFileUtils.DELETED_FILE_SUFFIX)))
       }
     }
   }
 
+  /**
+   * Deletes the index files from the disk. Invoking #close is not required 
prior to this function.
+   */
   def cleanup(): Unit = {
     markForCleanup()
     CoreUtils.tryAll(Seq(() => offsetIndex.deleteIfExists(), () => 
timeIndex.deleteIfExists(), () => txnIndex.deleteIfExists()))
   }
 
+  /**
+   * Calls the underlying close method for each index which may lead to 
releasing resources such as mmap.
+   * This function does not delete the index files.
+   */
   def close(): Unit = {
-    Array(offsetIndex, timeIndex).foreach(index => try {
-      index.close()
-    } catch {
-      case _: Exception => // ignore error.
-    })
+    Utils.closeQuietly(offsetIndex, "Closing the offset index.")
+    Utils.closeQuietly(timeIndex, "Closing the time index.")
     Utils.closeQuietly(txnIndex, "Closing the transaction index.")
   }
 }
 
 /**
  * This is a LRU cache of remote index files stored in 
`$logdir/remote-log-index-cache`. This is helpful to avoid
- * re-fetching the index files like offset, time indexes from the remote 
storage for every fetch call.
+ * re-fetching the index files like offset, time indexes from the remote 
storage for every fetch call. The cache is
+ * re-initialized from the index files on disk on startup, if the index files 
are available.
+ *
+ * The cache contains a garbage collection thread which will delete the files 
for entries that have been removed from
+ * the cache.
+ *
+ * Note that closing this cache does not delete the index files on disk.
+ * Note that this cache is not strictly based on a LRU policy. It is based on 
the default implementation of Caffeine i.e.
+ * <a href="https://github.com/ben-manes/caffeine/wiki/Efficiency";>Window 
TinyLfu</a>. TinyLfu relies on a frequency
+ * sketch to probabilistically estimate the historic usage of an entry.
  *
  * @param maxSize              maximum number of segment index entries to be 
cached.
  * @param remoteStorageManager RemoteStorageManager instance, to be used in 
fetching indexes.
  * @param logDir               log directory
  */
+@threadsafe
 class RemoteIndexCache(maxSize: Int = 1024, remoteStorageManager: 
RemoteStorageManager, logDir: String)
-  extends Logging with Closeable {
-
-  val cacheDir = new File(logDir, DirName)
-  @volatile var closed = false
-
-  val expiredIndexes = new LinkedBlockingQueue[Entry]()
-  val lock = new Object()
-
-  val entries: util.Map[Uuid, Entry] = new java.util.LinkedHashMap[Uuid, 
Entry](maxSize / 2,
-    0.75f, true) {
-    override def removeEldestEntry(eldest: util.Map.Entry[Uuid, Entry]): 
Boolean = {
-      if (this.size() > maxSize) {
-        val entry = eldest.getValue
-        // Mark the entries for cleanup, background thread will clean them 
later.
-        entry.markForCleanup()
-        expiredIndexes.add(entry)
-        true
-      } else {
-        false
-      }
-    }
-  }
+  extends Logging with AutoCloseable {
+  /**
+   * Directory where the index files will be stored on disk.
+   */
+  private val cacheDir = new File(logDir, DirName)
+  /**
+   * Represents if the cache is closed or not. Closing the cache is an 
irreversible operation.
+   */
+  private val closed: AtomicBoolean = new AtomicBoolean(false)
+  /**
+   * Unbounded queue containing the removed entries from the cache which are 
waiting to be garbage collected.
+   */
+  private val expiredIndexes = new LinkedBlockingQueue[Entry]()
+  /**
+   * Actual cache implementation that this file wraps around.
+   *
+   * The requirements for this internal cache is as follows:
+   * 1. Multiple threads should be able to read concurrently.
+   * 2. Fetch for missing keys should not block read for available keys.
+   * 3. Only one thread should fetch for a specific key.
+   * 4. Should support LRU policy.
+   *
+   * We use [[Caffeine]] cache instead of implementing a thread safe LRU cache 
on our own.
+   *
+   * Visible for testing.
+   */
+  private[remote] var internalCache: Cache[Uuid, Entry] = Caffeine.newBuilder()
+    .maximumSize(maxSize)
+    // removeListener is invoked when either the entry is invalidated (means 
manual removal by the caller) or
+    // evicted (means removal due to the policy)
+    .removalListener((_: Uuid, entry: Entry, _: RemovalCause) => {
+      // Mark the entries for cleanup and add them to the queue to be garbage 
collected later by the background thread.
+      entry.markForCleanup()
+      expiredIndexes.add(entry)
+    })
+    .build[Uuid, Entry]()
+
 
   private def init(): Unit = {
-    if (cacheDir.mkdir())
+    try {
+      Files.createDirectory(cacheDir.toPath)
       info(s"Created $cacheDir successfully")
+    } catch {
+      case _: FileAlreadyExistsException =>
+        info(s"RemoteIndexCache directory $cacheDir already exists. Re-using 
the same directory.")
+      case e: Exception =>
+        error(s"Unable to create directory $cacheDir for RemoteIndexCache.", e)
+        throw new IllegalArgumentException(e)

Review Comment:
   Why is it wrapped as `IllegalArgumentException`?



##########
core/src/test/scala/unit/kafka/log/remote/RemoteIndexCacheTest.scala:
##########
@@ -170,44 +190,173 @@ class RemoteIndexCacheTest {
     assertThrows(classOf[IllegalStateException], () => 
cache.getIndexEntry(metadataList.head))
   }
 
+  @Test
+  def testCloseIsIdempotent(): Unit = {
+    val spyCleanerThread = spy(cache.cleanerThread)
+    cache.cleanerThread = spyCleanerThread
+    cache.close()
+    cache.close()
+    // verify that cleanup is only called once
+    verify(spyCleanerThread).initiateShutdown()
+  }
+
+  @Test
+  def testClose(): Unit = {
+    val spyInternalCache = spy(cache.internalCache)
+    val spyCleanerThread = spy(cache.cleanerThread)
+
+    // replace with new spy cache
+    cache.internalCache = spyInternalCache
+    cache.cleanerThread = spyCleanerThread
+
+    // use the cache
+    val tpId = new TopicIdPartition(Uuid.randomUuid(), new 
TopicPartition("foo", 0))
+    val metadataList = generateRemoteLogSegmentMetadata(size = 1, tpId)
+    val entry = cache.getIndexEntry(metadataList.head)
+
+    val spyTxnIndex = spy(entry.txnIndex)
+    val spyOffsetIndex = spy(entry.offsetIndex)
+    val spyTimeIndex = spy(entry.timeIndex)
+    // remove this entry and replace with spied entry
+    cache.internalCache.invalidateAll()
+    cache.internalCache.put(metadataList.head.remoteLogSegmentId().id(), new 
Entry(spyOffsetIndex, spyTimeIndex, spyTxnIndex))
+
+    // close the cache
+    cache.close()
+
+    // cleaner thread should be closed properly
+    verify(spyCleanerThread).initiateShutdown()
+    verify(spyCleanerThread).awaitShutdown()
+
+    // close for all index entries must be invoked
+    verify(spyTxnIndex).close()
+    verify(spyOffsetIndex).close()
+    verify(spyTimeIndex).close()
+
+    // index files must not be deleted
+    verify(spyTxnIndex, times(0)).deleteIfExists()
+    verify(spyOffsetIndex, times(0)).deleteIfExists()
+    verify(spyTimeIndex, times(0)).deleteIfExists()
+  }
+
+  @Test
+  def testConcurrentReadWriteAccessForCache(): Unit = {
+    val tpId = new TopicIdPartition(Uuid.randomUuid(), new 
TopicPartition("foo", 0))
+    val metadataList = generateRemoteLogSegmentMetadata(size = 3, tpId)
+
+    assertCacheSize(0)
+    // getIndex for first time will call rsm#fetchIndex
+    cache.getIndexEntry(metadataList.head)
+    assertCacheSize(1)
+    verifyFetchIndexInvocation(count = 1, Seq(IndexType.OFFSET, 
IndexType.TIMESTAMP))
+    reset(rsm)
+
+    // Simulate a concurrency situation where one thread is reading the entry 
already present in the cache (cache hit)
+    // and the other thread is reading an entry which is not available in the 
cache (cache miss). The expected behaviour
+    // is for the former thread to succeed while latter is fetching from rsm.
+    // In this this test we simulate the situation using latches. We perform 
the following operations:
+    // 1. Start the CacheMiss thread and wait until it starts executing the 
rsm.fetchIndex
+    // 2. Block the CacheMiss thread inside the call to rsm.fetchIndex.
+    // 3. Start the CacheHit thread. Assert that it performs a successful read.
+    // 4. On completion of successful read by CacheHit thread, signal the 
CacheMiss thread to release it's block.
+    // 5. Validate that the test passes. If the CacheMiss thread was blocking 
the CacheHit thread, the test will fail.
+    //
+    val latchForCacheHit = new CountDownLatch(1)
+    val latchForCacheMiss = new CountDownLatch(1)
+
+    val readerCacheHit = (() => {
+      // Wait for signal to start executing the read
+      logger.debug(s"Waiting for signal to begin read from 
${Thread.currentThread()}")
+      latchForCacheHit.await()
+      val entry = cache.getIndexEntry(metadataList.head)
+      assertNotNull(entry)
+      // Signal the CacheMiss to unblock itself
+      logger.debug(s"Signaling CacheMiss to unblock from 
${Thread.currentThread()}")
+      latchForCacheMiss.countDown()
+    }): Runnable
+
+    when(rsm.fetchIndex(any(classOf[RemoteLogSegmentMetadata]), 
any(classOf[IndexType])))
+      .thenAnswer(_ => {
+        logger.debug(s"Signaling CacheHit to begin read from 
${Thread.currentThread()}")
+        latchForCacheHit.countDown()
+        logger.debug("Waiting for signal to complete rsm fetch from" + 
Thread.currentThread())
+        latchForCacheMiss.await()
+      })
+
+    val readerCacheMiss = (() => {
+      val entry = cache.getIndexEntry(metadataList.last)
+      assertNotNull(entry)
+    }): Runnable
+
+    val executor = Executors.newFixedThreadPool(2)
+    try {
+      executor.submit(readerCacheMiss: Runnable)
+      executor.submit(readerCacheHit: Runnable)
+      assertTrue(latchForCacheMiss.await(30, TimeUnit.SECONDS))
+    } finally {
+      executor.shutdownNow()
+    }
+  }
+
   @Test
   def testReloadCacheAfterClose(): Unit = {
-    val cache = new RemoteIndexCache(maxSize = 2, rsm, logDir = 
logDir.toString)
+    // close exiting cache created in test setup before creating a new one

Review Comment:
   nit typo:  `exiting` -> `existing`



##########
core/src/main/scala/kafka/log/remote/RemoteIndexCache.scala:
##########
@@ -167,14 +210,14 @@ class RemoteIndexCache(maxSize: Int = 1024, 
remoteStorageManager: RemoteStorageM
   init()
 
   // Start cleaner thread that will clean the expired entries
-  val cleanerThread: ShutdownableThread = new 
ShutdownableThread("remote-log-index-cleaner") {
+  private[remote] var cleanerThread: ShutdownableThread = new 
ShutdownableThread("remote-log-index-cleaner") {

Review Comment:
   Can you leave it as `val` instead of setting it as null in close?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to