divijvaidya commented on code in PR #13947: URL: https://github.com/apache/kafka/pull/13947#discussion_r1294768292
########## core/src/main/java/kafka/log/remote/RemoteLogManager.java: ########## @@ -343,21 +345,80 @@ public void onLeadershipChange(Set<Partition> partitionsBecomeLeader, /** * Deletes the internal topic partition info if delete flag is set as true. * - * @param topicPartition topic partition to be stopped. + * @param topicPartitions topic partitions that needs to be stopped. * @param delete flag to indicate whether the given topic partitions to be deleted or not. + * @param errorHandler callback to handle any errors while stopping the partitions. */ - public void stopPartitions(TopicPartition topicPartition, boolean delete) { + public void stopPartitions(Set<TopicPartition> topicPartitions, + boolean delete, + BiConsumer<TopicPartition, Throwable> errorHandler) { + LOGGER.debug("Stopping {} partitions, delete: {}", topicPartitions.size(), delete); + Set<TopicIdPartition> topicIdPartitions = topicPartitions.stream() + .filter(topicIdByPartitionMap::containsKey) + .map(tp -> new TopicIdPartition(topicIdByPartitionMap.get(tp), tp)) + .collect(Collectors.toSet()); + + topicIdPartitions.forEach(tpId -> { + try { + RLMTaskWithFuture task = leaderOrFollowerTasks.remove(tpId); + if (task != null) { + LOGGER.info("Cancelling the RLM task for tpId: {}", tpId); + task.cancel(); + } + if (delete) { + LOGGER.info("Deleting the remote log segments task for partition: {}", tpId); + deleteRemoteLogPartition(tpId); + } + } catch (Exception ex) { + errorHandler.accept(tpId.topicPartition(), ex); + LOGGER.error("Error while stopping the partition: {}, delete: {}", tpId.topicPartition(), delete, ex); + } + }); + if (delete) { - // Delete from internal datastructures only if it is to be deleted. - Uuid topicIdPartition = topicPartitionIds.remove(topicPartition); - LOGGER.debug("Removed partition: {} from topicPartitionIds", topicIdPartition); + // NOTE: this#stopPartitions method is called when Replica state changes to Offline and ReplicaDeletionStarted Review Comment: in future this will be called when TS on topic gets "disabled". Do we want to only invoke it when `delete` param is true? Shouldn't RLMM.stopPartitions be invoked always when RLM.stopPartition is called? ########## storage/src/main/java/org/apache/kafka/storage/internals/log/RemoteIndexCache.java: ########## @@ -166,6 +166,10 @@ public Cache<Uuid, Entry> internalCache() { return internalCache; } + public void remove(Uuid key) { Review Comment: please acquire a read lock here so that it doesn't conflict with cache closing. we use lock here to ensure that no other thread is accessing the cache when it is closing. ########## core/src/main/java/kafka/log/remote/RemoteLogManager.java: ########## @@ -343,21 +345,80 @@ public void onLeadershipChange(Set<Partition> partitionsBecomeLeader, /** * Deletes the internal topic partition info if delete flag is set as true. * - * @param topicPartition topic partition to be stopped. + * @param topicPartitions topic partitions that needs to be stopped. * @param delete flag to indicate whether the given topic partitions to be deleted or not. + * @param errorHandler callback to handle any errors while stopping the partitions. */ - public void stopPartitions(TopicPartition topicPartition, boolean delete) { + public void stopPartitions(Set<TopicPartition> topicPartitions, + boolean delete, + BiConsumer<TopicPartition, Throwable> errorHandler) { + LOGGER.debug("Stopping {} partitions, delete: {}", topicPartitions.size(), delete); + Set<TopicIdPartition> topicIdPartitions = topicPartitions.stream() + .filter(topicIdByPartitionMap::containsKey) + .map(tp -> new TopicIdPartition(topicIdByPartitionMap.get(tp), tp)) + .collect(Collectors.toSet()); + + topicIdPartitions.forEach(tpId -> { + try { + RLMTaskWithFuture task = leaderOrFollowerTasks.remove(tpId); + if (task != null) { + LOGGER.info("Cancelling the RLM task for tpId: {}", tpId); + task.cancel(); + } + if (delete) { + LOGGER.info("Deleting the remote log segments task for partition: {}", tpId); + deleteRemoteLogPartition(tpId); + } + } catch (Exception ex) { + errorHandler.accept(tpId.topicPartition(), ex); + LOGGER.error("Error while stopping the partition: {}, delete: {}", tpId.topicPartition(), delete, ex); + } + }); + if (delete) { - // Delete from internal datastructures only if it is to be deleted. - Uuid topicIdPartition = topicPartitionIds.remove(topicPartition); - LOGGER.debug("Removed partition: {} from topicPartitionIds", topicIdPartition); + // NOTE: this#stopPartitions method is called when Replica state changes to Offline and ReplicaDeletionStarted + remoteLogMetadataManager.onStopPartitions(topicIdPartitions); + topicPartitions.forEach(topicIdByPartitionMap::remove); + } + } + + private void deleteRemoteLogPartition(TopicIdPartition partition) throws RemoteStorageException, ExecutionException, InterruptedException { + List<RemoteLogSegmentMetadata> metadataList = new ArrayList<>(); + remoteLogMetadataManager.listRemoteLogSegments(partition).forEachRemaining(metadataList::add); + + List<RemoteLogSegmentMetadataUpdate> deleteSegmentStartedEvents = metadataList.stream() + .map(metadata -> + new RemoteLogSegmentMetadataUpdate(metadata.remoteLogSegmentId(), time.milliseconds(), + metadata.customMetadata(), RemoteLogSegmentState.DELETE_SEGMENT_STARTED, brokerId)) + .collect(Collectors.toList()); + publishEvents(deleteSegmentStartedEvents).get(); + + // KAFKA-15313: Delete remote log segments partition asynchronously when a partition is deleted. + for (RemoteLogSegmentMetadata metadata: metadataList) { + indexCache.remove(metadata.remoteLogSegmentId().id()); + remoteLogStorageManager.deleteLogSegmentData(metadata); Review Comment: is this documented to be thread safe in the interface? ########## core/src/main/java/kafka/log/remote/RemoteLogManager.java: ########## @@ -343,21 +345,80 @@ public void onLeadershipChange(Set<Partition> partitionsBecomeLeader, /** * Deletes the internal topic partition info if delete flag is set as true. * - * @param topicPartition topic partition to be stopped. + * @param topicPartitions topic partitions that needs to be stopped. * @param delete flag to indicate whether the given topic partitions to be deleted or not. + * @param errorHandler callback to handle any errors while stopping the partitions. */ - public void stopPartitions(TopicPartition topicPartition, boolean delete) { + public void stopPartitions(Set<TopicPartition> topicPartitions, Review Comment: In it's current form, seems that this function is designed to be executed concurrently by multiple threads. It could be called from by a RequestHandler thread in replicamanager and in future it could be called by TopicConfigHandler when we want to disable (not delete) TS for a topic. Is the entire function thread safe? Let's check: 1. Do we document in the interface that `RLMM.onStopPartitions` should be thread safe? 2. Consider two threads trying to delete the same partition. They both enter deleteRemoteLogPartition(). One of them sends delete_started and delete_finished to RLMM but then the second one sends delete_started again. If RLMM is maintaining a state machine, it "will" have to accomodate the fact that delete_started can arrive after delete_finished, which is a bit awkward. 3. `errorHandler` could be called twice for same partition and hence it should be thread safe. Is it? May I suggest that we execute this function in an exclusive lock. We can optimize a per partition level lock later but given that deletes are not frequent, we can start with a coarse grained exclusive lock such that only one thread should be executing `stopPartitions` at one time. ########## storage/src/main/java/org/apache/kafka/storage/internals/log/RemoteIndexCache.java: ########## @@ -421,6 +425,7 @@ public void close() { // Initiate shutdown for cleaning thread boolean shutdownRequired = cleanerThread.initiateShutdown(); // Close all the opened indexes to force unload mmap memory. This does not delete the index files from disk. + internalCache.cleanUp(); Review Comment: see: https://github.com/apache/kafka/pull/13850#discussion_r1229087123 Think of this cache as a hashmap. When we are closing the cache, garbage collector will automatically reclaim memory. We do not have to explicitly release any pinned resources. That is why calling cleanUp is not required. Also, cleanUp actually performs maintenance activity such as cache invalidation etc. which we don't really want. Let me take a look at tests to see why they are failing. ########## core/src/main/java/kafka/log/remote/RemoteLogManager.java: ########## @@ -343,21 +345,80 @@ public void onLeadershipChange(Set<Partition> partitionsBecomeLeader, /** * Deletes the internal topic partition info if delete flag is set as true. * - * @param topicPartition topic partition to be stopped. + * @param topicPartitions topic partitions that needs to be stopped. * @param delete flag to indicate whether the given topic partitions to be deleted or not. + * @param errorHandler callback to handle any errors while stopping the partitions. */ - public void stopPartitions(TopicPartition topicPartition, boolean delete) { + public void stopPartitions(Set<TopicPartition> topicPartitions, + boolean delete, + BiConsumer<TopicPartition, Throwable> errorHandler) { + LOGGER.debug("Stopping {} partitions, delete: {}", topicPartitions.size(), delete); + Set<TopicIdPartition> topicIdPartitions = topicPartitions.stream() + .filter(topicIdByPartitionMap::containsKey) + .map(tp -> new TopicIdPartition(topicIdByPartitionMap.get(tp), tp)) + .collect(Collectors.toSet()); + + topicIdPartitions.forEach(tpId -> { + try { + RLMTaskWithFuture task = leaderOrFollowerTasks.remove(tpId); + if (task != null) { + LOGGER.info("Cancelling the RLM task for tpId: {}", tpId); + task.cancel(); + } + if (delete) { + LOGGER.info("Deleting the remote log segments task for partition: {}", tpId); + deleteRemoteLogPartition(tpId); + } + } catch (Exception ex) { + errorHandler.accept(tpId.topicPartition(), ex); + LOGGER.error("Error while stopping the partition: {}, delete: {}", tpId.topicPartition(), delete, ex); + } + }); + if (delete) { - // Delete from internal datastructures only if it is to be deleted. - Uuid topicIdPartition = topicPartitionIds.remove(topicPartition); - LOGGER.debug("Removed partition: {} from topicPartitionIds", topicIdPartition); + // NOTE: this#stopPartitions method is called when Replica state changes to Offline and ReplicaDeletionStarted + remoteLogMetadataManager.onStopPartitions(topicIdPartitions); + topicPartitions.forEach(topicIdByPartitionMap::remove); + } + } + + private void deleteRemoteLogPartition(TopicIdPartition partition) throws RemoteStorageException, ExecutionException, InterruptedException { + List<RemoteLogSegmentMetadata> metadataList = new ArrayList<>(); + remoteLogMetadataManager.listRemoteLogSegments(partition).forEachRemaining(metadataList::add); + + List<RemoteLogSegmentMetadataUpdate> deleteSegmentStartedEvents = metadataList.stream() + .map(metadata -> + new RemoteLogSegmentMetadataUpdate(metadata.remoteLogSegmentId(), time.milliseconds(), + metadata.customMetadata(), RemoteLogSegmentState.DELETE_SEGMENT_STARTED, brokerId)) + .collect(Collectors.toList()); + publishEvents(deleteSegmentStartedEvents).get(); Review Comment: since multiple threads could be executing this together for same partition, this could end up being called twice for same partition. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org