showuon commented on code in PR #15616:
URL: https://github.com/apache/kafka/pull/15616#discussion_r1562184559
##########
storage/src/main/java/org/apache/kafka/storage/internals/log/LogSegment.java:
##########
@@ -800,8 +802,23 @@ private Void deleteTypeIfExists(StorageAction<Boolean,
IOException> delete, Stri
try {
if (delete.execute())
LOGGER.info("Deleted {} {}.", fileType,
file.getAbsolutePath());
- else if (logIfMissing)
- LOGGER.info("Failed to delete {} {} because it does not
exist.", fileType, file.getAbsolutePath());
+ else {
+ if (logIfMissing) {
+ LOGGER.info("Failed to delete {} {} because it does not
exist.", fileType, file.getAbsolutePath());
+ }
+
+ // During alter log dir, the log segment may be moved to a new
directory, so async delete may fail.
+ // Fallback to delete the file in the new directory to avoid
orphan file.
+ Pattern dirPattern =
Pattern.compile("^(\\S+)-(\\S+)\\.(\\S+)-(delete|future)");
+ Matcher dirMatcher = dirPattern.matcher(file.getParent());
+ if (dirMatcher.matches()) {
+ String topicPartitionAbsolutePath = dirMatcher.group(1) +
"-" + dirMatcher.group(2);
+ File fallbackFile = new File(topicPartitionAbsolutePath,
file.getName());
+ if (fallbackFile.exists() && fallbackFile.delete()) {
Review Comment:
Does the file name always ends with `.delete`? Should we check it before
deletion?
##########
storage/src/main/java/org/apache/kafka/storage/internals/log/LogSegment.java:
##########
@@ -800,8 +802,23 @@ private Void deleteTypeIfExists(StorageAction<Boolean,
IOException> delete, Stri
try {
if (delete.execute())
LOGGER.info("Deleted {} {}.", fileType,
file.getAbsolutePath());
- else if (logIfMissing)
- LOGGER.info("Failed to delete {} {} because it does not
exist.", fileType, file.getAbsolutePath());
+ else {
+ if (logIfMissing) {
+ LOGGER.info("Failed to delete {} {} because it does not
exist.", fileType, file.getAbsolutePath());
+ }
+
+ // During alter log dir, the log segment may be moved to a new
directory, so async delete may fail.
+ // Fallback to delete the file in the new directory to avoid
orphan file.
+ Pattern dirPattern =
Pattern.compile("^(\\S+)-(\\S+)\\.(\\S+)-(delete|future)");
Review Comment:
1. Why does it contain `delete` in the end?
2. Unfortunately, the topic name could contain `-` or `.`, so it's unsafe to
do regex like this.
I'm thinking we can pass `topicPartition` as parameter into
`deleteTypeIfExists` so that we don't have to do further regex like this. And
just verify if fileName.endsWith("future") because the normal folder name
should always ends with a number (partition number), instead of "future". WDYT?
##########
storage/src/main/java/org/apache/kafka/storage/internals/log/LogSegment.java:
##########
@@ -800,8 +802,23 @@ private Void deleteTypeIfExists(StorageAction<Boolean,
IOException> delete, Stri
try {
if (delete.execute())
LOGGER.info("Deleted {} {}.", fileType,
file.getAbsolutePath());
- else if (logIfMissing)
- LOGGER.info("Failed to delete {} {} because it does not
exist.", fileType, file.getAbsolutePath());
+ else {
+ if (logIfMissing) {
+ LOGGER.info("Failed to delete {} {} because it does not
exist.", fileType, file.getAbsolutePath());
+ }
+
+ // During alter log dir, the log segment may be moved to a new
directory, so async delete may fail.
+ // Fallback to delete the file in the new directory to avoid
orphan file.
+ Pattern dirPattern =
Pattern.compile("^(\\S+)-(\\S+)\\.(\\S+)-(delete|future)");
+ Matcher dirMatcher = dirPattern.matcher(file.getParent());
+ if (dirMatcher.matches()) {
+ String topicPartitionAbsolutePath = dirMatcher.group(1) +
"-" + dirMatcher.group(2);
+ File fallbackFile = new File(topicPartitionAbsolutePath,
file.getName());
+ if (fallbackFile.exists() && fallbackFile.delete()) {
+ LOGGER.warn("Fallback to delete {} {}.", fileType,
fallbackFile.getAbsolutePath());
Review Comment:
Why did we use `warn` here? I think we can use `info` since it's expected
behavior. WDYT?
##########
core/src/test/scala/unit/kafka/server/AlterReplicaLogDirsRequestTest.scala:
##########
@@ -116,6 +118,57 @@ class AlterReplicaLogDirsRequestTest extends
BaseRequestTest {
assertEquals(Errors.KAFKA_STORAGE_ERROR,
findErrorForPartition(alterReplicaDirResponse3, new TopicPartition(topic, 2)))
}
+ @Test
+ def testAlterReplicaLogDirsRequestWithRetention(): Unit = {
+ val partitionNum = 1
+
+ // Alter replica dir before topic creation
+ val logDir1 = new File(servers.head.config.logDirs(1)).getAbsolutePath
+ val partitionDirs1 = (0 until partitionNum).map(partition => new
TopicPartition(topic, partition) -> logDir1).toMap
+ val alterReplicaLogDirsResponse1 =
sendAlterReplicaLogDirsRequest(partitionDirs1)
+
+ // The response should show error UNKNOWN_TOPIC_OR_PARTITION for all
partitions
+ val tp = new TopicPartition(topic, 0)
+ assertEquals(Errors.UNKNOWN_TOPIC_OR_PARTITION,
findErrorForPartition(alterReplicaLogDirsResponse1, tp))
+ assertTrue(servers.head.logManager.getLog(tp).isEmpty)
+
+ val topicProperties = new Properties()
+ topicProperties.put(TopicConfig.RETENTION_BYTES_CONFIG, "1024")
+ topicProperties.put(TopicConfig.FILE_DELETE_DELAY_MS_CONFIG, "10000")
+ topicProperties.put(TopicConfig.SEGMENT_BYTES_CONFIG, "1024")
+
+ createTopic(topic, partitionNum, 1, topicProperties)
+ assertEquals(logDir1, servers.head.logManager.getLog(tp).get.dir.getParent)
+
+ // send enough records to trigger log rolling
+ (0 until 20).foreach { _ =>
+ TestUtils.generateAndProduceMessages(servers, topic, 10, 1)
+ }
+ TestUtils.waitUntilTrue(() => servers.head.logManager.getLog(new
TopicPartition(topic, 0)).get.numberOfSegments > 1,
+ "timed out waiting for log segment to roll")
+
+ // Wait for log segment retention. LogManager#InitialTaskDelayMs is 30
seconds.
+ // The first retention task is executed after 30 seconds, so waiting for
35 seconds should be enough.
+ TestUtils.waitUntilTrue(() => {
+ new File(logDir1,
tp.toString).listFiles().count(_.getName.endsWith(LogFileUtils.DELETED_FILE_SUFFIX))
> 0
+ }, "timed out waiting for log segment to retention", 35000)
Review Comment:
We should override the retention interval config (i.e.
`log.retention.check.interval.ms`) to maybe 500 ms to speed it up, so that we
don't need to increase the wait time.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]