dhruvilshah3 commented on a change in pull request #10763: URL: https://github.com/apache/kafka/pull/10763#discussion_r652142906
########## File path: core/src/main/scala/kafka/log/LogLoader.scala ########## @@ -368,12 +368,50 @@ object LogLoader extends Logging { for (swapFile <- swapFiles) { val logFile = new File(CoreUtils.replaceSuffix(swapFile.getPath, Log.SwapFileSuffix, "")) val baseOffset = Log.offsetFromFile(logFile) + // Check whether swap index files exist: if not, the cleaned files must exist due to the + // existence of swap log file. Therefore, we rename the cleaned files to swap files and continue. + var recoverable = true + val swapOffsetIndexFile = Log.offsetIndexFile(swapFile.getParentFile, baseOffset, Log.SwapFileSuffix) + if (!swapOffsetIndexFile.exists()) { + val cleanedOffsetIndexFile = Log.offsetIndexFile(swapFile.getParentFile, baseOffset, Log.CleanedFileSuffix) + if (cleanedOffsetIndexFile.exists()) + cleanedOffsetIndexFile.renameTo(swapOffsetIndexFile) + else + recoverable = false + } + val swapTimeIndexFile = Log.timeIndexFile(swapFile.getParentFile, baseOffset, Log.SwapFileSuffix) + if (!swapTimeIndexFile.exists()) { + val cleanedTimeIndexFile = Log.timeIndexFile(swapFile.getParentFile, baseOffset, Log.CleanedFileSuffix) + if (cleanedTimeIndexFile.exists()) + cleanedTimeIndexFile.renameTo(swapTimeIndexFile) + else + recoverable = false + } + val swapTxnIndexFile = Log.transactionIndexFile(swapFile.getParentFile, baseOffset, Log.SwapFileSuffix) + if (!swapTxnIndexFile.exists()) { + val cleanedTxnIndexFile = Log.transactionIndexFile(swapFile.getParentFile, baseOffset, Log.CleanedFileSuffix) + if (cleanedTxnIndexFile.exists()) + cleanedTxnIndexFile.renameTo(swapTxnIndexFile) + else + recoverable = false + } val swapSegment = LogSegment.open(swapFile.getParentFile, baseOffset = baseOffset, params.config, time = params.time, fileSuffix = Log.SwapFileSuffix) - info(s"${params.logIdentifier}Found log file ${swapFile.getPath} from interrupted swap operation, repairing.") + if (recoverable) { + try { + swapSegment.sanityCheck(true) + info(s"Found log file ${swapFile.getPath} from interrupted swap operation, which is recoverable from ${Log.CleanedFileSuffix} files.") + swapSegment.changeFileSuffixes(Log.SwapFileSuffix, "") + return + } catch { + case _: NoSuchFileException => {} + // do nothing and fall back to the recover index logic + } + } + info(s"${params.logIdentifier}Found log file ${swapFile.getPath} from interrupted swap operation, which is not recoverable from ${Log.CleanedFileSuffix} files, repairing.") recoverSegment(swapSegment, params) Review comment: The main thing we want to avoid is running this recovery logic for scenarios where the `rename` operation was interrupted, as it rebuilds the producer state from scratch. Could we make this recovery conditional on whether we have all the relevant log files and indices? ########## File path: core/src/main/scala/kafka/log/LogLoader.scala ########## @@ -368,12 +368,50 @@ object LogLoader extends Logging { for (swapFile <- swapFiles) { val logFile = new File(CoreUtils.replaceSuffix(swapFile.getPath, Log.SwapFileSuffix, "")) val baseOffset = Log.offsetFromFile(logFile) + // Check whether swap index files exist: if not, the cleaned files must exist due to the + // existence of swap log file. Therefore, we rename the cleaned files to swap files and continue. + var recoverable = true + val swapOffsetIndexFile = Log.offsetIndexFile(swapFile.getParentFile, baseOffset, Log.SwapFileSuffix) + if (!swapOffsetIndexFile.exists()) { + val cleanedOffsetIndexFile = Log.offsetIndexFile(swapFile.getParentFile, baseOffset, Log.CleanedFileSuffix) + if (cleanedOffsetIndexFile.exists()) + cleanedOffsetIndexFile.renameTo(swapOffsetIndexFile) + else + recoverable = false + } + val swapTimeIndexFile = Log.timeIndexFile(swapFile.getParentFile, baseOffset, Log.SwapFileSuffix) + if (!swapTimeIndexFile.exists()) { + val cleanedTimeIndexFile = Log.timeIndexFile(swapFile.getParentFile, baseOffset, Log.CleanedFileSuffix) + if (cleanedTimeIndexFile.exists()) + cleanedTimeIndexFile.renameTo(swapTimeIndexFile) + else + recoverable = false + } + val swapTxnIndexFile = Log.transactionIndexFile(swapFile.getParentFile, baseOffset, Log.SwapFileSuffix) + if (!swapTxnIndexFile.exists()) { + val cleanedTxnIndexFile = Log.transactionIndexFile(swapFile.getParentFile, baseOffset, Log.CleanedFileSuffix) + if (cleanedTxnIndexFile.exists()) + cleanedTxnIndexFile.renameTo(swapTxnIndexFile) + else + recoverable = false + } val swapSegment = LogSegment.open(swapFile.getParentFile, baseOffset = baseOffset, params.config, time = params.time, fileSuffix = Log.SwapFileSuffix) - info(s"${params.logIdentifier}Found log file ${swapFile.getPath} from interrupted swap operation, repairing.") + if (recoverable) { Review comment: Could you elaborate a bit on what this block of code is doing? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org