hachikuji commented on a change in pull request #8850:
URL: https://github.com/apache/kafka/pull/8850#discussion_r438559177
##########
File path: core/src/main/scala/kafka/log/Log.scala
##########
@@ -1784,8 +1784,18 @@ class Log(@volatile private var _dir: File,
private def deleteRetentionMsBreachedSegments(): Int = {
if (config.retentionMs < 0) return 0
val startMs = time.milliseconds
- deleteOldSegments((segment, _) => startMs - segment.largestTimestamp >
config.retentionMs,
- reason = s"retention time ${config.retentionMs}ms breach")
+
+ def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment])
= {
+ if (startMs - segment.largestTimestamp > config.retentionMs) {
+ info(s"Segment with base offset ${segment.baseOffset} will be deleted
due to" +
Review comment:
`LogSegment.largestTimestamp` may refer to either the largest record
timestamp for newer formats or the last modified time of the segment for older
formats. I think it would be helpful if the log message indicated which case it
is. Perhaps we could add a method like this to `LogSegment`?
```scala
def largestRecordTimestamp: Option[Long]
```
##########
File path: core/src/main/scala/kafka/log/Log.scala
##########
@@ -1804,8 +1816,15 @@ class Log(@volatile private var _dir: File,
}
private def deleteLogStartOffsetBreachedSegments(): Int = {
- def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment]) =
- nextSegmentOpt.exists(_.baseOffset <= logStartOffset)
+ def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment])
= {
+ if (nextSegmentOpt.exists(_.baseOffset <= logStartOffset)) {
+ info (s"Segment with base offset ${segment.baseOffset} will be deleted
due to" +
Review comment:
nit: space after `info`
##########
File path: core/src/main/scala/kafka/log/Log.scala
##########
@@ -1784,8 +1784,18 @@ class Log(@volatile private var _dir: File,
private def deleteRetentionMsBreachedSegments(): Int = {
if (config.retentionMs < 0) return 0
val startMs = time.milliseconds
- deleteOldSegments((segment, _) => startMs - segment.largestTimestamp >
config.retentionMs,
- reason = s"retention time ${config.retentionMs}ms breach")
+
+ def shouldDelete(segment: LogSegment, nextSegmentOpt: Option[LogSegment])
= {
+ if (startMs - segment.largestTimestamp > config.retentionMs) {
+ info(s"Segment with base offset ${segment.baseOffset} will be deleted
due to" +
+ s" retentionMs breach. Largest timestamp of segment is
${segment.largestTimestamp}")
+ true
+ } else {
+ false
+ }
+ }
+
+ deleteOldSegments(shouldDelete, reason = s"retention time
${config.retentionMs}ms breach")
Review comment:
With the logging we have above, do you think we still need the message
in `deleteOldSegments`? Perhaps we could make it more concise at least. Maybe
just mention the number of segments to be deleted for example.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]