[GitHub] [kafka] dajac commented on a change in pull request #10960: KAFKA-12981 Ensure LogSegment.maxTimestampSoFar and LogSegment.offsetOfMaxTimestampSoFar are read/updated in sync

GitBox Mon, 05 Jul 2021 05:16:52 -0700


dajac commented on a change in pull request #10960:
URL: https://github.com/apache/kafka/pull/10960#discussion_r663877163




##########
File path: core/src/main/scala/kafka/log/LogSegment.scala
##########
@@ -99,21 +99,21 @@ class LogSegment private[log] (val log: FileRecords,
   // volatile for LogCleaner to see the update
   @volatile private var rollingBasedTimestamp: Option[Long] = None
 
+  /* The maximum timestamp and offset we see so far */
+  @volatile private var _maxTimestampAndOffsetSoFar: TimestampOffset = 
TimestampOffset.Unknown
+  def maxTimestampAndOffsetSoFar: TimestampOffset = {
+    if (_maxTimestampAndOffsetSoFar == TimestampOffset.Unknown)
+      _maxTimestampAndOffsetSoFar = 
TimestampOffset(timeIndex.lastEntry.timestamp,timeIndex.lastEntry.offset)

Review comment:
       It seems that we could directly set `_maxTimestampAndOffsetSoFar` to 
`timeIndex.lastEntry`.

##########
File path: core/src/main/scala/kafka/log/LogSegment.scala
##########
@@ -338,22 +337,21 @@ class LogSegment private[log] (val log: FileRecords,
     txnIndex.reset()
     var validBytes = 0
     var lastIndexEntry = 0
-    maxTimestampSoFar = RecordBatch.NO_TIMESTAMP
+    _maxTimestampAndOffsetSoFar = TimestampOffset.Unknown
     try {
       for (batch <- log.batches.asScala) {
         batch.ensureValid()
         ensureOffsetInRange(batch.lastOffset)
 
         // The max timestamp is exposed at the batch level, so no need to 
iterate the records
         if (batch.maxTimestamp > maxTimestampSoFar) {
-          maxTimestampSoFar = batch.maxTimestamp
-          offsetOfMaxTimestampSoFar = batch.lastOffset
+          _maxTimestampAndOffsetSoFar = TimestampOffset(batch.maxTimestamp, 
batch.lastOffset)
         }
 
         // Build offset index
         if (validBytes - lastIndexEntry > indexIntervalBytes) {
           offsetIndex.append(batch.lastOffset, validBytes)
-          timeIndex.maybeAppend(maxTimestampSoFar, offsetOfMaxTimestampSoFar)
+          timeIndex.maybeAppend(maxTimestampAndOffsetSoFar.timestamp, 
maxTimestampAndOffsetSoFar.offset)

Review comment:
       ditto.

##########
File path: core/src/main/scala/kafka/log/LogSegment.scala
##########
@@ -158,13 +158,12 @@ class LogSegment private[log] (val log: FileRecords,
       trace(s"Appended $appendedBytes to ${log.file} at end offset 
$largestOffset")
       // Update the in memory max timestamp and corresponding offset.
       if (largestTimestamp > maxTimestampSoFar) {
-        maxTimestampSoFar = largestTimestamp
-        offsetOfMaxTimestampSoFar = shallowOffsetOfMaxTimestamp
+        _maxTimestampAndOffsetSoFar = TimestampOffset(largestTimestamp, 
shallowOffsetOfMaxTimestamp)

Review comment:
       It would rather keep using a setter like we had before.

##########
File path: core/src/main/scala/kafka/log/LogSegment.scala
##########
@@ -680,4 +676,4 @@ object LogSegment {
 
 object LogFlushStats extends KafkaMetricsGroup {
   val logFlushTimer = new KafkaTimer(newTimer("LogFlushRateAndTimeMs", 
TimeUnit.MILLISECONDS, TimeUnit.SECONDS))
-}
+}

Review comment:
       Could we revert this back?

##########
File path: core/src/main/scala/kafka/log/LogSegment.scala
##########
@@ -378,23 +376,21 @@ class LogSegment private[log] (val log: FileRecords,
     log.truncateTo(validBytes)
     offsetIndex.trimToValidSize()
     // A normally closed segment always appends the biggest timestamp ever 
seen into log segment, we do this as well.
-    timeIndex.maybeAppend(maxTimestampSoFar, offsetOfMaxTimestampSoFar, 
skipFullCheck = true)
+    timeIndex.maybeAppend(maxTimestampAndOffsetSoFar.timestamp, 
maxTimestampAndOffsetSoFar.offset, skipFullCheck = true)

Review comment:
       ditto.

##########
File path: core/src/main/scala/kafka/log/LogSegment.scala
##########
@@ -503,7 +499,7 @@ class LogSegment private[log] (val log: FileRecords,
    * The time index entry appended will be used to decide when to delete the 
segment.
    */
   def onBecomeInactiveSegment(): Unit = {
-    timeIndex.maybeAppend(maxTimestampSoFar, offsetOfMaxTimestampSoFar, 
skipFullCheck = true)
+    timeIndex.maybeAppend(maxTimestampAndOffsetSoFar.timestamp, 
maxTimestampAndOffsetSoFar.offset, skipFullCheck = true)

Review comment:
       ditto.

##########
File path: core/src/main/scala/kafka/log/LogSegment.scala
##########
@@ -158,13 +158,12 @@ class LogSegment private[log] (val log: FileRecords,
       trace(s"Appended $appendedBytes to ${log.file} at end offset 
$largestOffset")
       // Update the in memory max timestamp and corresponding offset.
       if (largestTimestamp > maxTimestampSoFar) {
-        maxTimestampSoFar = largestTimestamp
-        offsetOfMaxTimestampSoFar = shallowOffsetOfMaxTimestamp
+        _maxTimestampAndOffsetSoFar = TimestampOffset(largestTimestamp, 
shallowOffsetOfMaxTimestamp)
       }
       // append an entry to the index (if needed)
       if (bytesSinceLastIndexEntry > indexIntervalBytes) {
         offsetIndex.append(largestOffset, physicalPosition)
-        timeIndex.maybeAppend(maxTimestampSoFar, offsetOfMaxTimestampSoFar)
+        timeIndex.maybeAppend(maxTimestampAndOffsetSoFar.timestamp, 
maxTimestampAndOffsetSoFar.offset)

Review comment:
       We could keep the previous version of this line.

##########
File path: core/src/main/scala/kafka/log/LogSegment.scala
##########
@@ -584,8 +580,8 @@ class LogSegment private[log] (val log: FileRecords,
    * Close this log segment
    */
   def close(): Unit = {
-    if (_maxTimestampSoFar.nonEmpty || _offsetOfMaxTimestampSoFar.nonEmpty)
-      CoreUtils.swallow(timeIndex.maybeAppend(maxTimestampSoFar, 
offsetOfMaxTimestampSoFar,
+    if (_maxTimestampAndOffsetSoFar != TimestampOffset.Unknown)
+      
CoreUtils.swallow(timeIndex.maybeAppend(maxTimestampAndOffsetSoFar.timestamp, 
maxTimestampAndOffsetSoFar.offset,

Review comment:
       ditto.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: jira-unsubscr...@kafka.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

[GitHub] [kafka] dajac commented on a change in pull request #10960: KAFKA-12981 Ensure LogSegment.maxTimestampSoFar and LogSegment.offsetOfMaxTimestampSoFar are read/updated in sync

Reply via email to