hachikuji commented on a change in pull request #9713:
URL: https://github.com/apache/kafka/pull/9713#discussion_r539640590



##########
File path: core/src/main/scala/kafka/server/ZkIsrManager.scala
##########
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package kafka.server
+
+import kafka.utils.{Logging, ReplicationUtils, Scheduler}
+import kafka.zk.KafkaZkClient
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.protocol.Errors
+import org.apache.kafka.common.utils.Time
+
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.AtomicLong
+import scala.collection.mutable
+
+/**
+ * @param checkIntervalMs How often to check for ISR
+ * @param maxDelayMs  Maximum time that an ISR change may be delayed before 
sending the notification
+ * @param lingerMs  Maximum time to await additional changes before sending 
the notification
+ */
+case class IsrChangePropagationConfig(checkIntervalMs: Long, maxDelayMs: Long, 
lingerMs: Long)
+
+object ZkIsrManager {
+  // This field is mutable to allow overriding change notification behavior in 
test cases
+  @volatile var DefaultIsrPropagationConfig: IsrChangePropagationConfig = 
IsrChangePropagationConfig(
+    checkIntervalMs = 2500,
+    lingerMs = 5000,
+    maxDelayMs = 60000,
+  )
+}
+
+class ZkIsrManager(scheduler: Scheduler, time: Time, zkClient: KafkaZkClient) 
extends AlterIsrManager with Logging {
+
+  private val isrChangeNotificationConfig = 
ZkIsrManager.DefaultIsrPropagationConfig
+  // Visible for testing
+  private[server] val isrChangeSet: mutable.Set[TopicPartition] = new 
mutable.HashSet[TopicPartition]()
+  private val lastIsrChangeMs = new AtomicLong(time.milliseconds())
+  private val lastIsrPropagationMs = new AtomicLong(time.milliseconds())
+
+  override def start(): Unit = {
+    scheduler.schedule("isr-change-propagation", maybePropagateIsrChanges _,
+      period = isrChangeNotificationConfig.checkIntervalMs, unit = 
TimeUnit.MILLISECONDS)
+  }
+
+  override def clearPending(topicPartition: TopicPartition): Unit = {
+    // Since we always immediately process ZK updates and never actually 
enqueue anything, there is nothing to
+    // clear here so this is a no-op. Even if there are changes that have not 
been propagated, the write to ZK
+    // has already happened, so we may as well send the notification to the 
controller.
+  }
+
+  override def enqueue(alterIsrItem: AlterIsrItem): Boolean = {

Review comment:
       The name "enqueue" suggests an asynchronous change. Wonder if there is 
another name we could use. Perhaps "submit" suggests less about the 
implementation?

##########
File path: core/src/main/scala/kafka/cluster/Partition.scala
##########
@@ -51,40 +51,8 @@ trait IsrChangeListener {
   def markFailed(): Unit
 }
 
-trait PartitionStateStore {
-  def fetchTopicConfig(): Properties
-  def shrinkIsr(controllerEpoch: Int, leaderAndIsr: LeaderAndIsr): Option[Int]
-  def expandIsr(controllerEpoch: Int, leaderAndIsr: LeaderAndIsr): Option[Int]
-}
-
-class ZkPartitionStateStore(topicPartition: TopicPartition,
-                            zkClient: KafkaZkClient) extends 
PartitionStateStore {
-
-  override def fetchTopicConfig(): Properties = {
-    val adminZkClient = new AdminZkClient(zkClient)
-    adminZkClient.fetchEntityConfig(ConfigType.Topic, topicPartition.topic)
-  }
-
-  override def shrinkIsr(controllerEpoch: Int, leaderAndIsr: LeaderAndIsr): 
Option[Int] = {
-    val newVersionOpt = updateIsr(controllerEpoch, leaderAndIsr)
-    newVersionOpt
-  }
-
-  override def expandIsr(controllerEpoch: Int, leaderAndIsr: LeaderAndIsr): 
Option[Int] = {
-    val newVersionOpt = updateIsr(controllerEpoch, leaderAndIsr)
-    newVersionOpt
-  }
-
-  private def updateIsr(controllerEpoch: Int, leaderAndIsr: LeaderAndIsr): 
Option[Int] = {
-    val (updateSucceeded, newVersion) = 
ReplicationUtils.updateLeaderAndIsr(zkClient, topicPartition,
-      leaderAndIsr, controllerEpoch)
-
-    if (updateSucceeded) {
-      Some(newVersion)
-    } else {
-      None
-    }
-  }
+trait TopicConfigProvider {
+  def get(): Properties

Review comment:
       nit: I wonder if it might be better to use a verb like `fetch` which 
suggests some overhead involved

##########
File path: core/src/main/scala/kafka/server/AlterIsrManager.scala
##########
@@ -46,13 +51,36 @@ trait AlterIsrManager {
   def clearPending(topicPartition: TopicPartition): Unit
 }
 
-case class AlterIsrItem(topicPartition: TopicPartition, leaderAndIsr: 
LeaderAndIsr, callback: Either[Errors, LeaderAndIsr] => Unit)
+case class AlterIsrItem(topicPartition: TopicPartition,
+                        leaderAndIsr: LeaderAndIsr,
+                        callback: Either[Errors, LeaderAndIsr] => Unit,
+                        controllerEpoch: Int = -1) // controllerEpoch needed 
for Zk impl

Review comment:
       We don't have to do it here, but I think the controller epoch is no 
longer needed. This was previously used as a sort of poor man's controller 
fencing, but now the controller has a stronger mechanism relying on conditional 
zk updates.
   
   By the way, do we need the default value? Especially the fact that it is a 
sentinel seems dangerous.

##########
File path: core/src/main/scala/kafka/server/ZkIsrManager.scala
##########
@@ -0,0 +1,116 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package kafka.server
+
+import kafka.utils.{Logging, ReplicationUtils, Scheduler}
+import kafka.zk.KafkaZkClient
+import org.apache.kafka.common.TopicPartition
+import org.apache.kafka.common.protocol.Errors
+import org.apache.kafka.common.utils.Time
+
+import java.util.concurrent.TimeUnit
+import java.util.concurrent.atomic.AtomicLong
+import scala.collection.mutable
+
+/**
+ * @param checkIntervalMs How often to check for ISR
+ * @param maxDelayMs  Maximum time that an ISR change may be delayed before 
sending the notification
+ * @param lingerMs  Maximum time to await additional changes before sending 
the notification
+ */
+case class IsrChangePropagationConfig(checkIntervalMs: Long, maxDelayMs: Long, 
lingerMs: Long)
+
+object ZkIsrManager {
+  // This field is mutable to allow overriding change notification behavior in 
test cases
+  @volatile var DefaultIsrPropagationConfig: IsrChangePropagationConfig = 
IsrChangePropagationConfig(
+    checkIntervalMs = 2500,
+    lingerMs = 5000,
+    maxDelayMs = 60000,
+  )
+}
+
+class ZkIsrManager(scheduler: Scheduler, time: Time, zkClient: KafkaZkClient) 
extends AlterIsrManager with Logging {
+
+  private val isrChangeNotificationConfig = 
ZkIsrManager.DefaultIsrPropagationConfig
+  // Visible for testing
+  private[server] val isrChangeSet: mutable.Set[TopicPartition] = new 
mutable.HashSet[TopicPartition]()
+  private val lastIsrChangeMs = new AtomicLong(time.milliseconds())
+  private val lastIsrPropagationMs = new AtomicLong(time.milliseconds())
+
+  override def start(): Unit = {
+    scheduler.schedule("isr-change-propagation", maybePropagateIsrChanges _,
+      period = isrChangeNotificationConfig.checkIntervalMs, unit = 
TimeUnit.MILLISECONDS)
+  }
+
+  override def clearPending(topicPartition: TopicPartition): Unit = {
+    // Since we always immediately process ZK updates and never actually 
enqueue anything, there is nothing to
+    // clear here so this is a no-op. Even if there are changes that have not 
been propagated, the write to ZK
+    // has already happened, so we may as well send the notification to the 
controller.
+  }
+
+  override def enqueue(alterIsrItem: AlterIsrItem): Boolean = {
+    debug(s"Writing new ISR " + alterIsrItem.leaderAndIsr.isr + " to ZooKeeper 
with version " +
+      alterIsrItem.leaderAndIsr.zkVersion + " for partition " + 
alterIsrItem.topicPartition)
+
+    val (updateSucceeded, newVersion) = 
ReplicationUtils.updateLeaderAndIsr(zkClient, alterIsrItem.topicPartition,
+      alterIsrItem.leaderAndIsr, alterIsrItem.controllerEpoch)
+
+    if (updateSucceeded) {
+      // Track which partitions need to be propagated to the controller
+      isrChangeSet synchronized {
+        isrChangeSet += alterIsrItem.topicPartition
+        lastIsrChangeMs.set(time.milliseconds())
+      }
+
+      // We actually need to apply the callback in another thread since 
Partition#sendAlterIsrRequest will write

Review comment:
       Hmm.. It feels a tad brittle to rely on locking assumptions like this. 
Perhaps we could let `enqueue` return a different response in the case that the 
change was applied immediately? For example, we could return `Option[Int]` to 
indicate that new version if the change was applied. Note that we can probably 
discard the boolean return type and let `enqueue` raise an illegal state 
exception directly since that is what `Partition` is already doing.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


Reply via email to