ningyougang commented on a change in pull request #5102: URL: https://github.com/apache/openwhisk/pull/5102#discussion_r622880300
########## File path: core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/v2/FunctionPullingContainerPool.scala ########## @@ -0,0 +1,857 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.openwhisk.core.containerpool.v2 + +import java.util.concurrent.atomic.AtomicInteger + +import akka.actor.{Actor, ActorRef, ActorRefFactory, Cancellable, Props} +import org.apache.kafka.clients.producer.RecordMetadata +import org.apache.openwhisk.common._ +import org.apache.openwhisk.core.connector.ContainerCreationError._ +import org.apache.openwhisk.core.connector.{ + ContainerCreationAckMessage, + ContainerCreationMessage, + ContainerDeletionMessage +} +import org.apache.openwhisk.core.containerpool.{ + AdjustPrewarmedContainer, + BlackboxStartupError, + ColdStartKey, + ContainerPool, + ContainerPoolConfig, + ContainerRemoved, + PrewarmingConfig, + WhiskContainerStartupError +} +import org.apache.openwhisk.core.entity._ +import org.apache.openwhisk.core.entity.size._ +import org.apache.openwhisk.http.Messages + +import scala.annotation.tailrec +import scala.collection.concurrent.TrieMap +import scala.collection.immutable +import scala.concurrent.Future +import scala.concurrent.duration._ +import scala.util.{Random, Try} +import scala.collection.immutable.Queue + +case class CreationContainer(creationMessage: ContainerCreationMessage, action: WhiskAction) +case class DeletionContainer(deletionMessage: ContainerDeletionMessage) +case object Remove +case class Keep(timeout: FiniteDuration) +case class PrewarmContainer(maxConcurrent: Int) + +/** + * A pool managing containers to run actions on. + * + * This pool fulfills the other half of the ContainerProxy contract. Only + * one job (either Start or Run) is sent to a child-actor at any given + * time. The pool then waits for a response of that container, indicating + * the container is done with the job. Only then will the pool send another + * request to that container. + * + * Upon actor creation, the pool will start to prewarm containers according + * to the provided prewarmConfig, iff set. Those containers will **not** be + * part of the poolsize calculation, which is capped by the poolSize parameter. + * Prewarm containers are only used, if they have matching arguments + * (kind, memory) and there is space in the pool. + * + * @param childFactory method to create new container proxy actor + * @param prewarmConfig optional settings for container prewarming + * @param poolConfig config for the ContainerPool + */ +class FunctionPullingContainerPool( + childFactory: ActorRefFactory => ActorRef, + invokerHealthService: ActorRef, + poolConfig: ContainerPoolConfig, + instance: InvokerInstanceId, + prewarmConfig: List[PrewarmingConfig] = List.empty, + sendAckToScheduler: (SchedulerInstanceId, ContainerCreationAckMessage) => Future[RecordMetadata])( + implicit val logging: Logging) + extends Actor { + import ContainerPoolV2.memoryConsumptionOf + + implicit val ec = context.system.dispatcher + + private var busyPool = immutable.Map.empty[ActorRef, Data] + private var inProgressPool = immutable.Map.empty[ActorRef, Data] + private var warmedPool = immutable.Map.empty[ActorRef, WarmData] + private var prewarmedPool = immutable.Map.empty[ActorRef, PreWarmData] + private var prewarmStartingPool = immutable.Map.empty[ActorRef, (String, ByteSize)] + + private var shuttingDown = false + + private val creationMessages = TrieMap[ActorRef, ContainerCreationMessage]() + + private var preWarmScheduler: Option[Cancellable] = None + private var prewarmConfigQueue = Queue.empty[(CodeExec[_], ByteSize, Option[FiniteDuration])] + private val prewarmCreateFailedCount = new AtomicInteger(0) + + val logScheduler = context.system.scheduler.schedule(0.seconds, 1.seconds) { + MetricEmitter.emitHistogramMetric( + LoggingMarkers.INVOKER_CONTAINERPOOL_MEMORY("inprogress"), + memoryConsumptionOf(inProgressPool)) + MetricEmitter.emitHistogramMetric( + LoggingMarkers.INVOKER_CONTAINERPOOL_MEMORY("busy"), + memoryConsumptionOf(busyPool)) + MetricEmitter.emitHistogramMetric( + LoggingMarkers.INVOKER_CONTAINERPOOL_MEMORY("prewarmed"), + memoryConsumptionOf(prewarmedPool)) + MetricEmitter.emitHistogramMetric(LoggingMarkers.INVOKER_CONTAINERPOOL_MEMORY("max"), poolConfig.userMemory.toMB) + } + + // Key is ColdStartKey, value is the number of cold Start in minute + var coldStartCount = immutable.Map.empty[ColdStartKey, Int] + + adjustPrewarmedContainer(true, false) + + // check periodically, adjust prewarmed container(delete if unused for some time and create some increment containers) + // add some random amount to this schedule to avoid a herd of container removal + creation + val interval = poolConfig.prewarmExpirationCheckInterval + poolConfig.prewarmExpirationCheckIntervalVariance + .map(v => + Random + .nextInt(v.toSeconds.toInt)) + .getOrElse(0) + .seconds + + if (prewarmConfig.exists(!_.reactive.isEmpty)) { + context.system.scheduler.schedule( + poolConfig.prewarmExpirationCheckInitDelay, + interval, + self, + AdjustPrewarmedContainer) + } + + val resourceSubmitter = context.system.scheduler.schedule(0.seconds, poolConfig.memorySyncInterval) { + syncMemoryInfo + } + + private def logContainerStart(c: ContainerCreationMessage, action: WhiskAction, containerState: String): Unit = { + val FQN = c.action + if (FQN.namespace.name == "whisk.system" && FQN.fullPath.segments > 2) { + MetricEmitter.emitCounterMetric(LoggingMarkers.INVOKER_SHAREDPACKAGE(FQN.fullPath.asString)) + } + + MetricEmitter.emitCounterMetric( + LoggingMarkers.INVOKER_CONTAINER_START( + containerState, + c.invocationNamespace, + c.action.namespace.toString, + c.action.name.toString)) + } + + def receive: Receive = { + case PrewarmContainer(maxConcurrent) => + if (prewarmConfigQueue.isEmpty) { + preWarmScheduler.map(_.cancel()) + preWarmScheduler = None + } else { + for (_ <- 1 to maxConcurrent if !prewarmConfigQueue.isEmpty) { + val ((codeExec, byteSize, ttl), newQueue) = prewarmConfigQueue.dequeue + prewarmConfigQueue = newQueue + prewarmContainer(codeExec, byteSize, ttl) + } + } + + case CreationContainer(create: ContainerCreationMessage, action: WhiskAction) => + if (shuttingDown) { + val message = + s"creationId: ${create.creationId}, invoker is shutting down, reschedule ${action.fullyQualifiedName(false)}" + val ack = ContainerCreationAckMessage( + create.transid, + create.creationId, + create.invocationNamespace, + create.action, + create.revision, + create.whiskActionMetaData, + instance, + create.schedulerHost, + create.rpcPort, + create.retryCount, + Some(ShuttingDownError), + Some(message)) + logging.warn(this, message) + sendAckToScheduler(create.rootSchedulerIndex, ack) + } else { + logging.info(this, s"received a container creation message: ${create.creationId}") + action.toExecutableWhiskAction match { + case Some(executable) => + val createdContainer = + takeWarmedContainer(executable, create.invocationNamespace, create.revision) + .map(container => (container, "warmed")) + .orElse { + takeContainer(executable) + } + handleChosenContainer(create, executable, createdContainer) + case None => + val message = + s"creationId: ${create.creationId}, non-executable action reached the container pool ${action.fullyQualifiedName(false)}" + logging.error(this, message) + val ack = ContainerCreationAckMessage( + create.transid, + create.creationId, + create.invocationNamespace, + create.action, + create.revision, + create.whiskActionMetaData, + instance, + create.schedulerHost, + create.rpcPort, + create.retryCount, + Some(NonExecutableActionError), + Some(message)) + sendAckToScheduler(create.rootSchedulerIndex, ack) + } + } + + case DeletionContainer(deletionMessage: ContainerDeletionMessage) => + val oldRevision = deletionMessage.revision + val invocationNamespace = deletionMessage.invocationNamespace + val fqn = deletionMessage.action.copy(version = None) + + warmedPool.foreach(warmed => { + val proxy = warmed._1 + val data = warmed._2 + + if (data.invocationNamespace == invocationNamespace + && data.action.fullyQualifiedName(withVersion = false) == fqn.copy(version = None) + && data.revision <= oldRevision) { + proxy ! GracefulShutdown + } + }) + + busyPool.foreach(f = busy => { + val proxy = busy._1 + busy._2 match { + case warmData: WarmData + if warmData.invocationNamespace == invocationNamespace + && warmData.action.fullyQualifiedName(withVersion = false) == fqn.copy(version = None) + && warmData.revision <= oldRevision => + proxy ! GracefulShutdown + case initializedData: InitializedData + if initializedData.invocationNamespace == invocationNamespace + && initializedData.action.fullyQualifiedName(withVersion = false) == fqn.copy(version = None) => + proxy ! GracefulShutdown + case _ => // Other actions are ignored. + } + }) + + case ReadyToWork(data) => + prewarmStartingPool = prewarmStartingPool - sender() + prewarmedPool = prewarmedPool + (sender() -> data) + // after create prewarm successfully, reset the value to 0 + if (prewarmCreateFailedCount.get() > 0) { + prewarmCreateFailedCount.set(0) + } + + // Container is initialized + case Initialized(data) => + busyPool = busyPool + (sender() -> data) + inProgressPool = inProgressPool - sender() + // container init completed, send creationAck(success) to scheduler + creationMessages.remove(sender()).foreach { msg => + val ack = ContainerCreationAckMessage( + msg.transid, + msg.creationId, + msg.invocationNamespace, + msg.action, + msg.revision, + msg.whiskActionMetaData, + instance, + msg.schedulerHost, + msg.rpcPort, + msg.retryCount) + sendAckToScheduler(msg.rootSchedulerIndex, ack) + } + + case Resumed(data) => + busyPool = busyPool + (sender() -> data) + inProgressPool = inProgressPool - sender() + // container init completed, send creationAck(success) to scheduler + creationMessages.remove(sender()).foreach { msg => + val ack = ContainerCreationAckMessage( + msg.transid, + msg.creationId, + msg.invocationNamespace, + msg.action, + msg.revision, + msg.whiskActionMetaData, + instance, + msg.schedulerHost, + msg.rpcPort, + msg.retryCount) + sendAckToScheduler(msg.rootSchedulerIndex, ack) + } + + // if warmed containers is failed to resume, we should try to use other container or create a new one + case ResumeFailed(data) => + inProgressPool = inProgressPool - sender() + creationMessages.remove(sender()).foreach { msg => + val container = takeWarmedContainer(data.action, data.invocationNamespace, data.revision) + .map(container => (container, "warmed")) + .orElse { + takeContainer(data.action) + } + handleChosenContainer(msg, data.action, container) + } + + case ContainerCreationFailed(t) => + val (error, message) = t match { + case WhiskContainerStartupError(msg) => (WhiskError, msg) + case BlackboxStartupError(msg) => (BlackBoxError, msg) + case _ => (WhiskError, Messages.resourceProvisionError) + } + creationMessages.remove(sender()).foreach { msg => + val ack = ContainerCreationAckMessage( + msg.transid, + msg.creationId, + msg.invocationNamespace, + msg.action, + msg.revision, + msg.whiskActionMetaData, + instance, + msg.schedulerHost, + msg.rpcPort, + msg.retryCount, + Some(error), + Some(message)) + sendAckToScheduler(msg.rootSchedulerIndex, ack) + } + + case ContainerIsPaused(data) => + warmedPool = warmedPool + (sender() -> data) + busyPool = busyPool - sender() // remove container from busy pool + + // Container got removed + case ContainerRemoved(replacePrewarm) => + inProgressPool.get(sender()).foreach { _ => + inProgressPool = inProgressPool - sender() + } + + warmedPool.get(sender()).foreach { _ => + warmedPool = warmedPool - sender() + } + + // container was busy (busy indicates at full capacity), so there is capacity to accept another job request + busyPool.get(sender()).foreach { _ => + busyPool = busyPool - sender() + } + + //in case this was a prewarm + prewarmedPool.get(sender()).foreach { data => + prewarmedPool = prewarmedPool - sender() + logging.info( + this, + s"${if (replacePrewarm) "failed" else "expired"} prewarm [kind: ${data.kind}, memory: ${data.memoryLimit.toString}] removed") + } + + //in case this was a starting prewarm + prewarmStartingPool.get(sender()).foreach { data => + logging.info(this, s"failed starting prewarm [kind: ${data._1}, memory: ${data._2.toString}] removed") + prewarmStartingPool = prewarmStartingPool - sender() + prewarmCreateFailedCount.incrementAndGet() + } + + //backfill prewarms on every ContainerRemoved, just in case + if (replacePrewarm) { + adjustPrewarmedContainer(false, false) //in case a prewarm is removed due to health failure or crash + } + + // there maybe a chance that container create failed or init grpc client failed, + // send creationAck(reschedule) to scheduler + creationMessages.remove(sender()).foreach { msg => + val ack = ContainerCreationAckMessage( + msg.transid, + msg.creationId, + msg.invocationNamespace, + msg.action, + msg.revision, + msg.whiskActionMetaData, + instance, + msg.schedulerHost, + msg.rpcPort, + msg.retryCount, + Some(UnknownError), + Some("ContainerProxy init failed.")) + sendAckToScheduler(msg.rootSchedulerIndex, ack) + } + + case GracefulShutdown => + shuttingDown = true + waitForPoolToClear() + + case Enable => + shuttingDown = false + + case AdjustPrewarmedContainer => + // Reset the prewarmCreateCount value when do expiration check and backfill prewarm if possible + prewarmCreateFailedCount.set(0) + adjustPrewarmedContainer(false, true) + } + + /** Install prewarm containers up to the configured requirements for each kind/memory combination or specified kind/memory */ + private def adjustPrewarmedContainer(init: Boolean, scheduled: Boolean): Unit = { + if (!shuttingDown) { Review comment: It this invoker is disabled, have no need to backfill the prewarm. ########## File path: core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/v2/FunctionPullingContainerPool.scala ########## @@ -0,0 +1,857 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.openwhisk.core.containerpool.v2 + +import java.util.concurrent.atomic.AtomicInteger + +import akka.actor.{Actor, ActorRef, ActorRefFactory, Cancellable, Props} +import org.apache.kafka.clients.producer.RecordMetadata +import org.apache.openwhisk.common._ +import org.apache.openwhisk.core.connector.ContainerCreationError._ +import org.apache.openwhisk.core.connector.{ + ContainerCreationAckMessage, + ContainerCreationMessage, + ContainerDeletionMessage +} +import org.apache.openwhisk.core.containerpool.{ + AdjustPrewarmedContainer, + BlackboxStartupError, + ColdStartKey, + ContainerPool, + ContainerPoolConfig, + ContainerRemoved, + PrewarmingConfig, + WhiskContainerStartupError +} +import org.apache.openwhisk.core.entity._ +import org.apache.openwhisk.core.entity.size._ +import org.apache.openwhisk.http.Messages + +import scala.annotation.tailrec +import scala.collection.concurrent.TrieMap +import scala.collection.immutable +import scala.concurrent.Future +import scala.concurrent.duration._ +import scala.util.{Random, Try} +import scala.collection.immutable.Queue + +case class CreationContainer(creationMessage: ContainerCreationMessage, action: WhiskAction) +case class DeletionContainer(deletionMessage: ContainerDeletionMessage) +case object Remove +case class Keep(timeout: FiniteDuration) +case class PrewarmContainer(maxConcurrent: Int) + +/** + * A pool managing containers to run actions on. + * + * This pool fulfills the other half of the ContainerProxy contract. Only + * one job (either Start or Run) is sent to a child-actor at any given + * time. The pool then waits for a response of that container, indicating + * the container is done with the job. Only then will the pool send another + * request to that container. + * + * Upon actor creation, the pool will start to prewarm containers according + * to the provided prewarmConfig, iff set. Those containers will **not** be + * part of the poolsize calculation, which is capped by the poolSize parameter. + * Prewarm containers are only used, if they have matching arguments + * (kind, memory) and there is space in the pool. + * + * @param childFactory method to create new container proxy actor + * @param prewarmConfig optional settings for container prewarming + * @param poolConfig config for the ContainerPool + */ +class FunctionPullingContainerPool( + childFactory: ActorRefFactory => ActorRef, + invokerHealthService: ActorRef, + poolConfig: ContainerPoolConfig, + instance: InvokerInstanceId, + prewarmConfig: List[PrewarmingConfig] = List.empty, + sendAckToScheduler: (SchedulerInstanceId, ContainerCreationAckMessage) => Future[RecordMetadata])( + implicit val logging: Logging) + extends Actor { + import ContainerPoolV2.memoryConsumptionOf + + implicit val ec = context.system.dispatcher + + private var busyPool = immutable.Map.empty[ActorRef, Data] + private var inProgressPool = immutable.Map.empty[ActorRef, Data] + private var warmedPool = immutable.Map.empty[ActorRef, WarmData] + private var prewarmedPool = immutable.Map.empty[ActorRef, PreWarmData] + private var prewarmStartingPool = immutable.Map.empty[ActorRef, (String, ByteSize)] + + private var shuttingDown = false + + private val creationMessages = TrieMap[ActorRef, ContainerCreationMessage]() + + private var preWarmScheduler: Option[Cancellable] = None + private var prewarmConfigQueue = Queue.empty[(CodeExec[_], ByteSize, Option[FiniteDuration])] + private val prewarmCreateFailedCount = new AtomicInteger(0) + + val logScheduler = context.system.scheduler.schedule(0.seconds, 1.seconds) { + MetricEmitter.emitHistogramMetric( + LoggingMarkers.INVOKER_CONTAINERPOOL_MEMORY("inprogress"), + memoryConsumptionOf(inProgressPool)) + MetricEmitter.emitHistogramMetric( + LoggingMarkers.INVOKER_CONTAINERPOOL_MEMORY("busy"), + memoryConsumptionOf(busyPool)) + MetricEmitter.emitHistogramMetric( + LoggingMarkers.INVOKER_CONTAINERPOOL_MEMORY("prewarmed"), + memoryConsumptionOf(prewarmedPool)) + MetricEmitter.emitHistogramMetric(LoggingMarkers.INVOKER_CONTAINERPOOL_MEMORY("max"), poolConfig.userMemory.toMB) + } + + // Key is ColdStartKey, value is the number of cold Start in minute + var coldStartCount = immutable.Map.empty[ColdStartKey, Int] + + adjustPrewarmedContainer(true, false) + + // check periodically, adjust prewarmed container(delete if unused for some time and create some increment containers) + // add some random amount to this schedule to avoid a herd of container removal + creation + val interval = poolConfig.prewarmExpirationCheckInterval + poolConfig.prewarmExpirationCheckIntervalVariance + .map(v => + Random + .nextInt(v.toSeconds.toInt)) + .getOrElse(0) + .seconds + + if (prewarmConfig.exists(!_.reactive.isEmpty)) { + context.system.scheduler.schedule( + poolConfig.prewarmExpirationCheckInitDelay, + interval, + self, + AdjustPrewarmedContainer) + } + + val resourceSubmitter = context.system.scheduler.schedule(0.seconds, poolConfig.memorySyncInterval) { + syncMemoryInfo + } + + private def logContainerStart(c: ContainerCreationMessage, action: WhiskAction, containerState: String): Unit = { + val FQN = c.action + if (FQN.namespace.name == "whisk.system" && FQN.fullPath.segments > 2) { + MetricEmitter.emitCounterMetric(LoggingMarkers.INVOKER_SHAREDPACKAGE(FQN.fullPath.asString)) + } + + MetricEmitter.emitCounterMetric( + LoggingMarkers.INVOKER_CONTAINER_START( + containerState, + c.invocationNamespace, + c.action.namespace.toString, + c.action.name.toString)) + } + + def receive: Receive = { + case PrewarmContainer(maxConcurrent) => + if (prewarmConfigQueue.isEmpty) { + preWarmScheduler.map(_.cancel()) + preWarmScheduler = None + } else { + for (_ <- 1 to maxConcurrent if !prewarmConfigQueue.isEmpty) { + val ((codeExec, byteSize, ttl), newQueue) = prewarmConfigQueue.dequeue + prewarmConfigQueue = newQueue + prewarmContainer(codeExec, byteSize, ttl) + } + } + + case CreationContainer(create: ContainerCreationMessage, action: WhiskAction) => + if (shuttingDown) { + val message = + s"creationId: ${create.creationId}, invoker is shutting down, reschedule ${action.fullyQualifiedName(false)}" + val ack = ContainerCreationAckMessage( + create.transid, + create.creationId, + create.invocationNamespace, + create.action, + create.revision, + create.whiskActionMetaData, + instance, + create.schedulerHost, + create.rpcPort, + create.retryCount, + Some(ShuttingDownError), + Some(message)) + logging.warn(this, message) + sendAckToScheduler(create.rootSchedulerIndex, ack) + } else { + logging.info(this, s"received a container creation message: ${create.creationId}") + action.toExecutableWhiskAction match { + case Some(executable) => + val createdContainer = + takeWarmedContainer(executable, create.invocationNamespace, create.revision) + .map(container => (container, "warmed")) + .orElse { + takeContainer(executable) + } + handleChosenContainer(create, executable, createdContainer) + case None => + val message = + s"creationId: ${create.creationId}, non-executable action reached the container pool ${action.fullyQualifiedName(false)}" + logging.error(this, message) + val ack = ContainerCreationAckMessage( + create.transid, + create.creationId, + create.invocationNamespace, + create.action, + create.revision, + create.whiskActionMetaData, + instance, + create.schedulerHost, + create.rpcPort, + create.retryCount, + Some(NonExecutableActionError), + Some(message)) + sendAckToScheduler(create.rootSchedulerIndex, ack) + } + } + + case DeletionContainer(deletionMessage: ContainerDeletionMessage) => + val oldRevision = deletionMessage.revision + val invocationNamespace = deletionMessage.invocationNamespace + val fqn = deletionMessage.action.copy(version = None) + + warmedPool.foreach(warmed => { + val proxy = warmed._1 + val data = warmed._2 + + if (data.invocationNamespace == invocationNamespace + && data.action.fullyQualifiedName(withVersion = false) == fqn.copy(version = None) + && data.revision <= oldRevision) { + proxy ! GracefulShutdown + } + }) + + busyPool.foreach(f = busy => { + val proxy = busy._1 + busy._2 match { + case warmData: WarmData + if warmData.invocationNamespace == invocationNamespace + && warmData.action.fullyQualifiedName(withVersion = false) == fqn.copy(version = None) + && warmData.revision <= oldRevision => + proxy ! GracefulShutdown + case initializedData: InitializedData + if initializedData.invocationNamespace == invocationNamespace + && initializedData.action.fullyQualifiedName(withVersion = false) == fqn.copy(version = None) => + proxy ! GracefulShutdown + case _ => // Other actions are ignored. + } + }) + + case ReadyToWork(data) => + prewarmStartingPool = prewarmStartingPool - sender() + prewarmedPool = prewarmedPool + (sender() -> data) + // after create prewarm successfully, reset the value to 0 + if (prewarmCreateFailedCount.get() > 0) { + prewarmCreateFailedCount.set(0) + } + + // Container is initialized + case Initialized(data) => + busyPool = busyPool + (sender() -> data) + inProgressPool = inProgressPool - sender() + // container init completed, send creationAck(success) to scheduler + creationMessages.remove(sender()).foreach { msg => + val ack = ContainerCreationAckMessage( + msg.transid, + msg.creationId, + msg.invocationNamespace, + msg.action, + msg.revision, + msg.whiskActionMetaData, + instance, + msg.schedulerHost, + msg.rpcPort, + msg.retryCount) + sendAckToScheduler(msg.rootSchedulerIndex, ack) + } + + case Resumed(data) => + busyPool = busyPool + (sender() -> data) + inProgressPool = inProgressPool - sender() + // container init completed, send creationAck(success) to scheduler + creationMessages.remove(sender()).foreach { msg => + val ack = ContainerCreationAckMessage( + msg.transid, + msg.creationId, + msg.invocationNamespace, + msg.action, + msg.revision, + msg.whiskActionMetaData, + instance, + msg.schedulerHost, + msg.rpcPort, + msg.retryCount) + sendAckToScheduler(msg.rootSchedulerIndex, ack) + } + + // if warmed containers is failed to resume, we should try to use other container or create a new one + case ResumeFailed(data) => + inProgressPool = inProgressPool - sender() + creationMessages.remove(sender()).foreach { msg => + val container = takeWarmedContainer(data.action, data.invocationNamespace, data.revision) + .map(container => (container, "warmed")) + .orElse { + takeContainer(data.action) + } + handleChosenContainer(msg, data.action, container) + } + + case ContainerCreationFailed(t) => + val (error, message) = t match { + case WhiskContainerStartupError(msg) => (WhiskError, msg) + case BlackboxStartupError(msg) => (BlackBoxError, msg) + case _ => (WhiskError, Messages.resourceProvisionError) + } + creationMessages.remove(sender()).foreach { msg => + val ack = ContainerCreationAckMessage( + msg.transid, + msg.creationId, + msg.invocationNamespace, + msg.action, + msg.revision, + msg.whiskActionMetaData, + instance, + msg.schedulerHost, + msg.rpcPort, + msg.retryCount, + Some(error), + Some(message)) + sendAckToScheduler(msg.rootSchedulerIndex, ack) + } + + case ContainerIsPaused(data) => + warmedPool = warmedPool + (sender() -> data) + busyPool = busyPool - sender() // remove container from busy pool + + // Container got removed + case ContainerRemoved(replacePrewarm) => + inProgressPool.get(sender()).foreach { _ => + inProgressPool = inProgressPool - sender() + } + + warmedPool.get(sender()).foreach { _ => + warmedPool = warmedPool - sender() + } + + // container was busy (busy indicates at full capacity), so there is capacity to accept another job request + busyPool.get(sender()).foreach { _ => + busyPool = busyPool - sender() + } + + //in case this was a prewarm + prewarmedPool.get(sender()).foreach { data => + prewarmedPool = prewarmedPool - sender() + logging.info( + this, + s"${if (replacePrewarm) "failed" else "expired"} prewarm [kind: ${data.kind}, memory: ${data.memoryLimit.toString}] removed") + } + + //in case this was a starting prewarm + prewarmStartingPool.get(sender()).foreach { data => + logging.info(this, s"failed starting prewarm [kind: ${data._1}, memory: ${data._2.toString}] removed") + prewarmStartingPool = prewarmStartingPool - sender() + prewarmCreateFailedCount.incrementAndGet() + } + + //backfill prewarms on every ContainerRemoved, just in case + if (replacePrewarm) { + adjustPrewarmedContainer(false, false) //in case a prewarm is removed due to health failure or crash + } + + // there maybe a chance that container create failed or init grpc client failed, + // send creationAck(reschedule) to scheduler + creationMessages.remove(sender()).foreach { msg => + val ack = ContainerCreationAckMessage( + msg.transid, + msg.creationId, + msg.invocationNamespace, + msg.action, + msg.revision, + msg.whiskActionMetaData, + instance, + msg.schedulerHost, + msg.rpcPort, + msg.retryCount, + Some(UnknownError), + Some("ContainerProxy init failed.")) + sendAckToScheduler(msg.rootSchedulerIndex, ack) + } + + case GracefulShutdown => + shuttingDown = true + waitForPoolToClear() + + case Enable => + shuttingDown = false + + case AdjustPrewarmedContainer => + // Reset the prewarmCreateCount value when do expiration check and backfill prewarm if possible + prewarmCreateFailedCount.set(0) + adjustPrewarmedContainer(false, true) + } + + /** Install prewarm containers up to the configured requirements for each kind/memory combination or specified kind/memory */ + private def adjustPrewarmedContainer(init: Boolean, scheduled: Boolean): Unit = { + if (!shuttingDown) { Review comment: It the invoker is disabled, have no need to backfill the prewarm. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org