This is an automated email from the ASF dual-hosted git repository. markusthoemmes pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-openwhisk.git
The following commit(s) were added to refs/heads/master by this push: new 6027dca Throttle log messages when rescheduling containers. (#3165) 6027dca is described below commit 6027dca8f4814d2ee37a20b4fb190194fa07fe3d Author: Martin Henke <martin.he...@web.de> AuthorDate: Thu Jan 11 10:07:14 2018 +0100 Throttle log messages when rescheduling containers. (#3165) This piece of code catches an error case and tries to recover it by rescheduling the message. In very bad cases that can result in a tight loop and millions of messages generated. Throttling those messages to once per 10 seconds should be enough to debug the scenario while not crashing the Invoker due to the log volume. --- .../whisk/core/containerpool/ContainerPool.scala | 19 +++++++++++++++---- .../whisk/core/containerpool/ContainerProxy.scala | 2 +- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/core/invoker/src/main/scala/whisk/core/containerpool/ContainerPool.scala b/core/invoker/src/main/scala/whisk/core/containerpool/ContainerPool.scala index 61172dd..3e83fca 100644 --- a/core/invoker/src/main/scala/whisk/core/containerpool/ContainerPool.scala +++ b/core/invoker/src/main/scala/whisk/core/containerpool/ContainerPool.scala @@ -18,13 +18,11 @@ package whisk.core.containerpool import scala.collection.immutable - import akka.actor.Actor import akka.actor.ActorRef import akka.actor.ActorRefFactory import akka.actor.Props import whisk.common.AkkaLogging - import whisk.common.TransactionId import whisk.core.entity.ByteSize import whisk.core.entity.CodeExec @@ -33,6 +31,8 @@ import whisk.core.entity.ExecutableWhiskAction import whisk.core.entity.size._ import whisk.core.connector.MessageFeed +import scala.concurrent.duration._ + sealed trait WorkerState case object Busy extends WorkerState case object Free extends WorkerState @@ -71,6 +71,7 @@ class ContainerPool(childFactory: ActorRefFactory => ActorRef, var freePool = immutable.Map.empty[ActorRef, ContainerData] var busyPool = immutable.Map.empty[ActorRef, ContainerData] var prewarmedPool = immutable.Map.empty[ActorRef, ContainerData] + val logMessageInterval = 10.seconds prewarmConfig.foreach { config => logging.info(this, s"pre-warming ${config.count} ${config.exec.kind} containers")(TransactionId.invokerWarmup) @@ -117,8 +118,18 @@ class ContainerPool(childFactory: ActorRefFactory => ActorRef, // this can also happen if createContainer fails to start a new container, or // if a job is rescheduled but the container it was allocated to has not yet destroyed itself // (and a new container would over commit the pool) - logging.error(this, "Rescheduling Run message, too many message in the pool")(r.msg.transid) - self ! r + val isErrorLogged = r.retryLogDeadline.map(_.isOverdue).getOrElse(true) + val retryLogDeadline = if (isErrorLogged) { + logging.error( + this, + s"Rescheduling Run message, too many message in the pool, freePoolSize: ${freePool.size}, " + + s"busyPoolSize: ${busyPool.size}, maxActiveContainers $maxActiveContainers, " + + s"userNamespace: ${r.msg.user.namespace}, action: ${r.action}")(r.msg.transid) + Some(logMessageInterval.fromNow) + } else { + r.retryLogDeadline + } + self ! Run(r.action, r.msg, retryLogDeadline) } // Container is free to take more work diff --git a/core/invoker/src/main/scala/whisk/core/containerpool/ContainerProxy.scala b/core/invoker/src/main/scala/whisk/core/containerpool/ContainerProxy.scala index c3f89c5..93d4798 100644 --- a/core/invoker/src/main/scala/whisk/core/containerpool/ContainerProxy.scala +++ b/core/invoker/src/main/scala/whisk/core/containerpool/ContainerProxy.scala @@ -61,7 +61,7 @@ case class WarmedData(container: Container, // Events received by the actor case class Start(exec: CodeExec[_], memoryLimit: ByteSize) -case class Run(action: ExecutableWhiskAction, msg: ActivationMessage) +case class Run(action: ExecutableWhiskAction, msg: ActivationMessage, retryLogDeadline: Option[Deadline] = None) case object Remove // Events sent by the actor -- To stop receiving notification emails like this one, please contact ['"commits@openwhisk.apache.org" <commits@openwhisk.apache.org>'].