This is an automated email from the ASF dual-hosted git repository.

markusthoemmes pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-openwhisk.git


The following commit(s) were added to refs/heads/master by this push:
     new 6027dca  Throttle log messages when rescheduling containers. (#3165)
6027dca is described below

commit 6027dca8f4814d2ee37a20b4fb190194fa07fe3d
Author: Martin Henke <martin.he...@web.de>
AuthorDate: Thu Jan 11 10:07:14 2018 +0100

    Throttle log messages when rescheduling containers. (#3165)
    
    This piece of code catches an error case and tries to recover it by 
rescheduling the message. In very bad cases that can result in a tight loop and 
millions of messages generated. Throttling those messages to once per 10 
seconds should be enough to debug the scenario while not crashing the Invoker 
due to the log volume.
---
 .../whisk/core/containerpool/ContainerPool.scala      | 19 +++++++++++++++----
 .../whisk/core/containerpool/ContainerProxy.scala     |  2 +-
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git 
a/core/invoker/src/main/scala/whisk/core/containerpool/ContainerPool.scala 
b/core/invoker/src/main/scala/whisk/core/containerpool/ContainerPool.scala
index 61172dd..3e83fca 100644
--- a/core/invoker/src/main/scala/whisk/core/containerpool/ContainerPool.scala
+++ b/core/invoker/src/main/scala/whisk/core/containerpool/ContainerPool.scala
@@ -18,13 +18,11 @@
 package whisk.core.containerpool
 
 import scala.collection.immutable
-
 import akka.actor.Actor
 import akka.actor.ActorRef
 import akka.actor.ActorRefFactory
 import akka.actor.Props
 import whisk.common.AkkaLogging
-
 import whisk.common.TransactionId
 import whisk.core.entity.ByteSize
 import whisk.core.entity.CodeExec
@@ -33,6 +31,8 @@ import whisk.core.entity.ExecutableWhiskAction
 import whisk.core.entity.size._
 import whisk.core.connector.MessageFeed
 
+import scala.concurrent.duration._
+
 sealed trait WorkerState
 case object Busy extends WorkerState
 case object Free extends WorkerState
@@ -71,6 +71,7 @@ class ContainerPool(childFactory: ActorRefFactory => ActorRef,
   var freePool = immutable.Map.empty[ActorRef, ContainerData]
   var busyPool = immutable.Map.empty[ActorRef, ContainerData]
   var prewarmedPool = immutable.Map.empty[ActorRef, ContainerData]
+  val logMessageInterval = 10.seconds
 
   prewarmConfig.foreach { config =>
     logging.info(this, s"pre-warming ${config.count} ${config.exec.kind} 
containers")(TransactionId.invokerWarmup)
@@ -117,8 +118,18 @@ class ContainerPool(childFactory: ActorRefFactory => 
ActorRef,
           // this can also happen if createContainer fails to start a new 
container, or
           // if a job is rescheduled but the container it was allocated to has 
not yet destroyed itself
           // (and a new container would over commit the pool)
-          logging.error(this, "Rescheduling Run message, too many message in 
the pool")(r.msg.transid)
-          self ! r
+          val isErrorLogged = 
r.retryLogDeadline.map(_.isOverdue).getOrElse(true)
+          val retryLogDeadline = if (isErrorLogged) {
+            logging.error(
+              this,
+              s"Rescheduling Run message, too many message in the pool, 
freePoolSize: ${freePool.size}, " +
+                s"busyPoolSize: ${busyPool.size}, maxActiveContainers 
$maxActiveContainers, " +
+                s"userNamespace: ${r.msg.user.namespace}, action: 
${r.action}")(r.msg.transid)
+            Some(logMessageInterval.fromNow)
+          } else {
+            r.retryLogDeadline
+          }
+          self ! Run(r.action, r.msg, retryLogDeadline)
       }
 
     // Container is free to take more work
diff --git 
a/core/invoker/src/main/scala/whisk/core/containerpool/ContainerProxy.scala 
b/core/invoker/src/main/scala/whisk/core/containerpool/ContainerProxy.scala
index c3f89c5..93d4798 100644
--- a/core/invoker/src/main/scala/whisk/core/containerpool/ContainerProxy.scala
+++ b/core/invoker/src/main/scala/whisk/core/containerpool/ContainerProxy.scala
@@ -61,7 +61,7 @@ case class WarmedData(container: Container,
 
 // Events received by the actor
 case class Start(exec: CodeExec[_], memoryLimit: ByteSize)
-case class Run(action: ExecutableWhiskAction, msg: ActivationMessage)
+case class Run(action: ExecutableWhiskAction, msg: ActivationMessage, 
retryLogDeadline: Option[Deadline] = None)
 case object Remove
 
 // Events sent by the actor

-- 
To stop receiving notification emails like this one, please contact
['"commits@openwhisk.apache.org" <commits@openwhisk.apache.org>'].

Reply via email to