style95 commented on code in PR #5326:
URL: https://github.com/apache/openwhisk/pull/5326#discussion_r976038644


##########
tests/src/test/scala/org/apache/openwhisk/core/containerpool/v2/test/FunctionPullingContainerProxyTests.scala:
##########
@@ -1036,6 +1048,84 @@ class FunctionPullingContainerProxyTests
     }
   }
 
+  it should "destroy container proxy when stopping due to timeout and getting 
live count fails" in within(timeout) {
+    val authStore = mock[ArtifactWhiskAuthStore]
+    val namespaceBlacklist: NamespaceBlacklist = new 
NamespaceBlacklist(authStore)
+    val get = getWhiskAction(Future(action.toWhiskAction))
+    val dataManagementService = TestProbe()
+    val container = new TestContainer
+    val factory = createFactory(Future.successful(container))
+    val acker = createAcker()
+    val store = createStore
+    val collector = createCollector()
+    val counter = getLiveContainerCountFailFirstCall(2)

Review Comment:
   👍 



##########
tests/src/test/scala/org/apache/openwhisk/core/containerpool/v2/test/FunctionPullingContainerProxyTests.scala:
##########
@@ -1036,6 +1048,84 @@ class FunctionPullingContainerProxyTests
     }
   }
 
+  it should "destroy container proxy when stopping due to timeout and getting 
live count fails" in within(timeout) {
+    val authStore = mock[ArtifactWhiskAuthStore]
+    val namespaceBlacklist: NamespaceBlacklist = new 
NamespaceBlacklist(authStore)
+    val get = getWhiskAction(Future(action.toWhiskAction))
+    val dataManagementService = TestProbe()
+    val container = new TestContainer
+    val factory = createFactory(Future.successful(container))
+    val acker = createAcker()
+    val store = createStore
+    val collector = createCollector()
+    val counter = getLiveContainerCountFailFirstCall(2)
+    val limit = getWarmedContainerLimit(Future.successful((1, 10.seconds)))
+    val (client, clientFactory) = testClient
+
+    val probe = TestProbe()
+    val machine =
+      probe.childActorOf(
+        FunctionPullingContainerProxy
+          .props(
+            factory,
+            entityStore,
+            namespaceBlacklist,
+            get,
+            dataManagementService.ref,
+            clientFactory,
+            acker,
+            store,
+            collector,
+            counter,
+            limit,
+            InvokerInstanceId(0, userMemory = defaultUserMemory),
+            invokerHealthManager.ref,
+            poolConfig,
+            timeoutConfig))
+
+    registerCallback(machine, probe)
+    probe watch machine
+
+    machine ! Initialize(invocationNamespace.asString, fqn, action, 
schedulerHost, rpcPort, messageTransId)
+    probe.expectMsg(Transition(machine, Uninitialized, CreatingClient))
+    client.expectMsg(StartClient)
+    client.send(machine, ClientCreationCompleted())
+
+    probe.expectMsg(Transition(machine, CreatingClient, ClientCreated))
+    expectInitialized(probe)
+    client.expectMsg(RequestActivation())
+    client.send(machine, message)
+
+    probe.expectMsg(Transition(machine, ClientCreated, Running))
+    client.expectMsg(ContainerWarmed)
+    client.expectMsgPF() {
+      case RequestActivation(Some(_), None) => true
+    }
+
+    machine ! StateTimeout
+    client.send(machine, RetryRequestActivation)
+    probe.expectMsg(Transition(machine, Running, Pausing))
+    probe.expectMsgType[ContainerIsPaused]
+    probe.expectMsg(Transition(machine, Pausing, Paused))
+
+    machine ! StateTimeout
+    client.expectMsg(StopClientProxy)
+    probe.expectMsgAllOf(ContainerRemoved(true), Transition(machine, Paused, 
Removing))

Review Comment:
   Can we add a check to see if `DataManagementService` properly receives the 
clean-up messages?



##########
core/invoker/src/main/scala/org/apache/openwhisk/core/containerpool/v2/FunctionPullingContainerProxy.scala:
##########
@@ -732,7 +735,12 @@ class FunctionPullingContainerProxy(
         data.action.fullyQualifiedName(false),
         data.action.rev,
         Some(data.clientProxy))
-
+    case Event(t: FailureMessage, data: WarmData) =>
+      logging.error(
+        this,
+        s"Failed to determine whether to keep or remove container on pause 
timeout for ${data.container.containerId}, retrying. Caused by: $t")
+      startSingleTimer(DetermineKeepContainer.toString, 
DetermineKeepContainer, 1.second)

Review Comment:
   Is there any reason to start the timer after 1 second?
   Since it will delay the deletion of the ETCD key for the problematic 
container, another request heading to this container can still come during that 
time.
   The request will be rescheduled, but it will anyway also delay container 
creation.
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: issues-unsubscr...@openwhisk.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org

Reply via email to