[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer
dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer URL: https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r171450061 ## File path: core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala ## @@ -99,43 +117,144 @@ class KubernetesClient( } protected val kubectlCmd = Seq(findKubectlCmd) - def run(name: String, image: String, args: Seq[String] = Seq.empty[String])( -implicit transid: TransactionId): Future[ContainerId] = { -runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run) - .map(_ => ContainerId(name)) - } + def run(name: String, + image: String, + memory: ByteSize = 256.MB, + environment: Map[String, String] = Map.empty, + labels: Map[String, String] = Map.empty)(implicit transid: TransactionId): Future[KubernetesContainer] = { + +val envVars = environment.map { + case (key, value) => new EnvVarBuilder().withName(key).withValue(value).build() +}.toSeq + +val pod = new PodBuilder() + .withNewMetadata() + .withName(name) + .addToLabels("name", name) + .addToLabels(labels.asJava) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Always") + .addNewContainer() + .withNewResources() + .withLimits(Map("memory" -> new Quantity(memory.toMB + "Mi")).asJava) + .endResources() + .withName("user-action") + .withImage(image) + .withEnv(envVars.asJava) + .addNewPort() + .withContainerPort(8080) + .withName("action") + .endPort() + .endContainer() + .endSpec() + .build() + +kubeRestClient.pods.inNamespace(config.namespace).create(pod) - def inspectIPAddress(id: ContainerId)(implicit transid: TransactionId): Future[ContainerAddress] = { Future { blocking { -val pod = - kubeRestClient.pods().withName(id.asString).waitUntilReady(timeouts.inspect.length, timeouts.inspect.unit) -ContainerAddress(pod.getStatus().getPodIP()) +val createdPod = kubeRestClient.pods + .inNamespace(config.namespace) + .withName(name) + .waitUntilReady(config.timeouts.run.length, config.timeouts.run.unit) +toContainer(createdPod) } }.recoverWith { case e => -log.error(this, s"Failed to get IP of Pod '${id.asString}' within timeout: ${e.getClass} - ${e.getMessage}") -Future.failed(new Exception(s"Failed to get IP of Pod '${id.asString}'")) +log.error(this, s"Failed create pod for '$name': ${e.getClass} - ${e.getMessage}") +Future.failed(new Exception(s"Failed to create pod '$name'")) +} + } + + def rm(container: KubernetesContainer)(implicit transid: TransactionId): Future[Unit] = { +runCmd(Seq("delete", "--now", "pod", container.id.asString), config.timeouts.rm).map(_ => ()) + } + + def rm(key: String, value: String, ensureUnpaused: Boolean = false)(implicit transid: TransactionId): Future[Unit] = { +if (ensureUnpaused && config.invokerAgent.enabled) { + // The caller can't guarantee that every container with the label key=value is already unpaused. + // Therefore we must enumerate them and ensure they are unpaused before we attempt to delete them. + Future { +blocking { + kubeRestClient +.inNamespace(config.namespace) +.pods() +.withLabel(key, value) +.list() +.getItems +.asScala +.map { pod => + val container = toContainer(pod) + container +.resume() +.recover { case _ => () } // Ignore errors; it is possible the container was not actually suspended. +.map(_ => rm(container)) +} +} + }.flatMap(futures => +Future + .sequence(futures) + .map(_ => ())) +} else { + runCmd(Seq("delete", "--now", "pod", "-l", s"$key=$value"), config.timeouts.rm).map(_ => ()) } } - def rm(id: ContainerId)(implicit transid: TransactionId): Future[Unit] = -runCmd(Seq("delete", "--now", "pod", id.asString), timeouts.rm).map(_ => ()) + def suspend(container: KubernetesContainer)(implicit transid: TransactionId): Future[Unit] = { +if (config.invokerAgent.enabled) { + agentCommand("suspend", container) +.map { response => + response.discardEntityBytes() +} +} else { + Future.successful({}) +} + } - def rm(key: String, value: String)(implicit transid: TransactionId): Future[Unit] = -runCmd(Seq("delete", "--now", "pod", "-l", s"$key=$value"), timeouts.rm).map(_ => ()) + def resume(container: KubernetesContainer)(implicit transid: TransactionId): Future[Unit] = { +if (config.invokerAgent.enabled) { + agentCommand("resume", container) +.map { response =>
[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer
dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer URL: https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r170916241 ## File path: core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala ## @@ -99,43 +116,139 @@ class KubernetesClient( } protected val kubectlCmd = Seq(findKubectlCmd) - def run(name: String, image: String, args: Seq[String] = Seq.empty[String])( -implicit transid: TransactionId): Future[ContainerId] = { -runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run) - .map(_ => ContainerId(name)) - } + def run(name: String, + image: String, + memory: ByteSize = 256.MB, + environment: Map[String, String] = Map(), + labels: Map[String, String] = Map())(implicit transid: TransactionId): Future[KubernetesContainer] = { + +val envVars = environment.map { + case (key, value) => new EnvVarBuilder().withName(key).withValue(value).build() +}.toSeq + +val pod = new PodBuilder() + .withNewMetadata() + .withName(name) + .addToLabels("name", name) + .addToLabels(labels.asJava) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Always") + .addNewContainer() + .withNewResources() + .withLimits(Map("memory" -> new Quantity(memory.toMB + "Mi")).asJava) + .endResources() + .withName("user-action") + .withImage(image) + .withEnv(envVars.asJava) + .addNewPort() + .withContainerPort(8080) + .withName("action") + .endPort() + .endContainer() + .endSpec() + .build() + +kubeRestClient.pods.inNamespace("openwhisk").create(pod) - def inspectIPAddress(id: ContainerId)(implicit transid: TransactionId): Future[ContainerAddress] = { Future { blocking { -val pod = - kubeRestClient.pods().withName(id.asString).waitUntilReady(timeouts.inspect.length, timeouts.inspect.unit) -ContainerAddress(pod.getStatus().getPodIP()) +val createdPod = kubeRestClient.pods + .inNamespace("openwhisk") + .withName(name) + .waitUntilReady(config.timeouts.run.length, config.timeouts.run.unit) +toContainer(createdPod) } }.recoverWith { case e => -log.error(this, s"Failed to get IP of Pod '${id.asString}' within timeout: ${e.getClass} - ${e.getMessage}") -Future.failed(new Exception(s"Failed to get IP of Pod '${id.asString}'")) +log.error(this, s"Failed create pod for '$name': ${e.getClass} - ${e.getMessage}") +Future.failed(new Exception(s"Failed to create pod '$name'")) +} + } + + def rm(container: KubernetesContainer)(implicit transid: TransactionId): Future[Unit] = { +runCmd(Seq("delete", "--now", "pod", container.id.asString), config.timeouts.rm).map(_ => ()) + } + + def rm(key: String, value: String)(implicit transid: TransactionId): Future[Unit] = { +if (config.invokerAgent.enabled) { + Future { +blocking { + kubeRestClient +.inNamespace("openwhisk") Review comment: good point. Moved the namespace value into application.conf. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer
dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer URL: https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r170916241 ## File path: core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala ## @@ -99,43 +116,139 @@ class KubernetesClient( } protected val kubectlCmd = Seq(findKubectlCmd) - def run(name: String, image: String, args: Seq[String] = Seq.empty[String])( -implicit transid: TransactionId): Future[ContainerId] = { -runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run) - .map(_ => ContainerId(name)) - } + def run(name: String, + image: String, + memory: ByteSize = 256.MB, + environment: Map[String, String] = Map(), + labels: Map[String, String] = Map())(implicit transid: TransactionId): Future[KubernetesContainer] = { + +val envVars = environment.map { + case (key, value) => new EnvVarBuilder().withName(key).withValue(value).build() +}.toSeq + +val pod = new PodBuilder() + .withNewMetadata() + .withName(name) + .addToLabels("name", name) + .addToLabels(labels.asJava) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Always") + .addNewContainer() + .withNewResources() + .withLimits(Map("memory" -> new Quantity(memory.toMB + "Mi")).asJava) + .endResources() + .withName("user-action") + .withImage(image) + .withEnv(envVars.asJava) + .addNewPort() + .withContainerPort(8080) + .withName("action") + .endPort() + .endContainer() + .endSpec() + .build() + +kubeRestClient.pods.inNamespace("openwhisk").create(pod) - def inspectIPAddress(id: ContainerId)(implicit transid: TransactionId): Future[ContainerAddress] = { Future { blocking { -val pod = - kubeRestClient.pods().withName(id.asString).waitUntilReady(timeouts.inspect.length, timeouts.inspect.unit) -ContainerAddress(pod.getStatus().getPodIP()) +val createdPod = kubeRestClient.pods + .inNamespace("openwhisk") + .withName(name) + .waitUntilReady(config.timeouts.run.length, config.timeouts.run.unit) +toContainer(createdPod) } }.recoverWith { case e => -log.error(this, s"Failed to get IP of Pod '${id.asString}' within timeout: ${e.getClass} - ${e.getMessage}") -Future.failed(new Exception(s"Failed to get IP of Pod '${id.asString}'")) +log.error(this, s"Failed create pod for '$name': ${e.getClass} - ${e.getMessage}") +Future.failed(new Exception(s"Failed to create pod '$name'")) +} + } + + def rm(container: KubernetesContainer)(implicit transid: TransactionId): Future[Unit] = { +runCmd(Seq("delete", "--now", "pod", container.id.asString), config.timeouts.rm).map(_ => ()) + } + + def rm(key: String, value: String)(implicit transid: TransactionId): Future[Unit] = { +if (config.invokerAgent.enabled) { + Future { +blocking { + kubeRestClient +.inNamespace("openwhisk") Review comment: good point. Moved the namespace intoapplication.conf. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer
dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer URL: https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r170387260 ## File path: core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala ## @@ -99,43 +111,133 @@ class KubernetesClient( } protected val kubectlCmd = Seq(findKubectlCmd) - def run(name: String, image: String, args: Seq[String] = Seq.empty[String])( -implicit transid: TransactionId): Future[ContainerId] = { -runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run) - .map(_ => ContainerId(name)) - } + def run(name: String, + image: String, + memory: ByteSize = 256.MB, + environment: Map[String, String] = Map(), + labels: Map[String, String] = Map())(implicit transid: TransactionId): Future[KubernetesContainer] = { + +val envVars = environment.map { + case (key, value) => new EnvVarBuilder().withName(key).withValue(value).build() +}.toSeq + +val pod = new PodBuilder() + .withNewMetadata() + .withName(name) + .addToLabels("name", name) + .addToLabels(mapAsJavaMap(labels)) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Always") + .addNewContainer() + .withNewResources() + .withLimits(mapAsJavaMap(Map("memory" -> new Quantity(memory.toMB + "Mi" + .endResources() + .withName("user-action") + .withImage(image) + .withEnv(envVars) + .addNewPort() + .withContainerPort(8080) + .withName("action") + .endPort() + .endContainer() + .endSpec() + .build() + +kubeRestClient.pods.inNamespace("openwhisk").create(pod) - def inspectIPAddress(id: ContainerId)(implicit transid: TransactionId): Future[ContainerAddress] = { Future { blocking { -val pod = - kubeRestClient.pods().withName(id.asString).waitUntilReady(timeouts.inspect.length, timeouts.inspect.unit) -ContainerAddress(pod.getStatus().getPodIP()) +val createdPod = kubeRestClient.pods + .inNamespace("openwhisk") + .withName(name) + .waitUntilReady(config.timeouts.run.length, config.timeouts.run.unit) +toContainer(createdPod) } }.recoverWith { case e => -log.error(this, s"Failed to get IP of Pod '${id.asString}' within timeout: ${e.getClass} - ${e.getMessage}") -Future.failed(new Exception(s"Failed to get IP of Pod '${id.asString}'")) +log.error(this, s"Failed create pod for '$name': ${e.getClass} - ${e.getMessage}") +Future.failed(new Exception(s"Failed to create pod '$name'")) } } - def rm(id: ContainerId)(implicit transid: TransactionId): Future[Unit] = -runCmd(Seq("delete", "--now", "pod", id.asString), timeouts.rm).map(_ => ()) + def rm(container: KubernetesContainer)(implicit transid: TransactionId): Future[Unit] = { +// Pod deletion will never complete if the pod contains a paused container. +// Therefore issue a resume before the delete (resuming a non-suspended container is harmless). +resume(container).map { _ => + runCmd(Seq("delete", "--now", "pod", container.id.asString), config.timeouts.rm).map(_ => ()) +} + } - def rm(key: String, value: String)(implicit transid: TransactionId): Future[Unit] = -runCmd(Seq("delete", "--now", "pod", "-l", s"$key=$value"), timeouts.rm).map(_ => ()) + def rm(key: String, value: String)(implicit transid: TransactionId): Future[Unit] = { +if (config.invokerAgent) { + Future { +blocking { + kubeRestClient +.inNamespace("openwhisk") +.pods() +.withLabel(key, value) +.list() +.getItems +.map { pod => + rm(toContainer(pod)) +} +.reduce((a, b) => a.flatMap(_ => b)) +} + } +} else { + runCmd(Seq("delete", "--now", "pod", "-l", s"$key=$value"), config.timeouts.rm).map(_ => ()) +} + } - def logs(id: ContainerId, sinceTime: Option[Instant], waitForSentinel: Boolean = false)( + def suspend(container: KubernetesContainer)(implicit transid: TransactionId): Future[Unit] = { Review comment: I think the implementations belong in the KubernetesClient, since they need access to some of its state too (its config object). I agree passing the entire KubernetesContainer object as a parameter looks a bit backwards. I decided to do that (instead of passing a few of its fields individually) as a hedge against future churn and because it felt like a nice logical unit (vs. passing containerId, workerIP, nativeContainerId). But I don't feel that strongly about this. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHu
[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer
dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer URL: https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r170385887 ## File path: core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala ## @@ -99,43 +111,133 @@ class KubernetesClient( } protected val kubectlCmd = Seq(findKubectlCmd) - def run(name: String, image: String, args: Seq[String] = Seq.empty[String])( -implicit transid: TransactionId): Future[ContainerId] = { -runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run) - .map(_ => ContainerId(name)) - } + def run(name: String, + image: String, + memory: ByteSize = 256.MB, + environment: Map[String, String] = Map(), + labels: Map[String, String] = Map())(implicit transid: TransactionId): Future[KubernetesContainer] = { + +val envVars = environment.map { + case (key, value) => new EnvVarBuilder().withName(key).withValue(value).build() +}.toSeq + +val pod = new PodBuilder() + .withNewMetadata() + .withName(name) + .addToLabels("name", name) + .addToLabels(mapAsJavaMap(labels)) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Always") + .addNewContainer() + .withNewResources() + .withLimits(mapAsJavaMap(Map("memory" -> new Quantity(memory.toMB + "Mi" + .endResources() + .withName("user-action") + .withImage(image) + .withEnv(envVars) + .addNewPort() + .withContainerPort(8080) + .withName("action") + .endPort() + .endContainer() + .endSpec() + .build() + +kubeRestClient.pods.inNamespace("openwhisk").create(pod) - def inspectIPAddress(id: ContainerId)(implicit transid: TransactionId): Future[ContainerAddress] = { Future { blocking { -val pod = - kubeRestClient.pods().withName(id.asString).waitUntilReady(timeouts.inspect.length, timeouts.inspect.unit) -ContainerAddress(pod.getStatus().getPodIP()) +val createdPod = kubeRestClient.pods + .inNamespace("openwhisk") + .withName(name) + .waitUntilReady(config.timeouts.run.length, config.timeouts.run.unit) +toContainer(createdPod) } }.recoverWith { case e => -log.error(this, s"Failed to get IP of Pod '${id.asString}' within timeout: ${e.getClass} - ${e.getMessage}") -Future.failed(new Exception(s"Failed to get IP of Pod '${id.asString}'")) +log.error(this, s"Failed create pod for '$name': ${e.getClass} - ${e.getMessage}") +Future.failed(new Exception(s"Failed to create pod '$name'")) } } - def rm(id: ContainerId)(implicit transid: TransactionId): Future[Unit] = -runCmd(Seq("delete", "--now", "pod", id.asString), timeouts.rm).map(_ => ()) + def rm(container: KubernetesContainer)(implicit transid: TransactionId): Future[Unit] = { +// Pod deletion will never complete if the pod contains a paused container. +// Therefore issue a resume before the delete (resuming a non-suspended container is harmless). +resume(container).map { _ => Review comment: Makes sense; I restructured. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer
dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer URL: https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r170385786 ## File path: core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala ## @@ -99,43 +111,133 @@ class KubernetesClient( } protected val kubectlCmd = Seq(findKubectlCmd) - def run(name: String, image: String, args: Seq[String] = Seq.empty[String])( -implicit transid: TransactionId): Future[ContainerId] = { -runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run) - .map(_ => ContainerId(name)) - } + def run(name: String, + image: String, + memory: ByteSize = 256.MB, + environment: Map[String, String] = Map(), + labels: Map[String, String] = Map())(implicit transid: TransactionId): Future[KubernetesContainer] = { + +val envVars = environment.map { + case (key, value) => new EnvVarBuilder().withName(key).withValue(value).build() +}.toSeq + +val pod = new PodBuilder() + .withNewMetadata() + .withName(name) + .addToLabels("name", name) + .addToLabels(mapAsJavaMap(labels)) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Always") Review comment: Preserving the behavior that was there. I agree we don't actually want them to be restarted but @bbrowning said in #3219 that on OpenShift they need to set the restart policy to Always as a hack to prevent the containers from being arbitrarily killed by the system. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services