[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer

2018-02-28 Thread GitBox
dgrove-oss commented on a change in pull request #3338: implement 
suspend/resume for KubernetesContainer
URL: 
https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r171450061
 
 

 ##
 File path: 
core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala
 ##
 @@ -99,43 +117,144 @@ class KubernetesClient(
   }
   protected val kubectlCmd = Seq(findKubectlCmd)
 
-  def run(name: String, image: String, args: Seq[String] = Seq.empty[String])(
-implicit transid: TransactionId): Future[ContainerId] = {
-runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run)
-  .map(_ => ContainerId(name))
-  }
+  def run(name: String,
+  image: String,
+  memory: ByteSize = 256.MB,
+  environment: Map[String, String] = Map.empty,
+  labels: Map[String, String] = Map.empty)(implicit transid: 
TransactionId): Future[KubernetesContainer] = {
+
+val envVars = environment.map {
+  case (key, value) => new 
EnvVarBuilder().withName(key).withValue(value).build()
+}.toSeq
+
+val pod = new PodBuilder()
+  .withNewMetadata()
+  .withName(name)
+  .addToLabels("name", name)
+  .addToLabels(labels.asJava)
+  .endMetadata()
+  .withNewSpec()
+  .withRestartPolicy("Always")
+  .addNewContainer()
+  .withNewResources()
+  .withLimits(Map("memory" -> new Quantity(memory.toMB + "Mi")).asJava)
+  .endResources()
+  .withName("user-action")
+  .withImage(image)
+  .withEnv(envVars.asJava)
+  .addNewPort()
+  .withContainerPort(8080)
+  .withName("action")
+  .endPort()
+  .endContainer()
+  .endSpec()
+  .build()
+
+kubeRestClient.pods.inNamespace(config.namespace).create(pod)
 
-  def inspectIPAddress(id: ContainerId)(implicit transid: TransactionId): 
Future[ContainerAddress] = {
 Future {
   blocking {
-val pod =
-  
kubeRestClient.pods().withName(id.asString).waitUntilReady(timeouts.inspect.length,
 timeouts.inspect.unit)
-ContainerAddress(pod.getStatus().getPodIP())
+val createdPod = kubeRestClient.pods
+  .inNamespace(config.namespace)
+  .withName(name)
+  .waitUntilReady(config.timeouts.run.length, config.timeouts.run.unit)
+toContainer(createdPod)
   }
 }.recoverWith {
   case e =>
-log.error(this, s"Failed to get IP of Pod '${id.asString}' within 
timeout: ${e.getClass} - ${e.getMessage}")
-Future.failed(new Exception(s"Failed to get IP of Pod 
'${id.asString}'"))
+log.error(this, s"Failed create pod for '$name': ${e.getClass} - 
${e.getMessage}")
+Future.failed(new Exception(s"Failed to create pod '$name'"))
+}
+  }
+
+  def rm(container: KubernetesContainer)(implicit transid: TransactionId): 
Future[Unit] = {
+runCmd(Seq("delete", "--now", "pod", container.id.asString), 
config.timeouts.rm).map(_ => ())
+  }
+
+  def rm(key: String, value: String, ensureUnpaused: Boolean = false)(implicit 
transid: TransactionId): Future[Unit] = {
+if (ensureUnpaused && config.invokerAgent.enabled) {
+  // The caller can't guarantee that every container with the label 
key=value is already unpaused.
+  // Therefore we must enumerate them and ensure they are unpaused before 
we attempt to delete them.
+  Future {
+blocking {
+  kubeRestClient
+.inNamespace(config.namespace)
+.pods()
+.withLabel(key, value)
+.list()
+.getItems
+.asScala
+.map { pod =>
+  val container = toContainer(pod)
+  container
+.resume()
+.recover { case _ => () } // Ignore errors; it is possible the 
container was not actually suspended.
+.map(_ => rm(container))
+}
+}
+  }.flatMap(futures =>
+Future
+  .sequence(futures)
+  .map(_ => ()))
+} else {
+  runCmd(Seq("delete", "--now", "pod", "-l", s"$key=$value"), 
config.timeouts.rm).map(_ => ())
 }
   }
 
-  def rm(id: ContainerId)(implicit transid: TransactionId): Future[Unit] =
-runCmd(Seq("delete", "--now", "pod", id.asString), timeouts.rm).map(_ => 
())
+  def suspend(container: KubernetesContainer)(implicit transid: 
TransactionId): Future[Unit] = {
+if (config.invokerAgent.enabled) {
+  agentCommand("suspend", container)
+.map { response =>
+  response.discardEntityBytes()
+}
+} else {
+  Future.successful({})
+}
+  }
 
-  def rm(key: String, value: String)(implicit transid: TransactionId): 
Future[Unit] =
-runCmd(Seq("delete", "--now", "pod", "-l", s"$key=$value"), 
timeouts.rm).map(_ => ())
+  def resume(container: KubernetesContainer)(implicit transid: TransactionId): 
Future[Unit] = {
+if (config.invokerAgent.enabled) {
+  agentCommand("resume", container)
+.map { response =>

[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer

2018-02-27 Thread GitBox
dgrove-oss commented on a change in pull request #3338: implement 
suspend/resume for KubernetesContainer
URL: 
https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r170916241
 
 

 ##
 File path: 
core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala
 ##
 @@ -99,43 +116,139 @@ class KubernetesClient(
   }
   protected val kubectlCmd = Seq(findKubectlCmd)
 
-  def run(name: String, image: String, args: Seq[String] = Seq.empty[String])(
-implicit transid: TransactionId): Future[ContainerId] = {
-runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run)
-  .map(_ => ContainerId(name))
-  }
+  def run(name: String,
+  image: String,
+  memory: ByteSize = 256.MB,
+  environment: Map[String, String] = Map(),
+  labels: Map[String, String] = Map())(implicit transid: 
TransactionId): Future[KubernetesContainer] = {
+
+val envVars = environment.map {
+  case (key, value) => new 
EnvVarBuilder().withName(key).withValue(value).build()
+}.toSeq
+
+val pod = new PodBuilder()
+  .withNewMetadata()
+  .withName(name)
+  .addToLabels("name", name)
+  .addToLabels(labels.asJava)
+  .endMetadata()
+  .withNewSpec()
+  .withRestartPolicy("Always")
+  .addNewContainer()
+  .withNewResources()
+  .withLimits(Map("memory" -> new Quantity(memory.toMB + "Mi")).asJava)
+  .endResources()
+  .withName("user-action")
+  .withImage(image)
+  .withEnv(envVars.asJava)
+  .addNewPort()
+  .withContainerPort(8080)
+  .withName("action")
+  .endPort()
+  .endContainer()
+  .endSpec()
+  .build()
+
+kubeRestClient.pods.inNamespace("openwhisk").create(pod)
 
-  def inspectIPAddress(id: ContainerId)(implicit transid: TransactionId): 
Future[ContainerAddress] = {
 Future {
   blocking {
-val pod =
-  
kubeRestClient.pods().withName(id.asString).waitUntilReady(timeouts.inspect.length,
 timeouts.inspect.unit)
-ContainerAddress(pod.getStatus().getPodIP())
+val createdPod = kubeRestClient.pods
+  .inNamespace("openwhisk")
+  .withName(name)
+  .waitUntilReady(config.timeouts.run.length, config.timeouts.run.unit)
+toContainer(createdPod)
   }
 }.recoverWith {
   case e =>
-log.error(this, s"Failed to get IP of Pod '${id.asString}' within 
timeout: ${e.getClass} - ${e.getMessage}")
-Future.failed(new Exception(s"Failed to get IP of Pod 
'${id.asString}'"))
+log.error(this, s"Failed create pod for '$name': ${e.getClass} - 
${e.getMessage}")
+Future.failed(new Exception(s"Failed to create pod '$name'"))
+}
+  }
+
+  def rm(container: KubernetesContainer)(implicit transid: TransactionId): 
Future[Unit] = {
+runCmd(Seq("delete", "--now", "pod", container.id.asString), 
config.timeouts.rm).map(_ => ())
+  }
+
+  def rm(key: String, value: String)(implicit transid: TransactionId): 
Future[Unit] = {
+if (config.invokerAgent.enabled) {
+  Future {
+blocking {
+  kubeRestClient
+.inNamespace("openwhisk")
 
 Review comment:
   good point.  Moved the namespace value into application.conf.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer

2018-02-27 Thread GitBox
dgrove-oss commented on a change in pull request #3338: implement 
suspend/resume for KubernetesContainer
URL: 
https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r170916241
 
 

 ##
 File path: 
core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala
 ##
 @@ -99,43 +116,139 @@ class KubernetesClient(
   }
   protected val kubectlCmd = Seq(findKubectlCmd)
 
-  def run(name: String, image: String, args: Seq[String] = Seq.empty[String])(
-implicit transid: TransactionId): Future[ContainerId] = {
-runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run)
-  .map(_ => ContainerId(name))
-  }
+  def run(name: String,
+  image: String,
+  memory: ByteSize = 256.MB,
+  environment: Map[String, String] = Map(),
+  labels: Map[String, String] = Map())(implicit transid: 
TransactionId): Future[KubernetesContainer] = {
+
+val envVars = environment.map {
+  case (key, value) => new 
EnvVarBuilder().withName(key).withValue(value).build()
+}.toSeq
+
+val pod = new PodBuilder()
+  .withNewMetadata()
+  .withName(name)
+  .addToLabels("name", name)
+  .addToLabels(labels.asJava)
+  .endMetadata()
+  .withNewSpec()
+  .withRestartPolicy("Always")
+  .addNewContainer()
+  .withNewResources()
+  .withLimits(Map("memory" -> new Quantity(memory.toMB + "Mi")).asJava)
+  .endResources()
+  .withName("user-action")
+  .withImage(image)
+  .withEnv(envVars.asJava)
+  .addNewPort()
+  .withContainerPort(8080)
+  .withName("action")
+  .endPort()
+  .endContainer()
+  .endSpec()
+  .build()
+
+kubeRestClient.pods.inNamespace("openwhisk").create(pod)
 
-  def inspectIPAddress(id: ContainerId)(implicit transid: TransactionId): 
Future[ContainerAddress] = {
 Future {
   blocking {
-val pod =
-  
kubeRestClient.pods().withName(id.asString).waitUntilReady(timeouts.inspect.length,
 timeouts.inspect.unit)
-ContainerAddress(pod.getStatus().getPodIP())
+val createdPod = kubeRestClient.pods
+  .inNamespace("openwhisk")
+  .withName(name)
+  .waitUntilReady(config.timeouts.run.length, config.timeouts.run.unit)
+toContainer(createdPod)
   }
 }.recoverWith {
   case e =>
-log.error(this, s"Failed to get IP of Pod '${id.asString}' within 
timeout: ${e.getClass} - ${e.getMessage}")
-Future.failed(new Exception(s"Failed to get IP of Pod 
'${id.asString}'"))
+log.error(this, s"Failed create pod for '$name': ${e.getClass} - 
${e.getMessage}")
+Future.failed(new Exception(s"Failed to create pod '$name'"))
+}
+  }
+
+  def rm(container: KubernetesContainer)(implicit transid: TransactionId): 
Future[Unit] = {
+runCmd(Seq("delete", "--now", "pod", container.id.asString), 
config.timeouts.rm).map(_ => ())
+  }
+
+  def rm(key: String, value: String)(implicit transid: TransactionId): 
Future[Unit] = {
+if (config.invokerAgent.enabled) {
+  Future {
+blocking {
+  kubeRestClient
+.inNamespace("openwhisk")
 
 Review comment:
   good point.  Moved the namespace intoapplication.conf.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer

2018-02-23 Thread GitBox
dgrove-oss commented on a change in pull request #3338: implement 
suspend/resume for KubernetesContainer
URL: 
https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r170387260
 
 

 ##
 File path: 
core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala
 ##
 @@ -99,43 +111,133 @@ class KubernetesClient(
   }
   protected val kubectlCmd = Seq(findKubectlCmd)
 
-  def run(name: String, image: String, args: Seq[String] = Seq.empty[String])(
-implicit transid: TransactionId): Future[ContainerId] = {
-runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run)
-  .map(_ => ContainerId(name))
-  }
+  def run(name: String,
+  image: String,
+  memory: ByteSize = 256.MB,
+  environment: Map[String, String] = Map(),
+  labels: Map[String, String] = Map())(implicit transid: 
TransactionId): Future[KubernetesContainer] = {
+
+val envVars = environment.map {
+  case (key, value) => new 
EnvVarBuilder().withName(key).withValue(value).build()
+}.toSeq
+
+val pod = new PodBuilder()
+  .withNewMetadata()
+  .withName(name)
+  .addToLabels("name", name)
+  .addToLabels(mapAsJavaMap(labels))
+  .endMetadata()
+  .withNewSpec()
+  .withRestartPolicy("Always")
+  .addNewContainer()
+  .withNewResources()
+  .withLimits(mapAsJavaMap(Map("memory" -> new Quantity(memory.toMB + 
"Mi"
+  .endResources()
+  .withName("user-action")
+  .withImage(image)
+  .withEnv(envVars)
+  .addNewPort()
+  .withContainerPort(8080)
+  .withName("action")
+  .endPort()
+  .endContainer()
+  .endSpec()
+  .build()
+
+kubeRestClient.pods.inNamespace("openwhisk").create(pod)
 
-  def inspectIPAddress(id: ContainerId)(implicit transid: TransactionId): 
Future[ContainerAddress] = {
 Future {
   blocking {
-val pod =
-  
kubeRestClient.pods().withName(id.asString).waitUntilReady(timeouts.inspect.length,
 timeouts.inspect.unit)
-ContainerAddress(pod.getStatus().getPodIP())
+val createdPod = kubeRestClient.pods
+  .inNamespace("openwhisk")
+  .withName(name)
+  .waitUntilReady(config.timeouts.run.length, config.timeouts.run.unit)
+toContainer(createdPod)
   }
 }.recoverWith {
   case e =>
-log.error(this, s"Failed to get IP of Pod '${id.asString}' within 
timeout: ${e.getClass} - ${e.getMessage}")
-Future.failed(new Exception(s"Failed to get IP of Pod 
'${id.asString}'"))
+log.error(this, s"Failed create pod for '$name': ${e.getClass} - 
${e.getMessage}")
+Future.failed(new Exception(s"Failed to create pod '$name'"))
 }
   }
 
-  def rm(id: ContainerId)(implicit transid: TransactionId): Future[Unit] =
-runCmd(Seq("delete", "--now", "pod", id.asString), timeouts.rm).map(_ => 
())
+  def rm(container: KubernetesContainer)(implicit transid: TransactionId): 
Future[Unit] = {
+// Pod deletion will never complete if the pod contains a paused container.
+// Therefore issue a resume before the delete (resuming a non-suspended 
container is harmless).
+resume(container).map { _ =>
+  runCmd(Seq("delete", "--now", "pod", container.id.asString), 
config.timeouts.rm).map(_ => ())
+}
+  }
 
-  def rm(key: String, value: String)(implicit transid: TransactionId): 
Future[Unit] =
-runCmd(Seq("delete", "--now", "pod", "-l", s"$key=$value"), 
timeouts.rm).map(_ => ())
+  def rm(key: String, value: String)(implicit transid: TransactionId): 
Future[Unit] = {
+if (config.invokerAgent) {
+  Future {
+blocking {
+  kubeRestClient
+.inNamespace("openwhisk")
+.pods()
+.withLabel(key, value)
+.list()
+.getItems
+.map { pod =>
+  rm(toContainer(pod))
+}
+.reduce((a, b) => a.flatMap(_ => b))
+}
+  }
+} else {
+  runCmd(Seq("delete", "--now", "pod", "-l", s"$key=$value"), 
config.timeouts.rm).map(_ => ())
+}
+  }
 
-  def logs(id: ContainerId, sinceTime: Option[Instant], waitForSentinel: 
Boolean = false)(
+  def suspend(container: KubernetesContainer)(implicit transid: 
TransactionId): Future[Unit] = {
 
 Review comment:
   I think the implementations belong in the KubernetesClient, since they need 
access to some of its state too (its config object). 
   
   I agree passing the entire KubernetesContainer object as a parameter looks a 
bit backwards.  I decided to do that (instead of passing a few of its fields 
individually) as a hedge against future churn and because it felt like a nice 
logical unit (vs. passing containerId, workerIP, nativeContainerId).   But I 
don't feel that strongly about this.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHu

[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer

2018-02-23 Thread GitBox
dgrove-oss commented on a change in pull request #3338: implement 
suspend/resume for KubernetesContainer
URL: 
https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r170385887
 
 

 ##
 File path: 
core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala
 ##
 @@ -99,43 +111,133 @@ class KubernetesClient(
   }
   protected val kubectlCmd = Seq(findKubectlCmd)
 
-  def run(name: String, image: String, args: Seq[String] = Seq.empty[String])(
-implicit transid: TransactionId): Future[ContainerId] = {
-runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run)
-  .map(_ => ContainerId(name))
-  }
+  def run(name: String,
+  image: String,
+  memory: ByteSize = 256.MB,
+  environment: Map[String, String] = Map(),
+  labels: Map[String, String] = Map())(implicit transid: 
TransactionId): Future[KubernetesContainer] = {
+
+val envVars = environment.map {
+  case (key, value) => new 
EnvVarBuilder().withName(key).withValue(value).build()
+}.toSeq
+
+val pod = new PodBuilder()
+  .withNewMetadata()
+  .withName(name)
+  .addToLabels("name", name)
+  .addToLabels(mapAsJavaMap(labels))
+  .endMetadata()
+  .withNewSpec()
+  .withRestartPolicy("Always")
+  .addNewContainer()
+  .withNewResources()
+  .withLimits(mapAsJavaMap(Map("memory" -> new Quantity(memory.toMB + 
"Mi"
+  .endResources()
+  .withName("user-action")
+  .withImage(image)
+  .withEnv(envVars)
+  .addNewPort()
+  .withContainerPort(8080)
+  .withName("action")
+  .endPort()
+  .endContainer()
+  .endSpec()
+  .build()
+
+kubeRestClient.pods.inNamespace("openwhisk").create(pod)
 
-  def inspectIPAddress(id: ContainerId)(implicit transid: TransactionId): 
Future[ContainerAddress] = {
 Future {
   blocking {
-val pod =
-  
kubeRestClient.pods().withName(id.asString).waitUntilReady(timeouts.inspect.length,
 timeouts.inspect.unit)
-ContainerAddress(pod.getStatus().getPodIP())
+val createdPod = kubeRestClient.pods
+  .inNamespace("openwhisk")
+  .withName(name)
+  .waitUntilReady(config.timeouts.run.length, config.timeouts.run.unit)
+toContainer(createdPod)
   }
 }.recoverWith {
   case e =>
-log.error(this, s"Failed to get IP of Pod '${id.asString}' within 
timeout: ${e.getClass} - ${e.getMessage}")
-Future.failed(new Exception(s"Failed to get IP of Pod 
'${id.asString}'"))
+log.error(this, s"Failed create pod for '$name': ${e.getClass} - 
${e.getMessage}")
+Future.failed(new Exception(s"Failed to create pod '$name'"))
 }
   }
 
-  def rm(id: ContainerId)(implicit transid: TransactionId): Future[Unit] =
-runCmd(Seq("delete", "--now", "pod", id.asString), timeouts.rm).map(_ => 
())
+  def rm(container: KubernetesContainer)(implicit transid: TransactionId): 
Future[Unit] = {
+// Pod deletion will never complete if the pod contains a paused container.
+// Therefore issue a resume before the delete (resuming a non-suspended 
container is harmless).
+resume(container).map { _ =>
 
 Review comment:
   Makes sense; I restructured.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] dgrove-oss commented on a change in pull request #3338: implement suspend/resume for KubernetesContainer

2018-02-23 Thread GitBox
dgrove-oss commented on a change in pull request #3338: implement 
suspend/resume for KubernetesContainer
URL: 
https://github.com/apache/incubator-openwhisk/pull/3338#discussion_r170385786
 
 

 ##
 File path: 
core/invoker/src/main/scala/whisk/core/containerpool/kubernetes/KubernetesClient.scala
 ##
 @@ -99,43 +111,133 @@ class KubernetesClient(
   }
   protected val kubectlCmd = Seq(findKubectlCmd)
 
-  def run(name: String, image: String, args: Seq[String] = Seq.empty[String])(
-implicit transid: TransactionId): Future[ContainerId] = {
-runCmd(Seq("run", name, s"--image=$image") ++ args, timeouts.run)
-  .map(_ => ContainerId(name))
-  }
+  def run(name: String,
+  image: String,
+  memory: ByteSize = 256.MB,
+  environment: Map[String, String] = Map(),
+  labels: Map[String, String] = Map())(implicit transid: 
TransactionId): Future[KubernetesContainer] = {
+
+val envVars = environment.map {
+  case (key, value) => new 
EnvVarBuilder().withName(key).withValue(value).build()
+}.toSeq
+
+val pod = new PodBuilder()
+  .withNewMetadata()
+  .withName(name)
+  .addToLabels("name", name)
+  .addToLabels(mapAsJavaMap(labels))
+  .endMetadata()
+  .withNewSpec()
+  .withRestartPolicy("Always")
 
 Review comment:
   Preserving the behavior that was there.  I agree we don't actually want them 
to be restarted but @bbrowning said in #3219 that on OpenShift they need to set 
the restart policy to Always as a hack to prevent the containers from being 
arbitrarily killed by the system.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services