[spark] branch master updated: [SPARK-26603][K8S] Update minikube backend

2019-02-03 Thread felixcheung
This is an automated email from the ASF dual-hosted git repository.

felixcheung pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 196ca0c  [SPARK-26603][K8S] Update minikube backend
196ca0c is described below

commit 196ca0c8f5f5caa2ae588ff4a63ce314aa42aecc
Author: Stavros Kontopoulos 
AuthorDate: Sun Feb 3 17:15:20 2019 -0800

[SPARK-26603][K8S] Update minikube backend

## What changes were proposed in this pull request?

- Covers latest minikube versions.
- keeps the older version support

Note: While I was facing disk pressure issues locally on machine, I noticed 
minikube status command would report that everything was working fine even if 
some kube-system pods were not up. I don't think the output is 100% reliable 
but it is good enough for most cases.

## How was this patch tested?

Run it against latest version of minikube (v0.32.0).

Author: Stavros Kontopoulos 

Closes #23520 from skonto/update-mini-backend.
---
 .../backend/minikube/Minikube.scala| 64 ++
 1 file changed, 54 insertions(+), 10 deletions(-)

diff --git 
a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
 
b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
index 6494cbc..58aa177 100644
--- 
a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
+++ 
b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/k8s/integrationtest/backend/minikube/Minikube.scala
@@ -16,7 +16,6 @@
  */
 package org.apache.spark.deploy.k8s.integrationtest.backend.minikube
 
-import java.io.File
 import java.nio.file.Paths
 
 import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient}
@@ -26,8 +25,14 @@ import org.apache.spark.internal.Logging
 
 // TODO support windows
 private[spark] object Minikube extends Logging {
-
   private val MINIKUBE_STARTUP_TIMEOUT_SECONDS = 60
+  private val HOST_PREFIX = "host:"
+  private val KUBELET_PREFIX = "kubelet:"
+  private val APISERVER_PREFIX = "apiserver:"
+  private val KUBECTL_PREFIX = "kubectl:"
+  private val MINIKUBE_VM_PREFIX = "minikubeVM: "
+  private val MINIKUBE_PREFIX = "minikube: "
+  private val MINIKUBE_PATH = ".minikube"
 
   def getMinikubeIp: String = {
 val outputs = executeMinikube("ip")
@@ -38,12 +43,21 @@ private[spark] object Minikube extends Logging {
 
   def getMinikubeStatus: MinikubeStatus.Value = {
 val statusString = executeMinikube("status")
-  .filter(line => line.contains("minikubeVM: ") || 
line.contains("minikube:"))
-  .head
-  .replaceFirst("minikubeVM: ", "")
-  .replaceFirst("minikube: ", "")
-MinikubeStatus.unapply(statusString)
+logInfo(s"Minikube status command output:\n$statusString")
+// up to minikube version v0.30.0 use this to check for minikube status
+val oldMinikube = statusString
+  .filter(line => line.contains(MINIKUBE_VM_PREFIX) || 
line.contains(MINIKUBE_PREFIX))
+
+if (oldMinikube.isEmpty) {
+  getIfNewMinikubeStatus(statusString)
+} else {
+  val finalStatusString = oldMinikube
+.head
+.replaceFirst(MINIKUBE_VM_PREFIX, "")
+.replaceFirst(MINIKUBE_PREFIX, "")
+  MinikubeStatus.unapply(finalStatusString)
 .getOrElse(throw new IllegalStateException(s"Unknown status 
$statusString"))
+}
   }
 
   def getKubernetesClient: DefaultKubernetesClient = {
@@ -52,13 +66,43 @@ private[spark] object Minikube extends Logging {
 val kubernetesConf = new ConfigBuilder()
   .withApiVersion("v1")
   .withMasterUrl(kubernetesMaster)
-  .withCaCertFile(Paths.get(userHome, ".minikube", 
"ca.crt").toFile.getAbsolutePath)
-  .withClientCertFile(Paths.get(userHome, ".minikube", 
"apiserver.crt").toFile.getAbsolutePath)
-  .withClientKeyFile(Paths.get(userHome, ".minikube", 
"apiserver.key").toFile.getAbsolutePath)
+  .withCaCertFile(Paths.get(userHome, MINIKUBE_PATH, 
"ca.crt").toFile.getAbsolutePath)
+  .withClientCertFile(Paths.get(userHome, MINIKUBE_PATH, 
"apiserver.crt").toFile.getAbsolutePath)
+  .withClientKeyFile(Paths.get(userHome, MINIKUBE_PATH, 
"apiserver.key").toFile.getAbsolutePath)
   .build()
 new DefaultKubernetesClient(kubernetesConf)
   }
 
+  // Covers minikube status output after Minikube V0.30.
+  private def getIfNewMinikubeStatus(statusString: Seq[String]): 
MinikubeStatus.Value = {
+val hostString = statusString.find(_.contains(s"$HOST_PREFIX "))
+val kubeletString = statusString.find(_.contains(s"$KUBELET_PREFIX "))
+val apiserverString = 

[spark] branch branch-2.3 updated: [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException

2019-02-03 Thread srowen
This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.3 by this push:
 new 4d6ea2c  [SPARK-26751][SQL] Fix memory leak when statement run in 
background and throw exception which is not HiveSQLException
4d6ea2c is described below

commit 4d6ea2cbb83d365434d0650a9e5ecaefc939b97d
Author: zhoukang 
AuthorDate: Sun Feb 3 08:45:57 2019 -0600

[SPARK-26751][SQL] Fix memory leak when statement run in background and 
throw exception which is not HiveSQLException

## What changes were proposed in this pull request?
When we run in background and we get exception which is not 
HiveSQLException,
we may encounter memory leak since handleToOperation will not removed 
correctly.
The reason is below:
1. When calling operation.run() in HiveSessionImpl#executeStatementInternal 
we throw an exception which is not HiveSQLException
2. Then the opHandle generated by SparkSQLOperationManager will not be 
added into opHandleSet of HiveSessionImpl , and 
operationManager.closeOperation(opHandle) will not be called
3. When we close the session we will also call 
operationManager.closeOperation(opHandle),since we did not add this opHandle 
into the opHandleSet.

For the reasons above,the opHandled will always in 
SparkSQLOperationManager#handleToOperation,which will cause memory leak.
More details and a case has attached on 
https://issues.apache.org/jira/browse/SPARK-26751
This patch will always throw HiveSQLException when running in background

## How was this patch tested?
Exist UT

Closes #23673 from caneGuy/zhoukang/fix-hivesessionimpl-leak.

Authored-by: zhoukang 
Signed-off-by: Sean Owen 
(cherry picked from commit 255faaf3436e1f41838062ed460f801bb0be40ec)
Signed-off-by: Sean Owen 
---
 .../spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 3cfc81b..fd17f50 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -204,7 +204,7 @@ private[hive] class SparkExecuteStatementOperation(
 case NonFatal(e) =>
   logError(s"Error executing query in background", e)
   setState(OperationState.ERROR)
-  throw e
+  throw new HiveSQLException(e)
   }
 }
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch branch-2.4 updated: [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException

2019-02-03 Thread srowen
This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-2.4 by this push:
 new 3d4aa5b  [SPARK-26751][SQL] Fix memory leak when statement run in 
background and throw exception which is not HiveSQLException
3d4aa5b is described below

commit 3d4aa5bd798a793a10c5d769a31f97242e78ce15
Author: zhoukang 
AuthorDate: Sun Feb 3 08:45:57 2019 -0600

[SPARK-26751][SQL] Fix memory leak when statement run in background and 
throw exception which is not HiveSQLException

## What changes were proposed in this pull request?
When we run in background and we get exception which is not 
HiveSQLException,
we may encounter memory leak since handleToOperation will not removed 
correctly.
The reason is below:
1. When calling operation.run() in HiveSessionImpl#executeStatementInternal 
we throw an exception which is not HiveSQLException
2. Then the opHandle generated by SparkSQLOperationManager will not be 
added into opHandleSet of HiveSessionImpl , and 
operationManager.closeOperation(opHandle) will not be called
3. When we close the session we will also call 
operationManager.closeOperation(opHandle),since we did not add this opHandle 
into the opHandleSet.

For the reasons above,the opHandled will always in 
SparkSQLOperationManager#handleToOperation,which will cause memory leak.
More details and a case has attached on 
https://issues.apache.org/jira/browse/SPARK-26751
This patch will always throw HiveSQLException when running in background

## How was this patch tested?
Exist UT

Closes #23673 from caneGuy/zhoukang/fix-hivesessionimpl-leak.

Authored-by: zhoukang 
Signed-off-by: Sean Owen 
(cherry picked from commit 255faaf3436e1f41838062ed460f801bb0be40ec)
Signed-off-by: Sean Owen 
---
 .../spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 3cfc81b..fd17f50 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -204,7 +204,7 @@ private[hive] class SparkExecuteStatementOperation(
 case NonFatal(e) =>
   logError(s"Error executing query in background", e)
   setState(OperationState.ERROR)
-  throw e
+  throw new HiveSQLException(e)
   }
 }
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-26751][SQL] Fix memory leak when statement run in background and throw exception which is not HiveSQLException

2019-02-03 Thread srowen
This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new 255faaf  [SPARK-26751][SQL] Fix memory leak when statement run in 
background and throw exception which is not HiveSQLException
255faaf is described below

commit 255faaf3436e1f41838062ed460f801bb0be40ec
Author: zhoukang 
AuthorDate: Sun Feb 3 08:45:57 2019 -0600

[SPARK-26751][SQL] Fix memory leak when statement run in background and 
throw exception which is not HiveSQLException

## What changes were proposed in this pull request?
When we run in background and we get exception which is not 
HiveSQLException,
we may encounter memory leak since handleToOperation will not removed 
correctly.
The reason is below:
1. When calling operation.run() in HiveSessionImpl#executeStatementInternal 
we throw an exception which is not HiveSQLException
2. Then the opHandle generated by SparkSQLOperationManager will not be 
added into opHandleSet of HiveSessionImpl , and 
operationManager.closeOperation(opHandle) will not be called
3. When we close the session we will also call 
operationManager.closeOperation(opHandle),since we did not add this opHandle 
into the opHandleSet.

For the reasons above,the opHandled will always in 
SparkSQLOperationManager#handleToOperation,which will cause memory leak.
More details and a case has attached on 
https://issues.apache.org/jira/browse/SPARK-26751
This patch will always throw HiveSQLException when running in background

## How was this patch tested?
Exist UT

Closes #23673 from caneGuy/zhoukang/fix-hivesessionimpl-leak.

Authored-by: zhoukang 
Signed-off-by: Sean Owen 
---
 .../spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala| 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
index 83dfa74..1772fe6 100644
--- 
a/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
+++ 
b/sql/hive-thriftserver/src/main/scala/org/apache/spark/sql/hive/thriftserver/SparkExecuteStatementOperation.scala
@@ -205,7 +205,7 @@ private[hive] class SparkExecuteStatementOperation(
 case NonFatal(e) =>
   logError(s"Error executing query in background", e)
   setState(OperationState.ERROR)
-  throw e
+  throw new HiveSQLException(e)
   }
 }
   }


-
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org



[spark] branch master updated: [SPARK-26818][ML] Make MLEvents JSON ser/de safe

2019-02-03 Thread gurwls223
This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
 new dfb8809  [SPARK-26818][ML] Make MLEvents JSON ser/de safe
dfb8809 is described below

commit dfb880951a8de55c587c1bf8b696df50eae6e68a
Author: Hyukjin Kwon 
AuthorDate: Sun Feb 3 21:19:35 2019 +0800

[SPARK-26818][ML] Make MLEvents JSON ser/de safe

## What changes were proposed in this pull request?

Currently, it looks it's not going to cause any virtually effective problem 
apparently (if I didn't misread the codes).

I see one place that JSON formatted events are being used.


https://github.com/apache/spark/blob/ec506bd30c2ca324c12c9ec811764081c2eb8c42/core/src/main/scala/org/apache/spark/scheduler/EventLoggingListener.scala#L148

It's okay because it just logs when the exception is ignorable


https://github.com/apache/spark/blob/9690eba16efe6d25261934d8b73a221972b684f3/core/src/main/scala/org/apache/spark/util/ListenerBus.scala#L111

I guess it should be best to stay safe - I don't want this unstable 
experimental feature breaks anything in any case. It also disables `logEvent` 
in `SparkListenerEvent` for the same reason.

This is also to match SQL execution events side:


https://github.com/apache/spark/blob/ca545f79410a464ef24e3986fac225f53bb2ef02/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLListener.scala#L41-L57

to make ML events JSON ser/de safe.

## How was this patch tested?

Manually tested, and unit tests were added.

Closes #23728 from HyukjinKwon/SPARK-26818.

Authored-by: Hyukjin Kwon 
Signed-off-by: Hyukjin Kwon 
---
 .../main/scala/org/apache/spark/ml/events.scala|  81 +++
 .../scala/org/apache/spark/ml/MLEventsSuite.scala  | 112 +
 2 files changed, 155 insertions(+), 38 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/events.scala 
b/mllib/src/main/scala/org/apache/spark/ml/events.scala
index c51600f..dc4be4d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/events.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/events.scala
@@ -17,6 +17,8 @@
 
 package org.apache.spark.ml
 
+import com.fasterxml.jackson.annotation.JsonIgnore
+
 import org.apache.spark.SparkContext
 import org.apache.spark.annotation.Unstable
 import org.apache.spark.internal.Logging
@@ -29,53 +31,84 @@ import org.apache.spark.sql.{DataFrame, Dataset}
  * after each operation (the event should document this).
  *
  * @note This is supported via [[Pipeline]] and [[PipelineModel]].
+ * @note This is experimental and unstable. Do not use this unless you fully
+ *   understand what `Unstable` means.
  */
 @Unstable
-sealed trait MLEvent extends SparkListenerEvent
+sealed trait MLEvent extends SparkListenerEvent {
+  // Do not log ML events in event log. It should be revisited to see
+  // how it works with history server.
+  protected[spark] override def logEvent: Boolean = false
+}
 
 /**
  * Event fired before `Transformer.transform`.
  */
 @Unstable
-case class TransformStart(transformer: Transformer, input: Dataset[_]) extends 
MLEvent
+case class TransformStart() extends MLEvent {
+  @JsonIgnore var transformer: Transformer = _
+  @JsonIgnore var input: Dataset[_] = _
+}
+
 /**
  * Event fired after `Transformer.transform`.
  */
 @Unstable
-case class TransformEnd(transformer: Transformer, output: Dataset[_]) extends 
MLEvent
+case class TransformEnd() extends MLEvent {
+  @JsonIgnore var transformer: Transformer = _
+  @JsonIgnore var output: Dataset[_] = _
+}
 
 /**
  * Event fired before `Estimator.fit`.
  */
 @Unstable
-case class FitStart[M <: Model[M]](estimator: Estimator[M], dataset: 
Dataset[_]) extends MLEvent
+case class FitStart[M <: Model[M]]() extends MLEvent {
+  @JsonIgnore var estimator: Estimator[M] = _
+  @JsonIgnore var dataset: Dataset[_] = _
+}
+
 /**
  * Event fired after `Estimator.fit`.
  */
 @Unstable
-case class FitEnd[M <: Model[M]](estimator: Estimator[M], model: M) extends 
MLEvent
+case class FitEnd[M <: Model[M]]() extends MLEvent {
+  @JsonIgnore var estimator: Estimator[M] = _
+  @JsonIgnore var model: M = _
+}
 
 /**
  * Event fired before `MLReader.load`.
  */
 @Unstable
-case class LoadInstanceStart[T](reader: MLReader[T], path: String) extends 
MLEvent
+case class LoadInstanceStart[T](path: String) extends MLEvent {
+  @JsonIgnore var reader: MLReader[T] = _
+}
+
 /**
  * Event fired after `MLReader.load`.
  */
 @Unstable
-case class LoadInstanceEnd[T](reader: MLReader[T], instance: T) extends MLEvent
+case class LoadInstanceEnd[T]() extends MLEvent {
+  @JsonIgnore var reader: MLReader[T] = _
+  @JsonIgnore var instance: T = _
+}
 
 /**
  * Event fired before `MLWriter.save`.
  */
 @Unstable
-case class SaveInstanceStart(writer: