spark git commit: [SPARK-22862] Docs on lazy elimination of columns missing from an encoder
Repository: spark Updated Branches: refs/heads/branch-2.1 0f6862106 -> 1df8020e1 [SPARK-22862] Docs on lazy elimination of columns missing from an encoder This behavior has confused some users, so lets clarify it. Author: Michael ArmbrustCloses #20048 from marmbrus/datasetAsDocs. (cherry picked from commit 8df1da396f64bb7fe76d73cd01498fdf3b8ed964) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1df8020e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1df8020e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1df8020e Branch: refs/heads/branch-2.1 Commit: 1df8020e1e1bd136b2374471584e110c4bc0d948 Parents: 0f68621 Author: Michael Armbrust Authored: Thu Dec 21 21:38:16 2017 -0800 Committer: gatorsmile Committed: Thu Dec 21 21:38:56 2017 -0800 -- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1df8020e/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index ac34e0b..ecced7c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -368,6 +368,10 @@ class Dataset[T] private[sql]( * If the schema of the Dataset does not match the desired `U` type, you can use `select` * along with `alias` or `as` to rearrange or rename as required. * + * Note that `as[]` only changes the view of the data that is passed into typed operations, + * such as `map()`, and does not eagerly project away any columns that are not present in + * the specified class. + * * @group basic * @since 1.6.0 */ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22862] Docs on lazy elimination of columns missing from an encoder
Repository: spark Updated Branches: refs/heads/branch-2.2 1e4cca02f -> 1cf3e3a26 [SPARK-22862] Docs on lazy elimination of columns missing from an encoder This behavior has confused some users, so lets clarify it. Author: Michael ArmbrustCloses #20048 from marmbrus/datasetAsDocs. (cherry picked from commit 8df1da396f64bb7fe76d73cd01498fdf3b8ed964) Signed-off-by: gatorsmile Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1cf3e3a2 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1cf3e3a2 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1cf3e3a2 Branch: refs/heads/branch-2.2 Commit: 1cf3e3a26961d306eb17b7629d8742a4df45f339 Parents: 1e4cca0 Author: Michael Armbrust Authored: Thu Dec 21 21:38:16 2017 -0800 Committer: gatorsmile Committed: Thu Dec 21 21:38:34 2017 -0800 -- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/1cf3e3a2/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 1acbad9..fb700a4 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -368,6 +368,10 @@ class Dataset[T] private[sql]( * If the schema of the Dataset does not match the desired `U` type, you can use `select` * along with `alias` or `as` to rearrange or rename as required. * + * Note that `as[]` only changes the view of the data that is passed into typed operations, + * such as `map()`, and does not eagerly project away any columns that are not present in + * the specified class. + * * @group basic * @since 1.6.0 */ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22862] Docs on lazy elimination of columns missing from an encoder
Repository: spark Updated Branches: refs/heads/master 22e1849bc -> 8df1da396 [SPARK-22862] Docs on lazy elimination of columns missing from an encoder This behavior has confused some users, so lets clarify it. Author: Michael ArmbrustCloses #20048 from marmbrus/datasetAsDocs. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8df1da39 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8df1da39 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8df1da39 Branch: refs/heads/master Commit: 8df1da396f64bb7fe76d73cd01498fdf3b8ed964 Parents: 22e1849 Author: Michael Armbrust Authored: Thu Dec 21 21:38:16 2017 -0800 Committer: gatorsmile Committed: Thu Dec 21 21:38:16 2017 -0800 -- sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 4 1 file changed, 4 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8df1da39/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index ef00562..209b800 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -401,6 +401,10 @@ class Dataset[T] private[sql]( * If the schema of the Dataset does not match the desired `U` type, you can use `select` * along with `alias` or `as` to rearrange or rename as required. * + * Note that `as[]` only changes the view of the data that is passed into typed operations, + * such as `map()`, and does not eagerly project away any columns that are not present in + * the specified class. + * * @group basic * @since 1.6.0 */ - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22866][K8S] Fix path issue in Kubernetes dockerfile
Repository: spark Updated Branches: refs/heads/master a36b78b0e -> 22e1849bc [SPARK-22866][K8S] Fix path issue in Kubernetes dockerfile ## What changes were proposed in this pull request? The path was recently changed in https://github.com/apache/spark/pull/19946, but the dockerfile was not updated. This is a trivial 1 line fix. ## How was this patch tested? `./sbin/build-push-docker-images.sh -r spark-repo -t latest build` cc/ vanzin mridulm rxin jiangxb1987 liyinan926 Author: Anirudh RamanathanAuthor: foxish Closes #20051 from foxish/patch-1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/22e1849b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/22e1849b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/22e1849b Branch: refs/heads/master Commit: 22e1849bcfb3ef988f4f9a5c2783bfc7ec001694 Parents: a36b78b Author: Anirudh Ramanathan Authored: Thu Dec 21 21:03:10 2017 -0800 Committer: Mridul Muralidharan Committed: Thu Dec 21 21:03:10 2017 -0800 -- .../kubernetes/docker/src/main/dockerfiles/driver/Dockerfile | 2 +- .../kubernetes/docker/src/main/dockerfiles/executor/Dockerfile | 2 +- .../kubernetes/docker/src/main/dockerfiles/spark-base/Dockerfile | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/22e1849b/resource-managers/kubernetes/docker/src/main/dockerfiles/driver/Dockerfile -- diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/driver/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/driver/Dockerfile index d163495..9b682f8 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/driver/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/driver/Dockerfile @@ -22,7 +22,7 @@ FROM spark-base # If this docker file is being used in the context of building your images from a Spark # distribution, the docker build command should be invoked from the top level directory # of the Spark distribution. E.g.: -# docker build -t spark-driver:latest -f dockerfiles/spark-base/Dockerfile . +# docker build -t spark-driver:latest -f kubernetes/dockerfiles/spark-base/Dockerfile . COPY examples /opt/spark/examples http://git-wip-us.apache.org/repos/asf/spark/blob/22e1849b/resource-managers/kubernetes/docker/src/main/dockerfiles/executor/Dockerfile -- diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/executor/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/executor/Dockerfile index 0e38169..168cd4c 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/executor/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/executor/Dockerfile @@ -22,7 +22,7 @@ FROM spark-base # If this docker file is being used in the context of building your images from a Spark # distribution, the docker build command should be invoked from the top level directory # of the Spark distribution. E.g.: -# docker build -t spark-executor:latest -f dockerfiles/spark-base/Dockerfile . +# docker build -t spark-executor:latest -f kubernetes/dockerfiles/spark-base/Dockerfile . COPY examples /opt/spark/examples http://git-wip-us.apache.org/repos/asf/spark/blob/22e1849b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark-base/Dockerfile -- diff --git a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark-base/Dockerfile b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark-base/Dockerfile index 20316c9..222e777 100644 --- a/resource-managers/kubernetes/docker/src/main/dockerfiles/spark-base/Dockerfile +++ b/resource-managers/kubernetes/docker/src/main/dockerfiles/spark-base/Dockerfile @@ -22,7 +22,7 @@ FROM openjdk:8-alpine # If this docker file is being used in the context of building your images from a Spark # distribution, the docker build command should be invoked from the top level directory # of the Spark distribution. E.g.: -# docker build -t spark-base:latest -f dockerfiles/spark-base/Dockerfile . +# docker build -t spark-base:latest -f kubernetes/dockerfiles/spark-base/Dockerfile . RUN set -ex && \ apk upgrade --no-cache && \ @@ -38,7 +38,7 @@ COPY jars /opt/spark/jars COPY bin /opt/spark/bin COPY sbin /opt/spark/sbin COPY conf /opt/spark/conf -COPY dockerfiles/spark-base/entrypoint.sh /opt/ +COPY kubernetes/dockerfiles/spark-base/entrypoint.sh /opt/ ENV SPARK_HOME /opt/spark
svn commit: r23862 - in /dev/spark/2.3.0-SNAPSHOT-2017_12_21_20_01-a36b78b-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Dec 22 04:14:43 2017 New Revision: 23862 Log: Apache Spark 2.3.0-SNAPSHOT-2017_12_21_20_01-a36b78b docs [This commit notification would consist of 1415 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22450][CORE][MLLIB][FOLLOWUP] safely register class for mllib - LabeledPoint/VectorWithNorm/TreePoint
Repository: spark Updated Branches: refs/heads/master c6f01cade -> a36b78b0e [SPARK-22450][CORE][MLLIB][FOLLOWUP] safely register class for mllib - LabeledPoint/VectorWithNorm/TreePoint ## What changes were proposed in this pull request? register following classes in Kryo: `org.apache.spark.mllib.regression.LabeledPoint` `org.apache.spark.mllib.clustering.VectorWithNorm` `org.apache.spark.ml.feature.LabeledPoint` `org.apache.spark.ml.tree.impl.TreePoint` `org.apache.spark.ml.tree.impl.BaggedPoint` seems also need to be registered, but I don't know how to do it in this safe way. WeichenXu123 cloud-fan ## How was this patch tested? added tests Author: Zheng RuiFengCloses #19950 from zhengruifeng/labeled_kryo. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a36b78b0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a36b78b0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a36b78b0 Branch: refs/heads/master Commit: a36b78b0e420b909bde0cec4349cdc2103853b91 Parents: c6f01ca Author: Zheng RuiFeng Authored: Thu Dec 21 20:20:04 2017 -0600 Committer: Sean Owen Committed: Thu Dec 21 20:20:04 2017 -0600 -- .../spark/serializer/KryoSerializer.scala | 27 ++- .../apache/spark/ml/feature/InstanceSuit.scala | 47 .../apache/spark/ml/feature/InstanceSuite.scala | 45 +++ .../spark/ml/feature/LabeledPointSuite.scala| 39 .../spark/ml/tree/impl/TreePointSuite.scala | 35 +++ .../spark/mllib/clustering/KMeansSuite.scala| 18 +++- .../mllib/regression/LabeledPointSuite.scala| 18 +++- 7 files changed, 169 insertions(+), 60 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a36b78b0/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala -- diff --git a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala index 2259d1a..538ae05 100644 --- a/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala +++ b/core/src/main/scala/org/apache/spark/serializer/KryoSerializer.scala @@ -181,20 +181,25 @@ class KryoSerializer(conf: SparkConf) // We can't load those class directly in order to avoid unnecessary jar dependencies. // We load them safely, ignore it if the class not found. -Seq("org.apache.spark.mllib.linalg.Vector", - "org.apache.spark.mllib.linalg.DenseVector", - "org.apache.spark.mllib.linalg.SparseVector", - "org.apache.spark.mllib.linalg.Matrix", - "org.apache.spark.mllib.linalg.DenseMatrix", - "org.apache.spark.mllib.linalg.SparseMatrix", - "org.apache.spark.ml.linalg.Vector", +Seq( + "org.apache.spark.ml.feature.Instance", + "org.apache.spark.ml.feature.LabeledPoint", + "org.apache.spark.ml.feature.OffsetInstance", + "org.apache.spark.ml.linalg.DenseMatrix", "org.apache.spark.ml.linalg.DenseVector", - "org.apache.spark.ml.linalg.SparseVector", "org.apache.spark.ml.linalg.Matrix", - "org.apache.spark.ml.linalg.DenseMatrix", "org.apache.spark.ml.linalg.SparseMatrix", - "org.apache.spark.ml.feature.Instance", - "org.apache.spark.ml.feature.OffsetInstance" + "org.apache.spark.ml.linalg.SparseVector", + "org.apache.spark.ml.linalg.Vector", + "org.apache.spark.ml.tree.impl.TreePoint", + "org.apache.spark.mllib.clustering.VectorWithNorm", + "org.apache.spark.mllib.linalg.DenseMatrix", + "org.apache.spark.mllib.linalg.DenseVector", + "org.apache.spark.mllib.linalg.Matrix", + "org.apache.spark.mllib.linalg.SparseMatrix", + "org.apache.spark.mllib.linalg.SparseVector", + "org.apache.spark.mllib.linalg.Vector", + "org.apache.spark.mllib.regression.LabeledPoint" ).foreach { name => try { val clazz = Utils.classForName(name) http://git-wip-us.apache.org/repos/asf/spark/blob/a36b78b0/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuit.scala -- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuit.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuit.scala deleted file mode 100644 index 88c85a9..000 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/InstanceSuit.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF
spark git commit: [SPARK-22750][SQL] Reuse mutable states when possible
Repository: spark Updated Branches: refs/heads/master c0abb1d99 -> c6f01cade [SPARK-22750][SQL] Reuse mutable states when possible ## What changes were proposed in this pull request? The PR introduces a new method `addImmutableStateIfNotExists ` to `CodeGenerator` to allow reusing and sharing the same global variable between different Expressions. This helps reducing the number of global variables needed, which is important to limit the impact on the constant pool. ## How was this patch tested? added UTs Author: Marco GaidoAuthor: Marco Gaido Closes #19940 from mgaido91/SPARK-22750. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c6f01cad Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c6f01cad Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c6f01cad Branch: refs/heads/master Commit: c6f01cadede490bde987d067becef14442f1e4a1 Parents: c0abb1d Author: Marco Gaido Authored: Fri Dec 22 10:13:26 2017 +0800 Committer: Wenchen Fan Committed: Fri Dec 22 10:13:26 2017 +0800 -- .../expressions/MonotonicallyIncreasingID.scala | 3 +- .../catalyst/expressions/SparkPartitionID.scala | 3 +- .../expressions/codegen/CodeGenerator.scala | 40 .../expressions/datetimeExpressions.scala | 12 -- .../catalyst/expressions/objects/objects.scala | 24 .../expressions/CodeGenerationSuite.scala | 12 ++ 6 files changed, 80 insertions(+), 14 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c6f01cad/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala index 784eaf8..11fb579 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/MonotonicallyIncreasingID.scala @@ -66,7 +66,8 @@ case class MonotonicallyIncreasingID() extends LeafExpression with Nondeterminis override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val countTerm = ctx.addMutableState(ctx.JAVA_LONG, "count") -val partitionMaskTerm = ctx.addMutableState(ctx.JAVA_LONG, "partitionMask") +val partitionMaskTerm = "partitionMask" +ctx.addImmutableStateIfNotExists(ctx.JAVA_LONG, partitionMaskTerm) ctx.addPartitionInitializationStatement(s"$countTerm = 0L;") ctx.addPartitionInitializationStatement(s"$partitionMaskTerm = ((long) partitionIndex) << 33;") http://git-wip-us.apache.org/repos/asf/spark/blob/c6f01cad/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala index 736ca37..a160b9b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/SparkPartitionID.scala @@ -43,7 +43,8 @@ case class SparkPartitionID() extends LeafExpression with Nondeterministic { override protected def evalInternal(input: InternalRow): Int = partitionId override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { -val idTerm = ctx.addMutableState(ctx.JAVA_INT, "partitionId") +val idTerm = "partitionId" +ctx.addImmutableStateIfNotExists(ctx.JAVA_INT, idTerm) ctx.addPartitionInitializationStatement(s"$idTerm = partitionIndex;") ev.copy(code = s"final ${ctx.javaType(dataType)} ${ev.value} = $idTerm;", isNull = "false") } http://git-wip-us.apache.org/repos/asf/spark/blob/c6f01cad/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 9adf632..d6eccad 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
spark git commit: [SPARK-22854][UI] Read Spark version from event logs.
Repository: spark Updated Branches: refs/heads/master 7ab165b70 -> c0abb1d99 [SPARK-22854][UI] Read Spark version from event logs. The code was ignoring SparkListenerLogStart, which was added somewhat recently to record the Spark version used to generate an event log. Author: Marcelo VanzinCloses #20049 from vanzin/SPARK-22854. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c0abb1d9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c0abb1d9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c0abb1d9 Branch: refs/heads/master Commit: c0abb1d994bda50d964c555163cdfca5a7e56f64 Parents: 7ab165b Author: Marcelo Vanzin Authored: Fri Dec 22 09:25:39 2017 +0800 Committer: Wenchen Fan Committed: Fri Dec 22 09:25:39 2017 +0800 -- .../scala/org/apache/spark/status/AppStatusListener.scala | 7 ++- .../org/apache/spark/status/AppStatusListenerSuite.scala | 3 +++ 2 files changed, 9 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c0abb1d9/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala -- diff --git a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala index 4db797e..5253297 100644 --- a/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala +++ b/core/src/main/scala/org/apache/spark/status/AppStatusListener.scala @@ -48,7 +48,7 @@ private[spark] class AppStatusListener( import config._ - private val sparkVersion = SPARK_VERSION + private var sparkVersion = SPARK_VERSION private var appInfo: v1.ApplicationInfo = null private var appSummary = new AppSummary(0, 0) private var coresPerTask: Int = 1 @@ -90,6 +90,11 @@ private[spark] class AppStatusListener( } } + override def onOtherEvent(event: SparkListenerEvent): Unit = event match { +case SparkListenerLogStart(version) => sparkVersion = version +case _ => + } + override def onApplicationStart(event: SparkListenerApplicationStart): Unit = { assert(event.appId.isDefined, "Application without IDs are not supported.") http://git-wip-us.apache.org/repos/asf/spark/blob/c0abb1d9/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala -- diff --git a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala index 9cf4f7e..c0b3a79 100644 --- a/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala +++ b/core/src/test/scala/org/apache/spark/status/AppStatusListenerSuite.scala @@ -103,6 +103,8 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter { test("scheduler events") { val listener = new AppStatusListener(store, conf, true) +listener.onOtherEvent(SparkListenerLogStart("TestSparkVersion")) + // Start the application. time += 1 listener.onApplicationStart(SparkListenerApplicationStart( @@ -125,6 +127,7 @@ class AppStatusListenerSuite extends SparkFunSuite with BeforeAndAfter { assert(attempt.endTime.getTime() === -1L) assert(attempt.sparkUser === "user") assert(!attempt.completed) + assert(attempt.appSparkVersion === "TestSparkVersion") } // Start a couple of executors. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22648][K8S] Spark on Kubernetes - Documentation
Repository: spark Updated Branches: refs/heads/master 7beb375bf -> 7ab165b70 [SPARK-22648][K8S] Spark on Kubernetes - Documentation What changes were proposed in this pull request? This PR contains documentation on the usage of Kubernetes scheduler in Spark 2.3, and a shell script to make it easier to build docker images required to use the integration. The changes detailed here are covered by https://github.com/apache/spark/pull/19717 and https://github.com/apache/spark/pull/19468 which have merged already. How was this patch tested? The script has been in use for releases on our fork. Rest is documentation. cc rxin mateiz (shepherd) k8s-big-data SIG members & contributors: foxish ash211 mccheah liyinan926 erikerlandson ssuchter varunkatta kimoonkim tnachen ifilonenko reviewers: vanzin felixcheung jiangxb1987 mridulm TODO: - [x] Add dockerfiles directory to built distribution. (https://github.com/apache/spark/pull/20007) - [x] Change references to docker to instead say "container" (https://github.com/apache/spark/pull/19995) - [x] Update configuration table. - [x] Modify spark.kubernetes.allocation.batch.delay to take time instead of int (#20032) Author: foxishCloses #19946 from foxish/update-k8s-docs. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7ab165b7 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7ab165b7 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7ab165b7 Branch: refs/heads/master Commit: 7ab165b7061d9acc26523227076056e94354d204 Parents: 7beb375 Author: foxish Authored: Thu Dec 21 17:21:11 2017 -0800 Committer: Reynold Xin Committed: Thu Dec 21 17:21:11 2017 -0800 -- docs/_layouts/global.html| 1 + docs/building-spark.md | 6 +- docs/cluster-overview.md | 7 +- docs/configuration.md| 2 + docs/img/k8s-cluster-mode.png| Bin 0 -> 55538 bytes docs/index.md| 3 +- docs/running-on-kubernetes.md| 578 ++ docs/running-on-yarn.md | 4 +- docs/submitting-applications.md | 16 + sbin/build-push-docker-images.sh | 68 10 files changed, 677 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7ab165b7/docs/_layouts/global.html -- diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html index 67b05ec..e5af5ae 100755 --- a/docs/_layouts/global.html +++ b/docs/_layouts/global.html @@ -99,6 +99,7 @@ Spark Standalone Mesos YARN +Kubernetes http://git-wip-us.apache.org/repos/asf/spark/blob/7ab165b7/docs/building-spark.md -- diff --git a/docs/building-spark.md b/docs/building-spark.md index 98f7df1..c391255 100644 --- a/docs/building-spark.md +++ b/docs/building-spark.md @@ -49,7 +49,7 @@ To create a Spark distribution like those distributed by the to be runnable, use `./dev/make-distribution.sh` in the project root directory. It can be configured with Maven profile settings and so on like the direct Maven build. Example: -./dev/make-distribution.sh --name custom-spark --pip --r --tgz -Psparkr -Phadoop-2.7 -Phive -Phive-thriftserver -Pmesos -Pyarn +./dev/make-distribution.sh --name custom-spark --pip --r --tgz -Psparkr -Phadoop-2.7 -Phive -Phive-thriftserver -Pmesos -Pyarn -Pkubernetes This will build Spark distribution along with Python pip and R packages. For more information on usage, run `./dev/make-distribution.sh --help` @@ -90,6 +90,10 @@ like ZooKeeper and Hadoop itself. ## Building with Mesos support ./build/mvn -Pmesos -DskipTests clean package + +## Building with Kubernetes support + +./build/mvn -Pkubernetes -DskipTests clean package ## Building with Kafka 0.8 support http://git-wip-us.apache.org/repos/asf/spark/blob/7ab165b7/docs/cluster-overview.md -- diff --git a/docs/cluster-overview.md b/docs/cluster-overview.md index c42bb4b..658e67f 100644 --- a/docs/cluster-overview.md +++ b/docs/cluster-overview.md @@ -52,11 +52,8 @@ The system currently supports three cluster managers: * [Apache Mesos](running-on-mesos.html) -- a general cluster manager that can also run Hadoop MapReduce and service applications. * [Hadoop YARN](running-on-yarn.html) -- the resource manager in Hadoop 2. -* [Kubernetes (experimental)](https://github.com/apache-spark-on-k8s/spark) -- In addition to the
svn commit: r23861 - in /dev/spark/2.3.0-SNAPSHOT-2017_12_21_16_01-7beb375-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Fri Dec 22 00:14:35 2017 New Revision: 23861 Log: Apache Spark 2.3.0-SNAPSHOT-2017_12_21_16_01-7beb375 docs [This commit notification would consist of 1415 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22861][SQL] SQLAppStatusListener handles multi-job executions.
Repository: spark Updated Branches: refs/heads/master fe65361b0 -> 7beb375bf [SPARK-22861][SQL] SQLAppStatusListener handles multi-job executions. When one execution has multiple jobs, we need to append to the set of stages, not replace them on every job. Added unit test and ran existing tests on jenkins Author: Imran RashidCloses #20047 from squito/SPARK-22861. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7beb375b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7beb375b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7beb375b Branch: refs/heads/master Commit: 7beb375bf4e8400f830a7fc7ff414634dd6efc78 Parents: fe65361 Author: Imran Rashid Authored: Thu Dec 21 15:37:55 2017 -0800 Committer: Marcelo Vanzin Committed: Thu Dec 21 15:37:55 2017 -0800 -- .../sql/execution/ui/SQLAppStatusListener.scala | 2 +- .../ui/SQLAppStatusListenerSuite.scala | 43 2 files changed, 44 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/7beb375b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala index aa78fa0..2295b8d 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListener.scala @@ -87,7 +87,7 @@ class SQLAppStatusListener( } exec.jobs = exec.jobs + (jobId -> JobExecutionStatus.RUNNING) -exec.stages = event.stageIds.toSet +exec.stages ++= event.stageIds.toSet update(exec) } http://git-wip-us.apache.org/repos/asf/spark/blob/7beb375b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala index 5ebbeb4..7d84f45 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/ui/SQLAppStatusListenerSuite.scala @@ -383,6 +383,49 @@ class SQLAppStatusListenerSuite extends SparkFunSuite with SharedSQLContext with assertJobs(statusStore.execution(executionId), failed = Seq(0)) } + test("handle one execution with multiple jobs") { +val statusStore = createStatusStore() +val listener = statusStore.listener.get + +val executionId = 0 +val df = createTestDataFrame +listener.onOtherEvent(SparkListenerSQLExecutionStart( + executionId, + "test", + "test", + df.queryExecution.toString, + SparkPlanInfo.fromSparkPlan(df.queryExecution.executedPlan), + System.currentTimeMillis())) + +var stageId = 0 +def twoStageJob(jobId: Int): Unit = { + val stages = Seq(stageId, stageId + 1).map { id => createStageInfo(id, 0)} + stageId += 2 + listener.onJobStart(SparkListenerJobStart( +jobId = jobId, +time = System.currentTimeMillis(), +stageInfos = stages, +createProperties(executionId))) + stages.foreach { s => +listener.onStageSubmitted(SparkListenerStageSubmitted(s)) +listener.onStageCompleted(SparkListenerStageCompleted(s)) + } + listener.onJobEnd(SparkListenerJobEnd( +jobId = jobId, +time = System.currentTimeMillis(), +JobSucceeded + )) +} +// submit two jobs with the same executionId +twoStageJob(0) +twoStageJob(1) +listener.onOtherEvent(SparkListenerSQLExecutionEnd( + executionId, System.currentTimeMillis())) + +assertJobs(statusStore.execution(0), completed = 0 to 1) +assert(statusStore.execution(0).get.stages === (0 to 3).toSet) + } + test("SPARK-11126: no memory leak when running non SQL jobs") { val listener = spark.sharedState.statusStore.listener.get // At the beginning of this test case, there should be no live data in the listener. - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r23856 - in /dev/spark/2.3.0-SNAPSHOT-2017_12_21_12_01-fe65361-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Thu Dec 21 20:14:34 2017 New Revision: 23856 Log: Apache Spark 2.3.0-SNAPSHOT-2017_12_21_12_01-fe65361 docs [This commit notification would consist of 1415 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22042][FOLLOW-UP][SQL] ReorderJoinPredicates can break when child's partitioning is not decided
Repository: spark Updated Branches: refs/heads/master 4e107fdb7 -> fe65361b0 [SPARK-22042][FOLLOW-UP][SQL] ReorderJoinPredicates can break when child's partitioning is not decided ## What changes were proposed in this pull request? This is a followup PR of https://github.com/apache/spark/pull/19257 where gatorsmile had left couple comments wrt code style. ## How was this patch tested? Doesn't change any functionality. Will depend on build to see if no checkstyle rules are violated. Author: Tejas PatilCloses #20041 from tejasapatil/followup_19257. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/fe65361b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/fe65361b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/fe65361b Branch: refs/heads/master Commit: fe65361b0579777c360dee1d7f633f28df0c6aeb Parents: 4e107fd Author: Tejas Patil Authored: Thu Dec 21 09:22:08 2017 -0800 Committer: gatorsmile Committed: Thu Dec 21 09:22:08 2017 -0800 -- .../execution/exchange/EnsureRequirements.scala | 82 ++-- .../spark/sql/sources/BucketedReadSuite.scala | 4 +- 2 files changed, 44 insertions(+), 42 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/fe65361b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala index 82f0b9f..c8e236b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/exchange/EnsureRequirements.scala @@ -252,54 +252,56 @@ case class EnsureRequirements(conf: SQLConf) extends Rule[SparkPlan] { operator.withNewChildren(children) } - /** - * When the physical operators are created for JOIN, the ordering of join keys is based on order - * in which the join keys appear in the user query. That might not match with the output - * partitioning of the join node's children (thus leading to extra sort / shuffle being - * introduced). This rule will change the ordering of the join keys to match with the - * partitioning of the join nodes' children. - */ - def reorderJoinPredicates(plan: SparkPlan): SparkPlan = { -def reorderJoinKeys( -leftKeys: Seq[Expression], -rightKeys: Seq[Expression], -leftPartitioning: Partitioning, -rightPartitioning: Partitioning): (Seq[Expression], Seq[Expression]) = { - - def reorder(expectedOrderOfKeys: Seq[Expression], - currentOrderOfKeys: Seq[Expression]): (Seq[Expression], Seq[Expression]) = { -val leftKeysBuffer = ArrayBuffer[Expression]() -val rightKeysBuffer = ArrayBuffer[Expression]() + private def reorder( + leftKeys: Seq[Expression], + rightKeys: Seq[Expression], + expectedOrderOfKeys: Seq[Expression], + currentOrderOfKeys: Seq[Expression]): (Seq[Expression], Seq[Expression]) = { +val leftKeysBuffer = ArrayBuffer[Expression]() +val rightKeysBuffer = ArrayBuffer[Expression]() -expectedOrderOfKeys.foreach(expression => { - val index = currentOrderOfKeys.indexWhere(e => e.semanticEquals(expression)) - leftKeysBuffer.append(leftKeys(index)) - rightKeysBuffer.append(rightKeys(index)) -}) -(leftKeysBuffer, rightKeysBuffer) - } +expectedOrderOfKeys.foreach(expression => { + val index = currentOrderOfKeys.indexWhere(e => e.semanticEquals(expression)) + leftKeysBuffer.append(leftKeys(index)) + rightKeysBuffer.append(rightKeys(index)) +}) +(leftKeysBuffer, rightKeysBuffer) + } - if (leftKeys.forall(_.deterministic) && rightKeys.forall(_.deterministic)) { -leftPartitioning match { - case HashPartitioning(leftExpressions, _) -if leftExpressions.length == leftKeys.length && - leftKeys.forall(x => leftExpressions.exists(_.semanticEquals(x))) => -reorder(leftExpressions, leftKeys) + private def reorderJoinKeys( + leftKeys: Seq[Expression], + rightKeys: Seq[Expression], + leftPartitioning: Partitioning, + rightPartitioning: Partitioning): (Seq[Expression], Seq[Expression]) = { +if (leftKeys.forall(_.deterministic) && rightKeys.forall(_.deterministic)) { + leftPartitioning match { +case HashPartitioning(leftExpressions, _) + if leftExpressions.length == leftKeys.length && +leftKeys.forall(x =>
[3/3] spark git commit: [SPARK-22822][TEST] Basic tests for WindowFrameCoercion and DecimalPrecision
[SPARK-22822][TEST] Basic tests for WindowFrameCoercion and DecimalPrecision ## What changes were proposed in this pull request? Test Coverage for `WindowFrameCoercion` and `DecimalPrecision`, this is a Sub-tasks for [SPARK-22722](https://issues.apache.org/jira/browse/SPARK-22722). ## How was this patch tested? N/A Author: Yuming WangCloses #20008 from wangyum/SPARK-22822. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4e107fdb Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4e107fdb Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4e107fdb Branch: refs/heads/master Commit: 4e107fdb7463a67d9c77c4a3434dfe70c72982f4 Parents: d3a1d95 Author: Yuming Wang Authored: Thu Dec 21 09:18:27 2017 -0800 Committer: gatorsmile Committed: Thu Dec 21 09:18:27 2017 -0800 -- .../expressions/windowExpressions.scala |4 +- .../typeCoercion/native/decimalPrecision.sql| 1448 +++ .../typeCoercion/native/windowFrameCoercion.sql | 44 + .../native/decimalPrecision.sql.out | 9514 ++ .../native/windowFrameCoercion.sql.out | 206 + .../resources/sql-tests/results/window.sql.out |2 +- 6 files changed, 11215 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4e107fdb/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala index e11e3a1..220cc4f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/windowExpressions.scala @@ -251,8 +251,8 @@ case class SpecifiedWindowFrame( TypeCheckFailure(s"Window frame $location bound '$e' is not a literal.") case e: Expression if !frameType.inputType.acceptsType(e.dataType) => TypeCheckFailure( -s"The data type of the $location bound '${e.dataType} does not match " + - s"the expected data type '${frameType.inputType}'.") +s"The data type of the $location bound '${e.dataType}' does not match " + + s"the expected data type '${frameType.inputType.simpleString}'.") case _ => TypeCheckSuccess } http://git-wip-us.apache.org/repos/asf/spark/blob/4e107fdb/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalPrecision.sql -- diff --git a/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalPrecision.sql b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalPrecision.sql new file mode 100644 index 000..8b04864 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/typeCoercion/native/decimalPrecision.sql @@ -0,0 +1,1448 @@ +-- +-- Licensed to the Apache Software Foundation (ASF) under one or more +-- contributor license agreements. See the NOTICE file distributed with +-- this work for additional information regarding copyright ownership. +-- The ASF licenses this file to You under the Apache License, Version 2.0 +-- (the "License"); you may not use this file except in compliance with +-- the License. You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- + +CREATE TEMPORARY VIEW t AS SELECT 1; + +SELECT cast(1 as tinyint) + cast(1 as decimal(3, 0)) FROM t; +SELECT cast(1 as tinyint) + cast(1 as decimal(5, 0)) FROM t; +SELECT cast(1 as tinyint) + cast(1 as decimal(10, 0)) FROM t; +SELECT cast(1 as tinyint) + cast(1 as decimal(20, 0)) FROM t; + +SELECT cast(1 as smallint) + cast(1 as decimal(3, 0)) FROM t; +SELECT cast(1 as smallint) + cast(1 as decimal(5, 0)) FROM t; +SELECT cast(1 as smallint) + cast(1 as decimal(10, 0)) FROM t; +SELECT cast(1 as smallint) + cast(1 as decimal(20, 0)) FROM t; + +SELECT cast(1 as int) + cast(1 as decimal(3, 0)) FROM t; +SELECT cast(1 as int) + cast(1 as decimal(5, 0)) FROM t; +SELECT cast(1 as int) + cast(1 as decimal(10, 0)) FROM t; +SELECT cast(1 as int) + cast(1 as decimal(20, 0)) FROM t; + +SELECT cast(1 as bigint) + cast(1 as decimal(3, 0)) FROM t;
[2/3] spark git commit: [SPARK-22822][TEST] Basic tests for WindowFrameCoercion and DecimalPrecision
http://git-wip-us.apache.org/repos/asf/spark/blob/4e107fdb/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out -- diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out new file mode 100644 index 000..ebc8201 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/decimalPrecision.sql.out @@ -0,0 +1,9514 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 1145 + + +-- !query 0 +CREATE TEMPORARY VIEW t AS SELECT 1 +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +SELECT cast(1 as tinyint) + cast(1 as decimal(3, 0)) FROM t +-- !query 1 schema +struct<(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) + CAST(1 AS DECIMAL(3,0))):decimal(4,0)> +-- !query 1 output +2 + + +-- !query 2 +SELECT cast(1 as tinyint) + cast(1 as decimal(5, 0)) FROM t +-- !query 2 schema +struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(6,0)) + CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(6,0))):decimal(6,0)> +-- !query 2 output +2 + + +-- !query 3 +SELECT cast(1 as tinyint) + cast(1 as decimal(10, 0)) FROM t +-- !query 3 schema +struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)> +-- !query 3 output +2 + + +-- !query 4 +SELECT cast(1 as tinyint) + cast(1 as decimal(20, 0)) FROM t +-- !query 4 schema +struct<(CAST(CAST(CAST(1 AS TINYINT) AS DECIMAL(3,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)> +-- !query 4 output +2 + + +-- !query 5 +SELECT cast(1 as smallint) + cast(1 as decimal(3, 0)) FROM t +-- !query 5 schema +struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(6,0)) + CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(6,0))):decimal(6,0)> +-- !query 5 output +2 + + +-- !query 6 +SELECT cast(1 as smallint) + cast(1 as decimal(5, 0)) FROM t +-- !query 6 schema +struct<(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) + CAST(1 AS DECIMAL(5,0))):decimal(6,0)> +-- !query 6 output +2 + + +-- !query 7 +SELECT cast(1 as smallint) + cast(1 as decimal(10, 0)) FROM t +-- !query 7 schema +struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(11,0))):decimal(11,0)> +-- !query 7 output +2 + + +-- !query 8 +SELECT cast(1 as smallint) + cast(1 as decimal(20, 0)) FROM t +-- !query 8 schema +struct<(CAST(CAST(CAST(1 AS SMALLINT) AS DECIMAL(5,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)> +-- !query 8 output +2 + + +-- !query 9 +SELECT cast(1 as int) + cast(1 as decimal(3, 0)) FROM t +-- !query 9 schema +struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(11,0))):decimal(11,0)> +-- !query 9 output +2 + + +-- !query 10 +SELECT cast(1 as int) + cast(1 as decimal(5, 0)) FROM t +-- !query 10 schema +struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(11,0)) + CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(11,0))):decimal(11,0)> +-- !query 10 output +2 + + +-- !query 11 +SELECT cast(1 as int) + cast(1 as decimal(10, 0)) FROM t +-- !query 11 schema +struct<(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) + CAST(1 AS DECIMAL(10,0))):decimal(11,0)> +-- !query 11 output +2 + + +-- !query 12 +SELECT cast(1 as int) + cast(1 as decimal(20, 0)) FROM t +-- !query 12 schema +struct<(CAST(CAST(CAST(1 AS INT) AS DECIMAL(10,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(20,0)) AS DECIMAL(21,0))):decimal(21,0)> +-- !query 12 output +2 + + +-- !query 13 +SELECT cast(1 as bigint) + cast(1 as decimal(3, 0)) FROM t +-- !query 13 schema +struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(3,0)) AS DECIMAL(21,0))):decimal(21,0)> +-- !query 13 output +2 + + +-- !query 14 +SELECT cast(1 as bigint) + cast(1 as decimal(5, 0)) FROM t +-- !query 14 schema +struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(5,0)) AS DECIMAL(21,0))):decimal(21,0)> +-- !query 14 output +2 + + +-- !query 15 +SELECT cast(1 as bigint) + cast(1 as decimal(10, 0)) FROM t +-- !query 15 schema +struct<(CAST(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) AS DECIMAL(21,0)) + CAST(CAST(1 AS DECIMAL(10,0)) AS DECIMAL(21,0))):decimal(21,0)> +-- !query 15 output +2 + + +-- !query 16 +SELECT cast(1 as bigint) + cast(1 as decimal(20, 0)) FROM t +-- !query 16 schema +struct<(CAST(CAST(1 AS BIGINT) AS DECIMAL(20,0)) + CAST(1 AS DECIMAL(20,0))):decimal(21,0)> +-- !query 16 output +2 + + +-- !query 17 +SELECT cast(1 as float) + cast(1 as decimal(3, 0)) FROM t +-- !query 17 schema +struct<(CAST(CAST(1 AS FLOAT) AS DOUBLE) + CAST(CAST(1 AS DECIMAL(3,0)) AS DOUBLE)):double> +-- !query 17 output +2.0 + + +-- !query 18
[1/3] spark git commit: [SPARK-22822][TEST] Basic tests for WindowFrameCoercion and DecimalPrecision
Repository: spark Updated Branches: refs/heads/master d3a1d9527 -> 4e107fdb7 http://git-wip-us.apache.org/repos/asf/spark/blob/4e107fdb/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out -- diff --git a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out new file mode 100644 index 000..5dd257b --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/windowFrameCoercion.sql.out @@ -0,0 +1,206 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 25 + + +-- !query 0 +CREATE TEMPORARY VIEW t AS SELECT 1 +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as tinyint)) FROM t +-- !query 1 schema +struct+-- !query 1 output +1 + + +-- !query 2 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as smallint)) FROM t +-- !query 2 schema +struct +-- !query 2 output +1 + + +-- !query 3 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as int)) FROM t +-- !query 3 schema +struct +-- !query 3 output +1 + + +-- !query 4 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as bigint)) FROM t +-- !query 4 schema +struct +-- !query 4 output +1 + + +-- !query 5 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as float)) FROM t +-- !query 5 schema +struct +-- !query 5 output +1 + + +-- !query 6 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as double)) FROM t +-- !query 6 schema +struct +-- !query 6 output +1 + + +-- !query 7 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as decimal(10, 0))) FROM t +-- !query 7 schema +struct +-- !query 7 output +1 + + +-- !query 8 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as string)) FROM t +-- !query 8 schema +struct +-- !query 8 output +1 + + +-- !query 9 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('1' as binary)) FROM t +-- !query 9 schema +struct +-- !query 9 output +1 + + +-- !query 10 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as boolean)) FROM t +-- !query 10 schema +struct +-- !query 10 output +1 + + +-- !query 11 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00.0' as timestamp)) FROM t +-- !query 11 schema +struct +-- !query 11 output +1 + + +-- !query 12 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast('2017-12-11 09:30:00' as date)) FROM t +-- !query 12 schema +struct +-- !query 12 output +1 + + +-- !query 13 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as tinyint) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t +-- !query 13 schema +struct +-- !query 13 output +1 + + +-- !query 14 +SELECT COUNT(*) OVER (PARTITION BY 1 ORDER BY cast(1 as smallint) DESC RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM t +-- !query 14 schema +struct +-- !query 14 output +1 + + +-- !query 15 +SELECT COUNT(*) OVER (PARTITION BY 1
spark git commit: [SPARK-22786][SQL] only use AppStatusPlugin in history server
Repository: spark Updated Branches: refs/heads/master 8a0ed5a5e -> d3a1d9527 [SPARK-22786][SQL] only use AppStatusPlugin in history server ## What changes were proposed in this pull request? In https://github.com/apache/spark/pull/19681 we introduced a new interface called `AppStatusPlugin`, to register listeners and set up the UI for both live and history UI. However I think it's an overkill for live UI. For example, we should not register `SQLListener` if users are not using SQL functions. Previously we register the `SQLListener` and set up SQL tab when `SparkSession` is firstly created, which indicates users are going to use SQL functions. But in #19681 , we register the SQL functions during `SparkContext` creation. The same thing should apply to streaming too. I think we should keep the previous behavior, and only use this new interface for history server. To reflect this change, I also rename the new interface to `SparkHistoryUIPlugin` This PR also refines the tests for sql listener. ## How was this patch tested? existing tests Author: Wenchen FanCloses #19981 from cloud-fan/listener. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d3a1d952 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d3a1d952 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d3a1d952 Branch: refs/heads/master Commit: d3a1d9527bcd6675cc45773f01d4558cf4b46b3d Parents: 8a0ed5a Author: Wenchen Fan Authored: Fri Dec 22 01:08:13 2017 +0800 Committer: Wenchen Fan Committed: Fri Dec 22 01:08:13 2017 +0800 -- .../scala/org/apache/spark/SparkContext.scala | 16 +- .../deploy/history/FsHistoryProvider.scala | 14 +- .../spark/status/AppHistoryServerPlugin.scala | 38 ++ .../apache/spark/status/AppStatusListener.scala | 2 +- .../apache/spark/status/AppStatusPlugin.scala | 71 --- .../apache/spark/status/AppStatusStore.scala| 17 +- .../org/apache/spark/ui/StagePageSuite.scala| 20 +- ...g.apache.spark.status.AppHistoryServerPlugin | 1 + .../org.apache.spark.status.AppStatusPlugin | 1 - .../sql/execution/ui/SQLAppStatusListener.scala | 23 +- .../sql/execution/ui/SQLAppStatusStore.scala| 62 +-- .../execution/ui/SQLHistoryServerPlugin.scala | 36 ++ .../apache/spark/sql/internal/SharedState.scala | 18 +- .../sql/execution/metric/SQLMetricsSuite.scala | 8 - .../execution/metric/SQLMetricsTestUtils.scala | 10 +- .../ui/SQLAppStatusListenerSuite.scala | 531 +++ .../sql/execution/ui/SQLListenerSuite.scala | 531 --- 17 files changed, 668 insertions(+), 731 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/d3a1d952/core/src/main/scala/org/apache/spark/SparkContext.scala -- diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala index 92e13ce..fcbeddd 100644 --- a/core/src/main/scala/org/apache/spark/SparkContext.scala +++ b/core/src/main/scala/org/apache/spark/SparkContext.scala @@ -53,7 +53,7 @@ import org.apache.spark.rpc.RpcEndpointRef import org.apache.spark.scheduler._ import org.apache.spark.scheduler.cluster.{CoarseGrainedSchedulerBackend, StandaloneSchedulerBackend} import org.apache.spark.scheduler.local.LocalSchedulerBackend -import org.apache.spark.status.{AppStatusPlugin, AppStatusStore} +import org.apache.spark.status.AppStatusStore import org.apache.spark.storage._ import org.apache.spark.storage.BlockManagerMessages.TriggerThreadDump import org.apache.spark.ui.{ConsoleProgressBar, SparkUI} @@ -416,7 +416,8 @@ class SparkContext(config: SparkConf) extends Logging { // Initialize the app status store and listener before SparkEnv is created so that it gets // all events. -_statusStore = AppStatusStore.createLiveStore(conf, l => listenerBus.addToStatusQueue(l)) +_statusStore = AppStatusStore.createLiveStore(conf) +listenerBus.addToStatusQueue(_statusStore.listener.get) // Create the Spark execution environment (cache, map output tracker, etc) _env = createSparkEnv(_conf, isLocal, listenerBus) @@ -445,14 +446,9 @@ class SparkContext(config: SparkConf) extends Logging { // For tests, do not enable the UI None } -_ui.foreach { ui => - // Load any plugins that might want to modify the UI. - AppStatusPlugin.loadPlugins().foreach(_.setupUI(ui)) - - // Bind the UI before starting the task scheduler to communicate - // the bound port to the cluster manager properly - ui.bind() -} +// Bind the UI before starting the task scheduler to communicate +// the bound
spark git commit: [SPARK-22668][SQL] Ensure no global variables in arguments of method split by CodegenContext.splitExpressions()
Repository: spark Updated Branches: refs/heads/master 4c2efde93 -> 8a0ed5a5e [SPARK-22668][SQL] Ensure no global variables in arguments of method split by CodegenContext.splitExpressions() ## What changes were proposed in this pull request? Passing global variables to the split method is dangerous, as any mutating to it is ignored and may lead to unexpected behavior. To prevent this, one approach is to make sure no expression would output global variables: Localizing lifetime of mutable states in expressions. Another approach is, when calling `ctx.splitExpression`, make sure we don't use children's output as parameter names. Approach 1 is actually hard to do, as we need to check all expressions and operators that support whole-stage codegen. Approach 2 is easier as the callers of `ctx.splitExpressions` are not too many. Besides, approach 2 is more flexible, as children's output may be other stuff that can't be parameter name: literal, inlined statement(a + 1), etc. close https://github.com/apache/spark/pull/19865 close https://github.com/apache/spark/pull/19938 ## How was this patch tested? existing tests Author: Wenchen FanCloses #20021 from cloud-fan/codegen. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8a0ed5a5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8a0ed5a5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8a0ed5a5 Branch: refs/heads/master Commit: 8a0ed5a5ee64a6e854c516f80df5a9729435479b Parents: 4c2efde Author: Wenchen Fan Authored: Fri Dec 22 00:21:27 2017 +0800 Committer: Wenchen Fan Committed: Fri Dec 22 00:21:27 2017 +0800 -- .../sql/catalyst/expressions/arithmetic.scala | 18 +-- .../expressions/codegen/CodeGenerator.scala | 32 +--- .../expressions/conditionalExpressions.scala| 8 ++--- .../catalyst/expressions/nullExpressions.scala | 9 +++--- .../sql/catalyst/expressions/predicates.scala | 2 +- 5 files changed, 43 insertions(+), 26 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8a0ed5a5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index d3a8cb5..8bb1459 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -602,13 +602,13 @@ case class Least(children: Seq[Expression]) extends Expression { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val evalChildren = children.map(_.genCode(ctx)) -val tmpIsNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, "leastTmpIsNull") +ev.isNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, ev.isNull) val evals = evalChildren.map(eval => s""" |${eval.code} - |if (!${eval.isNull} && ($tmpIsNull || + |if (!${eval.isNull} && (${ev.isNull} || | ${ctx.genGreater(dataType, ev.value, eval.value)})) { - | $tmpIsNull = false; + | ${ev.isNull} = false; | ${ev.value} = ${eval.value}; |} """.stripMargin @@ -628,10 +628,9 @@ case class Least(children: Seq[Expression]) extends Expression { foldFunctions = _.map(funcCall => s"${ev.value} = $funcCall;").mkString("\n")) ev.copy(code = s""" - |$tmpIsNull = true; + |${ev.isNull} = true; |${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)}; |$codes - |final boolean ${ev.isNull} = $tmpIsNull; """.stripMargin) } } @@ -682,13 +681,13 @@ case class Greatest(children: Seq[Expression]) extends Expression { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val evalChildren = children.map(_.genCode(ctx)) -val tmpIsNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, "greatestTmpIsNull") +ev.isNull = ctx.addMutableState(ctx.JAVA_BOOLEAN, ev.isNull) val evals = evalChildren.map(eval => s""" |${eval.code} - |if (!${eval.isNull} && ($tmpIsNull || + |if (!${eval.isNull} && (${ev.isNull} || | ${ctx.genGreater(dataType, eval.value, ev.value)})) { - | $tmpIsNull = false; + | ${ev.isNull} = false; | ${ev.value} = ${eval.value}; |} """.stripMargin @@ -708,10 +707,9 @@ case class Greatest(children: Seq[Expression]) extends Expression { foldFunctions = _.map(funcCall =>
svn commit: r23848 - in /dev/spark/2.3.0-SNAPSHOT-2017_12_21_08_01-0abaf31-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Thu Dec 21 16:17:25 2017 New Revision: 23848 Log: Apache Spark 2.3.0-SNAPSHOT-2017_12_21_08_01-0abaf31 docs [This commit notification would consist of 1415 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22855][BUILD] Add -no-java-comments to sbt docs/scalacOptions
Repository: spark Updated Branches: refs/heads/master 0abaf31be -> 4c2efde93 [SPARK-22855][BUILD] Add -no-java-comments to sbt docs/scalacOptions Prevents Scala 2.12 scaladoc from blowing up attempting to parse java comments. ## What changes were proposed in this pull request? Adds -no-java-comments to docs/scalacOptions under Scala 2.12. Also moves scaladoc configs out of the TestSettings and into the standard sharedSettings section in SparkBuild.scala. ## How was this patch tested? SBT_OPTS=-Dscala-2.12 sbt ++2.12.4 tags/publishLocal Author: Erik LaBiancaCloses #20042 from easel/scaladoc-212. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/4c2efde9 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/4c2efde9 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/4c2efde9 Branch: refs/heads/master Commit: 4c2efde9314a5f67052ac87bfa1472ebb9aca74a Parents: 0abaf31 Author: Erik LaBianca Authored: Thu Dec 21 10:08:38 2017 -0600 Committer: Sean Owen Committed: Thu Dec 21 10:08:38 2017 -0600 -- project/SparkBuild.scala | 28 1 file changed, 16 insertions(+), 12 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/4c2efde9/project/SparkBuild.scala -- diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 8305494..7469f11 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -254,6 +254,21 @@ object SparkBuild extends PomBuild { "-sourcepath", (baseDirectory in ThisBuild).value.getAbsolutePath // Required for relative source links in scaladoc ), +// Remove certain packages from Scaladoc +scalacOptions in (Compile, doc) := Seq( + "-groups", + "-skip-packages", Seq( +"org.apache.spark.api.python", +"org.apache.spark.network", +"org.apache.spark.deploy", +"org.apache.spark.util.collection" + ).mkString(":"), + "-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc" +) ++ { + // Do not attempt to scaladoc javadoc comments under 2.12 since it can't handle inner classes + if (scalaBinaryVersion.value == "2.12") Seq("-no-java-comments") else Seq.empty +}, + // Implements -Xfatal-warnings, ignoring deprecation warnings. // Code snippet taken from https://issues.scala-lang.org/browse/SI-8410. compile in Compile := { @@ -828,18 +843,7 @@ object TestSettings { } Seq.empty[File] }).value, -concurrentRestrictions in Global += Tags.limit(Tags.Test, 1), -// Remove certain packages from Scaladoc -scalacOptions in (Compile, doc) := Seq( - "-groups", - "-skip-packages", Seq( -"org.apache.spark.api.python", -"org.apache.spark.network", -"org.apache.spark.deploy", -"org.apache.spark.util.collection" - ).mkString(":"), - "-doc-title", "Spark " + version.value.replaceAll("-SNAPSHOT", "") + " ScalaDoc" -) +concurrentRestrictions in Global += Tags.limit(Tags.Test, 1) ) } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22852][BUILD] Exclude -Xlint:unchecked from sbt javadoc flags
Repository: spark Updated Branches: refs/heads/master 59d52631e -> 0abaf31be [SPARK-22852][BUILD] Exclude -Xlint:unchecked from sbt javadoc flags ## What changes were proposed in this pull request? Moves the -Xlint:unchecked flag in the sbt build configuration from Compile to (Compile, compile) scope, allowing publish and publishLocal commands to work. ## How was this patch tested? Successfully published the spark-launcher subproject from within sbt successfully, where it fails without this patch. Author: Erik LaBiancaCloses #20040 from easel/javadoc-xlint. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0abaf31b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0abaf31b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0abaf31b Branch: refs/heads/master Commit: 0abaf31be7ab9e030ea9433938b9123596954814 Parents: 59d5263 Author: Erik LaBianca Authored: Thu Dec 21 09:38:21 2017 -0600 Committer: Sean Owen Committed: Thu Dec 21 09:38:21 2017 -0600 -- project/SparkBuild.scala | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/0abaf31b/project/SparkBuild.scala -- diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 7570338..8305494 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -239,14 +239,14 @@ object SparkBuild extends PomBuild { javacOptions in Compile ++= Seq( "-encoding", "UTF-8", - "-source", javacJVMVersion.value, - "-Xlint:unchecked" + "-source", javacJVMVersion.value ), -// This -target option cannot be set in the Compile configuration scope since `javadoc` doesn't -// play nicely with it; see https://github.com/sbt/sbt/issues/355#issuecomment-3817629 for -// additional discussion and explanation. +// This -target and Xlint:unchecked options cannot be set in the Compile configuration scope since +// `javadoc` doesn't play nicely with them; see https://github.com/sbt/sbt/issues/355#issuecomment-3817629 +// for additional discussion and explanation. javacOptions in (Compile, compile) ++= Seq( - "-target", javacJVMVersion.value + "-target", javacJVMVersion.value, + "-Xlint:unchecked" ), scalacOptions in Compile ++= Seq( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r23847 - in /dev/spark/2.3.0-SNAPSHOT-2017_12_21_04_01-59d5263-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Thu Dec 21 12:16:56 2017 New Revision: 23847 Log: Apache Spark 2.3.0-SNAPSHOT-2017_12_21_04_01-59d5263 docs [This commit notification would consist of 1415 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: [SPARK-22324][SQL][PYTHON] Upgrade Arrow to 0.8.0
Repository: spark Updated Branches: refs/heads/master cb9fc8d9b -> 59d52631e http://git-wip-us.apache.org/repos/asf/spark/blob/59d52631/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala index 57958f7..fd5a3df 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/arrow/ArrowConvertersSuite.scala @@ -25,7 +25,7 @@ import java.util.Locale import com.google.common.io.Files import org.apache.arrow.memory.RootAllocator import org.apache.arrow.vector.{VectorLoader, VectorSchemaRoot} -import org.apache.arrow.vector.file.json.JsonFileReader +import org.apache.arrow.vector.ipc.JsonFileReader import org.apache.arrow.vector.util.Validator import org.scalatest.BeforeAndAfterAll @@ -76,16 +76,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll { |"bitWidth" : 16 | }, | "nullable" : false, - | "children" : [ ], - | "typeLayout" : { - |"vectors" : [ { - | "type" : "VALIDITY", - | "typeBitWidth" : 1 - |}, { - | "type" : "DATA", - | "typeBitWidth" : 16 - |} ] - | } + | "children" : [ ] |}, { | "name" : "b_s", | "type" : { @@ -94,16 +85,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll { |"bitWidth" : 16 | }, | "nullable" : true, - | "children" : [ ], - | "typeLayout" : { - |"vectors" : [ { - | "type" : "VALIDITY", - | "typeBitWidth" : 1 - |}, { - | "type" : "DATA", - | "typeBitWidth" : 16 - |} ] - | } + | "children" : [ ] |} ] | }, | "batches" : [ { @@ -143,16 +125,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll { |"bitWidth" : 32 | }, | "nullable" : false, - | "children" : [ ], - | "typeLayout" : { - |"vectors" : [ { - | "type" : "VALIDITY", - | "typeBitWidth" : 1 - |}, { - | "type" : "DATA", - | "typeBitWidth" : 32 - |} ] - | } + | "children" : [ ] |}, { | "name" : "b_i", | "type" : { @@ -161,16 +134,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll { |"bitWidth" : 32 | }, | "nullable" : true, - | "children" : [ ], - | "typeLayout" : { - |"vectors" : [ { - | "type" : "VALIDITY", - | "typeBitWidth" : 1 - |}, { - | "type" : "DATA", - | "typeBitWidth" : 32 - |} ] - | } + | "children" : [ ] |} ] | }, | "batches" : [ { @@ -210,16 +174,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll { |"bitWidth" : 64 | }, | "nullable" : false, - | "children" : [ ], - | "typeLayout" : { - |"vectors" : [ { - | "type" : "VALIDITY", - | "typeBitWidth" : 1 - |}, { - | "type" : "DATA", - | "typeBitWidth" : 64 - |} ] - | } + | "children" : [ ] |}, { | "name" : "b_l", | "type" : { @@ -228,16 +183,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll { |"bitWidth" : 64 | }, | "nullable" : true, - | "children" : [ ], - | "typeLayout" : { - |"vectors" : [ { - | "type" : "VALIDITY", - | "typeBitWidth" : 1 - |}, { - | "type" : "DATA", - | "typeBitWidth" : 64 - |} ] - | } + | "children" : [ ] |} ] | }, | "batches" : [ { @@ -276,16 +222,7 @@ class ArrowConvertersSuite extends SharedSQLContext with BeforeAndAfterAll { |"precision" :
[2/2] spark git commit: [SPARK-22324][SQL][PYTHON] Upgrade Arrow to 0.8.0
[SPARK-22324][SQL][PYTHON] Upgrade Arrow to 0.8.0 ## What changes were proposed in this pull request? Upgrade Spark to Arrow 0.8.0 for Java and Python. Also includes an upgrade of Netty to 4.1.17 to resolve dependency requirements. The highlights that pertain to Spark for the update from Arrow versoin 0.4.1 to 0.8.0 include: * Java refactoring for more simple API * Java reduced heap usage and streamlined hot code paths * Type support for DecimalType, ArrayType * Improved type casting support in Python * Simplified type checking in Python ## How was this patch tested? Existing tests Author: Bryan CutlerAuthor: Shixiong Zhu Closes #19884 from BryanCutler/arrow-upgrade-080-SPARK-22324. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/59d52631 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/59d52631 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/59d52631 Branch: refs/heads/master Commit: 59d52631eb86394f1d981419cb744c20bd4e0b87 Parents: cb9fc8d Author: Bryan Cutler Authored: Thu Dec 21 20:43:56 2017 +0900 Committer: hyukjinkwon Committed: Thu Dec 21 20:43:56 2017 +0900 -- .../spark/network/crypto/TransportCipher.java | 41 +- .../network/protocol/MessageWithHeader.java | 39 +- .../spark/network/sasl/SaslEncryption.java | 41 +- .../spark/network/util/AbstractFileRegion.java | 53 ++ .../org/apache/spark/network/ProtocolSuite.java | 4 +- .../protocol/MessageWithHeaderSuite.java| 7 +- .../org/apache/spark/storage/DiskStore.scala| 9 +- dev/deps/spark-deps-hadoop-2.6 | 10 +- dev/deps/spark-deps-hadoop-2.7 | 10 +- pom.xml | 12 +- python/pyspark/serializers.py | 27 +- python/pyspark/sql/dataframe.py | 2 + python/pyspark/sql/functions.py | 13 +- python/pyspark/sql/group.py | 2 +- python/pyspark/sql/session.py | 3 + python/pyspark/sql/tests.py | 12 +- python/pyspark/sql/types.py | 25 +- python/pyspark/sql/udf.py | 16 +- python/pyspark/sql/utils.py | 9 + .../execution/vectorized/ArrowColumnVector.java | 136 +++-- .../sql/execution/arrow/ArrowConverters.scala | 13 +- .../spark/sql/execution/arrow/ArrowWriter.scala | 132 ++--- .../execution/python/ArrowPythonRunner.scala| 27 +- .../execution/arrow/ArrowConvertersSuite.scala | 571 ++- .../vectorized/ArrowColumnVectorSuite.scala | 150 +++-- .../vectorized/ColumnarBatchSuite.scala | 20 +- 26 files changed, 515 insertions(+), 869 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/59d52631/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java -- diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java index 7376d1d..e04524d 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java +++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/TransportCipher.java @@ -30,10 +30,10 @@ import com.google.common.base.Preconditions; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.channel.*; -import io.netty.util.AbstractReferenceCounted; import org.apache.commons.crypto.stream.CryptoInputStream; import org.apache.commons.crypto.stream.CryptoOutputStream; +import org.apache.spark.network.util.AbstractFileRegion; import org.apache.spark.network.util.ByteArrayReadableChannel; import org.apache.spark.network.util.ByteArrayWritableChannel; @@ -161,7 +161,7 @@ public class TransportCipher { } } - private static class EncryptedMessage extends AbstractReferenceCounted implements FileRegion { + private static class EncryptedMessage extends AbstractFileRegion { private final boolean isByteBuf; private final ByteBuf buf; private final FileRegion region; @@ -199,11 +199,46 @@ public class TransportCipher { } @Override -public long transfered() { +public long transferred() { return transferred; } @Override +public EncryptedMessage touch(Object o) { + super.touch(o); + if (region != null) { +region.touch(o); + } + if (buf != null) { +buf.touch(o); + } + return this; +} + +@Override +public EncryptedMessage retain(int increment) { +
svn commit: r23839 - in /dev/spark/2.3.0-SNAPSHOT-2017_12_21_00_01-cb9fc8d-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Thu Dec 21 08:14:49 2017 New Revision: 23839 Log: Apache Spark 2.3.0-SNAPSHOT-2017_12_21_00_01-cb9fc8d docs [This commit notification would consist of 1415 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org