spark git commit: [SPARK-22604][SQL] remove the get address methods from ColumnVector
Repository: spark Updated Branches: refs/heads/master 70221903f -> e3fd93f14 [SPARK-22604][SQL] remove the get address methods from ColumnVector ## What changes were proposed in this pull request? `nullsNativeAddress` and `valuesNativeAddress` are only used in tests and benchmark, no need to be top class API. ## How was this patch tested? existing tests Author: Wenchen FanCloses #19818 from cloud-fan/minor. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e3fd93f1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e3fd93f1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e3fd93f1 Branch: refs/heads/master Commit: e3fd93f149ff0ff1caff28a5191215e2a29749a9 Parents: 7022190 Author: Wenchen Fan Authored: Fri Nov 24 22:43:47 2017 -0800 Committer: gatorsmile Committed: Fri Nov 24 22:43:47 2017 -0800 -- .../execution/vectorized/ArrowColumnVector.java | 10 --- .../sql/execution/vectorized/ColumnVector.java | 7 -- .../vectorized/OffHeapColumnVector.java | 6 +- .../vectorized/OnHeapColumnVector.java | 9 --- .../vectorized/ColumnarBatchBenchmark.scala | 32 .../vectorized/ColumnarBatchSuite.scala | 82 +++- 6 files changed, 47 insertions(+), 99 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e3fd93f1/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java -- diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java index 949035b..3a10e98 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ArrowColumnVector.java @@ -60,16 +60,6 @@ public final class ArrowColumnVector extends ColumnVector { } @Override - public long nullsNativeAddress() { -throw new RuntimeException("Cannot get native address for arrow column"); - } - - @Override - public long valuesNativeAddress() { -throw new RuntimeException("Cannot get native address for arrow column"); - } - - @Override public void close() { if (childColumns != null) { for (int i = 0; i < childColumns.length; i++) { http://git-wip-us.apache.org/repos/asf/spark/blob/e3fd93f1/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java -- diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java index 666fd63..360ed83e 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/ColumnVector.java @@ -63,13 +63,6 @@ public abstract class ColumnVector implements AutoCloseable { public abstract boolean anyNullsSet(); /** - * Returns the off heap ptr for the arrays backing the NULLs and values buffer. Only valid - * to call for off heap columns. - */ - public abstract long nullsNativeAddress(); - public abstract long valuesNativeAddress(); - - /** * Returns whether the value at rowId is NULL. */ public abstract boolean isNullAt(int rowId); http://git-wip-us.apache.org/repos/asf/spark/blob/e3fd93f1/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java -- diff --git a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java index 2bf523b..6b5c783 100644 --- a/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java +++ b/sql/core/src/main/java/org/apache/spark/sql/execution/vectorized/OffHeapColumnVector.java @@ -19,6 +19,8 @@ package org.apache.spark.sql.execution.vectorized; import java.nio.ByteBuffer; import java.nio.ByteOrder; +import com.google.common.annotations.VisibleForTesting; + import org.apache.spark.sql.types.*; import org.apache.spark.unsafe.Platform; @@ -73,12 +75,12 @@ public final class OffHeapColumnVector extends WritableColumnVector { reset(); } - @Override + @VisibleForTesting public long valuesNativeAddress() { return data; } - @Override + @VisibleForTesting public long nullsNativeAddress() { return nulls; }
spark git commit: [SPARK-22596][SQL] set ctx.currentVars in CodegenSupport.consume
Repository: spark Updated Branches: refs/heads/master a1877f45c -> 70221903f [SPARK-22596][SQL] set ctx.currentVars in CodegenSupport.consume ## What changes were proposed in this pull request? `ctx.currentVars` means the input variables for the current operator, which is already decided in `CodegenSupport`, we can set it there instead of `doConsume`. also add more comments to help people understand the codegen framework. After this PR, we now have a principle about setting `ctx.currentVars` and `ctx.INPUT_ROW`: 1. for non-whole-stage-codegen path, never set them. (permit some special cases like generating ordering) 2. for whole-stage-codegen `produce` path, mostly we don't need to set them, but blocking operators may need to set them for expressions that produce data from data source, sort buffer, aggregate buffer, etc. 3. for whole-stage-codegen `consume` path, mostly we don't need to set them because `currentVars` is automatically set to child input variables and `INPUT_ROW` is mostly not used. A few plans need to tweak them as they may have different inputs, or they use the input row. ## How was this patch tested? existing tests. Author: Wenchen FanCloses #19803 from cloud-fan/codegen. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/70221903 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/70221903 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/70221903 Branch: refs/heads/master Commit: 70221903f54eaa0514d5d189dfb6f175a62228a8 Parents: a1877f4 Author: Wenchen Fan Authored: Fri Nov 24 21:50:30 2017 -0800 Committer: gatorsmile Committed: Fri Nov 24 21:50:30 2017 -0800 -- .../catalyst/expressions/BoundAttribute.scala | 23 + .../expressions/codegen/CodeGenerator.scala | 14 +++--- .../sql/execution/DataSourceScanExec.scala | 14 +- .../apache/spark/sql/execution/ExpandExec.scala | 3 --- .../spark/sql/execution/GenerateExec.scala | 2 -- .../sql/execution/WholeStageCodegenExec.scala | 27 +++- .../sql/execution/basicPhysicalOperators.scala | 6 + .../apache/spark/sql/execution/objects.scala| 20 +-- 8 files changed, 59 insertions(+), 50 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/70221903/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala index 7d16118..6a17a39 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/BoundAttribute.scala @@ -59,21 +59,24 @@ case class BoundReference(ordinal: Int, dataType: DataType, nullable: Boolean) } override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { -val javaType = ctx.javaType(dataType) -val value = ctx.getValue(ctx.INPUT_ROW, dataType, ordinal.toString) if (ctx.currentVars != null && ctx.currentVars(ordinal) != null) { val oev = ctx.currentVars(ordinal) ev.isNull = oev.isNull ev.value = oev.value - val code = oev.code - oev.code = "" - ev.copy(code = code) -} else if (nullable) { - ev.copy(code = s""" -boolean ${ev.isNull} = ${ctx.INPUT_ROW}.isNullAt($ordinal); -$javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(dataType)} : ($value);""") + ev.copy(code = oev.code) } else { - ev.copy(code = s"""$javaType ${ev.value} = $value;""", isNull = "false") + assert(ctx.INPUT_ROW != null, "INPUT_ROW and currentVars cannot both be null.") + val javaType = ctx.javaType(dataType) + val value = ctx.getValue(ctx.INPUT_ROW, dataType, ordinal.toString) + if (nullable) { +ev.copy(code = + s""" + |boolean ${ev.isNull} = ${ctx.INPUT_ROW}.isNullAt($ordinal); + |$javaType ${ev.value} = ${ev.isNull} ? ${ctx.defaultValue(dataType)} : ($value); + """.stripMargin) + } else { +ev.copy(code = s"$javaType ${ev.value} = $value;", isNull = "false") + } } } } http://git-wip-us.apache.org/repos/asf/spark/blob/70221903/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
svn commit: r23279 - in /dev/spark/spark-2.2.1-rc2-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _site/api/java/org/apache/
Author: felixcheung Date: Sat Nov 25 03:01:04 2017 New Revision: 23279 Log: Apache Spark spark-2.2.1-rc2 docs [This commit notification would consist of 1388 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r23278 - /dev/spark/spark-2.2.1-rc2-bin/
Author: felixcheung Date: Sat Nov 25 02:44:26 2017 New Revision: 23278 Log: Apache Spark spark-2.2.1-rc2 Added: dev/spark/spark-2.2.1-rc2-bin/ dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz (with props) dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.asc dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.md5 dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.sha512 dev/spark/spark-2.2.1-rc2-bin/pyspark-2.2.1.tar.gz (with props) dev/spark/spark-2.2.1-rc2-bin/pyspark-2.2.1.tar.gz.asc dev/spark/spark-2.2.1-rc2-bin/pyspark-2.2.1.tar.gz.md5 dev/spark/spark-2.2.1-rc2-bin/pyspark-2.2.1.tar.gz.sha512 dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-hadoop2.6.tgz (with props) dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-hadoop2.6.tgz.asc dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-hadoop2.6.tgz.md5 dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-hadoop2.6.tgz.sha512 dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-hadoop2.7.tgz (with props) dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-hadoop2.7.tgz.asc dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-hadoop2.7.tgz.md5 dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-hadoop2.7.tgz.sha512 dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-without-hadoop.tgz (with props) dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-without-hadoop.tgz.asc dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-without-hadoop.tgz.md5 dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1-bin-without-hadoop.tgz.sha512 dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1.tgz (with props) dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1.tgz.asc dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1.tgz.md5 dev/spark/spark-2.2.1-rc2-bin/spark-2.2.1.tgz.sha512 Added: dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz == Binary file - no diff available. Propchange: dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz -- svn:mime-type = application/x-gzip Added: dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.asc == --- dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.asc (added) +++ dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.asc Sat Nov 25 02:44:26 2017 @@ -0,0 +1,16 @@ +-BEGIN PGP SIGNATURE- + +iQIcBAABCAAGBQJaGM5LAAoJELbItmCFBAEYrMIP/jDhITlIT+Kvv0bWtsrB4NkJ +KjlnneO6ZSpSOPVM/ZgA1T7zxMSG8dJvmkD+45flPSm/xoG6TksnZ+jUMUV5/1Nv +KIHWUtJezRTomAxtYpFpfcQGRylFlG8cxeasZ29pEk7zZfUqDrYH8Ki7+qYhXPx4 +XTfODzjOcUMlb0K9fMsh4cmbL2Yg78ft4dAUsmk0gGQBtxr6Tx5y16ejnmBoYoeF +io07Hb+eYTV6Jba6hpT6yuFT5MqPdZQfkQ0pL40zyHDYfP/9IDDFunoqHhPqd/aS +eZPedS0t5tstjis3lD2uUrxddAqeV7WztUeyap5+D48LL1dFWUsqespnLjMQq8fK +vbyHR2hljRs7hmXzNZkkFBStd4UaHcUQPjx3xzkHPxO9ncFfmRreJIgpr8cGT9Pc +tlDIX3sqpGqZ0aAMkuvkHRVr6PqoqdPj25NQCz6QC04JQCFbGsVGvHnrdw3/mrgA +gJTh0wHOCOVHxZbq+F4a7iPEF72z5agoaiAHwGy5pUzJi1N8BRQv3J94hw1fx6uP +vrRcg3Nbqt9x1KWwUaMFC/ie+dviA8aLxtdn/IEUbRkzMJKIE+MeWbA1hfRE5CD4 +SH4BhNR02gc5Hhaflo0i54t9Xk9VCX6zZ5N8sG4ExBcZBCRqDrPtQvvqZxaZDuDb +9Y7r79M3TV51VahLE2Mr +=6NPY +-END PGP SIGNATURE- Added: dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.md5 == --- dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.md5 (added) +++ dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.md5 Sat Nov 25 02:44:26 2017 @@ -0,0 +1 @@ +SparkR_2.2.1.tar.gz: 55 90 42 2F A9 55 66 0F E1 22 CF 22 09 7C 23 F2 Added: dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.sha512 == --- dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.sha512 (added) +++ dev/spark/spark-2.2.1-rc2-bin/SparkR_2.2.1.tar.gz.sha512 Sat Nov 25 02:44:26 2017 @@ -0,0 +1,3 @@ +SparkR_2.2.1.tar.gz: 02C7BDC2 B8D19FFD B844676E 17369237 C139B155 0BF21E9F + DB7B1127 4EFDDC3B 77E93548 48AC8508 934888E8 10AADAF1 + 6C6DDEC7 F0068C13 2A7D578F 47E803E1 Added: dev/spark/spark-2.2.1-rc2-bin/pyspark-2.2.1.tar.gz == Binary file - no diff available. Propchange: dev/spark/spark-2.2.1-rc2-bin/pyspark-2.2.1.tar.gz -- svn:mime-type = application/x-gzip Added: dev/spark/spark-2.2.1-rc2-bin/pyspark-2.2.1.tar.gz.asc == --- dev/spark/spark-2.2.1-rc2-bin/pyspark-2.2.1.tar.gz.asc (added) +++ dev/spark/spark-2.2.1-rc2-bin/pyspark-2.2.1.tar.gz.asc Sat Nov 25 02:44:26 2017 @@ -0,0 +1,16 @@ +-BEGIN PGP SIGNATURE- + +iQIcBAABCAAGBQJaGM5eAAoJELbItmCFBAEYgSAP+gNwIYoECSMA9OhR3T0MvR6Y +zuyysHXFh1eVd2Pl3qCNIESyBSSMxwc58is+GNFRg+SBJFPV28mZtn1r5+IF7ln3
[1/2] spark git commit: Preparing Spark release v2.2.1-rc2
Repository: spark Updated Branches: refs/heads/branch-2.2 c3b5df22a -> 455cea622 Preparing Spark release v2.2.1-rc2 Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e30e2698 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e30e2698 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e30e2698 Branch: refs/heads/branch-2.2 Commit: e30e2698a2193f0bbdcd4edb884710819ab6397c Parents: c3b5df2 Author: Felix CheungAuthored: Fri Nov 24 21:11:35 2017 + Committer: Felix Cheung Committed: Fri Nov 24 21:11:35 2017 + -- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 39 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/e30e2698/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 380b3ef..4ac45fc 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.2.2 +Version: 2.2.1 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), http://git-wip-us.apache.org/repos/asf/spark/blob/e30e2698/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index eeb75e9..ded172d 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.2.2-SNAPSHOT +2.2.1 ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/e30e2698/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 9d83ad8..1a976a5 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.2.2-SNAPSHOT +2.2.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/e30e2698/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index f841f93..1159953 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.2.2-SNAPSHOT +2.2.1 ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/e30e2698/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index c1c2ebb..1ad4afe 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -
[2/2] spark git commit: Preparing development version 2.2.2-SNAPSHOT
Preparing development version 2.2.2-SNAPSHOT Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/455cea62 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/455cea62 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/455cea62 Branch: refs/heads/branch-2.2 Commit: 455cea622f14e27eba43c496eeb928e7514e44f6 Parents: e30e269 Author: Felix CheungAuthored: Fri Nov 24 21:11:41 2017 + Committer: Felix Cheung Committed: Fri Nov 24 21:11:41 2017 + -- R/pkg/DESCRIPTION | 2 +- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml| 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- docs/_config.yml | 4 ++-- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml| 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml| 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml| 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- python/pyspark/version.py | 2 +- repl/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml| 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 38 files changed, 39 insertions(+), 39 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/455cea62/R/pkg/DESCRIPTION -- diff --git a/R/pkg/DESCRIPTION b/R/pkg/DESCRIPTION index 4ac45fc..380b3ef 100644 --- a/R/pkg/DESCRIPTION +++ b/R/pkg/DESCRIPTION @@ -1,6 +1,6 @@ Package: SparkR Type: Package -Version: 2.2.1 +Version: 2.2.2 Title: R Frontend for Apache Spark Description: Provides an R Frontend for Apache Spark. Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"), http://git-wip-us.apache.org/repos/asf/spark/blob/455cea62/assembly/pom.xml -- diff --git a/assembly/pom.xml b/assembly/pom.xml index ded172d..eeb75e9 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 -2.2.1 +2.2.2-SNAPSHOT ../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/455cea62/common/network-common/pom.xml -- diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 1a976a5..9d83ad8 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.2.1 +2.2.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/455cea62/common/network-shuffle/pom.xml -- diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 1159953..f841f93 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.2.1 +2.2.2-SNAPSHOT ../../pom.xml http://git-wip-us.apache.org/repos/asf/spark/blob/455cea62/common/network-yarn/pom.xml -- diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 1ad4afe..c1c2ebb 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 -2.2.1 +2.2.2-SNAPSHOT ../../pom.xml
[spark] Git Push Summary
Repository: spark Updated Tags: refs/tags/v2.2.1-rc2 [created] e30e2698a - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[2/2] spark git commit: fix typo
fix typo Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c3b5df22 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c3b5df22 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c3b5df22 Branch: refs/heads/branch-2.2 Commit: c3b5df22aa34001d8412456ee346042b6218c664 Parents: b606cc2 Author: Felix CheungAuthored: Fri Nov 24 12:06:57 2017 -0800 Committer: Felix Cheung Committed: Fri Nov 24 12:06:57 2017 -0800 -- bin/find-spark-home.cmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c3b5df22/bin/find-spark-home.cmd -- diff --git a/bin/find-spark-home.cmd b/bin/find-spark-home.cmd index c75e7ee..6025f67 100644 --- a/bin/find-spark-home.cmd +++ b/bin/find-spark-home.cmd @@ -32,7 +32,7 @@ if not "x%PYSPARK_PYTHON%"=="x" ( ) rem If there is python installed, trying to use the root dir as SPARK_HOME -where %PYTHON_RUNNER% > nul 2>$1 +where %PYTHON_RUNNER% > nul 2>&1 if %ERRORLEVEL% neq 0 ( if not exist %PYTHON_RUNNER% ( if "x%SPARK_HOME%"=="x" ( - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
[1/2] spark git commit: [SPARK-22495] Fix setup of SPARK_HOME variable on Windows
Repository: spark Updated Branches: refs/heads/branch-2.2 ad57141f9 -> c3b5df22a [SPARK-22495] Fix setup of SPARK_HOME variable on Windows ## What changes were proposed in this pull request? This is a cherry pick of the original PR 19370 onto branch-2.2 as suggested in https://github.com/apache/spark/pull/19370#issuecomment-346526920. Fixing the way how `SPARK_HOME` is resolved on Windows. While the previous version was working with the built release download, the set of directories changed slightly for the PySpark `pip` or `conda` install. This has been reflected in Linux files in `bin` but not for Windows `cmd` files. First fix improves the way how the `jars` directory is found, as this was stoping Windows version of `pip/conda` install from working; JARs were not found by on Session/Context setup. Second fix is adding `find-spark-home.cmd` script, which uses `find_spark_home.py` script, as the Linux version, to resolve `SPARK_HOME`. It is based on `find-spark-home` bash script, though, some operations are done in different order due to the `cmd` script language limitations. If environment variable is set, the Python script `find_spark_home.py` will not be run. The process can fail if Python is not installed, but it will mostly use this way if PySpark is installed via `pip/conda`, thus, there is some Python in the system. ## How was this patch tested? Tested on local installation. Author: Jakub NowackiCloses #19807 from jsnowacki/fix_spark_cmds_2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b606cc2b Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b606cc2b Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b606cc2b Branch: refs/heads/branch-2.2 Commit: b606cc2bdcfb103b819dac65cc610283cfa0ded5 Parents: ad57141 Author: Jakub Nowacki Authored: Fri Nov 24 12:05:57 2017 -0800 Committer: Felix Cheung Committed: Fri Nov 24 12:05:57 2017 -0800 -- appveyor.yml| 2 ++ bin/find-spark-home.cmd | 60 bin/pyspark2.cmd| 2 +- bin/run-example.cmd | 4 ++- bin/spark-class2.cmd| 2 +- bin/spark-shell2.cmd| 4 ++- bin/sparkR2.cmd | 2 +- 7 files changed, 71 insertions(+), 5 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/b606cc2b/appveyor.yml -- diff --git a/appveyor.yml b/appveyor.yml index 49e09ea..bc527e8 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -32,6 +32,8 @@ only_commits: - sql/core/src/main/scala/org/apache/spark/sql/api/r/ - core/src/main/scala/org/apache/spark/api/r/ - mllib/src/main/scala/org/apache/spark/ml/r/ +- core/src/test/scala/org/apache/spark/deploy/SparkSubmitSuite.scala +- bin/*.cmd cache: - C:\Users\appveyor\.m2 http://git-wip-us.apache.org/repos/asf/spark/blob/b606cc2b/bin/find-spark-home.cmd -- diff --git a/bin/find-spark-home.cmd b/bin/find-spark-home.cmd new file mode 100644 index 000..c75e7ee --- /dev/null +++ b/bin/find-spark-home.cmd @@ -0,0 +1,60 @@ +@echo off + +rem +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +remhttp://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. +rem + +rem Path to Python script finding SPARK_HOME +set FIND_SPARK_HOME_PYTHON_SCRIPT=%~dp0find_spark_home.py + +rem Default to standard python interpreter unless told otherwise +set PYTHON_RUNNER=python +rem If PYSPARK_DRIVER_PYTHON is set, it overwrites the python version +if not "x%PYSPARK_DRIVER_PYTHON%"=="x" ( + set PYTHON_RUNNER=%PYSPARK_DRIVER_PYTHON% +) +rem If PYSPARK_PYTHON is set, it overwrites the python version +if not "x%PYSPARK_PYTHON%"=="x" ( + set PYTHON_RUNNER=%PYSPARK_PYTHON% +) + +rem If there is python installed, trying to use the root dir as SPARK_HOME +where %PYTHON_RUNNER% > nul 2>$1 +if %ERRORLEVEL% neq 0 ( + if not exist %PYTHON_RUNNER% ( +if "x%SPARK_HOME%"=="x" ( + echo Missing
spark git commit: [SPARK-22597][SQL] Add spark-sql cmd script for Windows users
Repository: spark Updated Branches: refs/heads/master efd0036ec -> a1877f45c [SPARK-22597][SQL] Add spark-sql cmd script for Windows users ## What changes were proposed in this pull request? This PR proposes to add cmd scripts so that Windows users can also run `spark-sql` script. ## How was this patch tested? Manually tested on Windows. **Before** ```cmd C:\...\spark>.\bin\spark-sql '.\bin\spark-sql' is not recognized as an internal or external command, operable program or batch file. C:\...\spark>.\bin\spark-sql.cmd '.\bin\spark-sql.cmd' is not recognized as an internal or external command, operable program or batch file. ``` **After** ```cmd C:\...\spark>.\bin\spark-sql ... spark-sql> SELECT 'Hello World !!'; ... Hello World !! ``` Author: hyukjinkwonCloses #19808 from HyukjinKwon/spark-sql-cmd. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a1877f45 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a1877f45 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a1877f45 Branch: refs/heads/master Commit: a1877f45c3451d18879083ed9b71dd9d5f583f1c Parents: efd0036 Author: hyukjinkwon Authored: Fri Nov 24 19:55:26 2017 +0100 Committer: Wenchen Fan Committed: Fri Nov 24 19:55:26 2017 +0100 -- bin/find-spark-home.cmd | 2 +- bin/run-example.cmd | 2 +- bin/spark-sql.cmd | 25 + bin/spark-sql2.cmd | 25 + bin/sparkR2.cmd | 3 +-- 5 files changed, 53 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/a1877f45/bin/find-spark-home.cmd -- diff --git a/bin/find-spark-home.cmd b/bin/find-spark-home.cmd index c75e7ee..6025f67 100644 --- a/bin/find-spark-home.cmd +++ b/bin/find-spark-home.cmd @@ -32,7 +32,7 @@ if not "x%PYSPARK_PYTHON%"=="x" ( ) rem If there is python installed, trying to use the root dir as SPARK_HOME -where %PYTHON_RUNNER% > nul 2>$1 +where %PYTHON_RUNNER% > nul 2>&1 if %ERRORLEVEL% neq 0 ( if not exist %PYTHON_RUNNER% ( if "x%SPARK_HOME%"=="x" ( http://git-wip-us.apache.org/repos/asf/spark/blob/a1877f45/bin/run-example.cmd -- diff --git a/bin/run-example.cmd b/bin/run-example.cmd index cc6b234..2dd396e 100644 --- a/bin/run-example.cmd +++ b/bin/run-example.cmd @@ -20,7 +20,7 @@ rem rem Figure out where the Spark framework is installed call "%~dp0find-spark-home.cmd" -set _SPARK_CMD_USAGE=Usage: ./bin/run-example [options] example-class [example args] +set _SPARK_CMD_USAGE=Usage: .\bin\run-example [options] example-class [example args] rem The outermost quotes are used to prevent Windows command line parse error rem when there are some quotes in parameters, see SPARK-21877. http://git-wip-us.apache.org/repos/asf/spark/blob/a1877f45/bin/spark-sql.cmd -- diff --git a/bin/spark-sql.cmd b/bin/spark-sql.cmd new file mode 100644 index 000..919e321 --- /dev/null +++ b/bin/spark-sql.cmd @@ -0,0 +1,25 @@ +@echo off + +rem +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license agreements. See the NOTICE file distributed with +rem this work for additional information regarding copyright ownership. +rem The ASF licenses this file to You under the Apache License, Version 2.0 +rem (the "License"); you may not use this file except in compliance with +rem the License. You may obtain a copy of the License at +rem +remhttp://www.apache.org/licenses/LICENSE-2.0 +rem +rem Unless required by applicable law or agreed to in writing, software +rem distributed under the License is distributed on an "AS IS" BASIS, +rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +rem See the License for the specific language governing permissions and +rem limitations under the License. +rem + +rem This is the entry point for running SparkSQL. To avoid polluting the +rem environment, it just launches a new cmd to do the real work. + +rem The outermost quotes are used to prevent Windows command line parse error +rem when there are some quotes in parameters, see SPARK-21877. +cmd /V /E /C ""%~dp0spark-sql2.cmd" %*" http://git-wip-us.apache.org/repos/asf/spark/blob/a1877f45/bin/spark-sql2.cmd -- diff --git a/bin/spark-sql2.cmd b/bin/spark-sql2.cmd new file mode 100644 index 000..c34a3c5 --- /dev/null +++ b/bin/spark-sql2.cmd @@ -0,0 +1,25 @@ +@echo off + +rem +rem Licensed to the Apache Software Foundation (ASF) under one or more +rem contributor license
spark git commit: [SPARK-22537][CORE] Aggregation of map output statistics on driver faces single point bottleneck
Repository: spark Updated Branches: refs/heads/master 449e26ecd -> efd0036ec [SPARK-22537][CORE] Aggregation of map output statistics on driver faces single point bottleneck ## What changes were proposed in this pull request? In adaptive execution, the map output statistics of all mappers will be aggregated after previous stage is successfully executed. Driver takes the aggregation job while it will get slow when the number of `mapper * shuffle partitions` is large, since it only uses single thread to compute. This PR uses multi-thread to deal with this single point bottleneck. ## How was this patch tested? Test cases are in `MapOutputTrackerSuite.scala` Author: GuoChenzhaoAuthor: gczsjdy Closes #19763 from gczsjdy/single_point_mapstatistics. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/efd0036e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/efd0036e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/efd0036e Branch: refs/heads/master Commit: efd0036ec88bdc385f5a9ea568d2e2bbfcda2912 Parents: 449e26e Author: GuoChenzhao Authored: Fri Nov 24 15:09:43 2017 +0100 Committer: Wenchen Fan Committed: Fri Nov 24 15:09:43 2017 +0100 -- .../org/apache/spark/MapOutputTracker.scala | 60 +++- .../apache/spark/internal/config/package.scala | 11 .../apache/spark/MapOutputTrackerSuite.scala| 23 3 files changed, 91 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/efd0036e/core/src/main/scala/org/apache/spark/MapOutputTracker.scala -- diff --git a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala index 7f760a5..195fd4f 100644 --- a/core/src/main/scala/org/apache/spark/MapOutputTracker.scala +++ b/core/src/main/scala/org/apache/spark/MapOutputTracker.scala @@ -23,11 +23,14 @@ import java.util.zip.{GZIPInputStream, GZIPOutputStream} import scala.collection.JavaConverters._ import scala.collection.mutable.{ArrayBuffer, HashMap, HashSet, Map} +import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.duration.Duration import scala.reflect.ClassTag import scala.util.control.NonFatal import org.apache.spark.broadcast.{Broadcast, BroadcastManager} import org.apache.spark.internal.Logging +import org.apache.spark.internal.config._ import org.apache.spark.rpc.{RpcCallContext, RpcEndpoint, RpcEndpointRef, RpcEnv} import org.apache.spark.scheduler.MapStatus import org.apache.spark.shuffle.MetadataFetchFailedException @@ -473,14 +476,65 @@ private[spark] class MapOutputTrackerMaster( } /** + * Grouped function of Range, this is to avoid traverse of all elements of Range using + * IterableLike's grouped function. + */ + def rangeGrouped(range: Range, size: Int): Seq[Range] = { +val start = range.start +val step = range.step +val end = range.end +for (i <- start.until(end, size * step)) yield { + i.until(i + size * step, step) +} + } + + /** + * To equally divide n elements into m buckets, basically each bucket should have n/m elements, + * for the remaining n%m elements, add one more element to the first n%m buckets each. + */ + def equallyDivide(numElements: Int, numBuckets: Int): Seq[Seq[Int]] = { +val elementsPerBucket = numElements / numBuckets +val remaining = numElements % numBuckets +val splitPoint = (elementsPerBucket + 1) * remaining +if (elementsPerBucket == 0) { + rangeGrouped(0.until(splitPoint), elementsPerBucket + 1) +} else { + rangeGrouped(0.until(splitPoint), elementsPerBucket + 1) ++ +rangeGrouped(splitPoint.until(numElements), elementsPerBucket) +} + } + + /** * Return statistics about all of the outputs for a given shuffle. */ def getStatistics(dep: ShuffleDependency[_, _, _]): MapOutputStatistics = { shuffleStatuses(dep.shuffleId).withMapStatuses { statuses => val totalSizes = new Array[Long](dep.partitioner.numPartitions) - for (s <- statuses) { -for (i <- 0 until totalSizes.length) { - totalSizes(i) += s.getSizeForBlock(i) + val parallelAggThreshold = conf.get( +SHUFFLE_MAP_OUTPUT_PARALLEL_AGGREGATION_THRESHOLD) + val parallelism = math.min( +Runtime.getRuntime.availableProcessors(), +statuses.length.toLong * totalSizes.length / parallelAggThreshold + 1).toInt + if (parallelism <= 1) { +for (s <- statuses) { + for (i <- 0 until totalSizes.length) { +totalSizes(i) += s.getSizeForBlock(i) + } +} + } else
spark git commit: [SPARK-22559][CORE] history server: handle exception on opening corrupted listing.ldb
Repository: spark Updated Branches: refs/heads/master 554adc77d -> 449e26ecd [SPARK-22559][CORE] history server: handle exception on opening corrupted listing.ldb ## What changes were proposed in this pull request? Currently history server v2 failed to start if `listing.ldb` is corrupted. This patch get rid of the corrupted `listing.ldb` and re-create it. The exception handling follows [opening disk store for app](https://github.com/apache/spark/blob/0ffa7c488fa8156e2a1aa282e60b7c36b86d8af8/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala#L307) ## How was this patch tested? manual test Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Wang GengliangCloses #19786 from gengliangwang/listingException. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/449e26ec Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/449e26ec Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/449e26ec Branch: refs/heads/master Commit: 449e26ecdc891039198c26ece99454a2e76d5455 Parents: 554adc7 Author: Wang Gengliang Authored: Fri Nov 24 15:07:43 2017 +0100 Committer: Wenchen Fan Committed: Fri Nov 24 15:07:43 2017 +0100 -- .../apache/spark/deploy/history/FsHistoryProvider.scala | 12 1 file changed, 8 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/449e26ec/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala -- diff --git a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala index 25f82b5..69ccde3 100644 --- a/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala +++ b/core/src/main/scala/org/apache/spark/deploy/history/FsHistoryProvider.scala @@ -34,10 +34,10 @@ import org.apache.hadoop.fs.permission.FsAction import org.apache.hadoop.hdfs.DistributedFileSystem import org.apache.hadoop.hdfs.protocol.HdfsConstants import org.apache.hadoop.security.AccessControlException +import org.fusesource.leveldbjni.internal.NativeDB import org.apache.spark.{SecurityManager, SparkConf, SparkException} import org.apache.spark.deploy.SparkHadoopUtil -import org.apache.spark.deploy.history.config._ import org.apache.spark.internal.Logging import org.apache.spark.scheduler._ import org.apache.spark.scheduler.ReplayListenerBus._ @@ -132,7 +132,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) AppStatusStore.CURRENT_VERSION, logDir.toString()) try { - open(new File(path, "listing.ldb"), metadata) + open(dbPath, metadata) } catch { // If there's an error, remove the listing database and any existing UI database // from the store directory, since it's extremely likely that they'll all contain @@ -140,7 +140,12 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) case _: UnsupportedStoreVersionException | _: MetadataMismatchException => logInfo("Detected incompatible DB versions, deleting...") path.listFiles().foreach(Utils.deleteRecursively) -open(new File(path, "listing.ldb"), metadata) +open(dbPath, metadata) + case dbExc: NativeDB.DBException => +// Get rid of the corrupted listing.ldb and re-create it. +logWarning(s"Failed to load disk store $dbPath :", dbExc) +Utils.deleteRecursively(dbPath) +open(dbPath, metadata) } }.getOrElse(new InMemoryStore()) @@ -568,7 +573,6 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock) } val logPath = fileStatus.getPath() -logInfo(s"Replaying log path: $logPath") val bus = new ReplayListenerBus() val listener = new AppListingListener(fileStatus, clock) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-22595][SQL] fix flaky test: CastSuite.SPARK-22500: cast for struct should not generate codes beyond 64KB
Repository: spark Updated Branches: refs/heads/branch-2.2 f4c457a30 -> ad57141f9 [SPARK-22595][SQL] fix flaky test: CastSuite.SPARK-22500: cast for struct should not generate codes beyond 64KB This PR reduces the number of fields in the test case of `CastSuite` to fix an issue that is pointed at [here](https://github.com/apache/spark/pull/19800#issuecomment-346634950). ``` java.lang.OutOfMemoryError: GC overhead limit exceeded java.lang.OutOfMemoryError: GC overhead limit exceeded at org.codehaus.janino.UnitCompiler.findClass(UnitCompiler.java:10971) at org.codehaus.janino.UnitCompiler.findTypeByName(UnitCompiler.java:7607) at org.codehaus.janino.UnitCompiler.getReferenceType(UnitCompiler.java:5758) at org.codehaus.janino.UnitCompiler.getType2(UnitCompiler.java:5732) at org.codehaus.janino.UnitCompiler.access$13200(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$18.visitReferenceType(UnitCompiler.java:5668) at org.codehaus.janino.UnitCompiler$18.visitReferenceType(UnitCompiler.java:5660) at org.codehaus.janino.Java$ReferenceType.accept(Java.java:3356) at org.codehaus.janino.UnitCompiler.getType(UnitCompiler.java:5660) at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:2892) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:2764) at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1262) at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1234) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:538) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:890) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:894) at org.codehaus.janino.UnitCompiler.access$600(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:377) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:369) at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1128) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1209) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:564) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:890) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:894) at org.codehaus.janino.UnitCompiler.access$600(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:377) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:369) at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1128) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1209) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:564) ... ``` Used existing test case Author: Kazuaki IshizakiCloses #19806 from kiszk/SPARK-22595. (cherry picked from commit 554adc77d24c411a6df6d38c596aa33cdf68f3c1) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ad57141f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ad57141f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ad57141f Branch: refs/heads/branch-2.2 Commit: ad57141f9499a4017c39fe336394c7084356df39 Parents: f4c457a Author: Kazuaki Ishizaki Authored: Fri Nov 24 12:08:49 2017 +0100 Committer: Wenchen Fan Committed: Fri Nov 24 12:10:32 2017 +0100 -- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/ad57141f/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 7014994..7837d65 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -829,7 +829,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { } test("SPARK-22500: cast for struct should not generate codes
spark git commit: [SPARK-22595][SQL] fix flaky test: CastSuite.SPARK-22500: cast for struct should not generate codes beyond 64KB
Repository: spark Updated Branches: refs/heads/master 62a826f17 -> 554adc77d [SPARK-22595][SQL] fix flaky test: CastSuite.SPARK-22500: cast for struct should not generate codes beyond 64KB ## What changes were proposed in this pull request? This PR reduces the number of fields in the test case of `CastSuite` to fix an issue that is pointed at [here](https://github.com/apache/spark/pull/19800#issuecomment-346634950). ``` java.lang.OutOfMemoryError: GC overhead limit exceeded java.lang.OutOfMemoryError: GC overhead limit exceeded at org.codehaus.janino.UnitCompiler.findClass(UnitCompiler.java:10971) at org.codehaus.janino.UnitCompiler.findTypeByName(UnitCompiler.java:7607) at org.codehaus.janino.UnitCompiler.getReferenceType(UnitCompiler.java:5758) at org.codehaus.janino.UnitCompiler.getType2(UnitCompiler.java:5732) at org.codehaus.janino.UnitCompiler.access$13200(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$18.visitReferenceType(UnitCompiler.java:5668) at org.codehaus.janino.UnitCompiler$18.visitReferenceType(UnitCompiler.java:5660) at org.codehaus.janino.Java$ReferenceType.accept(Java.java:3356) at org.codehaus.janino.UnitCompiler.getType(UnitCompiler.java:5660) at org.codehaus.janino.UnitCompiler.buildLocalVariableMap(UnitCompiler.java:2892) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:2764) at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1262) at org.codehaus.janino.UnitCompiler.compileDeclaredMethods(UnitCompiler.java:1234) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:538) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:890) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:894) at org.codehaus.janino.UnitCompiler.access$600(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:377) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:369) at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1128) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1209) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:564) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:890) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:894) at org.codehaus.janino.UnitCompiler.access$600(UnitCompiler.java:206) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:377) at org.codehaus.janino.UnitCompiler$2.visitMemberClassDeclaration(UnitCompiler.java:369) at org.codehaus.janino.Java$MemberClassDeclaration.accept(Java.java:1128) at org.codehaus.janino.UnitCompiler.compile(UnitCompiler.java:369) at org.codehaus.janino.UnitCompiler.compileDeclaredMemberTypes(UnitCompiler.java:1209) at org.codehaus.janino.UnitCompiler.compile2(UnitCompiler.java:564) ... ``` ## How was this patch tested? Used existing test case Author: Kazuaki IshizakiCloses #19806 from kiszk/SPARK-22595. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/554adc77 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/554adc77 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/554adc77 Branch: refs/heads/master Commit: 554adc77d24c411a6df6d38c596aa33cdf68f3c1 Parents: 62a826f Author: Kazuaki Ishizaki Authored: Fri Nov 24 12:08:49 2017 +0100 Committer: Wenchen Fan Committed: Fri Nov 24 12:08:49 2017 +0100 -- .../org/apache/spark/sql/catalyst/expressions/CastSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/554adc77/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala index 84bd8b2..7837d65 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CastSuite.scala @@ -829,7 +829,7 @@ class CastSuite extends SparkFunSuite with ExpressionEvalHelper { } test("SPARK-22500: cast for struct should not generate codes beyond 64KB") { -val N = 250 +val N = 25
spark git commit: [SPARK-22591][SQL] GenerateOrdering shouldn't change CodegenContext.INPUT_ROW
Repository: spark Updated Branches: refs/heads/branch-2.2 f8e73d029 -> f4c457a30 [SPARK-22591][SQL] GenerateOrdering shouldn't change CodegenContext.INPUT_ROW ## What changes were proposed in this pull request? When I played with codegen in developing another PR, I found the value of `CodegenContext.INPUT_ROW` is not reliable. Under wholestage codegen, it is assigned to null first and then suddenly changed to `i`. The reason is `GenerateOrdering` changes `CodegenContext.INPUT_ROW` but doesn't restore it back. ## How was this patch tested? Added test. Author: Liang-Chi HsiehCloses #19800 from viirya/SPARK-22591. (cherry picked from commit 62a826f17c549ed93300bdce562db56bddd5d959) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f4c457a3 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f4c457a3 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f4c457a3 Branch: refs/heads/branch-2.2 Commit: f4c457a308b0226aa0e7a1714c19046376e03409 Parents: f8e73d0 Author: Liang-Chi Hsieh Authored: Fri Nov 24 11:46:58 2017 +0100 Committer: Wenchen Fan Committed: Fri Nov 24 11:47:10 2017 +0100 -- .../expressions/codegen/GenerateOrdering.scala | 16 ++-- .../sql/catalyst/expressions/OrderingSuite.scala| 11 ++- 2 files changed, 20 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f4c457a3/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index f7fc2d5..a2fe55b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -72,13 +72,15 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR * Generates the code for ordering based on the given order. */ def genComparisons(ctx: CodegenContext, ordering: Seq[SortOrder]): String = { +val oldInputRow = ctx.INPUT_ROW +val oldCurrentVars = ctx.currentVars +val inputRow = "i" +ctx.INPUT_ROW = inputRow +// to use INPUT_ROW we must make sure currentVars is null +ctx.currentVars = null + val comparisons = ordering.map { order => - val oldCurrentVars = ctx.currentVars - ctx.INPUT_ROW = "i" - // to use INPUT_ROW we must make sure currentVars is null - ctx.currentVars = null val eval = order.child.genCode(ctx) - ctx.currentVars = oldCurrentVars val asc = order.isAscending val isNullA = ctx.freshName("isNullA") val primitiveA = ctx.freshName("primitiveA") @@ -147,10 +149,12 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR """ }.mkString }) +ctx.currentVars = oldCurrentVars +ctx.INPUT_ROW = oldInputRow // make sure INPUT_ROW is declared even if splitExpressions // returns an inlined block s""" - |InternalRow ${ctx.INPUT_ROW} = null; + |InternalRow $inputRow = null; |$code """.stripMargin } http://git-wip-us.apache.org/repos/asf/spark/blob/f4c457a3/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala index aa61ba2..d0604b8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.serializer.KryoSerializer import org.apache.spark.sql.{RandomDataGenerator, Row} import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateOrdering, LazilyGeneratedOrdering} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, GenerateOrdering, LazilyGeneratedOrdering} import org.apache.spark.sql.types._ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { @@ -156,4 +156,13 @@ class OrderingSuite extends
spark git commit: [SPARK-22591][SQL] GenerateOrdering shouldn't change CodegenContext.INPUT_ROW
Repository: spark Updated Branches: refs/heads/master c1217565e -> 62a826f17 [SPARK-22591][SQL] GenerateOrdering shouldn't change CodegenContext.INPUT_ROW ## What changes were proposed in this pull request? When I played with codegen in developing another PR, I found the value of `CodegenContext.INPUT_ROW` is not reliable. Under wholestage codegen, it is assigned to null first and then suddenly changed to `i`. The reason is `GenerateOrdering` changes `CodegenContext.INPUT_ROW` but doesn't restore it back. ## How was this patch tested? Added test. Author: Liang-Chi HsiehCloses #19800 from viirya/SPARK-22591. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/62a826f1 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/62a826f1 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/62a826f1 Branch: refs/heads/master Commit: 62a826f17c549ed93300bdce562db56bddd5d959 Parents: c121756 Author: Liang-Chi Hsieh Authored: Fri Nov 24 11:46:58 2017 +0100 Committer: Wenchen Fan Committed: Fri Nov 24 11:46:58 2017 +0100 -- .../expressions/codegen/GenerateOrdering.scala | 16 ++-- .../sql/catalyst/expressions/OrderingSuite.scala| 11 ++- 2 files changed, 20 insertions(+), 7 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/62a826f1/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index 1639d1b..4a45957 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -72,13 +72,15 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR * Generates the code for ordering based on the given order. */ def genComparisons(ctx: CodegenContext, ordering: Seq[SortOrder]): String = { +val oldInputRow = ctx.INPUT_ROW +val oldCurrentVars = ctx.currentVars +val inputRow = "i" +ctx.INPUT_ROW = inputRow +// to use INPUT_ROW we must make sure currentVars is null +ctx.currentVars = null + val comparisons = ordering.map { order => - val oldCurrentVars = ctx.currentVars - ctx.INPUT_ROW = "i" - // to use INPUT_ROW we must make sure currentVars is null - ctx.currentVars = null val eval = order.child.genCode(ctx) - ctx.currentVars = oldCurrentVars val asc = order.isAscending val isNullA = ctx.freshName("isNullA") val primitiveA = ctx.freshName("primitiveA") @@ -147,10 +149,12 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR """ }.mkString }) +ctx.currentVars = oldCurrentVars +ctx.INPUT_ROW = oldInputRow // make sure INPUT_ROW is declared even if splitExpressions // returns an inlined block s""" - |InternalRow ${ctx.INPUT_ROW} = null; + |InternalRow $inputRow = null; |$code """.stripMargin } http://git-wip-us.apache.org/repos/asf/spark/blob/62a826f1/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala -- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala index aa61ba2..d0604b8 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala @@ -24,7 +24,7 @@ import org.apache.spark.serializer.KryoSerializer import org.apache.spark.sql.{RandomDataGenerator, Row} import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow} import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.expressions.codegen.{GenerateOrdering, LazilyGeneratedOrdering} +import org.apache.spark.sql.catalyst.expressions.codegen.{CodegenContext, GenerateOrdering, LazilyGeneratedOrdering} import org.apache.spark.sql.types._ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { @@ -156,4 +156,13 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { assert(genOrdering.compare(rowB1, rowB2) < 0) } } + + test("SPARK-22591:
spark git commit: [SPARK-17920][SQL] [FOLLOWUP] Backport PR 19779 to branch-2.2
Repository: spark Updated Branches: refs/heads/branch-2.2 b17f4063c -> f8e73d029 [SPARK-17920][SQL] [FOLLOWUP] Backport PR 19779 to branch-2.2 ## What changes were proposed in this pull request? A followup of https://github.com/apache/spark/pull/19795 , to simplify the file creation. ## How was this patch tested? Only test case is updated Author: vinodkcCloses #19809 from vinodkc/br_FollowupSPARK-17920_branch-2.2. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/f8e73d02 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/f8e73d02 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/f8e73d02 Branch: refs/heads/branch-2.2 Commit: f8e73d029d247d594793d832acd43041ac65b136 Parents: b17f406 Author: vinodkc Authored: Fri Nov 24 11:42:47 2017 +0100 Committer: Wenchen Fan Committed: Fri Nov 24 11:42:47 2017 +0100 -- .../org/apache/spark/sql/hive/client/VersionsSuite.scala | 11 +++ 1 file changed, 3 insertions(+), 8 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/f8e73d02/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala -- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala index d48a23f..8376fc7 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/client/VersionsSuite.scala @@ -700,12 +700,7 @@ class VersionsSuite extends SparkFunSuite with Logging { test(s"$version: SPARK-17920: Insert into/overwrite avro table") { withTempDir { dir => -val path = dir.getAbsolutePath -val schemaPath = s"""$path${File.separator}avroschemadir""" val destTableName = "tab1" - -new File(schemaPath).mkdir() - val avroSchema = """{ |"type": "record", @@ -719,11 +714,11 @@ class VersionsSuite extends SparkFunSuite with Logging { """.stripMargin withTable(destTableName) { - val schemaUrl = s"""$schemaPath${File.separator}avroSchema.avsc""" - val schemaFile = new File(schemaPath, "avroSchema.avsc") + val schemaFile = new File(dir, "avroSchema.avsc") val writer = new PrintWriter(schemaFile) writer.write(avroSchema) writer.close() + val schemaPath = schemaFile.getCanonicalPath versionSpark.sql( s"""CREATE TABLE $destTableName @@ -731,7 +726,7 @@ class VersionsSuite extends SparkFunSuite with Logging { |STORED AS | INPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat' | OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat' - |TBLPROPERTIES ('avro.schema.url' = '$schemaUrl') + |TBLPROPERTIES ('avro.schema.url' = '$schemaPath') """.stripMargin ) val insertStmt = s"INSERT OVERWRITE TABLE $destTableName SELECT 'ABC', 'DEF'" - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org