spark git commit: [SPARK-23912][SQL] add array_distinct
Repository: spark Updated Branches: refs/heads/master 15747cfd3 -> 9de11d3f9 [SPARK-23912][SQL] add array_distinct ## What changes were proposed in this pull request? Add array_distinct to remove duplicate value from the array. ## How was this patch tested? Add unit tests Author: Huaxin Gao Closes #21050 from huaxingao/spark-23912. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9de11d3f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9de11d3f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9de11d3f Branch: refs/heads/master Commit: 9de11d3f901bc206a33b9da3e7499bcd43e0142a Parents: 15747cf Author: Huaxin Gao Authored: Thu Jun 21 12:24:53 2018 +0900 Committer: Takuya UESHIN Committed: Thu Jun 21 12:24:53 2018 +0900 -- python/pyspark/sql/functions.py | 14 + .../catalyst/analysis/FunctionRegistry.scala| 1 + .../expressions/collectionOperations.scala | 279 +++ .../CollectionExpressionsSuite.scala| 45 +++ .../scala/org/apache/spark/sql/functions.scala | 7 + .../spark/sql/DataFrameFunctionsSuite.scala | 22 ++ 6 files changed, 368 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/9de11d3f/python/pyspark/sql/functions.py -- diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py index e634669..11b179f 100644 --- a/python/pyspark/sql/functions.py +++ b/python/pyspark/sql/functions.py @@ -1999,6 +1999,20 @@ def array_remove(col, element): return Column(sc._jvm.functions.array_remove(_to_java_column(col), element)) +@since(2.4) +def array_distinct(col): +""" +Collection function: removes duplicate values from the array. +:param col: name of column or expression + +>>> df = spark.createDataFrame([([1, 2, 3, 2],), ([4, 5, 5, 4],)], ['data']) +>>> df.select(array_distinct(df.data)).collect() +[Row(array_distinct(data)=[1, 2, 3]), Row(array_distinct(data)=[4, 5])] +""" +sc = SparkContext._active_spark_context +return Column(sc._jvm.functions.array_distinct(_to_java_column(col))) + + @since(1.4) def explode(col): """Returns a new row for each element in the given array or map. http://git-wip-us.apache.org/repos/asf/spark/blob/9de11d3f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index 3700c63..4b09b9a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -433,6 +433,7 @@ object FunctionRegistry { expression[Flatten]("flatten"), expression[ArrayRepeat]("array_repeat"), expression[ArrayRemove]("array_remove"), +expression[ArrayDistinct]("array_distinct"), CreateStruct.registryEntry, // mask functions http://git-wip-us.apache.org/repos/asf/spark/blob/9de11d3f/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala index d76f301..7c064a1 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.types._ import org.apache.spark.unsafe.Platform import org.apache.spark.unsafe.array.ByteArrayMethods import org.apache.spark.unsafe.types.{ByteArray, UTF8String} +import org.apache.spark.util.collection.OpenHashSet /** * Base trait for [[BinaryExpression]]s with two arrays of the same element type and implicit @@ -2355,3 +2356,281 @@ case class ArrayRemove(left: Expression, right: Expression) override def prettyName: String = "array_remove" } + +/** + * Removes duplicate values from the array. + */ +@ExpressionDescription( + usage = "_FUNC_(array) - Removes duplicate values from the array.", + examples = """ +Examples: + > SELECT _FUNC_(array(1, 2, 3, null, 3)); + [1,2,3,null] + """, since = "2.4.0") +case class ArrayDistinct(child: Expression) + extends UnaryExpression with ExpectsInputTypes { + + override def inputTypes:
svn commit: r27610 - in /dev/spark/2.4.0-SNAPSHOT-2018_06_20_20_01-15747cf-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Thu Jun 21 03:15:55 2018 New Revision: 27610 Log: Apache Spark 2.4.0-SNAPSHOT-2018_06_20_20_01-15747cf docs [This commit notification would consist of 1468 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27609 - in /dev/spark/2.3.2-SNAPSHOT-2018_06_20_18_03-8928de3-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Thu Jun 21 01:17:24 2018 New Revision: 27609 Log: Apache Spark 2.3.2-SNAPSHOT-2018_06_20_18_03-8928de3 docs [This commit notification would consist of 1443 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24547][K8S] Allow for building spark on k8s docker images without cache and don't forget to push spark-py container.
Repository: spark Updated Branches: refs/heads/master 3f4bda728 -> 15747cfd3 [SPARK-24547][K8S] Allow for building spark on k8s docker images without cache and don't forget to push spark-py container. ## What changes were proposed in this pull request? https://issues.apache.org/jira/browse/SPARK-24547 TL;DR from JIRA issue: - First time I generated images for 2.4.0 Docker was using it's cache, so actually when running jobs, old jars where still in the Docker image. This produces errors like this in the executors: `java.io.InvalidClassException: org.apache.spark.storage.BlockManagerId; local class incompatible: stream classdesc serialVersionUID = 6155820641931972169, local class serialVersionUID = -3720498261147521051` - The second problem was that the spark container is pushed, but the spark-py container wasn't yet. This was just forgotten in the initial PR. - A third problem I also ran into because I had an older docker was https://github.com/apache/spark/pull/21551 so I have not included a fix for that in this ticket. ## How was this patch tested? I've tested it on my own Spark on k8s deployment. Author: Ray Burgemeestre Closes #21555 from rayburgemeestre/SPARK-24547. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/15747cfd Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/15747cfd Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/15747cfd Branch: refs/heads/master Commit: 15747cfd3246385ffb23e19e28d2e4effa710bf6 Parents: 3f4bda7 Author: Ray Burgemeestre Authored: Wed Jun 20 17:09:37 2018 -0700 Committer: Anirudh Ramanathan Committed: Wed Jun 20 17:09:37 2018 -0700 -- bin/docker-image-tool.sh | 10 +++--- 1 file changed, 7 insertions(+), 3 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/15747cfd/bin/docker-image-tool.sh -- diff --git a/bin/docker-image-tool.sh b/bin/docker-image-tool.sh index a871ab5..a3f1bcf 100755 --- a/bin/docker-image-tool.sh +++ b/bin/docker-image-tool.sh @@ -70,17 +70,18 @@ function build { local BASEDOCKERFILE=${BASEDOCKERFILE:-"$IMG_PATH/spark/Dockerfile"} local PYDOCKERFILE=${PYDOCKERFILE:-"$IMG_PATH/spark/bindings/python/Dockerfile"} - docker build "${BUILD_ARGS[@]}" \ + docker build $NOCACHEARG "${BUILD_ARGS[@]}" \ -t $(image_ref spark) \ -f "$BASEDOCKERFILE" . -docker build "${BINDING_BUILD_ARGS[@]}" \ + docker build $NOCACHEARG "${BINDING_BUILD_ARGS[@]}" \ -t $(image_ref spark-py) \ -f "$PYDOCKERFILE" . } function push { docker push "$(image_ref spark)" + docker push "$(image_ref spark-py)" } function usage { @@ -99,6 +100,7 @@ Options: -r repo Repository address. -t tag Tag to apply to the built image, or to identify the image to be pushed. -m Use minikube's Docker daemon. + -n Build docker image with --no-cache Using minikube when building images will do so directly into minikube's Docker daemon. There is no need to push the images into minikube in that case, they'll be automatically @@ -127,7 +129,8 @@ REPO= TAG= BASEDOCKERFILE= PYDOCKERFILE= -while getopts f:mr:t: option +NOCACHEARG= +while getopts f:mr:t:n option do case "${option}" in @@ -135,6 +138,7 @@ do p) PYDOCKERFILE=${OPTARG};; r) REPO=${OPTARG};; t) TAG=${OPTARG};; + n) NOCACHEARG="--no-cache";; m) if ! which minikube 1>/dev/null; then error "Cannot find minikube." - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27606 - in /dev/spark/2.4.0-SNAPSHOT-2018_06_20_16_01-3f4bda7-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Wed Jun 20 23:15:27 2018 New Revision: 27606 Log: Apache Spark 2.4.0-SNAPSHOT-2018_06_20_16_01-3f4bda7 docs [This commit notification would consist of 1468 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24578][CORE] Cap sub-region's size of returned nio buffer
Repository: spark Updated Branches: refs/heads/branch-2.3 d687d97b1 -> 8928de3cd [SPARK-24578][CORE] Cap sub-region's size of returned nio buffer ## What changes were proposed in this pull request? This PR tries to fix the performance regression introduced by SPARK-21517. In our production job, we performed many parallel computations, with high possibility, some task could be scheduled to a host-2 where it needs to read the cache block data from host-1. Often, this big transfer makes the cluster suffer time out issue (it will retry 3 times, each with 120s timeout, and then do recompute to put the cache block into the local MemoryStore). The root cause is that we don't do `consolidateIfNeeded` anymore as we are using ``` Unpooled.wrappedBuffer(chunks.length, getChunks(): _*) ``` in ChunkedByteBuffer. If we have many small chunks, it could cause the `buf.notBuffer(...)` have very bad performance in the case that we have to call `copyByteBuf(...)` many times. ## How was this patch tested? Existing unit tests and also test in production Author: Wenbo Zhao Closes #21593 from WenboZhao/spark-24578. (cherry picked from commit 3f4bda7289f1bfbbe8b9bc4b516007f569c44d2e) Signed-off-by: Shixiong Zhu Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/8928de3c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/8928de3c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/8928de3c Branch: refs/heads/branch-2.3 Commit: 8928de3cd448922d43f9ae80cea7138ecbea0d24 Parents: d687d97 Author: Wenbo Zhao Authored: Wed Jun 20 14:26:04 2018 -0700 Committer: Shixiong Zhu Committed: Wed Jun 20 14:26:32 2018 -0700 -- .../network/protocol/MessageWithHeader.java | 25 1 file changed, 5 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/8928de3c/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java -- diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java index a533765..e7b66a6 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java @@ -137,30 +137,15 @@ class MessageWithHeader extends AbstractFileRegion { } private int copyByteBuf(ByteBuf buf, WritableByteChannel target) throws IOException { -ByteBuffer buffer = buf.nioBuffer(); -int written = (buffer.remaining() <= NIO_BUFFER_LIMIT) ? - target.write(buffer) : writeNioBuffer(target, buffer); +// SPARK-24578: cap the sub-region's size of returned nio buffer to improve the performance +// for the case that the passed-in buffer has too many components. +int length = Math.min(buf.readableBytes(), NIO_BUFFER_LIMIT); +ByteBuffer buffer = buf.nioBuffer(buf.readerIndex(), length); +int written = target.write(buffer); buf.skipBytes(written); return written; } - private int writeNioBuffer( - WritableByteChannel writeCh, - ByteBuffer buf) throws IOException { -int originalLimit = buf.limit(); -int ret = 0; - -try { - int ioSize = Math.min(buf.remaining(), NIO_BUFFER_LIMIT); - buf.limit(buf.position() + ioSize); - ret = writeCh.write(buf); -} finally { - buf.limit(originalLimit); -} - -return ret; - } - @Override public MessageWithHeader touch(Object o) { super.touch(o); - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24578][CORE] Cap sub-region's size of returned nio buffer
Repository: spark Updated Branches: refs/heads/master c5a0d1132 -> 3f4bda728 [SPARK-24578][CORE] Cap sub-region's size of returned nio buffer ## What changes were proposed in this pull request? This PR tries to fix the performance regression introduced by SPARK-21517. In our production job, we performed many parallel computations, with high possibility, some task could be scheduled to a host-2 where it needs to read the cache block data from host-1. Often, this big transfer makes the cluster suffer time out issue (it will retry 3 times, each with 120s timeout, and then do recompute to put the cache block into the local MemoryStore). The root cause is that we don't do `consolidateIfNeeded` anymore as we are using ``` Unpooled.wrappedBuffer(chunks.length, getChunks(): _*) ``` in ChunkedByteBuffer. If we have many small chunks, it could cause the `buf.notBuffer(...)` have very bad performance in the case that we have to call `copyByteBuf(...)` many times. ## How was this patch tested? Existing unit tests and also test in production Author: Wenbo Zhao Closes #21593 from WenboZhao/spark-24578. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/3f4bda72 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/3f4bda72 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/3f4bda72 Branch: refs/heads/master Commit: 3f4bda7289f1bfbbe8b9bc4b516007f569c44d2e Parents: c5a0d11 Author: Wenbo Zhao Authored: Wed Jun 20 14:26:04 2018 -0700 Committer: Shixiong Zhu Committed: Wed Jun 20 14:26:04 2018 -0700 -- .../network/protocol/MessageWithHeader.java | 25 1 file changed, 5 insertions(+), 20 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/3f4bda72/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java -- diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java index a533765..e7b66a6 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java @@ -137,30 +137,15 @@ class MessageWithHeader extends AbstractFileRegion { } private int copyByteBuf(ByteBuf buf, WritableByteChannel target) throws IOException { -ByteBuffer buffer = buf.nioBuffer(); -int written = (buffer.remaining() <= NIO_BUFFER_LIMIT) ? - target.write(buffer) : writeNioBuffer(target, buffer); +// SPARK-24578: cap the sub-region's size of returned nio buffer to improve the performance +// for the case that the passed-in buffer has too many components. +int length = Math.min(buf.readableBytes(), NIO_BUFFER_LIMIT); +ByteBuffer buffer = buf.nioBuffer(buf.readerIndex(), length); +int written = target.write(buffer); buf.skipBytes(written); return written; } - private int writeNioBuffer( - WritableByteChannel writeCh, - ByteBuffer buf) throws IOException { -int originalLimit = buf.limit(); -int ret = 0; - -try { - int ioSize = Math.min(buf.remaining(), NIO_BUFFER_LIMIT); - buf.limit(buf.position() + ioSize); - ret = writeCh.write(buf); -} finally { - buf.limit(originalLimit); -} - -return ret; - } - @Override public MessageWithHeader touch(Object o) { super.touch(o); - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27593 - in /dev/spark/2.4.0-SNAPSHOT-2018_06_20_12_01-c5a0d11-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Wed Jun 20 19:16:36 2018 New Revision: 27593 Log: Apache Spark 2.4.0-SNAPSHOT-2018_06_20_12_01-c5a0d11 docs [This commit notification would consist of 1468 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-24575][SQL] Prohibit window expressions inside WHERE and HAVING clauses
Repository: spark Updated Branches: refs/heads/master c8ef9232c -> c5a0d1132 [SPARK-24575][SQL] Prohibit window expressions inside WHERE and HAVING clauses ## What changes were proposed in this pull request? As discussed [before](https://github.com/apache/spark/pull/19193#issuecomment-393726964), this PR prohibits window expressions inside WHERE and HAVING clauses. ## How was this patch tested? This PR comes with a dedicated unit test. Author: aokolnychyi Closes #21580 from aokolnychyi/spark-24575. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c5a0d113 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c5a0d113 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c5a0d113 Branch: refs/heads/master Commit: c5a0d1132a5608f2110781763f4c2229c6cd7175 Parents: c8ef923 Author: aokolnychyi Authored: Wed Jun 20 18:57:13 2018 +0200 Committer: Herman van Hovell Committed: Wed Jun 20 18:57:13 2018 +0200 -- .../spark/sql/catalyst/analysis/Analyzer.scala | 3 ++ .../sql/DataFrameWindowFunctionsSuite.scala | 42 ++-- 2 files changed, 41 insertions(+), 4 deletions(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c5a0d113/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala -- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 6e3107f..e187133 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -1923,6 +1923,9 @@ class Analyzer( // "Aggregate with Having clause" will be triggered. def apply(plan: LogicalPlan): LogicalPlan = plan transformDown { + case Filter(condition, _) if hasWindowFunction(condition) => +failAnalysis("It is not allowed to use window functions inside WHERE and HAVING clauses") + // Aggregate with Having clause. This rule works with an unresolved Aggregate because // a resolved Aggregate will not have Window Functions. case f @ Filter(condition, a @ Aggregate(groupingExprs, aggregateExprs, child)) http://git-wip-us.apache.org/repos/asf/spark/blob/c5a0d113/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala index 3ea398a..97a8439 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameWindowFunctionsSuite.scala @@ -17,9 +17,7 @@ package org.apache.spark.sql -import java.sql.{Date, Timestamp} - -import scala.collection.mutable +import org.scalatest.Matchers.the import org.apache.spark.TestUtils.{assertNotSpilled, assertSpilled} import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction, Window} @@ -27,7 +25,6 @@ import org.apache.spark.sql.functions._ import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSQLContext import org.apache.spark.sql.types._ -import org.apache.spark.unsafe.types.CalendarInterval /** * Window function testing for DataFrame API. @@ -624,4 +621,41 @@ class DataFrameWindowFunctionsSuite extends QueryTest with SharedSQLContext { } } } + + test("SPARK-24575: Window functions inside WHERE and HAVING clauses") { +def checkAnalysisError(df: => DataFrame): Unit = { + val thrownException = the [AnalysisException] thrownBy { +df.queryExecution.analyzed + } + assert(thrownException.message.contains("window functions inside WHERE and HAVING clauses")) +} + + checkAnalysisError(testData2.select('a).where(rank().over(Window.orderBy('b)) === 1)) +checkAnalysisError(testData2.where('b === 2 && rank().over(Window.orderBy('b)) === 1)) +checkAnalysisError( + testData2.groupBy('a) +.agg(avg('b).as("avgb")) +.where('a > 'avgb && rank().over(Window.orderBy('a)) === 1)) +checkAnalysisError( + testData2.groupBy('a) +.agg(max('b).as("maxb"), sum('b).as("sumb")) +.where(rank().over(Window.orderBy('a)) === 1)) +checkAnalysisError( + testData2.groupBy('a) +.agg(max('b).as("maxb"), sum('b).as("sumb")) +.where('sumb === 5 && rank().over(Window.orderBy('a)) === 1)) + +checkAnalysisError(sql("SELECT a FROM testData2 WHERE RANK() OVER(ORDER BY b) =
spark git commit: [MINOR][SQL] Remove invalid comment from SparkStrategies
Repository: spark Updated Branches: refs/heads/master bc111463a -> c8ef9232c [MINOR][SQL] Remove invalid comment from SparkStrategies ## What changes were proposed in this pull request? This patch is removing invalid comment from SparkStrategies, given that TODO-like comment is no longer preferred one as the comment: https://github.com/apache/spark/pull/21388#issuecomment-396856235 Removing invalid comment will prevent contributors to spend their times which is not going to be merged. ## How was this patch tested? N/A Author: Jungtaek Lim Closes #21595 from HeartSaVioR/MINOR-remove-invalid-comment-on-spark-strategies. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c8ef9232 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c8ef9232 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c8ef9232 Branch: refs/heads/master Commit: c8ef9232cf8b8ef262404b105cea83c1f393d8c3 Parents: bc11146 Author: Jungtaek Lim Authored: Wed Jun 20 18:38:42 2018 +0200 Committer: Herman van Hovell Committed: Wed Jun 20 18:38:42 2018 +0200 -- .../main/scala/org/apache/spark/sql/execution/SparkStrategies.scala | 1 - 1 file changed, 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c8ef9232/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala index d6951ad..07a6fca 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkStrategies.scala @@ -494,7 +494,6 @@ abstract class SparkStrategies extends QueryPlanner[SparkPlan] { } } - // Can we automate these 'pass through' operations? object BasicOperators extends Strategy { def apply(plan: LogicalPlan): Seq[SparkPlan] = plan match { case d: DataWritingCommand => DataWritingCommandExec(d, planLater(d.query)) :: Nil - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
svn commit: r27577 - in /dev/spark/2.4.0-SNAPSHOT-2018_06_20_00_01-bc11146-docs: ./ _site/ _site/api/ _site/api/R/ _site/api/java/ _site/api/java/lib/ _site/api/java/org/ _site/api/java/org/apache/ _s
Author: pwendell Date: Wed Jun 20 07:17:51 2018 New Revision: 27577 Log: Apache Spark 2.4.0-SNAPSHOT-2018_06_20_00_01-bc11146 docs [This commit notification would consist of 1468 parts, which exceeds the limit of 50 ones, so it was shortened to the summary.] - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org