[GitHub] [spark] maropu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
maropu commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456758817 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) + |U
[GitHub] [spark] SparkQA commented on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
SparkQA commented on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660440466 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29079: [SPARK-32286][SQL] Coalesce bucketed table for shuffled hash join if applicable
SparkQA commented on pull request #29079: URL: https://github.com/apache/spark/pull/29079#issuecomment-660440472 **[Test build #126095 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126095/testReport)** for PR 29079 at commit [`d620940`](https://github.com/apache/spark/commit/d6209407731bbed2602c1d6a05c7c50982561faf). * This patch **fails due to an unknown error code, -9**. * This patch **does not merge cleanly**. * This patch adds no public classes. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #28676: [SPARK-31869][SQL] BroadcastHashJoinExec can utilize the build side for its output partitioning
SparkQA commented on pull request #28676: URL: https://github.com/apache/spark/pull/28676#issuecomment-660440468 **[Test build #126091 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126091/testReport)** for PR 28676 at commit [`9caeecd`](https://github.com/apache/spark/commit/9caeecddaa07ef825b73835a3666502df468f881). * This patch **fails due to an unknown error code, -9**. * This patch merges cleanly. * This patch adds no public classes. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29143: [SPARK-32344][SQL] Unevaluable expr is set to FIRST/LAST ignoreNullsExpr in distinct aggregates
SparkQA commented on pull request #29143: URL: https://github.com/apache/spark/pull/29143#issuecomment-660440463 **[Test build #126086 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126086/testReport)** for PR 29143 at commit [`66bf522`](https://github.com/apache/spark/commit/66bf5224a81a26e59f1a2dad497d1db8e84f6788). * This patch **fails due to an unknown error code, -9**. * This patch merges cleanly. * This patch adds no public classes. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29107: [SPARK-32308][SQL] Move by-name resolution logic of unionByName from API code to analysis phase
SparkQA commented on pull request #29107: URL: https://github.com/apache/spark/pull/29107#issuecomment-660440464 **[Test build #126093 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126093/testReport)** for PR 29107 at commit [`c23898e`](https://github.com/apache/spark/commit/c23898ef1b120ea9e5d1659cdb76502214dce97b). * This patch **fails due to an unknown error code, -9**. * This patch merges cleanly. * This patch adds no public classes. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29014: [SPARK-32199][SPARK-32198] Reduce job failures during decommissioning
SparkQA commented on pull request #29014: URL: https://github.com/apache/spark/pull/29014#issuecomment-660440471 **[Test build #126096 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126096/testReport)** for PR 29014 at commit [`4a18813`](https://github.com/apache/spark/commit/4a188134a09b0ca9f6d8ee4c758ecdde7b237651). * This patch **fails due to an unknown error code, -9**. * This patch merges cleanly. * This patch adds no public classes. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #28676: [SPARK-31869][SQL] BroadcastHashJoinExec can utilize the build side for its output partitioning
AmplabJenkins commented on pull request #28676: URL: https://github.com/apache/spark/pull/28676#issuecomment-660440543 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29135: [SPARK-30276][SQL] Support Filter expression allows simultaneous use of DISTINCT
SparkQA commented on pull request #29135: URL: https://github.com/apache/spark/pull/29135#issuecomment-660440469 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29107: [SPARK-32308][SQL] Move by-name resolution logic of unionByName from API code to analysis phase
AmplabJenkins commented on pull request #29107: URL: https://github.com/apache/spark/pull/29107#issuecomment-660440513 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29079: [SPARK-32286][SQL] Coalesce bucketed table for shuffled hash join if applicable
AmplabJenkins commented on pull request #29079: URL: https://github.com/apache/spark/pull/29079#issuecomment-660440566 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29135: [SPARK-30276][SQL] Support Filter expression allows simultaneous use of DISTINCT
AmplabJenkins commented on pull request #29135: URL: https://github.com/apache/spark/pull/29135#issuecomment-660440561 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29014: [SPARK-32199][SPARK-32198] Reduce job failures during decommissioning
AmplabJenkins commented on pull request #29014: URL: https://github.com/apache/spark/pull/29014#issuecomment-660440523 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA removed a comment on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
SparkQA removed a comment on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660428628 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29143: [SPARK-32344][SQL] Unevaluable expr is set to FIRST/LAST ignoreNullsExpr in distinct aggregates
AmplabJenkins commented on pull request #29143: URL: https://github.com/apache/spark/pull/29143#issuecomment-660440592 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #28676: [SPARK-31869][SQL] BroadcastHashJoinExec can utilize the build side for its output partitioning
AmplabJenkins removed a comment on pull request #28676: URL: https://github.com/apache/spark/pull/28676#issuecomment-660440543 Merged build finished. Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA removed a comment on pull request #29143: [SPARK-32344][SQL] Unevaluable expr is set to FIRST/LAST ignoreNullsExpr in distinct aggregates
SparkQA removed a comment on pull request #29143: URL: https://github.com/apache/spark/pull/29143#issuecomment-660413170 **[Test build #126086 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126086/testReport)** for PR 29143 at commit [`66bf522`](https://github.com/apache/spark/commit/66bf5224a81a26e59f1a2dad497d1db8e84f6788). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA removed a comment on pull request #29014: [SPARK-32199][SPARK-32198] Reduce job failures during decommissioning
SparkQA removed a comment on pull request #29014: URL: https://github.com/apache/spark/pull/29014#issuecomment-660433006 **[Test build #126096 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126096/testReport)** for PR 29014 at commit [`4a18813`](https://github.com/apache/spark/commit/4a188134a09b0ca9f6d8ee4c758ecdde7b237651). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AmplabJenkins commented on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660440529 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29135: [SPARK-30276][SQL] Support Filter expression allows simultaneous use of DISTINCT
AmplabJenkins removed a comment on pull request #29135: URL: https://github.com/apache/spark/pull/29135#issuecomment-660440561 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA removed a comment on pull request #28676: [SPARK-31869][SQL] BroadcastHashJoinExec can utilize the build side for its output partitioning
SparkQA removed a comment on pull request #28676: URL: https://github.com/apache/spark/pull/28676#issuecomment-660422979 **[Test build #126091 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126091/testReport)** for PR 28676 at commit [`9caeecd`](https://github.com/apache/spark/commit/9caeecddaa07ef825b73835a3666502df468f881). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29079: [SPARK-32286][SQL] Coalesce bucketed table for shuffled hash join if applicable
AmplabJenkins removed a comment on pull request #29079: URL: https://github.com/apache/spark/pull/29079#issuecomment-660440566 Build finished. Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29014: [SPARK-32199][SPARK-32198] Reduce job failures during decommissioning
AmplabJenkins removed a comment on pull request #29014: URL: https://github.com/apache/spark/pull/29014#issuecomment-660440523 Merged build finished. Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29107: [SPARK-32308][SQL] Move by-name resolution logic of unionByName from API code to analysis phase
AmplabJenkins removed a comment on pull request #29107: URL: https://github.com/apache/spark/pull/29107#issuecomment-660440513 Merged build finished. Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA removed a comment on pull request #29107: [SPARK-32308][SQL] Move by-name resolution logic of unionByName from API code to analysis phase
SparkQA removed a comment on pull request #29107: URL: https://github.com/apache/spark/pull/29107#issuecomment-660427670 **[Test build #126093 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126093/testReport)** for PR 29107 at commit [`c23898e`](https://github.com/apache/spark/commit/c23898ef1b120ea9e5d1659cdb76502214dce97b). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA removed a comment on pull request #29079: [SPARK-32286][SQL] Coalesce bucketed table for shuffled hash join if applicable
SparkQA removed a comment on pull request #29079: URL: https://github.com/apache/spark/pull/29079#issuecomment-660429520 **[Test build #126095 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126095/testReport)** for PR 29079 at commit [`d620940`](https://github.com/apache/spark/commit/d6209407731bbed2602c1d6a05c7c50982561faf). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29143: [SPARK-32344][SQL] Unevaluable expr is set to FIRST/LAST ignoreNullsExpr in distinct aggregates
AmplabJenkins removed a comment on pull request #29143: URL: https://github.com/apache/spark/pull/29143#issuecomment-660440592 Merged build finished. Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AmplabJenkins removed a comment on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660440529 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA removed a comment on pull request #29135: [SPARK-30276][SQL] Support Filter expression allows simultaneous use of DISTINCT
SparkQA removed a comment on pull request #29135: URL: https://github.com/apache/spark/pull/29135#issuecomment-660418864 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29107: [SPARK-32308][SQL] Move by-name resolution logic of unionByName from API code to analysis phase
AmplabJenkins removed a comment on pull request #29107: URL: https://github.com/apache/spark/pull/29107#issuecomment-660440516 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/126093/ Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29135: [SPARK-30276][SQL] Support Filter expression allows simultaneous use of DISTINCT
AmplabJenkins removed a comment on pull request #29135: URL: https://github.com/apache/spark/pull/29135#issuecomment-660440576 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/126089/ Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AmplabJenkins removed a comment on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660440549 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/126092/ Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29014: [SPARK-32199][SPARK-32198] Reduce job failures during decommissioning
AmplabJenkins removed a comment on pull request #29014: URL: https://github.com/apache/spark/pull/29014#issuecomment-660440526 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/126096/ Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29143: [SPARK-32344][SQL] Unevaluable expr is set to FIRST/LAST ignoreNullsExpr in distinct aggregates
AmplabJenkins removed a comment on pull request #29143: URL: https://github.com/apache/spark/pull/29143#issuecomment-660440593 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/126086/ Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29079: [SPARK-32286][SQL] Coalesce bucketed table for shuffled hash join if applicable
AmplabJenkins removed a comment on pull request #29079: URL: https://github.com/apache/spark/pull/29079#issuecomment-660440568 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/126095/ Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #28676: [SPARK-31869][SQL] BroadcastHashJoinExec can utilize the build side for its output partitioning
AmplabJenkins removed a comment on pull request #28676: URL: https://github.com/apache/spark/pull/28676#issuecomment-660440550 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/126091/ Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] imback82 commented on pull request #28676: [SPARK-31869][SQL] BroadcastHashJoinExec can utilize the build side for its output partitioning
imback82 commented on pull request #28676: URL: https://github.com/apache/spark/pull/28676#issuecomment-660441720 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] maropu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
maropu commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456760533 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) + |U
[GitHub] [spark] SparkQA commented on pull request #28676: [SPARK-31869][SQL] BroadcastHashJoinExec can utilize the build side for its output partitioning
SparkQA commented on pull request #28676: URL: https://github.com/apache/spark/pull/28676#issuecomment-660442362 **[Test build #126097 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126097/testReport)** for PR 28676 at commit [`9caeecd`](https://github.com/apache/spark/commit/9caeecddaa07ef825b73835a3666502df468f881). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] maropu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
maropu commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456760674 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) + |U
[GitHub] [spark] AmplabJenkins commented on pull request #28676: [SPARK-31869][SQL] BroadcastHashJoinExec can utilize the build side for its output partitioning
AmplabJenkins commented on pull request #28676: URL: https://github.com/apache/spark/pull/28676#issuecomment-660442519 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #28676: [SPARK-31869][SQL] BroadcastHashJoinExec can utilize the build side for its output partitioning
AmplabJenkins removed a comment on pull request #28676: URL: https://github.com/apache/spark/pull/28676#issuecomment-660442519 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] maropu commented on pull request #29143: [SPARK-32344][SQL] Unevaluable expr is set to FIRST/LAST ignoreNullsExpr in distinct aggregates
maropu commented on pull request #29143: URL: https://github.com/apache/spark/pull/29143#issuecomment-660442750 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29143: [SPARK-32344][SQL] Unevaluable expr is set to FIRST/LAST ignoreNullsExpr in distinct aggregates
SparkQA commented on pull request #29143: URL: https://github.com/apache/spark/pull/29143#issuecomment-660443260 **[Test build #126098 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126098/testReport)** for PR 29143 at commit [`66bf522`](https://github.com/apache/spark/commit/66bf5224a81a26e59f1a2dad497d1db8e84f6788). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29143: [SPARK-32344][SQL] Unevaluable expr is set to FIRST/LAST ignoreNullsExpr in distinct aggregates
AmplabJenkins removed a comment on pull request #29143: URL: https://github.com/apache/spark/pull/29143#issuecomment-660443374 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29143: [SPARK-32344][SQL] Unevaluable expr is set to FIRST/LAST ignoreNullsExpr in distinct aggregates
AmplabJenkins commented on pull request #29143: URL: https://github.com/apache/spark/pull/29143#issuecomment-660443374 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
SparkQA commented on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660446426 **[Test build #126099 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126099/testReport)** for PR 29146 at commit [`7e0d97a`](https://github.com/apache/spark/commit/7e0d97ac6612d37ede14e92f14673cf8de5c3c56). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
AmplabJenkins removed a comment on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660446524 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
AmplabJenkins commented on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660446524 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] beliefer commented on pull request #29135: [SPARK-30276][SQL] Support Filter expression allows simultaneous use of DISTINCT
beliefer commented on pull request #29135: URL: https://github.com/apache/spark/pull/29135#issuecomment-660447813 retest this please This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29135: [SPARK-30276][SQL] Support Filter expression allows simultaneous use of DISTINCT
SparkQA commented on pull request #29135: URL: https://github.com/apache/spark/pull/29135#issuecomment-660447944 **[Test build #126100 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126100/testReport)** for PR 29135 at commit [`ba7c3a4`](https://github.com/apache/spark/commit/ba7c3a4d3ad5827f03c09356f93079732284e29d). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29135: [SPARK-30276][SQL] Support Filter expression allows simultaneous use of DISTINCT
AmplabJenkins commented on pull request #29135: URL: https://github.com/apache/spark/pull/29135#issuecomment-660448042 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29135: [SPARK-30276][SQL] Support Filter expression allows simultaneous use of DISTINCT
AmplabJenkins removed a comment on pull request #29135: URL: https://github.com/apache/spark/pull/29135#issuecomment-660448042 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AngersZhuuuu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AngersZh commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456766188 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) +
[GitHub] [spark] AngersZhuuuu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AngersZh commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456769354 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) +
[GitHub] [spark] AngersZhuuuu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AngersZh commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456770157 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) +
[GitHub] [spark] AngersZhuuuu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AngersZh commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456770213 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) +
[GitHub] [spark] SparkQA commented on pull request #29142: [SPARK-29343][SQL][FOLLOW-UP] Add more aggregate function to support eliminate sorts.
SparkQA commented on pull request #29142: URL: https://github.com/apache/spark/pull/29142#issuecomment-660455259 **[Test build #126101 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126101/testReport)** for PR 29142 at commit [`bc454b6`](https://github.com/apache/spark/commit/bc454b67f8879a596b90c7d91e0ac3c70a8d0a04). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AmplabJenkins commented on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660455437 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29142: [SPARK-29343][SQL][FOLLOW-UP] Add more aggregate function to support eliminate sorts.
AmplabJenkins commented on pull request #29142: URL: https://github.com/apache/spark/pull/29142#issuecomment-660455421 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you commented on pull request #29142: [SPARK-29343][SQL][FOLLOW-UP] Add more aggregate function to support eliminate sorts.
ulysses-you commented on pull request #29142: URL: https://github.com/apache/spark/pull/29142#issuecomment-660455410 cc @maropu This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29142: [SPARK-29343][SQL][FOLLOW-UP] Add more aggregate function to support eliminate sorts.
AmplabJenkins removed a comment on pull request #29142: URL: https://github.com/apache/spark/pull/29142#issuecomment-660455421 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AmplabJenkins removed a comment on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660455437 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you commented on a change in pull request #29142: [SPARK-29343][SQL][FOLLOW-UP] Add more aggregate function to support eliminate sorts.
ulysses-you commented on a change in pull request #29142: URL: https://github.com/apache/spark/pull/29142#discussion_r456770501 ## File path: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala ## @@ -1004,7 +1004,7 @@ object EliminateSorts extends Rule[LogicalPlan] { private def isOrderIrrelevantAggs(aggs: Seq[NamedExpression]): Boolean = { def isOrderIrrelevantAggFunction(func: AggregateFunction): Boolean = func match { - case _: Min | _: Max | _: Count => true + case _: Min | _: Max | _: Count | _: CountIf | _: MaxMinBy => true Review comment: count_if has replaced with count and if, so needn't add it. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you commented on pull request #29034: [SPARK-32219][SQL] Add SHOW CACHED TABLES Command
ulysses-you commented on pull request #29034: URL: https://github.com/apache/spark/pull/29034#issuecomment-660455974 cc @maropu @cloud-fan thanks for review. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AngersZhuuuu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AngersZh commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456770897 ## File path: sql/core/src/main/scala/org/apache/spark/sql/execution/BaseScriptTransformationExec.scala ## @@ -87,17 +171,55 @@ trait BaseScriptTransformationExec extends UnaryExecNode { } } } + + private lazy val fieldWriters: Seq[String => Any] = output.map { attr => +val converter = CatalystTypeConverters.createToCatalystConverter(attr.dataType) +attr.dataType match { + case StringType => (data: String) => converter(data) + case ByteType => (data: String) => converter(data.toByte) + case IntegerType => (data: String) => converter(data.toInt) + case ShortType => (data: String) => converter(data.toShort) + case LongType => (data: String) => converter(data.toLong) + case FloatType => (data: String) => converter(data.toFloat) + case DoubleType => (data: String) => converter(data.toDouble) + case decimal: DecimalType => (data: String) => converter(BigDecimal(data)) + case DateType if conf.datetimeJava8ApiEnabled => (data: String) => +converter(DateTimeUtils.stringToDate( + UTF8String.fromString(data), + DateTimeUtils.getZoneId(conf.sessionLocalTimeZone)) + .map(DateTimeUtils.daysToLocalDate).orNull) Review comment: > Need this? Seems like not. Tested, seems not need, remove. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AngersZhuuuu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AngersZh commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456770946 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) +
[GitHub] [spark] SparkQA commented on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
SparkQA commented on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660456148 **[Test build #126102 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126102/testReport)** for PR 29085 at commit [`fc96e1f`](https://github.com/apache/spark/commit/fc96e1fa2d224616b31813e7ef827f08ef4b9967). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AmplabJenkins removed a comment on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660456264 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AmplabJenkins commented on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660456264 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
SparkQA commented on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660456505 **[Test build #126099 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126099/testReport)** for PR 29146 at commit [`7e0d97a`](https://github.com/apache/spark/commit/7e0d97ac6612d37ede14e92f14673cf8de5c3c56). * This patch **fails Spark unit tests**. * This patch merges cleanly. * This patch adds no public classes. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
AmplabJenkins commented on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660456576 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA removed a comment on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
SparkQA removed a comment on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660446426 **[Test build #126099 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126099/testReport)** for PR 29146 at commit [`7e0d97a`](https://github.com/apache/spark/commit/7e0d97ac6612d37ede14e92f14673cf8de5c3c56). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
AmplabJenkins removed a comment on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660456576 Merged build finished. Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
AmplabJenkins removed a comment on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660456580 Test FAILed. Refer to this link for build results (access rights to CI server needed): https://amplab.cs.berkeley.edu/jenkins//job/SparkPullRequestBuilder/126099/ Test FAILed. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
SparkQA commented on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660456965 **[Test build #126103 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126103/testReport)** for PR 29085 at commit [`ed901af`](https://github.com/apache/spark/commit/ed901afc3ef115af06fef6047a21b472b7a867e5). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
SparkQA commented on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660461071 **[Test build #126104 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126104/testReport)** for PR 29146 at commit [`0d357ae`](https://github.com/apache/spark/commit/0d357ae6539822f59aa4ae5ff34f5215f27666e4). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
AmplabJenkins commented on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660461263 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
AmplabJenkins removed a comment on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660461263 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] maropu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
maropu commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456775139 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) + |U
[GitHub] [spark] maropu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
maropu commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456775167 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) + |U
[GitHub] [spark] sarutak commented on pull request #29002: [SPARK-32175][CORE] Fix the order between initialization for ExecutorPlugin and starting heartbeat thread
sarutak commented on pull request #29002: URL: https://github.com/apache/spark/pull/29002#issuecomment-660464123 retest this please. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29002: [SPARK-32175][CORE] Fix the order between initialization for ExecutorPlugin and starting heartbeat thread
SparkQA commented on pull request #29002: URL: https://github.com/apache/spark/pull/29002#issuecomment-660464379 **[Test build #126105 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126105/testReport)** for PR 29002 at commit [`d768385`](https://github.com/apache/spark/commit/d768385caac9c79c456de87a4afd72298dda46db). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29002: [SPARK-32175][CORE] Fix the order between initialization for ExecutorPlugin and starting heartbeat thread
AmplabJenkins commented on pull request #29002: URL: https://github.com/apache/spark/pull/29002#issuecomment-660464419 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you opened a new pull request #29152: [SPARK-32356][SQL] Forbid create view with null type
ulysses-you opened a new pull request #29152: URL: https://github.com/apache/spark/pull/29152 ### What changes were proposed in this pull request? After [#28833](https://github.com/apache/spark/pull/28833), we forbid create table with null type, but not include the view. This pr aims to forbid creat view with null type. ### Why are the changes needed? Currently, we support `create view v as select null` with `in-memory` but failed in `hive`. It's better to have same behavior of them. inclued 2 command: * create view v as select null * alter view v as select null ### Does this PR introduce _any_ user-facing change? Yes, user will get exception when create view with null type . ### How was this patch tested? Add ut. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29002: [SPARK-32175][CORE] Fix the order between initialization for ExecutorPlugin and starting heartbeat thread
AmplabJenkins removed a comment on pull request #29002: URL: https://github.com/apache/spark/pull/29002#issuecomment-660464419 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] ulysses-you commented on pull request #28833: [SPARK-20680][SQL] Spark-sql do not support for creating table with void column datatype
ulysses-you commented on pull request #28833: URL: https://github.com/apache/spark/pull/28833#issuecomment-660464702 I have created a [SPARK-32356/#29152](https://github.com/apache/spark/pull/29152) to forbid this. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29152: [SPARK-32356][SQL] Forbid create view with null type
SparkQA commented on pull request #29152: URL: https://github.com/apache/spark/pull/29152#issuecomment-660465114 **[Test build #126106 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126106/testReport)** for PR 29152 at commit [`d5a5575`](https://github.com/apache/spark/commit/d5a5575fd81c70f62fe54a807bf80c6d81f41d5f). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29152: [SPARK-32356][SQL] Forbid create view with null type
AmplabJenkins commented on pull request #29152: URL: https://github.com/apache/spark/pull/29152#issuecomment-660465260 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29152: [SPARK-32356][SQL] Forbid create view with null type
AmplabJenkins removed a comment on pull request #29152: URL: https://github.com/apache/spark/pull/29152#issuecomment-660465260 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] maropu commented on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
maropu commented on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660466175 Could you add tests in `SQLQueryTestSuite`, too? Probably, we can add `sql-tests/inputs/transform.sql`? This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] maropu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
maropu commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456775139 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) + |U
[GitHub] [spark] maropu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
maropu commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456775139 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) + |U
[GitHub] [spark] maropu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
maropu commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456775139 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) + |U
[GitHub] [spark] AngersZhuuuu commented on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AngersZh commented on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660469205 > Could you add tests in `SQLQueryTestSuite`, too? Probably, we can add `sql-tests/inputs/transform.sql`? Sure, update later. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AngersZhuuuu commented on a change in pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AngersZh commented on a change in pull request #29085: URL: https://github.com/apache/spark/pull/29085#discussion_r456780423 ## File path: sql/core/src/test/scala/org/apache/spark/sql/execution/BaseScriptTransformationSuite.scala ## @@ -0,0 +1,296 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution + +import java.sql.{Date, Timestamp} + +import org.scalatest.Assertions._ +import org.scalatest.BeforeAndAfterEach +import org.scalatest.exceptions.TestFailedException + +import org.apache.spark.{SparkException, TaskContext, TestUtils} +import org.apache.spark.rdd.RDD +import org.apache.spark.sql.Column +import org.apache.spark.sql.catalyst.InternalRow +import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression} +import org.apache.spark.sql.catalyst.plans.physical.Partitioning +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SQLTestUtils +import org.apache.spark.sql.types._ +import org.apache.spark.unsafe.types.CalendarInterval + +abstract class BaseScriptTransformationSuite extends SparkPlanTest with SQLTestUtils + with BeforeAndAfterEach { + import testImplicits._ + import ScriptTransformationIOSchema._ + + protected val uncaughtExceptionHandler = new TestUncaughtExceptionHandler + + private var defaultUncaughtExceptionHandler: Thread.UncaughtExceptionHandler = _ + + protected override def beforeAll(): Unit = { +super.beforeAll() +defaultUncaughtExceptionHandler = Thread.getDefaultUncaughtExceptionHandler +Thread.setDefaultUncaughtExceptionHandler(uncaughtExceptionHandler) + } + + protected override def afterAll(): Unit = { +super.afterAll() +Thread.setDefaultUncaughtExceptionHandler(defaultUncaughtExceptionHandler) + } + + override protected def afterEach(): Unit = { +super.afterEach() +uncaughtExceptionHandler.cleanStatus() + } + + def isHive23OrSpark: Boolean + + def createScriptTransformationExec( + input: Seq[Expression], + script: String, + output: Seq[Attribute], + child: SparkPlan, + ioschema: ScriptTransformationIOSchema): BaseScriptTransformationExec + + test("cat without SerDe") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +checkAnswer( + rowsDf, + (child: SparkPlan) => createScriptTransformationExec( +input = Seq(rowsDf.col("a").expr), +script = "cat", +output = Seq(AttributeReference("a", StringType)()), +child = child, +ioschema = defaultIOSchema + ), + rowsDf.collect()) +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("script transformation should not swallow errors from upstream operators (no serde)") { +assume(TestUtils.testCommandAvailable("/bin/bash")) + +val rowsDf = Seq("a", "b", "c").map(Tuple1.apply).toDF("a") +val e = intercept[TestFailedException] { + checkAnswer( +rowsDf, +(child: SparkPlan) => createScriptTransformationExec( + input = Seq(rowsDf.col("a").expr), + script = "cat", + output = Seq(AttributeReference("a", StringType)()), + child = ExceptionInjectingOperator(child), + ioschema = defaultIOSchema +), +rowsDf.collect()) +} +assert(e.getMessage().contains("intentional exception")) +// Before SPARK-25158, uncaughtExceptionHandler will catch IllegalArgumentException +assert(uncaughtExceptionHandler.exception.isEmpty) + } + + test("SPARK-25990: TRANSFORM should handle different data types correctly") { +assume(TestUtils.testCommandAvailable("python")) +val scriptFilePath = getTestResourcePath("test_script.py") + +withTempView("v") { + val df = Seq( +(1, "1", 1.0, BigDecimal(1.0), new Timestamp(1)), +(2, "2", 2.0, BigDecimal(2.0), new Timestamp(2)), +(3, "3", 3.0, BigDecimal(3.0), new Timestamp(3)) + ).toDF("a", "b", "c", "d", "e") // Note column d's data type is Decimal(38, 18) + df.createTempView("v") + + val query = sql( +s""" + |SELECT + |TRANSFORM(a, b, c, d, e) +
[GitHub] [spark] SparkQA commented on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
SparkQA commented on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660469367 **[Test build #126107 has started](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126107/testReport)** for PR 29085 at commit [`a6f1e7d`](https://github.com/apache/spark/commit/a6f1e7d88de983031aa5d3235a7ac497c70946a1). This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins commented on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AmplabJenkins commented on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660469503 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] AmplabJenkins removed a comment on pull request #29085: [SPARK-32106][SQL]Implement SparkScriptTransformationExec in sql/core
AmplabJenkins removed a comment on pull request #29085: URL: https://github.com/apache/spark/pull/29085#issuecomment-660469503 This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] [spark] SparkQA commented on pull request #29146: [WIP][SPARK-32257][SQL] Reports explicit errors for invalid usage of SET command
SparkQA commented on pull request #29146: URL: https://github.com/apache/spark/pull/29146#issuecomment-660471752 **[Test build #126104 has finished](https://amplab.cs.berkeley.edu/jenkins/job/SparkPullRequestBuilder/126104/testReport)** for PR 29146 at commit [`0d357ae`](https://github.com/apache/spark/commit/0d357ae6539822f59aa4ae5ff34f5215f27666e4). * This patch **fails Spark unit tests**. * This patch merges cleanly. * This patch adds no public classes. This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org