This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 42f496f [SPARK-31526][SQL][TESTS] Add a new test suite for ExpressionInfo 42f496f is described below commit 42f496f6ac82e51b5ce2463a375f78cb263f1e32 Author: Takeshi Yamamuro <yamam...@apache.org> AuthorDate: Fri Apr 24 11:19:20 2020 +0900 [SPARK-31526][SQL][TESTS] Add a new test suite for ExpressionInfo ### What changes were proposed in this pull request? This PR intends to add a new test suite for `ExpressionInfo`. Major changes are as follows; - Added a new test suite named `ExpressionInfoSuite` - To improve test coverage, added a test for error handling in `ExpressionInfoSuite` - Moved the `ExpressionInfo`-related tests from `UDFSuite` to `ExpressionInfoSuite` - Moved the related tests from `SQLQuerySuite` to `ExpressionInfoSuite` - Added a comment in `ExpressionInfoSuite` (followup of https://github.com/apache/spark/pull/28224) ### Why are the changes needed? To improve test suites/coverage. ### Does this PR introduce any user-facing change? No. ### How was this patch tested? Added tests. Closes #28308 from maropu/SPARK-31526. Authored-by: Takeshi Yamamuro <yamam...@apache.org> Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- .../expressions/ExpressionDescription.java | 6 + .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 80 ----------- .../test/scala/org/apache/spark/sql/UDFSuite.scala | 31 ---- .../sql/expressions/ExpressionInfoSuite.scala | 156 +++++++++++++++++++++ 4 files changed, 162 insertions(+), 111 deletions(-) diff --git a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java index 089fbe5..579f4b3 100644 --- a/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java +++ b/sql/catalyst/src/main/java/org/apache/spark/sql/catalyst/expressions/ExpressionDescription.java @@ -103,6 +103,12 @@ public @interface ExpressionDescription { String arguments() default ""; String examples() default ""; String note() default ""; + /** + * Valid group names are almost the same with one defined as `groupname` in + * `sql/functions.scala`. But, `collection_funcs` is split into fine-grained three groups: + * `array_funcs`, `map_funcs`, and `json_funcs`. See `ExpressionInfo` for the + * detailed group names. + */ String group() default ""; String since() default ""; String deprecated() default ""; diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index e199dcc..a958ab8 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -22,8 +22,6 @@ import java.net.{MalformedURLException, URL} import java.sql.{Date, Timestamp} import java.util.concurrent.atomic.AtomicBoolean -import scala.collection.parallel.immutable.ParVector - import org.apache.spark.{AccumulatorSuite, SparkException} import org.apache.spark.scheduler.{SparkListener, SparkListenerJobStart} import org.apache.spark.sql.catalyst.expressions.GenericRow @@ -31,7 +29,6 @@ import org.apache.spark.sql.catalyst.expressions.aggregate.{Complete, Partial} import org.apache.spark.sql.catalyst.optimizer.{ConvertToLocalRelation, NestedColumnAliasingSuite} import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.catalyst.util.StringUtils -import org.apache.spark.sql.execution.HiveResult.hiveResultString import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper import org.apache.spark.sql.execution.aggregate.{HashAggregateExec, ObjectHashAggregateExec, SortAggregateExec} import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec @@ -126,83 +123,6 @@ class SQLQuerySuite extends QueryTest with SharedSparkSession with AdaptiveSpark } } - test("using _FUNC_ instead of function names in examples") { - val exampleRe = "(>.*;)".r - val setStmtRe = "(?i)^(>\\s+set\\s+).+".r - val ignoreSet = Set( - // Examples for CaseWhen show simpler syntax: - // `CASE WHEN ... THEN ... WHEN ... THEN ... END` - "org.apache.spark.sql.catalyst.expressions.CaseWhen", - // _FUNC_ is replaced by `locate` but `locate(... IN ...)` is not supported - "org.apache.spark.sql.catalyst.expressions.StringLocate", - // _FUNC_ is replaced by `%` which causes a parsing error on `SELECT %(2, 1.8)` - "org.apache.spark.sql.catalyst.expressions.Remainder", - // Examples demonstrate alternative names, see SPARK-20749 - "org.apache.spark.sql.catalyst.expressions.Length") - spark.sessionState.functionRegistry.listFunction().foreach { funcId => - val info = spark.sessionState.catalog.lookupFunctionInfo(funcId) - val className = info.getClassName - withClue(s"Expression class '$className'") { - val exprExamples = info.getOriginalExamples - if (!exprExamples.isEmpty && !ignoreSet.contains(className)) { - assert(exampleRe.findAllIn(exprExamples).toIterable - .filter(setStmtRe.findFirstIn(_).isEmpty) // Ignore SET commands - .forall(_.contains("_FUNC_"))) - } - } - } - } - - test("check outputs of expression examples") { - def unindentAndTrim(s: String): String = { - s.replaceAll("\n\\s+", "\n").trim - } - val beginSqlStmtRe = " > ".r - val endSqlStmtRe = ";\n".r - def checkExampleSyntax(example: String): Unit = { - val beginStmtNum = beginSqlStmtRe.findAllIn(example).length - val endStmtNum = endSqlStmtRe.findAllIn(example).length - assert(beginStmtNum === endStmtNum, - "The number of ` > ` does not match to the number of `;`") - } - val exampleRe = """^(.+);\n(?s)(.+)$""".r - val ignoreSet = Set( - // One of examples shows getting the current timestamp - "org.apache.spark.sql.catalyst.expressions.UnixTimestamp", - // Random output without a seed - "org.apache.spark.sql.catalyst.expressions.Rand", - "org.apache.spark.sql.catalyst.expressions.Randn", - "org.apache.spark.sql.catalyst.expressions.Shuffle", - "org.apache.spark.sql.catalyst.expressions.Uuid", - // The example calls methods that return unstable results. - "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection") - - val parFuncs = new ParVector(spark.sessionState.functionRegistry.listFunction().toVector) - parFuncs.foreach { funcId => - // Examples can change settings. We clone the session to prevent tests clashing. - val clonedSpark = spark.cloneSession() - // Coalescing partitions can change result order, so disable it. - clonedSpark.sessionState.conf.setConf(SQLConf.COALESCE_PARTITIONS_ENABLED, false) - val info = clonedSpark.sessionState.catalog.lookupFunctionInfo(funcId) - val className = info.getClassName - if (!ignoreSet.contains(className)) { - withClue(s"Function '${info.getName}', Expression class '$className'") { - val example = info.getExamples - checkExampleSyntax(example) - example.split(" > ").toList.foreach(_ match { - case exampleRe(sql, output) => - val df = clonedSpark.sql(sql) - val actual = unindentAndTrim( - hiveResultString(df.queryExecution.executedPlan).mkString("\n")) - val expected = unindentAndTrim(output) - assert(actual === expected) - case _ => - }) - } - } - } - } - test("SPARK-6743: no columns from cache") { Seq( (83, 0, 38), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala index 92ea0ce..e2747d7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/UDFSuite.scala @@ -20,8 +20,6 @@ package org.apache.spark.sql import java.math.BigDecimal import org.apache.spark.sql.api.java._ -import org.apache.spark.sql.catalyst.FunctionIdentifier -import org.apache.spark.sql.catalyst.expressions.ExpressionInfo import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.execution.{QueryExecution, SimpleMode} import org.apache.spark.sql.execution.columnar.InMemoryRelation @@ -534,35 +532,6 @@ class UDFSuite extends QueryTest with SharedSparkSession { assert(spark.range(2).select(nonDeterministicJavaUDF()).distinct().count() == 2) } - test("Replace _FUNC_ in UDF ExpressionInfo") { - val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("upper")) - assert(info.getName === "upper") - assert(info.getClassName === "org.apache.spark.sql.catalyst.expressions.Upper") - assert(info.getUsage === "upper(str) - Returns `str` with all characters changed to uppercase.") - assert(info.getExamples.contains("> SELECT upper('SparkSql');")) - assert(info.getSince === "1.0.1") - assert(info.getNote === "") - assert(info.getExtended.contains("> SELECT upper('SparkSql');")) - } - - test("group info in ExpressionInfo") { - val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("sum")) - assert(info.getGroup === "agg_funcs") - - Seq("agg_funcs", "array_funcs", "datetime_funcs", "json_funcs", "map_funcs", "window_funcs") - .foreach { groupName => - val info = new ExpressionInfo( - "testClass", null, "testName", null, "", "", "", groupName, "", "") - assert(info.getGroup === groupName) - } - - val errMsg = intercept[IllegalArgumentException] { - val invalidGroupName = "invalid_group_funcs" - new ExpressionInfo("testClass", null, "testName", null, "", "", "", invalidGroupName, "", "") - }.getMessage - assert(errMsg.contains("'group' is malformed in the expression [testName].")) - } - test("SPARK-28521 error message for CAST(parameter types contains DataType)") { val e = intercept[AnalysisException] { spark.sql("SELECT CAST(1)") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala new file mode 100644 index 0000000..9a6fe46 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/expressions/ExpressionInfoSuite.scala @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.expressions + +import scala.collection.parallel.immutable.ParVector + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.FunctionIdentifier +import org.apache.spark.sql.catalyst.expressions.ExpressionInfo +import org.apache.spark.sql.execution.HiveResult.hiveResultString +import org.apache.spark.sql.internal.SQLConf +import org.apache.spark.sql.test.SharedSparkSession + +class ExpressionInfoSuite extends SparkFunSuite with SharedSparkSession { + + test("Replace _FUNC_ in ExpressionInfo") { + val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("upper")) + assert(info.getName === "upper") + assert(info.getClassName === "org.apache.spark.sql.catalyst.expressions.Upper") + assert(info.getUsage === "upper(str) - Returns `str` with all characters changed to uppercase.") + assert(info.getExamples.contains("> SELECT upper('SparkSql');")) + assert(info.getSince === "1.0.1") + assert(info.getNote === "") + assert(info.getExtended.contains("> SELECT upper('SparkSql');")) + } + + test("group info in ExpressionInfo") { + val info = spark.sessionState.catalog.lookupFunctionInfo(FunctionIdentifier("sum")) + assert(info.getGroup === "agg_funcs") + + Seq("agg_funcs", "array_funcs", "datetime_funcs", "json_funcs", "map_funcs", "window_funcs") + .foreach { groupName => + val info = new ExpressionInfo( + "testClass", null, "testName", null, "", "", "", groupName, "", "") + assert(info.getGroup === groupName) + } + + val errMsg = intercept[IllegalArgumentException] { + val invalidGroupName = "invalid_group_funcs" + new ExpressionInfo("testClass", null, "testName", null, "", "", "", invalidGroupName, "", "") + }.getMessage + assert(errMsg.contains("'group' is malformed in the expression [testName].")) + } + + test("error handling in ExpressionInfo") { + val errMsg1 = intercept[IllegalArgumentException] { + val invalidNote = " invalid note" + new ExpressionInfo("testClass", null, "testName", null, "", "", invalidNote, "", "", "") + }.getMessage + assert(errMsg1.contains("'note' is malformed in the expression [testName].")) + + val errMsg2 = intercept[IllegalArgumentException] { + val invalidSince = "-3.0.0" + new ExpressionInfo("testClass", null, "testName", null, "", "", "", "", invalidSince, "") + }.getMessage + assert(errMsg2.contains("'since' is malformed in the expression [testName].")) + + val errMsg3 = intercept[IllegalArgumentException] { + val invalidDeprecated = " invalid deprecated" + new ExpressionInfo("testClass", null, "testName", null, "", "", "", "", "", invalidDeprecated) + }.getMessage + assert(errMsg3.contains("'deprecated' is malformed in the expression [testName].")) + } + + test("using _FUNC_ instead of function names in examples") { + val exampleRe = "(>.*;)".r + val setStmtRe = "(?i)^(>\\s+set\\s+).+".r + val ignoreSet = Set( + // Examples for CaseWhen show simpler syntax: + // `CASE WHEN ... THEN ... WHEN ... THEN ... END` + "org.apache.spark.sql.catalyst.expressions.CaseWhen", + // _FUNC_ is replaced by `locate` but `locate(... IN ...)` is not supported + "org.apache.spark.sql.catalyst.expressions.StringLocate", + // _FUNC_ is replaced by `%` which causes a parsing error on `SELECT %(2, 1.8)` + "org.apache.spark.sql.catalyst.expressions.Remainder", + // Examples demonstrate alternative names, see SPARK-20749 + "org.apache.spark.sql.catalyst.expressions.Length") + spark.sessionState.functionRegistry.listFunction().foreach { funcId => + val info = spark.sessionState.catalog.lookupFunctionInfo(funcId) + val className = info.getClassName + withClue(s"Expression class '$className'") { + val exprExamples = info.getOriginalExamples + if (!exprExamples.isEmpty && !ignoreSet.contains(className)) { + assert(exampleRe.findAllIn(exprExamples).toIterable + .filter(setStmtRe.findFirstIn(_).isEmpty) // Ignore SET commands + .forall(_.contains("_FUNC_"))) + } + } + } + } + + test("check outputs of expression examples") { + def unindentAndTrim(s: String): String = { + s.replaceAll("\n\\s+", "\n").trim + } + val beginSqlStmtRe = " > ".r + val endSqlStmtRe = ";\n".r + def checkExampleSyntax(example: String): Unit = { + val beginStmtNum = beginSqlStmtRe.findAllIn(example).length + val endStmtNum = endSqlStmtRe.findAllIn(example).length + assert(beginStmtNum === endStmtNum, + "The number of ` > ` does not match to the number of `;`") + } + val exampleRe = """^(.+);\n(?s)(.+)$""".r + val ignoreSet = Set( + // One of examples shows getting the current timestamp + "org.apache.spark.sql.catalyst.expressions.UnixTimestamp", + // Random output without a seed + "org.apache.spark.sql.catalyst.expressions.Rand", + "org.apache.spark.sql.catalyst.expressions.Randn", + "org.apache.spark.sql.catalyst.expressions.Shuffle", + "org.apache.spark.sql.catalyst.expressions.Uuid", + // The example calls methods that return unstable results. + "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection") + + val parFuncs = new ParVector(spark.sessionState.functionRegistry.listFunction().toVector) + parFuncs.foreach { funcId => + // Examples can change settings. We clone the session to prevent tests clashing. + val clonedSpark = spark.cloneSession() + // Coalescing partitions can change result order, so disable it. + clonedSpark.sessionState.conf.setConf(SQLConf.COALESCE_PARTITIONS_ENABLED, false) + val info = clonedSpark.sessionState.catalog.lookupFunctionInfo(funcId) + val className = info.getClassName + if (!ignoreSet.contains(className)) { + withClue(s"Function '${info.getName}', Expression class '$className'") { + val example = info.getExamples + checkExampleSyntax(example) + example.split(" > ").toList.foreach { + case exampleRe(sql, output) => + val df = clonedSpark.sql(sql) + val actual = unindentAndTrim( + hiveResultString(df.queryExecution.executedPlan).mkString("\n")) + val expected = unindentAndTrim(output) + assert(actual === expected) + case _ => + } + } + } + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org