This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new cb253b1 [SPARK-31393][SQL] Show the correct alias in schema for expression cb253b1 is described below commit cb253b1c5f9b3e6c71adb5d43d8e82514aba00ef Author: beliefer <belie...@163.com> AuthorDate: Tue May 12 10:25:04 2020 +0900 [SPARK-31393][SQL] Show the correct alias in schema for expression ### What changes were proposed in this pull request? Some alias of expression can not display correctly in schema. This PR will fix them. - `TimeWindow` - `MaxBy` - `MinBy` - `UnaryMinus` - `BitwiseCount` This PR also fix a typo issue, please look at https://github.com/apache/spark/blob/b7cde42b04b21c9bfee6535199cf385855c15853/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala#L142 Note: 1. `MaxBy` and `MinBy` extends `MaxMinBy` and the latter add a method `funcName` not needed. We can reuse `prettyName` to replace `funcName`. 2. Spark SQL exists some function no elegant implementation.For example: `BitwiseCount` override the sql method show below: `override def sql: String = s"bit_count(${child.sql})"` I don't think it's elegant enough. Because `Expression` gives the following definitions. ``` def sql: String = { val childrenSQL = children.map(_.sql).mkString(", ") s"$prettyName($childrenSQL)" } ``` By this definition, `BitwiseCount` should override `prettyName` method. ### Why are the changes needed? Improve the implement of some expression. ### Does this PR introduce any user-facing change? 'Yes'. This PR will let user see the correct alias in schema. ### How was this patch tested? Jenkins test. Closes #28164 from beliefer/elegant-pretty-name-for-function. Lead-authored-by: beliefer <belie...@163.com> Co-authored-by: gengjiaan <gengji...@360.cn> Signed-off-by: HyukjinKwon <gurwls...@apache.org> (cherry picked from commit a89006aba03a623960e5c4c6864ca8c899c81db9) Signed-off-by: HyukjinKwon <gurwls...@apache.org> --- .../apache/spark/sql/catalyst/analysis/FunctionRegistry.scala | 2 +- .../org/apache/spark/sql/catalyst/expressions/TimeWindow.scala | 1 + .../spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala | 9 +++++---- .../org/apache/spark/sql/catalyst/expressions/arithmetic.scala | 7 ++++++- .../spark/sql/catalyst/expressions/bitwiseExpressions.scala | 4 ++-- .../src/test/resources/sql-functions/sql-expression-schema.md | 8 ++++---- sql/core/src/test/resources/sql-tests/results/operators.sql.out | 2 +- .../test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala | 2 +- 8 files changed, 21 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala index d3b0731..c2c0df5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala @@ -253,7 +253,7 @@ object FunctionRegistry { expression[Log2]("log2"), expression[Log]("ln"), expression[Remainder]("mod", true), - expression[UnaryMinus]("negative"), + expression[UnaryMinus]("negative", true), expression[Pi]("pi"), expression[Pmod]("pmod"), expression[UnaryPositive]("positive"), diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala index caacb71..82d6894 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/TimeWindow.scala @@ -63,6 +63,7 @@ case class TimeWindow( override def dataType: DataType = new StructType() .add(StructField("start", TimestampType)) .add(StructField("end", TimestampType)) + override def prettyName: String = "window" // This expression is replaced in the analyzer. override lazy val resolved = false diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala index 2e20224..6d3d3da 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/aggregate/MaxByAndMinBy.scala @@ -31,7 +31,6 @@ abstract class MaxMinBy extends DeclarativeAggregate { def valueExpr: Expression def orderingExpr: Expression - protected def funcName: String // The predicate compares two ordering values. protected def predicate(oldExpr: Expression, newExpr: Expression): Expression // The arithmetic expression returns greatest/least value of all parameters. @@ -46,7 +45,7 @@ abstract class MaxMinBy extends DeclarativeAggregate { override def dataType: DataType = valueExpr.dataType override def checkInputDataTypes(): TypeCheckResult = - TypeUtils.checkForOrderingExpr(orderingExpr.dataType, s"function $funcName") + TypeUtils.checkForOrderingExpr(orderingExpr.dataType, s"function $prettyName") // The attributes used to keep extremum (max or min) and associated aggregated values. private lazy val extremumOrdering = @@ -101,7 +100,8 @@ abstract class MaxMinBy extends DeclarativeAggregate { group = "agg_funcs", since = "3.0.0") case class MaxBy(valueExpr: Expression, orderingExpr: Expression) extends MaxMinBy { - override protected def funcName: String = "max_by" + + override def prettyName: String = "max_by" override protected def predicate(oldExpr: Expression, newExpr: Expression): Expression = oldExpr > newExpr @@ -120,7 +120,8 @@ case class MaxBy(valueExpr: Expression, orderingExpr: Expression) extends MaxMin group = "agg_funcs", since = "3.0.0") case class MinBy(valueExpr: Expression, orderingExpr: Expression) extends MaxMinBy { - override protected def funcName: String = "min_by" + + override def prettyName: String = "min_by" override protected def predicate(oldExpr: Expression, newExpr: Expression): Expression = oldExpr < newExpr diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala index 6a64819..354845d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/arithmetic.scala @@ -86,7 +86,12 @@ case class UnaryMinus(child: Expression) extends UnaryExpression case _ => numeric.negate(input) } - override def sql: String = s"(- ${child.sql})" + override def sql: String = { + getTagValue(FunctionRegistry.FUNC_ALIAS).getOrElse("-") match { + case "-" => s"(- ${child.sql})" + case funcName => s"$funcName(${child.sql})" + } + } } @ExpressionDescription( diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala index 72a8f7e..7b819db 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/bitwiseExpressions.scala @@ -172,6 +172,8 @@ case class BitwiseCount(child: Expression) extends UnaryExpression with ExpectsI override def toString: String = s"bit_count($child)" + override def prettyName: String = "bit_count" + override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = child.dataType match { case BooleanType => defineCodeGen(ctx, ev, c => s"if ($c) 1 else 0") case _ => defineCodeGen(ctx, ev, c => s"java.lang.Long.bitCount($c)") @@ -184,6 +186,4 @@ case class BitwiseCount(child: Expression) extends UnaryExpression with ExpectsI case IntegerType => java.lang.Long.bitCount(input.asInstanceOf[Int]) case LongType => java.lang.Long.bitCount(input.asInstanceOf[Long]) } - - override def sql: String = s"bit_count(${child.sql})" } diff --git a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md index 2091de2..c3ae2a7 100644 --- a/sql/core/src/test/resources/sql-functions/sql-expression-schema.md +++ b/sql/core/src/test/resources/sql-functions/sql-expression-schema.md @@ -1,4 +1,4 @@ -<!-- Automatically generated byExpressionsSchemaSuite --> +<!-- Automatically generated by ExpressionsSchemaSuite --> ## Summary - Number of queries: 328 - Number of expressions that missing example: 34 @@ -273,7 +273,7 @@ | org.apache.spark.sql.catalyst.expressions.TruncTimestamp | date_trunc | SELECT date_trunc('YEAR', '2015-03-05T09:32:05.359') | struct<date_trunc(YEAR, CAST(2015-03-05T09:32:05.359 AS TIMESTAMP)):timestamp> | | org.apache.spark.sql.catalyst.expressions.TypeOf | typeof | SELECT typeof(1) | struct<typeof(1):string> | | org.apache.spark.sql.catalyst.expressions.UnBase64 | unbase64 | SELECT unbase64('U3BhcmsgU1FM') | struct<unbase64(U3BhcmsgU1FM):binary> | -| org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<(- 1):int> | +| org.apache.spark.sql.catalyst.expressions.UnaryMinus | negative | SELECT negative(1) | struct<negative(1):int> | | org.apache.spark.sql.catalyst.expressions.UnaryPositive | positive | N/A | N/A | | org.apache.spark.sql.catalyst.expressions.Unhex | unhex | SELECT decode(unhex('537061726B2053514C'), 'UTF-8') | struct<decode(unhex(537061726B2053514C), UTF-8):string> | | org.apache.spark.sql.catalyst.expressions.UnixTimestamp | unix_timestamp | SELECT unix_timestamp() | struct<unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss):bigint> | @@ -312,9 +312,9 @@ | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last_value | SELECT last_value(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last_value(col, false):int> | | org.apache.spark.sql.catalyst.expressions.aggregate.Last | last | SELECT last(col) FROM VALUES (10), (5), (20) AS tab(col) | struct<last(col, false):int> | | org.apache.spark.sql.catalyst.expressions.aggregate.Max | max | SELECT max(col) FROM VALUES (10), (50), (20) AS tab(col) | struct<max(col):int> | -| org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<maxby(x, y):string> | +| org.apache.spark.sql.catalyst.expressions.aggregate.MaxBy | max_by | SELECT max_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<max_by(x, y):string> | | org.apache.spark.sql.catalyst.expressions.aggregate.Min | min | SELECT min(col) FROM VALUES (10), (-1), (20) AS tab(col) | struct<min(col):int> | -| org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<minby(x, y):string> | +| org.apache.spark.sql.catalyst.expressions.aggregate.MinBy | min_by | SELECT min_by(x, y) FROM VALUES (('a', 10)), (('b', 50)), (('c', 20)) AS tab(x, y) | struct<min_by(x, y):string> | | org.apache.spark.sql.catalyst.expressions.aggregate.Percentile | percentile | SELECT percentile(col, 0.3) FROM VALUES (0), (10) AS tab(col) | struct<percentile(col, CAST(0.3 AS DOUBLE), 1):double> | | org.apache.spark.sql.catalyst.expressions.aggregate.Skewness | skewness | SELECT skewness(col) FROM VALUES (-10), (-20), (100), (1000) AS tab(col) | struct<skewness(CAST(col AS DOUBLE)):double> | | org.apache.spark.sql.catalyst.expressions.aggregate.StddevPop | stddev_pop | SELECT stddev_pop(col) FROM VALUES (1), (2), (3) AS tab(col) | struct<stddev_pop(CAST(col AS DOUBLE)):double> | diff --git a/sql/core/src/test/resources/sql-tests/results/operators.sql.out b/sql/core/src/test/resources/sql-tests/results/operators.sql.out index cf857cf..a94a123 100644 --- a/sql/core/src/test/resources/sql-tests/results/operators.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/operators.sql.out @@ -437,7 +437,7 @@ struct<abs(-3.13):decimal(3,2),abs(CAST(-2.19 AS DOUBLE)):double> -- !query select positive('-1.11'), positive(-1.11), negative('-1.11'), negative(-1.11) -- !query schema -struct<(+ CAST(-1.11 AS DOUBLE)):double,(+ -1.11):decimal(3,2),(- CAST(-1.11 AS DOUBLE)):double,(- -1.11):decimal(3,2)> +struct<(+ CAST(-1.11 AS DOUBLE)):double,(+ -1.11):decimal(3,2),negative(CAST(-1.11 AS DOUBLE)):double,negative(-1.11):decimal(3,2)> -- !query output -1.11 -1.11 1.11 1.11 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala index d83aa4f..d69ecd7 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/ExpressionsSchemaSuite.scala @@ -142,7 +142,7 @@ class ExpressionsSchemaSuite extends QueryTest with SharedSparkSession { } val header = Seq( - s"<!-- Automatically generated by${getClass.getSimpleName} -->", + s"<!-- Automatically generated by ${getClass.getSimpleName} -->", "## Summary", s" - Number of queries: ${outputs.size}", s" - Number of expressions that missing example: ${missingExamples.size}", --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org