spark git commit: [SPARK-16730][SQL] Implement function aliases for type casts

wenchen Wed, 27 Jul 2016 22:13:40 -0700

Repository: spark
Updated Branches:
  refs/heads/master b14d7b5cf -> 11d427c92



[SPARK-16730][SQL] Implement function aliases for type casts

## What changes were proposed in this pull request?
Spark 1.x supports using the Hive type name as function names for doing casts, 
e.g.
```sql
SELECT int(1.0);
SELECT string(2.0);
```

The above query would work in Spark 1.x because Spark 1.x fail back to Hive for 
unimplemented functions, and break in Spark 2.0 because the fall back was 
removed.

This patch implements function aliases using an analyzer rule for the following 
cast functions:
- boolean
- tinyint
- smallint
- int
- bigint
- float
- double
- decimal
- date
- timestamp
- binary
- string

## How was this patch tested?
Added end-to-end tests in SQLCompatibilityFunctionSuite.

Author: petermaxlee <petermax...@gmail.com>

Closes #14364 from petermaxlee/SPARK-16730-2.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/11d427c9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/11d427c9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/11d427c9

Branch: refs/heads/master
Commit: 11d427c924d303e20af90c0179a105f6ff4d89e2
Parents: b14d7b5
Author: petermaxlee <petermax...@gmail.com>
Authored: Thu Jul 28 13:13:17 2016 +0800
Committer: Wenchen Fan <wenc...@databricks.com>
Committed: Thu Jul 28 13:13:17 2016 +0800

----------------------------------------------------------------------
 .../catalyst/analysis/FunctionRegistry.scala    | 51 +++++++++++++++++---
 .../spark/sql/catalyst/expressions/Cast.scala   |  3 ++
 .../sql/SQLCompatibilityFunctionSuite.scala     | 26 ++++++++++
 3 files changed, 73 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/11d427c9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
index 6516899..c5f91c1 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/FunctionRegistry.scala
@@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.expressions._
 import org.apache.spark.sql.catalyst.expressions.aggregate._
 import org.apache.spark.sql.catalyst.expressions.xml._
 import org.apache.spark.sql.catalyst.util.StringKeyHashMap
+import org.apache.spark.sql.types._
 
 
 /**
@@ -408,8 +409,21 @@ object FunctionRegistry {
     expression[BitwiseAnd]("&"),
     expression[BitwiseNot]("~"),
     expression[BitwiseOr]("|"),
-    expression[BitwiseXor]("^")
-
+    expression[BitwiseXor]("^"),
+
+    // Cast aliases (SPARK-16730)
+    castAlias("boolean", BooleanType),
+    castAlias("tinyint", ByteType),
+    castAlias("smallint", ShortType),
+    castAlias("int", IntegerType),
+    castAlias("bigint", LongType),
+    castAlias("float", FloatType),
+    castAlias("double", DoubleType),
+    castAlias("decimal", DecimalType.USER_DEFAULT),
+    castAlias("date", DateType),
+    castAlias("timestamp", TimestampType),
+    castAlias("binary", BinaryType),
+    castAlias("string", StringType)
   )
 
   val builtin: SimpleFunctionRegistry = {
@@ -452,14 +466,37 @@ object FunctionRegistry {
       }
     }
 
-    val clazz = tag.runtimeClass
+    (name, (expressionInfo[T](name), builder))
+  }
+
+  /**
+   * Creates a function registry lookup entry for cast aliases (SPARK-16730).
+   * For example, if name is "int", and dataType is IntegerType, this means 
int(x) would become
+   * an alias for cast(x as IntegerType).
+   * See usage above.
+   */
+  private def castAlias(
+      name: String,
+      dataType: DataType): (String, (ExpressionInfo, FunctionBuilder)) = {
+    val builder = (args: Seq[Expression]) => {
+      if (args.size != 1) {
+        throw new AnalysisException(s"Function $name accepts only one 
argument")
+      }
+      Cast(args.head, dataType)
+    }
+    (name, (expressionInfo[Cast](name), builder))
+  }
+
+  /**
+   * Creates an [[ExpressionInfo]] for the function as defined by expression T 
using the given name.
+   */
+  private def expressionInfo[T <: Expression : ClassTag](name: String): 
ExpressionInfo = {
+    val clazz = scala.reflect.classTag[T].runtimeClass
     val df = clazz.getAnnotation(classOf[ExpressionDescription])
     if (df != null) {
-      (name,
-        (new ExpressionInfo(clazz.getCanonicalName, name, df.usage(), 
df.extended()),
-        builder))
+      new ExpressionInfo(clazz.getCanonicalName, name, df.usage(), 
df.extended())
     } else {
-      (name, (new ExpressionInfo(clazz.getCanonicalName, name), builder))
+      new ExpressionInfo(clazz.getCanonicalName, name)
     }
   }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/11d427c9/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
index a12fba0..c452765 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala
@@ -113,6 +113,9 @@ object Cast {
 }
 
 /** Cast the child expression to the target data type. */
+@ExpressionDescription(
+  usage = " - Cast value v to the target data type.",
+  extended = "> SELECT _FUNC_('10' as int);\n 10")
 case class Cast(child: Expression, dataType: DataType) extends UnaryExpression 
with NullIntolerant {
 
   override def toString: String = s"cast($child as ${dataType.simpleString})"

http://git-wip-us.apache.org/repos/asf/spark/blob/11d427c9/sql/core/src/test/scala/org/apache/spark/sql/SQLCompatibilityFunctionSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/SQLCompatibilityFunctionSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLCompatibilityFunctionSuite.scala
index 1e32395..27b60e0 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/SQLCompatibilityFunctionSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLCompatibilityFunctionSuite.scala
@@ -17,10 +17,14 @@
 
 package org.apache.spark.sql
 
+import java.math.BigDecimal
+import java.sql.Timestamp
+
 import org.apache.spark.sql.test.SharedSQLContext
 
 /**
  * A test suite for functions added for compatibility with other databases 
such as Oracle, MSSQL.
+ *
  * These functions are typically implemented using the trait
  * [[org.apache.spark.sql.catalyst.expressions.RuntimeReplaceable]].
  */
@@ -69,4 +73,26 @@ class SQLCompatibilityFunctionSuite extends QueryTest with 
SharedSQLContext {
       sql("SELECT nvl2(null, 1, 2.1d), nvl2('n', 1, 2.1d)"),
       Row(2.1, 1.0))
   }
+
+  test("SPARK-16730 cast alias functions for Hive compatibility") {
+    checkAnswer(
+      sql("SELECT boolean(1), tinyint(1), smallint(1), int(1), bigint(1)"),
+      Row(true, 1.toByte, 1.toShort, 1, 1L))
+
+    checkAnswer(
+      sql("SELECT float(1), double(1), decimal(1)"),
+      Row(1.toFloat, 1.0, new BigDecimal(1)))
+
+    checkAnswer(
+      sql("SELECT date(\"2014-04-04\"), timestamp(date(\"2014-04-04\"))"),
+      Row(new java.util.Date(114, 3, 4), new Timestamp(114, 3, 4, 0, 0, 0, 0)))
+
+    checkAnswer(
+      sql("SELECT string(1)"),
+      Row("1"))
+
+    // Error handling: only one argument
+    val errorMsg = intercept[AnalysisException](sql("SELECT string(1, 
2)")).getMessage
+    assert(errorMsg.contains("Function string accepts only one argument"))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-16730][SQL] Implement function aliases for type casts

Reply via email to