git commit: [SPARK-3582][SQL] not limit argument type for hive simple udf

marmbrus Tue, 23 Sep 2014 11:48:49 -0700

Repository: spark
Updated Branches:
  refs/heads/master 66bc0f2d6 -> 116016b48



[SPARK-3582][SQL] not limit argument type for hive simple udf

Since we have moved to `ConventionHelper`, it is quite easy to avoid call 
`javaClassToDataType` in hive simple udf. This will solve SPARK-3582.

Author: Daoyuan Wang <daoyuan.w...@intel.com>

Closes #2506 from adrian-wang/spark3582 and squashes the following commits:

450c28e [Daoyuan Wang] not limit argument type for hive simple udf


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/116016b4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/116016b4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/116016b4

Branch: refs/heads/master
Commit: 116016b481cecbd8ad6e9717d92f977a164a6653
Parents: 66bc0f2
Author: Daoyuan Wang <daoyuan.w...@intel.com>
Authored: Tue Sep 23 11:47:53 2014 -0700
Committer: Michael Armbrust <mich...@databricks.com>
Committed: Tue Sep 23 11:47:53 2014 -0700

----------------------------------------------------------------------
 .../apache/spark/sql/hive/HiveInspectors.scala  |  4 ++--
 .../org/apache/spark/sql/hive/hiveUdfs.scala    | 22 ++------------------
 2 files changed, 4 insertions(+), 22 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/116016b4/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
----------------------------------------------------------------------
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
index 943bbaa..fa889ec 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala
@@ -137,7 +137,7 @@ private[hive] trait HiveInspectors {
 
   /** Converts native catalyst types to the types expected by Hive */
   def wrap(a: Any): AnyRef = a match {
-    case s: String => new hadoopIo.Text(s) // TODO why should be Text?
+    case s: String => s: java.lang.String
     case i: Int => i: java.lang.Integer
     case b: Boolean => b: java.lang.Boolean
     case f: Float => f: java.lang.Float
@@ -145,7 +145,7 @@ private[hive] trait HiveInspectors {
     case l: Long => l: java.lang.Long
     case l: Short => l: java.lang.Short
     case l: Byte => l: java.lang.Byte
-    case b: BigDecimal => b.bigDecimal
+    case b: BigDecimal => new HiveDecimal(b.underlying())
     case b: Array[Byte] => b
     case t: java.sql.Timestamp => t
     case s: Seq[_] => seqAsJavaList(s.map(wrap))

http://git-wip-us.apache.org/repos/asf/spark/blob/116016b4/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
----------------------------------------------------------------------
diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
index 19ff3b6..68944ed 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/hiveUdfs.scala
@@ -51,19 +51,7 @@ private[hive] abstract class HiveFunctionRegistry
     val functionClassName = functionInfo.getFunctionClass.getName
 
     if (classOf[UDF].isAssignableFrom(functionInfo.getFunctionClass)) {
-      val function = 
functionInfo.getFunctionClass.newInstance().asInstanceOf[UDF]
-      val method = 
function.getResolver.getEvalMethod(children.map(_.dataType.toTypeInfo))
-
-      val expectedDataTypes = method.getParameterTypes.map(javaClassToDataType)
-
-      HiveSimpleUdf(
-        functionClassName,
-        children.zip(expectedDataTypes).map {
-          case (e, NullType) => e
-          case (e, t) if (e.dataType == t) => e
-          case (e, t) => Cast(e, t)
-        }
-      )
+      HiveSimpleUdf(functionClassName, children)
     } else if 
(classOf[GenericUDF].isAssignableFrom(functionInfo.getFunctionClass)) {
       HiveGenericUdf(functionClassName, children)
     } else if (
@@ -117,15 +105,9 @@ private[hive] case class HiveSimpleUdf(functionClassName: 
String, children: Seq[
   @transient
   lazy val dataType = javaClassToDataType(method.getReturnType)
 
-  def catalystToHive(value: Any): Object = value match {
-    // TODO need more types here? or can we use wrap()
-    case bd: BigDecimal => new HiveDecimal(bd.underlying())
-    case d => d.asInstanceOf[Object]
-  }
-
   // TODO: Finish input output types.
   override def eval(input: Row): Any = {
-    val evaluatedChildren = children.map(c => catalystToHive(c.eval(input)))
+    val evaluatedChildren = children.map(c => wrap(c.eval(input)))
 
     unwrap(FunctionRegistry.invoke(method, function, conversionHelper
       .convertIfNecessary(evaluatedChildren: _*): _*))


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

git commit: [SPARK-3582][SQL] not limit argument type for hive simple udf

Reply via email to