Repository: spark Updated Branches: refs/heads/master 929dfa24b -> 1821fc165
[SPARK-6747] [SQL] Throw an AnalysisException when unsupported Java list types used in Hive UDF The current implementation can't handle List<> as a return type in Hive UDF and throws meaningless Match Error. We assume an UDF below; public class UDFToListString extends UDF { public List<String> evaluate(Object o) { return Arrays.asList("xxx", "yyy", "zzz"); } } An exception of scala.MatchError is thrown as follows when the UDF used; scala.MatchError: interface java.util.List (of class java.lang.Class) at org.apache.spark.sql.hive.HiveInspectors$class.javaClassToDataType(HiveInspectors.scala:174) at org.apache.spark.sql.hive.HiveSimpleUdf.javaClassToDataType(hiveUdfs.scala:76) at org.apache.spark.sql.hive.HiveSimpleUdf.dataType$lzycompute(hiveUdfs.scala:106) at org.apache.spark.sql.hive.HiveSimpleUdf.dataType(hiveUdfs.scala:106) at org.apache.spark.sql.catalyst.expressions.Alias.toAttribute(namedExpressions.scala:131) at org.apache.spark.sql.catalyst.planning.PhysicalOperation$$anonfun$collectAliases$1.applyOrElse(patterns.scala:95) at org.apache.spark.sql.catalyst.planning.PhysicalOperation$$anonfun$collectAliases$1.applyOrElse(patterns.scala:94) at scala.runtime.AbstractPartialFunction.apply(AbstractPartialFunction.scala:33) at scala.collection.TraversableLike$$anonfun$collect$1.apply(TraversableLike.scala:278) ... To make udf developers more understood, we need to throw a more suitable exception. Author: Takeshi YAMAMURO <linguin....@gmail.com> Closes #7248 from maropu/FixBugInHiveInspectors and squashes the following commits: 1c3df2a [Takeshi YAMAMURO] Fix comments 56305de [Takeshi YAMAMURO] Fix conflicts 92ed7a6 [Takeshi YAMAMURO] Throw an exception when java list type used 2844a8e [Takeshi YAMAMURO] Apply comments 7114a47 [Takeshi YAMAMURO] Add TODO comments in UDFToListString of HiveUdfSuite fdb2ae4 [Takeshi YAMAMURO] Add StringToUtf8 to comvert String into UTF8String af61f2e [Takeshi YAMAMURO] Remove a new type 7f812fd [Takeshi YAMAMURO] Fix code-style errors 6984bf4 [Takeshi YAMAMURO] Apply review comments 93e3d4e [Takeshi YAMAMURO] Add a blank line at the end of UDFToListString ee232db [Takeshi YAMAMURO] Support List as a return type in Hive UDF 1e82316 [Takeshi YAMAMURO] Apply comments 21e8763 [Takeshi YAMAMURO] Add TODO comments in UDFToListString of HiveUdfSuite a488712 [Takeshi YAMAMURO] Add StringToUtf8 to comvert String into UTF8String 1c7b9d1 [Takeshi YAMAMURO] Remove a new type f965c34 [Takeshi YAMAMURO] Fix code-style errors 9406416 [Takeshi YAMAMURO] Apply review comments e21ce7e [Takeshi YAMAMURO] Add a blank line at the end of UDFToListString e553f10 [Takeshi YAMAMURO] Support List as a return type in Hive UDF Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1821fc16 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1821fc16 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1821fc16 Branch: refs/heads/master Commit: 1821fc165808143e98b3d9626141b1a55bde90ac Parents: 929dfa2 Author: Takeshi YAMAMURO <linguin....@gmail.com> Authored: Mon Jul 6 19:44:31 2015 -0700 Committer: Michael Armbrust <mich...@databricks.com> Committed: Mon Jul 6 19:44:31 2015 -0700 ---------------------------------------------------------------------- .../apache/spark/sql/hive/HiveInspectors.scala | 10 +++++- .../spark/sql/hive/execution/UDFToListInt.java | 29 ++++++++++++++++++ .../sql/hive/execution/UDFToListString.java | 29 ++++++++++++++++++ .../spark/sql/hive/execution/HiveUDFSuite.scala | 32 +++++++++++++++++++- 4 files changed, 98 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/1821fc16/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index a6b8ead..7423d80 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -26,8 +26,8 @@ import org.apache.hadoop.{io => hadoopIo} import org.apache.spark.sql.catalyst.expressions._ import org.apache.spark.sql.catalyst.util.DateTimeUtils -import org.apache.spark.sql.types import org.apache.spark.sql.types._ +import org.apache.spark.sql.{AnalysisException, types} import org.apache.spark.unsafe.types.UTF8String /* Implicit conversions */ @@ -218,6 +218,14 @@ private[hive] trait HiveInspectors { // Hive seems to return this for struct types? case c: Class[_] if c == classOf[java.lang.Object] => NullType + + // java list type unsupported + case c: Class[_] if c == classOf[java.util.List[_]] => + throw new AnalysisException( + "List type in java is unsupported because " + + "JVM type erasure makes spark fail to catch a component type in List<>") + + case c => throw new AnalysisException(s"Unsupported java type $c") } /** http://git-wip-us.apache.org/repos/asf/spark/blob/1821fc16/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListInt.java ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListInt.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListInt.java new file mode 100644 index 0000000..67576a7 --- /dev/null +++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListInt.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.execution; + +import org.apache.hadoop.hive.ql.exec.UDF; + +import java.util.Arrays; +import java.util.List; + +public class UDFToListInt extends UDF { + public List<Integer> evaluate(Object o) { + return Arrays.asList(1, 2, 3); + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/1821fc16/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListString.java ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListString.java b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListString.java new file mode 100644 index 0000000..f02395c --- /dev/null +++ b/sql/hive/src/test/java/org/apache/spark/sql/hive/execution/UDFToListString.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.hive.execution; + +import org.apache.hadoop.hive.ql.exec.UDF; + +import java.util.Arrays; +import java.util.List; + +public class UDFToListString extends UDF { + public List<String> evaluate(Object o) { + return Arrays.asList("data1", "data2", "data3"); + } +} http://git-wip-us.apache.org/repos/asf/spark/blob/1821fc16/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala ---------------------------------------------------------------------- diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala index 56b0bef..4468620 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala @@ -28,7 +28,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectIn import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ObjectInspectorFactory} import org.apache.hadoop.hive.serde2.{AbstractSerDe, SerDeStats} import org.apache.hadoop.io.Writable -import org.apache.spark.sql.{QueryTest, Row} +import org.apache.spark.sql.{AnalysisException, QueryTest, Row} import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.util.Utils @@ -133,6 +133,36 @@ class HiveUDFSuite extends QueryTest { TestHive.reset() } + test("UDFToListString") { + val testData = TestHive.sparkContext.parallelize(StringCaseClass("") :: Nil).toDF() + testData.registerTempTable("inputTable") + + sql(s"CREATE TEMPORARY FUNCTION testUDFToListString AS '${classOf[UDFToListString].getName}'") + val errMsg = intercept[AnalysisException] { + sql("SELECT testUDFToListString(s) FROM inputTable") + } + assert(errMsg.getMessage === "List type in java is unsupported because " + + "JVM type erasure makes spark fail to catch a component type in List<>;") + + sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFToListString") + TestHive.reset() + } + + test("UDFToListInt") { + val testData = TestHive.sparkContext.parallelize(StringCaseClass("") :: Nil).toDF() + testData.registerTempTable("inputTable") + + sql(s"CREATE TEMPORARY FUNCTION testUDFToListInt AS '${classOf[UDFToListInt].getName}'") + val errMsg = intercept[AnalysisException] { + sql("SELECT testUDFToListInt(s) FROM inputTable") + } + assert(errMsg.getMessage === "List type in java is unsupported because " + + "JVM type erasure makes spark fail to catch a component type in List<>;") + + sql("DROP TEMPORARY FUNCTION IF EXISTS testUDFToListInt") + TestHive.reset() + } + test("UDFListListInt") { val testData = TestHive.sparkContext.parallelize( ListListIntCaseClass(Nil) :: --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org