[ https://issues.apache.org/jira/browse/SPARK-14171?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sean Owen resolved SPARK-14171. ------------------------------- Resolution: Duplicate Fix Version/s: (was: 2.1.0) > UDAF aggregates argument object inspector not parsed correctly > -------------------------------------------------------------- > > Key: SPARK-14171 > URL: https://issues.apache.org/jira/browse/SPARK-14171 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.6.1 > Reporter: Jianfeng Hu > Priority: Critical > > For example, when using percentile_approx and count distinct together, it > raises an error complaining the argument is not constant. We have a test case > to reproduce. Could you help look into a fix of this? This was working in > previous version (Spark 1.4 + Hive 0.13). Thanks! > {code}--- > a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala > +++ > b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveUDFSuite.scala > @@ -148,6 +148,9 @@ class HiveUDFSuite extends QueryTest with > TestHiveSingleton with SQLTestUtils { > checkAnswer(sql("SELECT percentile_approx(100.0, array(0.9, 0.9)) FROM > src LIMIT 1"), > sql("SELECT array(100, 100) FROM src LIMIT 1").collect().toSeq) > + > + checkAnswer(sql("SELECT percentile_approx(key, 0.99999), count(distinct > key) FROM src LIMIT 1"), > + sql("SELECT max(key), 1 FROM src LIMIT 1").collect().toSeq) > } > test("UDFIntegerToString") { > {code} > When running the test suite, we can see this error: > {code} > - Generic UDAF aggregates *** FAILED *** > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: makeCopy, > tree: > hiveudaffunction(HiveFunctionWrapper(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox,org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox@6e1dc6a7),key#51176,0.99999,false,0,0) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:49) > at org.apache.spark.sql.catalyst.trees.TreeNode.makeCopy(TreeNode.scala:357) > at > org.apache.spark.sql.catalyst.trees.TreeNode.withNewChildren(TreeNode.scala:238) > at > org.apache.spark.sql.catalyst.analysis.DistinctAggregationRewriter.org$apache$spark$sql$catalyst$analysis$DistinctAggregationRewriter$$patchAggregateFunctionChildren$1(DistinctAggregationRewriter.scala:148) > at > org.apache.spark.sql.catalyst.analysis.DistinctAggregationRewriter$$anonfun$15.apply(DistinctAggregationRewriter.scala:192) > at > org.apache.spark.sql.catalyst.analysis.DistinctAggregationRewriter$$anonfun$15.apply(DistinctAggregationRewriter.scala:190) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > ... > Cause: java.lang.reflect.InvocationTargetException: > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$makeCopy$1$$anonfun$apply$12.apply(TreeNode.scala:368) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$makeCopy$1$$anonfun$apply$12.apply(TreeNode.scala:367) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$makeCopy$1.apply(TreeNode.scala:365) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$makeCopy$1.apply(TreeNode.scala:357) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:48) > ... > Cause: org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException: The second > argument must be a constant, but double was passed instead. > at > org.apache.hadoop.hive.ql.udf.generic.GenericUDAFPercentileApprox.getEvaluator(GenericUDAFPercentileApprox.java:147) > at > org.apache.spark.sql.hive.HiveUDAFFunction.functionAndInspector$lzycompute(hiveUDFs.scala:598) > at > org.apache.spark.sql.hive.HiveUDAFFunction.functionAndInspector(hiveUDFs.scala:596) > at > org.apache.spark.sql.hive.HiveUDAFFunction.returnInspector$lzycompute(hiveUDFs.scala:606) > at > org.apache.spark.sql.hive.HiveUDAFFunction.returnInspector(hiveUDFs.scala:606) > at org.apache.spark.sql.hive.HiveUDAFFunction.<init>(hiveUDFs.scala:654) > at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method) > at > sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:526) > ... > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org