[ https://issues.apache.org/jira/browse/SPARK-18137?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15648291#comment-15648291 ]
Herman van Hovell commented on SPARK-18137: ------------------------------------------- [~srowen] How did you set the Assignee? I spend a while on this, but could not figure out how to do it. > RewriteDistinctAggregates UnresolvedException when a UDAF has a foldable > TypeCheck > ---------------------------------------------------------------------------------- > > Key: SPARK-18137 > URL: https://issues.apache.org/jira/browse/SPARK-18137 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.1.0 > Reporter: Song Jun > Assignee: Song Jun > Fix For: 2.0.3, 2.1.0 > > > when run a sql with distinct(on spark github master branch), it throw > UnresolvedException. > For example: > run a test case on spark(branch master) with sql: > {noformat} > SELECT percentile_approx(key, 0.99999), count(distinct key),sum(distinct key) > FROM src LIMIT 1 > {noformat} > and it throw exception: > {noformat} > org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to > dataType on unresolved object, tree: 'percentile_approx(CAST(src.`key` AS > DOUBLE), CAST(0.99999BD AS DOUBLE), 10000) > at > org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute.dataType(unresolved.scala:92) > at > org.apache.spark.sql.catalyst.optimizer.RewriteDistinctAggregates$.org$apache$spark$sql$catalyst$optimizer$RewriteDistinctAggregates$$nullify(RewriteDistinctAggregates.scala:261) > at > org.apache.spark.sql.catalyst.optimizer.RewriteDistinctAggregates$.org$apache$spark$sql$catalyst$optimizer$RewriteDistinctAggregates$$evalWithinGroup$1(RewriteDistinctAggregates.scala:136) > at > org.apache.spark.sql.catalyst.optimizer.RewriteDistinctAggregates$$anonfun$16.apply(RewriteDistinctAggregates.scala:187) > at > org.apache.spark.sql.catalyst.optimizer.RewriteDistinctAggregates$$anonfun$16.apply(RewriteDistinctAggregates.scala:180) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.immutable.List.map(List.scala:285) > at > org.apache.spark.sql.catalyst.optimizer.RewriteDistinctAggregates$.rewrite(RewriteDistinctAggregates.scala:180) > at > org.apache.spark.sql.catalyst.optimizer.RewriteDistinctAggregates$$anonfun$apply$1.applyOrElse(RewriteDistinctAggregates.scala:105) > at > org.apache.spark.sql.catalyst.optimizer.RewriteDistinctAggregates$$anonfun$apply$1.applyOrElse(RewriteDistinctAggregates.scala:104) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:301) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:301) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:300) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:321) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:179) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:319) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:321) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:179) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:319) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:321) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:179) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:319) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:321) > at > org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(TreeNode.scala:179) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformChildren(TreeNode.scala:319) > at > org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:298) > at > org.apache.spark.sql.catalyst.optimizer.RewriteDistinctAggregates$.apply(RewriteDistinctAggregates.scala:104) > at > org.apache.spark.sql.catalyst.optimizer.RewriteDistinctAggregates$.apply(RewriteDistinctAggregates.scala:102) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82) > at > scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:57) > at > scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:66) > at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:35) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74) > at scala.collection.immutable.List.foreach(List.scala:381) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan$lzycompute(QueryExecution.scala:74) > at > org.apache.spark.sql.execution.QueryExecution.optimizedPlan(QueryExecution.scala:74) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan$lzycompute(QueryExecution.scala:78) > at > org.apache.spark.sql.execution.QueryExecution.sparkPlan(QueryExecution.scala:76) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan$lzycompute(QueryExecution.scala:83) > at > org.apache.spark.sql.execution.QueryExecution.executedPlan(QueryExecution.scala:83) > at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2572) > at org.apache.spark.sql.Dataset.head(Dataset.scala:1934) > at org.apache.spark.sql.Dataset.take(Dataset.scala:2149) > at > com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation0(OutputAggregator.scala:76) > at > com.databricks.backend.daemon.driver.OutputAggregator$.withOutputAggregation(OutputAggregator.scala:42) > at > com.databricks.backend.daemon.driver.SQLDriverLocal.executeSql(SQLDriverLocal.scala:100) > at > com.databricks.backend.daemon.driver.SQLDriverLocal.repl(SQLDriverLocal.scala:128) > at > com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$1.apply(DriverLocal.scala:202) > at > com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$1.apply(DriverLocal.scala:191) > at > com.databricks.logging.UsageLogging$$anonfun$withAttributionContext$1.apply(UsageLogging.scala:145) > at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) > at > com.databricks.logging.UsageLogging$class.withAttributionContext(UsageLogging.scala:140) > at > com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:34) > at > com.databricks.logging.UsageLogging$class.withAttributionTags(UsageLogging.scala:178) > at > com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:34) > at > com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:191) > at > com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:584) > at > com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:584) > at scala.util.Try$.apply(Try.scala:192) > at > com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:579) > at > com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:491) > at > com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:394) > at > com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:351) > at > com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:217) > at java.lang.Thread.run(Thread.java:745) > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org