[ 
https://issues.apache.org/jira/browse/SPARK-5371?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14387847#comment-14387847
 ] 

Apache Spark commented on SPARK-5371:
-------------------------------------

User 'marmbrus' has created a pull request for this issue:
https://github.com/apache/spark/pull/5278

> Failure to analyze query with UNION ALL and double aggregation
> --------------------------------------------------------------
>
>                 Key: SPARK-5371
>                 URL: https://issues.apache.org/jira/browse/SPARK-5371
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 1.2.0, 1.3.0
>            Reporter: David Ross
>            Assignee: Michael Armbrust
>            Priority: Critical
>
> This SQL session:
> {code}
> DROP TABLE
>     test1;
> DROP TABLE
>     test2;
> CREATE TABLE
>     test1
>     (
>         c11 INT,
>         c12 INT,
>         c13 INT,
>         c14 INT
>     );
> CREATE TABLE
>     test2
>     (
>         c21 INT,
>         c22 INT,
>         c23 INT,
>         c24 INT
>     );
> SELECT
>     MIN(t3.c_1),
>     MIN(t3.c_2),
>     MIN(t3.c_3),
>     MIN(t3.c_4)
> FROM
>     (
>         SELECT
>             SUM(t1.c11) c_1,
>             NULL        c_2,
>             NULL        c_3,
>             NULL        c_4
>         FROM
>             test1 t1
>         UNION ALL
>         SELECT
>             NULL        c_1,
>             SUM(t2.c22) c_2,
>             SUM(t2.c23) c_3,
>             SUM(t2.c24) c_4
>         FROM
>             test2 t2 ) t3; 
> {code}
> Produces this error:
> {code}
> 15/01/23 00:25:21 INFO thriftserver.SparkExecuteStatementOperation: Running 
> query 'SELECT
>     MIN(t3.c_1),
>     MIN(t3.c_2),
>     MIN(t3.c_3),
>     MIN(t3.c_4)
> FROM
>     (
>         SELECT
>             SUM(t1.c11) c_1,
>             NULL        c_2,
>             NULL        c_3,
>             NULL        c_4
>         FROM
>             test1 t1
>         UNION ALL
>         SELECT
>             NULL        c_1,
>             SUM(t2.c22) c_2,
>             SUM(t2.c23) c_3,
>             SUM(t2.c24) c_4
>         FROM
>             test2 t2 ) t3'
> 15/01/23 00:25:21 INFO parse.ParseDriver: Parsing command: SELECT
>     MIN(t3.c_1),
>     MIN(t3.c_2),
>     MIN(t3.c_3),
>     MIN(t3.c_4)
> FROM
>     (
>         SELECT
>             SUM(t1.c11) c_1,
>             NULL        c_2,
>             NULL        c_3,
>             NULL        c_4
>         FROM
>             test1 t1
>         UNION ALL
>         SELECT
>             NULL        c_1,
>             SUM(t2.c22) c_2,
>             SUM(t2.c23) c_3,
>             SUM(t2.c24) c_4
>         FROM
>             test2 t2 ) t3
> 15/01/23 00:25:21 INFO parse.ParseDriver: Parse Completed
> 15/01/23 00:25:21 ERROR thriftserver.SparkExecuteStatementOperation: Error 
> executing query:
> java.util.NoSuchElementException: key not found: c_2#23488
>       at scala.collection.MapLike$class.default(MapLike.scala:228)
>       at 
> org.apache.spark.sql.catalyst.expressions.AttributeMap.default(AttributeMap.scala:29)
>       at scala.collection.MapLike$class.apply(MapLike.scala:141)
>       at 
> org.apache.spark.sql.catalyst.expressions.AttributeMap.apply(AttributeMap.scala:29)
>       at 
> org.apache.spark.sql.catalyst.optimizer.UnionPushdown$$anonfun$1.applyOrElse(Optimizer.scala:77)
>       at 
> org.apache.spark.sql.catalyst.optimizer.UnionPushdown$$anonfun$1.applyOrElse(Optimizer.scala:76)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:144)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:135)
>       at 
> org.apache.spark.sql.catalyst.optimizer.UnionPushdown$.pushToRight(Optimizer.scala:76)
>       at 
> org.apache.spark.sql.catalyst.optimizer.UnionPushdown$$anonfun$apply$1$$anonfun$applyOrElse$6.apply(Optimizer.scala:98)
>       at 
> org.apache.spark.sql.catalyst.optimizer.UnionPushdown$$anonfun$apply$1$$anonfun$applyOrElse$6.apply(Optimizer.scala:98)
>       at 
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>       at 
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244)
>       at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>       at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
>       at scala.collection.TraversableLike$class.map(TraversableLike.scala:244)
>       at scala.collection.AbstractTraversable.map(Traversable.scala:105)
>       at 
> org.apache.spark.sql.catalyst.optimizer.UnionPushdown$$anonfun$apply$1.applyOrElse(Optimizer.scala:98)
>       at 
> org.apache.spark.sql.catalyst.optimizer.UnionPushdown$$anonfun$apply$1.applyOrElse(Optimizer.scala:85)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:144)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$4.apply(TreeNode.scala:162)
>       at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
>       at scala.collection.Iterator$class.foreach(Iterator.scala:727)
>       at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
>       at 
> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
>       at 
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
>       at 
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
>       at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
>       at scala.collection.AbstractIterator.to(Iterator.scala:1157)
>       at 
> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
>       at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
>       at 
> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
>       at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenDown(TreeNode.scala:191)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:147)
>       at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:135)
>       at 
> org.apache.spark.sql.catalyst.optimizer.UnionPushdown$.apply(Optimizer.scala:85)
>       at 
> org.apache.spark.sql.catalyst.optimizer.UnionPushdown$.apply(Optimizer.scala:59)
>       at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61)
>       at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59)
>       at 
> scala.collection.IndexedSeqOptimized$class.foldl(IndexedSeqOptimized.scala:51)
>       at 
> scala.collection.IndexedSeqOptimized$class.foldLeft(IndexedSeqOptimized.scala:60)
>       at scala.collection.mutable.WrappedArray.foldLeft(WrappedArray.scala:34)
>       at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59)
>       at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51)
>       at scala.collection.immutable.List.foreach(List.scala:318)
>       at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51)
>       at 
> org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan$lzycompute(SQLContext.scala:462)
>       at 
> org.apache.spark.sql.SQLContext$QueryExecution.optimizedPlan(SQLContext.scala:462)
>       at 
> org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan$lzycompute(SQLContext.scala:467)
>       at 
> org.apache.spark.sql.SQLContext$QueryExecution.sparkPlan(SQLContext.scala:465)
>       at 
> org.apache.spark.sql.SQLContext$QueryExecution.executedPlan$lzycompute(SQLContext.scala:471)
>       at 
> org.apache.spark.sql.SQLContext$QueryExecution.executedPlan(SQLContext.scala:471)
>       at org.apache.spark.sql.SchemaRDD.collect(SchemaRDD.scala:463)
>       at 
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.run(Shim13.scala:178)
>       at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:231)
>       at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatement(HiveSessionImpl.java:212)
>       at sun.reflect.GeneratedMethodAccessor61.invoke(Unknown Source)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>       at java.lang.reflect.Method.invoke(Method.java:483)
>       at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:79)
>       at 
> org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:37)
>       at 
> org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:64)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
>       at 
> org.apache.hadoop.hive.shims.HadoopShimsSecure.doAs(HadoopShimsSecure.java:493)
>       at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:60)
>       at com.sun.proxy.$Proxy18.executeStatement(Unknown Source)
>       at 
> org.apache.hive.service.cli.CLIService.executeStatement(CLIService.java:220)
>       at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:344)
>       at 
> org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1313)
>       at 
> org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1298)
>       at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
>       at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
>       at 
> org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:55)
>       at 
> org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:206)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> 15/01/23 00:25:22 WARN thrift.ThriftCLIService: Error executing statement:
> org.apache.hive.service.cli.HiveSQLException: 
> java.util.NoSuchElementException: key not found: c_2#23488
>       at 
> org.apache.spark.sql.hive.thriftserver.SparkExecuteStatementOperation.run(Shim13.scala:189)
>       at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatementInternal(HiveSessionImpl.java:231)
>       at 
> org.apache.hive.service.cli.session.HiveSessionImpl.executeStatement(HiveSessionImpl.java:212)
>       at sun.reflect.GeneratedMethodAccessor61.invoke(Unknown Source)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>       at java.lang.reflect.Method.invoke(Method.java:483)
>       at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:79)
>       at 
> org.apache.hive.service.cli.session.HiveSessionProxy.access$000(HiveSessionProxy.java:37)
>       at 
> org.apache.hive.service.cli.session.HiveSessionProxy$1.run(HiveSessionProxy.java:64)
>       at java.security.AccessController.doPrivileged(Native Method)
>       at javax.security.auth.Subject.doAs(Subject.java:422)
>       at 
> org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
>       at 
> org.apache.hadoop.hive.shims.HadoopShimsSecure.doAs(HadoopShimsSecure.java:493)
>       at 
> org.apache.hive.service.cli.session.HiveSessionProxy.invoke(HiveSessionProxy.java:60)
>       at com.sun.proxy.$Proxy18.executeStatement(Unknown Source)
>       at 
> org.apache.hive.service.cli.CLIService.executeStatement(CLIService.java:220)
>       at 
> org.apache.hive.service.cli.thrift.ThriftCLIService.ExecuteStatement(ThriftCLIService.java:344)
>       at 
> org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1313)
>       at 
> org.apache.hive.service.cli.thrift.TCLIService$Processor$ExecuteStatement.getResult(TCLIService.java:1298)
>       at org.apache.thrift.ProcessFunction.process(ProcessFunction.java:39)
>       at org.apache.thrift.TBaseProcessor.process(TBaseProcessor.java:39)
>       at 
> org.apache.hive.service.auth.TSetIpAddressProcessor.process(TSetIpAddressProcessor.java:55)
>       at 
> org.apache.thrift.server.TThreadPoolServer$WorkerProcess.run(TThreadPoolServer.java:206)
>       at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
>       at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
>       at java.lang.Thread.run(Thread.java:745)
> {code}
> Some similar queries work. For example:
> {code}
> SELECT
>     MIN(t3.c_1),
>     MIN(t3.c_2),
>     MIN(t3.c_3),
>     MIN(t3.c_4)
> FROM
>     (
>         SELECT
>             SUM(t1.c11) c_1,
>             SUM(t1.c12) c_2,
>             SUM(t1.c13) c_3,
>             SUM(t1.c14) c_4
>         FROM
>             test1 t1
>         UNION ALL
>         SELECT
>             SUM(t2.c21) c_1,
>             SUM(t2.c22) c_2,
>             SUM(t2.c23) c_3,
>             SUM(t2.c24) c_4
>         FROM
>             test2 t2 ) t3; 
> {code}
> Works fine. Notice the only difference is the {{null}}.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to