[jira] [Commented] (SPARK-14495) Distinct aggregation cannot be used in the having clause
[ https://issues.apache.org/jira/browse/SPARK-14495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15275100#comment-15275100 ] Apache Spark commented on SPARK-14495: -- User 'xwu0226' has created a pull request for this issue: https://github.com/apache/spark/pull/12974 > Distinct aggregation cannot be used in the having clause > > > Key: SPARK-14495 > URL: https://issues.apache.org/jira/browse/SPARK-14495 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.1 >Reporter: Yin Huai > > {code} > select date, count(distinct id) > from (select '2010-01-01' as date, 1 as id) tmp > group by date > having count(distinct id) > 0; > org.apache.spark.sql.AnalysisException: resolved attribute(s) gid#558,id#559 > missing from date#554,id#555 in operator !Expand [List(date#554, null, 0, if > ((gid#558 = 1)) id#559 else null),List(date#554, id#555, 1, null)], > [date#554,id#561,gid#560,if ((gid = 1)) id else null#562]; > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:183) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:121) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) > at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:816) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-14495) Distinct aggregation cannot be used in the having clause
[ https://issues.apache.org/jira/browse/SPARK-14495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15271956#comment-15271956 ] Xin Wu commented on SPARK-14495: [~smilegator] I got the fix and running regtest now. Will submit the PR one it is done. > Distinct aggregation cannot be used in the having clause > > > Key: SPARK-14495 > URL: https://issues.apache.org/jira/browse/SPARK-14495 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.1 >Reporter: Yin Huai > > {code} > select date, count(distinct id) > from (select '2010-01-01' as date, 1 as id) tmp > group by date > having count(distinct id) > 0; > org.apache.spark.sql.AnalysisException: resolved attribute(s) gid#558,id#559 > missing from date#554,id#555 in operator !Expand [List(date#554, null, 0, if > ((gid#558 = 1)) id#559 else null),List(date#554, id#555, 1, null)], > [date#554,id#561,gid#560,if ((gid = 1)) id else null#562]; > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:183) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:121) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) > at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:816) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-14495) Distinct aggregation cannot be used in the having clause
[ https://issues.apache.org/jira/browse/SPARK-14495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15266069#comment-15266069 ] Xin Wu commented on SPARK-14495: I can recreated it on branch-1.6. and another workaround is using alias for the aggregate expression {code} scala> sqlContext.sql("SELECT date, count(distinct id) as cnt from (select '2010-01-01' as date, 1 as id) tmp group by date having cnt > 0").show +--+---+ | date|cnt| +--+---+ |2010-01-01| 1| +--+---+ {code} > Distinct aggregation cannot be used in the having clause > > > Key: SPARK-14495 > URL: https://issues.apache.org/jira/browse/SPARK-14495 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.1 >Reporter: Yin Huai > > {code} > select date, count(distinct id) > from (select '2010-01-01' as date, 1 as id) tmp > group by date > having count(distinct id) > 0; > org.apache.spark.sql.AnalysisException: resolved attribute(s) gid#558,id#559 > missing from date#554,id#555 in operator !Expand [List(date#554, null, 0, if > ((gid#558 = 1)) id#559 else null),List(date#554, id#555, 1, null)], > [date#554,id#561,gid#560,if ((gid = 1)) id else null#562]; > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:183) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:121) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) > at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:816) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-14495) Distinct aggregation cannot be used in the having clause
[ https://issues.apache.org/jira/browse/SPARK-14495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15265203#comment-15265203 ] Xiao Li commented on SPARK-14495: - Sure, will do it. Thanks! > Distinct aggregation cannot be used in the having clause > > > Key: SPARK-14495 > URL: https://issues.apache.org/jira/browse/SPARK-14495 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.1 >Reporter: Yin Huai > > {code} > select date, count(distinct id) > from (select '2010-01-01' as date, 1 as id) tmp > group by date > having count(distinct id) > 0; > org.apache.spark.sql.AnalysisException: resolved attribute(s) gid#558,id#559 > missing from date#554,id#555 in operator !Expand [List(date#554, null, 0, if > ((gid#558 = 1)) id#559 else null),List(date#554, id#555, 1, null)], > [date#554,id#561,gid#560,if ((gid = 1)) id else null#562]; > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:183) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:121) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) > at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:816) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-14495) Distinct aggregation cannot be used in the having clause
[ https://issues.apache.org/jira/browse/SPARK-14495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15264838#comment-15264838 ] Reynold Xin commented on SPARK-14495: - cc [~smilegator] want to find somebody from your side to fix this for branch-1.6? > Distinct aggregation cannot be used in the having clause > > > Key: SPARK-14495 > URL: https://issues.apache.org/jira/browse/SPARK-14495 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.1 >Reporter: Yin Huai > > {code} > select date, count(distinct id) > from (select '2010-01-01' as date, 1 as id) tmp > group by date > having count(distinct id) > 0; > org.apache.spark.sql.AnalysisException: resolved attribute(s) gid#558,id#559 > missing from date#554,id#555 in operator !Expand [List(date#554, null, 0, if > ((gid#558 = 1)) id#559 else null),List(date#554, id#555, 1, null)], > [date#554,id#561,gid#560,if ((gid = 1)) id else null#562]; > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:183) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:121) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) > at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:816) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-14495) Distinct aggregation cannot be used in the having clause
[ https://issues.apache.org/jira/browse/SPARK-14495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15239674#comment-15239674 ] Cheng Lian commented on SPARK-14495: This ticket is for branch-1.6. > Distinct aggregation cannot be used in the having clause > > > Key: SPARK-14495 > URL: https://issues.apache.org/jira/browse/SPARK-14495 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.1 >Reporter: Yin Huai > > {code} > select date, count(distinct id) > from (select '2010-01-01' as date, 1 as id) tmp > group by date > having count(distinct id) > 0; > org.apache.spark.sql.AnalysisException: resolved attribute(s) gid#558,id#559 > missing from date#554,id#555 in operator !Expand [List(date#554, null, 0, if > ((gid#558 = 1)) id#559 else null),List(date#554, id#555, 1, null)], > [date#554,id#561,gid#560,if ((gid = 1)) id else null#562]; > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:183) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:121) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) > at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:816) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-14495) Distinct aggregation cannot be used in the having clause
[ https://issues.apache.org/jira/browse/SPARK-14495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15235188#comment-15235188 ] Liang-Chi Hsieh commented on SPARK-14495: - I can' reproduce this bug with current master branch. The query can work. {code} test("distinct aggregatiom in having clause") { checkAnswer( sql( """ |SELECT date, count(distinct id) FROM (SELECT '2010-01-01' AS date, 1 AS id) |tmp GROUP BY date HAVING count(distinct id) > 0 """.stripMargin), Row("2010-01-01", 1) :: Nil) } {code} > Distinct aggregation cannot be used in the having clause > > > Key: SPARK-14495 > URL: https://issues.apache.org/jira/browse/SPARK-14495 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.1 >Reporter: Yin Huai > > {code} > select date, count(distinct id) > from (select '2010-01-01' as date, 1 as id) tmp > group by date > having count(distinct id) > 0; > org.apache.spark.sql.AnalysisException: resolved attribute(s) gid#558,id#559 > missing from date#554,id#555 in operator !Expand [List(date#554, null, 0, if > ((gid#558 = 1)) id#559 else null),List(date#554, id#555, 1, null)], > [date#554,id#561,gid#560,if ((gid = 1)) id else null#562]; > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:183) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:121) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) > at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:816) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-14495) Distinct aggregation cannot be used in the having clause
[ https://issues.apache.org/jira/browse/SPARK-14495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15234538#comment-15234538 ] Liang-Chi Hsieh commented on SPARK-14495: - I've tried this query in my PR (https://github.com/apache/spark/pull/12138) for Expand operator. It can solve this issue too. > Distinct aggregation cannot be used in the having clause > > > Key: SPARK-14495 > URL: https://issues.apache.org/jira/browse/SPARK-14495 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.1 >Reporter: Yin Huai > > {code} > select date, count(distinct id) > from (select '2010-01-01' as date, 1 as id) tmp > group by date > having count(distinct id) > 0; > org.apache.spark.sql.AnalysisException: resolved attribute(s) gid#558,id#559 > missing from date#554,id#555 in operator !Expand [List(date#554, null, 0, if > ((gid#558 = 1)) id#559 else null),List(date#554, id#555, 1, null)], > [date#554,id#561,gid#560,if ((gid = 1)) id else null#562]; > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:183) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:121) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) > at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:816) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-14495) Distinct aggregation cannot be used in the having clause
[ https://issues.apache.org/jira/browse/SPARK-14495?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15232717#comment-15232717 ] Yin Huai commented on SPARK-14495: -- The current workaround is to use a subquery and then use where. > Distinct aggregation cannot be used in the having clause > > > Key: SPARK-14495 > URL: https://issues.apache.org/jira/browse/SPARK-14495 > Project: Spark > Issue Type: Bug > Components: SQL >Affects Versions: 1.6.1 >Reporter: Yin Huai > > {code} > select date, count(distinct id) > from (select '2010-01-01' as date, 1 as id) tmp > group by date > having count(distinct id) > 0; > org.apache.spark.sql.AnalysisException: resolved attribute(s) gid#558,id#559 > missing from date#554,id#555 in operator !Expand [List(date#554, null, 0, if > ((gid#558 = 1)) id#559 else null),List(date#554, id#555, 1, null)], > [date#554,id#561,gid#560,if ((gid = 1)) id else null#562]; > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:183) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:121) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$foreachUp$1.apply(TreeNode.scala:120) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:120) > at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50) > at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:44) > at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:34) > at org.apache.spark.sql.DataFrame.(DataFrame.scala:133) > at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:52) > at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:816) > {code} -- This message was sent by Atlassian JIRA (v6.3.4#6332) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org