dongjoon-hyun commented on a change in pull request #24335: [SPARK-27425][SQL] Add count_if function URL: https://github.com/apache/spark/pull/24335#discussion_r291673195
########## File path: sql/core/src/test/scala/org/apache/spark/sql/DataFrameAggregateSuite.scala ########## @@ -894,4 +894,28 @@ class DataFrameAggregateSuite extends QueryTest with SharedSQLContext { error.message.contains("function min_by does not support ordering on type map<int,string>")) } } + + test("count_if") { + withTempView("tempView") { + Seq(("a", None), ("a", Some(1)), ("a", Some(2)), ("a", Some(3)), + ("b", None), ("b", Some(4)), ("b", Some(5)), ("b", Some(6))) + .toDF("x", "y") + .createOrReplaceTempView("tempView") + + checkAnswer( + sql("SELECT COUNT_IF(NULL), COUNT_IF(y % 2 = 0), COUNT_IF(y % 2 <> 0), " + + "COUNT_IF(y IS NULL) FROM tempView"), + Row(0L, 3L, 3L, 2L)) + + checkAnswer( + sql("SELECT x, COUNT_IF(NULL), COUNT_IF(y % 2 = 0), COUNT_IF(y % 2 <> 0), " + + "COUNT_IF(y IS NULL) FROM tempView GROUP BY x"), + Row("a", 0L, 1L, 2L, 1L) :: Row("b", 0L, 2L, 1L, 1L) :: Nil) Review comment: Since `COUNT_IF` can be used in `HAVING` clauses, could you add more tests like the followings? ```scala scala> sql("SELECT x FROM tempView GROUP BY x HAVING COUNT_IF(y % 2 = 0) = 1").show +---+ | x| +---+ | a| +---+ scala> sql("SELECT x FROM tempView GROUP BY x HAVING COUNT_IF(y % 2 = 0) = 2").show +---+ | x| +---+ | b| +---+ ``` ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org