[ https://issues.apache.org/jira/browse/SPARK-34876?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Tanel Kiis updated SPARK-34876: ------------------------------- Description: Test case in scalar-subquery-select.sql: {code:title=query} SELECT t1a, (SELECT count(t2d) FROM t2 WHERE t2a = t1a) count_t2, (SELECT count_if(t2d > 0) FROM t2 WHERE t2a = t1a) count_if_t2, (SELECT approx_count_distinct(t2d) FROM t2 WHERE t2a = t1a) approx_count_distinct_t2, (SELECT collect_list(t2d) FROM t2 WHERE t2a = t1a) collect_list_t2, (SELECT collect_set(t2d) FROM t2 WHERE t2a = t1a) collect_set_t2, (SELECT hex(count_min_sketch(t2d, 0.5d, 0.5d, 1)) FROM t2 WHERE t2a = t1a) collect_set_t2 FROM t1; {code} {code:title=Result} val1a 0 0 NULL NULL NULL NULL val1a 0 0 NULL NULL NULL NULL val1a 0 0 NULL NULL NULL NULL val1a 0 0 NULL NULL NULL NULL val1b 6 6 3 [19,119,319,19,19,19] [19,119,319] 0000000100000000000000060000000100000004000000005D8D6AB90000000000000000000000000000000400000000000000010000000000000001 val1c 2 2 2 [219,19] [219,19] 0000000100000000000000020000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000001 val1d 0 0 NULL NULL NULL NULL val1d 0 0 NULL NULL NULL NULL val1d 0 0 NULL NULL NULL NULL val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 {code} was: {code:title=query} SELECT t1a, (SELECT count(t2d) FROM t2 WHERE t2a = t1a) count_t2, (SELECT count_if(t2d > 0) FROM t2 WHERE t2a = t1a) count_if_t2, (SELECT approx_count_distinct(t2d) FROM t2 WHERE t2a = t1a) approx_count_distinct_t2, (SELECT collect_list(t2d) FROM t2 WHERE t2a = t1a) collect_list_t2, (SELECT collect_set(t2d) FROM t2 WHERE t2a = t1a) collect_set_t2, (SELECT hex(count_min_sketch(t2d, 0.5d, 0.5d, 1)) FROM t2 WHERE t2a = t1a) collect_set_t2 FROM t1; {code} {code:title=Result} val1a 0 0 NULL NULL NULL NULL val1a 0 0 NULL NULL NULL NULL val1a 0 0 NULL NULL NULL NULL val1a 0 0 NULL NULL NULL NULL val1b 6 6 3 [19,119,319,19,19,19] [19,119,319] 0000000100000000000000060000000100000004000000005D8D6AB90000000000000000000000000000000400000000000000010000000000000001 val1c 2 2 2 [219,19] [219,19] 0000000100000000000000020000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000001 val1d 0 0 NULL NULL NULL NULL val1d 0 0 NULL NULL NULL NULL val1d 0 0 NULL NULL NULL NULL val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 val1e 1 1 1 [19] [19] 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 {code} > Non-nullable aggregates can return NULL in a correlated subquery > ---------------------------------------------------------------- > > Key: SPARK-34876 > URL: https://issues.apache.org/jira/browse/SPARK-34876 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.2.0 > Reporter: Tanel Kiis > Priority: Major > > Test case in scalar-subquery-select.sql: > {code:title=query} > SELECT t1a, > (SELECT count(t2d) FROM t2 WHERE t2a = t1a) count_t2, > (SELECT count_if(t2d > 0) FROM t2 WHERE t2a = t1a) count_if_t2, > (SELECT approx_count_distinct(t2d) FROM t2 WHERE t2a = t1a) > approx_count_distinct_t2, > (SELECT collect_list(t2d) FROM t2 WHERE t2a = t1a) collect_list_t2, > (SELECT collect_set(t2d) FROM t2 WHERE t2a = t1a) collect_set_t2, > (SELECT hex(count_min_sketch(t2d, 0.5d, 0.5d, 1)) FROM t2 WHERE t2a = > t1a) collect_set_t2 > FROM t1; > {code} > {code:title=Result} > val1a 0 0 NULL NULL NULL NULL > val1a 0 0 NULL NULL NULL NULL > val1a 0 0 NULL NULL NULL NULL > val1a 0 0 NULL NULL NULL NULL > val1b 6 6 3 [19,119,319,19,19,19] [19,119,319] > 0000000100000000000000060000000100000004000000005D8D6AB90000000000000000000000000000000400000000000000010000000000000001 > val1c 2 2 2 [219,19] [219,19] > 0000000100000000000000020000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000001 > val1d 0 0 NULL NULL NULL NULL > val1d 0 0 NULL NULL NULL NULL > val1d 0 0 NULL NULL NULL NULL > val1e 1 1 1 [19] [19] > 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 > val1e 1 1 1 [19] [19] > 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 > val1e 1 1 1 [19] [19] > 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000 > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org