[ 
https://issues.apache.org/jira/browse/SPARK-34876?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Tanel Kiis updated SPARK-34876:
-------------------------------
    Description: 

Test case in scalar-subquery-select.sql:

{code:title=query}
SELECT t1a,
    (SELECT count(t2d) FROM t2 WHERE t2a = t1a) count_t2,
    (SELECT count_if(t2d > 0) FROM t2 WHERE t2a = t1a) count_if_t2,
    (SELECT approx_count_distinct(t2d) FROM t2 WHERE t2a = t1a) 
approx_count_distinct_t2,
    (SELECT collect_list(t2d) FROM t2 WHERE t2a = t1a) collect_list_t2,
    (SELECT collect_set(t2d) FROM t2 WHERE t2a = t1a) collect_set_t2,
    (SELECT hex(count_min_sketch(t2d, 0.5d, 0.5d, 1)) FROM t2 WHERE t2a = t1a) 
collect_set_t2
FROM t1;
{code}

{code:title=Result}
val1a   0       0       NULL    NULL    NULL    NULL
val1a   0       0       NULL    NULL    NULL    NULL
val1a   0       0       NULL    NULL    NULL    NULL
val1a   0       0       NULL    NULL    NULL    NULL
val1b   6       6       3       [19,119,319,19,19,19]   [19,119,319]    
0000000100000000000000060000000100000004000000005D8D6AB90000000000000000000000000000000400000000000000010000000000000001
val1c   2       2       2       [219,19]        [219,19]        
0000000100000000000000020000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000001
val1d   0       0       NULL    NULL    NULL    NULL
val1d   0       0       NULL    NULL    NULL    NULL
val1d   0       0       NULL    NULL    NULL    NULL
val1e   1       1       1       [19]    [19]    
0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
val1e   1       1       1       [19]    [19]    
0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
val1e   1       1       1       [19]    [19]    
0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
{code}

  was:
{code:title=query}
SELECT t1a,
    (SELECT count(t2d) FROM t2 WHERE t2a = t1a) count_t2,
    (SELECT count_if(t2d > 0) FROM t2 WHERE t2a = t1a) count_if_t2,
    (SELECT approx_count_distinct(t2d) FROM t2 WHERE t2a = t1a) 
approx_count_distinct_t2,
    (SELECT collect_list(t2d) FROM t2 WHERE t2a = t1a) collect_list_t2,
    (SELECT collect_set(t2d) FROM t2 WHERE t2a = t1a) collect_set_t2,
    (SELECT hex(count_min_sketch(t2d, 0.5d, 0.5d, 1)) FROM t2 WHERE t2a = t1a) 
collect_set_t2
FROM t1;
{code}

{code:title=Result}
val1a   0       0       NULL    NULL    NULL    NULL
val1a   0       0       NULL    NULL    NULL    NULL
val1a   0       0       NULL    NULL    NULL    NULL
val1a   0       0       NULL    NULL    NULL    NULL
val1b   6       6       3       [19,119,319,19,19,19]   [19,119,319]    
0000000100000000000000060000000100000004000000005D8D6AB90000000000000000000000000000000400000000000000010000000000000001
val1c   2       2       2       [219,19]        [219,19]        
0000000100000000000000020000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000001
val1d   0       0       NULL    NULL    NULL    NULL
val1d   0       0       NULL    NULL    NULL    NULL
val1d   0       0       NULL    NULL    NULL    NULL
val1e   1       1       1       [19]    [19]    
0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
val1e   1       1       1       [19]    [19]    
0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
val1e   1       1       1       [19]    [19]    
0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
{code}


> Non-nullable aggregates can return NULL in a correlated subquery
> ----------------------------------------------------------------
>
>                 Key: SPARK-34876
>                 URL: https://issues.apache.org/jira/browse/SPARK-34876
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.2.0
>            Reporter: Tanel Kiis
>            Priority: Major
>
> Test case in scalar-subquery-select.sql:
> {code:title=query}
> SELECT t1a,
>     (SELECT count(t2d) FROM t2 WHERE t2a = t1a) count_t2,
>     (SELECT count_if(t2d > 0) FROM t2 WHERE t2a = t1a) count_if_t2,
>     (SELECT approx_count_distinct(t2d) FROM t2 WHERE t2a = t1a) 
> approx_count_distinct_t2,
>     (SELECT collect_list(t2d) FROM t2 WHERE t2a = t1a) collect_list_t2,
>     (SELECT collect_set(t2d) FROM t2 WHERE t2a = t1a) collect_set_t2,
>     (SELECT hex(count_min_sketch(t2d, 0.5d, 0.5d, 1)) FROM t2 WHERE t2a = 
> t1a) collect_set_t2
> FROM t1;
> {code}
> {code:title=Result}
> val1a 0       0       NULL    NULL    NULL    NULL
> val1a 0       0       NULL    NULL    NULL    NULL
> val1a 0       0       NULL    NULL    NULL    NULL
> val1a 0       0       NULL    NULL    NULL    NULL
> val1b 6       6       3       [19,119,319,19,19,19]   [19,119,319]    
> 0000000100000000000000060000000100000004000000005D8D6AB90000000000000000000000000000000400000000000000010000000000000001
> val1c 2       2       2       [219,19]        [219,19]        
> 0000000100000000000000020000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000001
> val1d 0       0       NULL    NULL    NULL    NULL
> val1d 0       0       NULL    NULL    NULL    NULL
> val1d 0       0       NULL    NULL    NULL    NULL
> val1e 1       1       1       [19]    [19]    
> 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
> val1e 1       1       1       [19]    [19]    
> 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
> val1e 1       1       1       [19]    [19]    
> 0000000100000000000000010000000100000004000000005D8D6AB90000000000000000000000000000000100000000000000000000000000000000
> {code}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to