[ 
https://issues.apache.org/jira/browse/HIVE-25351?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Ashish Sharma updated HIVE-25351:
---------------------------------
    Description: 
*script used to repro*

create table cbo_test (key string, v1 double, v2 decimal(30,2), v3 
decimal(30,2));

insert into cbo_test values ("001400000000000000000006375905", 10230.72, 
10230.72, 10230.69), ("001400000000000000000006375905", 10230.72, 10230.72, 
10230.69), ("001400000000000000000006375905", 10230.72, 10230.72, 10230.69), 
("001400000000000000000006375905", 10230.72, 10230.72, 10230.69), 
("001400000000000000000006375905", 10230.72, 10230.72, 10230.69), 
("001400000000000000000006375905", 10230.72, 10230.72, 10230.69);

select stddev(v1), stddev(v2), stddev(v3) from cbo_test;


*Enable CBO*
+----------------------------------------------------+
|                      Explain                       |
+----------------------------------------------------+
| Plan optimized by CBO.                             |
|                                                    |
| Vertex dependency in root stage                    |
| Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)            |
|                                                    |
| Stage-0                                            |
|   Fetch Operator                                   |
|     limit:-1                                       |
|     Stage-1                                        |
|       Reducer 2 vectorized                         |
|       File Output Operator [FS_13]                 |
|         Select Operator [SEL_12] (rows=1 width=24) |
|           Output:["_col0","_col1","_col2"]         |
|           Group By Operator [GBY_11] (rows=1 width=72) |
|             
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","count(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)"]
 |
|           <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized  |
|             PARTITION_ONLY_SHUFFLE [RS_10]         |
|               Group By Operator [GBY_9] (rows=1 width=72) |
|                 
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col3)","sum(_col0)","count(_col0)","sum(_col5)","sum(_col4)","count(_col1)","sum(_col7)","sum(_col6)","count(_col2)"]
 |
|                 Select Operator [SEL_8] (rows=6 width=232) |
|                   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] |
|                   TableScan [TS_0] (rows=6 width=232) |
|                     default@cbo_test,cbo_test, ACID 
table,Tbl:COMPLETE,Col:COMPLETE,Output:["v1","v2","v3"] |
|                                                    |
+----------------------------------------------------+

result - 

_c0     _c1     _c2
0.0     NaN     NaN

*Disable CBO*
+----------------------------------------------------+
|                      Explain                       |
+----------------------------------------------------+
| Vertex dependency in root stage                    |
| Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)            |
|                                                    |
| Stage-0                                            |
|   Fetch Operator                                   |
|     limit:-1                                       |
|     Stage-1                                        |
|       Reducer 2 vectorized                         |
|       File Output Operator [FS_11]                 |
|         Group By Operator [GBY_10] (rows=1 width=24) |
|           
Output:["_col0","_col1","_col2"],aggregations:["stddev(VALUE._col0)","stddev(VALUE._col1)","stddev(VALUE._col2)"]
 |
|         <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized    |
|           PARTITION_ONLY_SHUFFLE [RS_9]            |
|             Group By Operator [GBY_8] (rows=1 width=240) |
|               
Output:["_col0","_col1","_col2"],aggregations:["stddev(v1)","stddev(v2)","stddev(v3)"]
 |
|               Select Operator [SEL_7] (rows=6 width=232) |
|                 Output:["v1","v2","v3"]            |
|                 TableScan [TS_0] (rows=6 width=232) |
|                   default@cbo_test,cbo_test, ACID 
table,Tbl:COMPLETE,Col:COMPLETE,Output:["v1","v2","v3"] |
|                                                    |
+----------------------------------------------------+


result - 

_c0     _c1     _c2
5.42317860890711E-13    5.42317860890711E-13    5.42317860890711E-13

  was:
*script used to repro*

create table cbo_test (key string, v1 double, v2 decimal(30,2), v3 
decimal(30,2));

insert into cbo_test values ("001400000000000000000006375905", 10230.72, 
10230.72, 10230.69), ("001400000000000000000006375905", 10230.72, 10230.72, 
10230.69), ("001400000000000000000006375905", 10230.72, 10230.72, 10230.69), 
("001400000000000000000006375905", 10230.72, 10230.72, 10230.69), 
("001400000000000000000006375905", 10230.72, 10230.72, 10230.69), 
("001400000000000000000006375905", 10230.72, 10230.72, 10230.69);

select stddev(v1), stddev(v2), stddev(v3) from cbo_test;


*Enable CBO*
+----------------------------------------------------+
|                      Explain                       |
+----------------------------------------------------+
| Plan optimized by CBO.                             |
|                                                    |
| Vertex dependency in root stage                    |
| Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)            |
|                                                    |
| Stage-0                                            |
|   Fetch Operator                                   |
|     limit:-1                                       |
|     Stage-1                                        |
|       Reducer 2 vectorized                         |
|       File Output Operator [FS_13]                 |
|         Select Operator [SEL_12] (rows=1 width=24) |
|           Output:["_col0","_col1","_col2"]         |
|           Group By Operator [GBY_11] (rows=1 width=72) |
|             
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","count(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)"]
 |
|           <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized  |
|             PARTITION_ONLY_SHUFFLE [RS_10]         |
|               Group By Operator [GBY_9] (rows=1 width=72) |
|                 
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col3)","sum(_col0)","count(_col0)","sum(_col5)","sum(_col4)","count(_col1)","sum(_col7)","sum(_col6)","count(_col2)"]
 |
|                 Select Operator [SEL_8] (rows=6 width=232) |
|                   
Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] |
|                   TableScan [TS_0] (rows=6 width=232) |
|                     default@cbo_test,cbo_test, ACID 
table,Tbl:COMPLETE,Col:COMPLETE,Output:["v1","v2","v3"] |
|                                                    |
+----------------------------------------------------+



*Disable CBO*
+----------------------------------------------------+
|                      Explain                       |
+----------------------------------------------------+
| Vertex dependency in root stage                    |
| Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)            |
|                                                    |
| Stage-0                                            |
|   Fetch Operator                                   |
|     limit:-1                                       |
|     Stage-1                                        |
|       Reducer 2 vectorized                         |
|       File Output Operator [FS_11]                 |
|         Group By Operator [GBY_10] (rows=1 width=24) |
|           
Output:["_col0","_col1","_col2"],aggregations:["stddev(VALUE._col0)","stddev(VALUE._col1)","stddev(VALUE._col2)"]
 |
|         <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized    |
|           PARTITION_ONLY_SHUFFLE [RS_9]            |
|             Group By Operator [GBY_8] (rows=1 width=240) |
|               
Output:["_col0","_col1","_col2"],aggregations:["stddev(v1)","stddev(v2)","stddev(v3)"]
 |
|               Select Operator [SEL_7] (rows=6 width=232) |
|                 Output:["v1","v2","v3"]            |
|                 TableScan [TS_0] (rows=6 width=232) |
|                   default@cbo_test,cbo_test, ACID 
table,Tbl:COMPLETE,Col:COMPLETE,Output:["v1","v2","v3"] |
|                                                    |
+----------------------------------------------------+



> stddev(), sstddev_pop() with CBO enable returning null
> ------------------------------------------------------
>
>                 Key: HIVE-25351
>                 URL: https://issues.apache.org/jira/browse/HIVE-25351
>             Project: Hive
>          Issue Type: Bug
>            Reporter: Ashish Sharma
>            Assignee: Ashish Sharma
>            Priority: Blocker
>
> *script used to repro*
> create table cbo_test (key string, v1 double, v2 decimal(30,2), v3 
> decimal(30,2));
> insert into cbo_test values ("001400000000000000000006375905", 10230.72, 
> 10230.72, 10230.69), ("001400000000000000000006375905", 10230.72, 10230.72, 
> 10230.69), ("001400000000000000000006375905", 10230.72, 10230.72, 10230.69), 
> ("001400000000000000000006375905", 10230.72, 10230.72, 10230.69), 
> ("001400000000000000000006375905", 10230.72, 10230.72, 10230.69), 
> ("001400000000000000000006375905", 10230.72, 10230.72, 10230.69);
> select stddev(v1), stddev(v2), stddev(v3) from cbo_test;
> *Enable CBO*
> +----------------------------------------------------+
> |                      Explain                       |
> +----------------------------------------------------+
> | Plan optimized by CBO.                             |
> |                                                    |
> | Vertex dependency in root stage                    |
> | Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)            |
> |                                                    |
> | Stage-0                                            |
> |   Fetch Operator                                   |
> |     limit:-1                                       |
> |     Stage-1                                        |
> |       Reducer 2 vectorized                         |
> |       File Output Operator [FS_13]                 |
> |         Select Operator [SEL_12] (rows=1 width=24) |
> |           Output:["_col0","_col1","_col2"]         |
> |           Group By Operator [GBY_11] (rows=1 width=72) |
> |             
> Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","count(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)","count(VALUE._col5)","sum(VALUE._col6)","sum(VALUE._col7)","count(VALUE._col8)"]
>  |
> |           <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized  |
> |             PARTITION_ONLY_SHUFFLE [RS_10]         |
> |               Group By Operator [GBY_9] (rows=1 width=72) |
> |                 
> Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"],aggregations:["sum(_col3)","sum(_col0)","count(_col0)","sum(_col5)","sum(_col4)","count(_col1)","sum(_col7)","sum(_col6)","count(_col2)"]
>  |
> |                 Select Operator [SEL_8] (rows=6 width=232) |
> |                   
> Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] |
> |                   TableScan [TS_0] (rows=6 width=232) |
> |                     default@cbo_test,cbo_test, ACID 
> table,Tbl:COMPLETE,Col:COMPLETE,Output:["v1","v2","v3"] |
> |                                                    |
> +----------------------------------------------------+
> result - 
> _c0   _c1     _c2
> 0.0   NaN     NaN
> *Disable CBO*
> +----------------------------------------------------+
> |                      Explain                       |
> +----------------------------------------------------+
> | Vertex dependency in root stage                    |
> | Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)            |
> |                                                    |
> | Stage-0                                            |
> |   Fetch Operator                                   |
> |     limit:-1                                       |
> |     Stage-1                                        |
> |       Reducer 2 vectorized                         |
> |       File Output Operator [FS_11]                 |
> |         Group By Operator [GBY_10] (rows=1 width=24) |
> |           
> Output:["_col0","_col1","_col2"],aggregations:["stddev(VALUE._col0)","stddev(VALUE._col1)","stddev(VALUE._col2)"]
>  |
> |         <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized    |
> |           PARTITION_ONLY_SHUFFLE [RS_9]            |
> |             Group By Operator [GBY_8] (rows=1 width=240) |
> |               
> Output:["_col0","_col1","_col2"],aggregations:["stddev(v1)","stddev(v2)","stddev(v3)"]
>  |
> |               Select Operator [SEL_7] (rows=6 width=232) |
> |                 Output:["v1","v2","v3"]            |
> |                 TableScan [TS_0] (rows=6 width=232) |
> |                   default@cbo_test,cbo_test, ACID 
> table,Tbl:COMPLETE,Col:COMPLETE,Output:["v1","v2","v3"] |
> |                                                    |
> +----------------------------------------------------+
> result - 
> _c0   _c1     _c2
> 5.42317860890711E-13  5.42317860890711E-13    5.42317860890711E-13



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to