Zoltan Haindrich created HIVE-23082:
---------------------------------------

             Summary: PK/FK stat rescale doesn't work in some cases
                 Key: HIVE-23082
                 URL: https://issues.apache.org/jira/browse/HIVE-23082
             Project: Hive
          Issue Type: Bug
            Reporter: Zoltan Haindrich
            Assignee: Zoltan Haindrich


As a result in Joins may retain the original estimate; see MAPJOIN_33 in this 
plan ; which retained the estimate of SEL_32
{code}
+----------------------------------------------------+
|                      Explain                       |
+----------------------------------------------------+
| Plan optimized by CBO.                             |
|                                                    |
| Vertex dependency in root stage                    |
| Map 1 <- Map 2 (BROADCAST_EDGE)                    |
|                                                    |
| Stage-0                                            |
|   Fetch Operator                                   |
|     limit:12                                       |
|     Stage-1                                        |
|       Map 1 vectorized                             |
|       File Output Operator [FS_36]                 |
|         Limit [LIM_35] (rows=12 width=4)           |
|           Number of rows:12                        |
|           Select Operator [SEL_34] (rows=5040 width=4) |
|             Output:["_col0"]                       |
|             Map Join Operator [MAPJOIN_33] (rows=5040 width=8) |
|               Conds:SEL_32._col0=RS_30._col0(Inner) |
|             <-Map 2 [BROADCAST_EDGE] vectorized    |
|               BROADCAST [RS_30]                    |
|                 PartitionCols:_col0                |
|                 Select Operator [SEL_29] (rows=1 width=8) |
|                   Output:["_col0"]                 |
|                   Filter Operator [FIL_28] (rows=1 width=108) |
|                     predicate:((r_reason_id = 'reason 66') and r_reason_sk is 
not null) |
|                     TableScan [TS_3] (rows=2 width=108) |
|                       
default@rx0,reason,Tbl:COMPLETE,Col:COMPLETE,Output:["r_reason_id","r_reason_sk"]
 |
|             <-Select Operator [SEL_32] (rows=5040 width=7) |
|                 Output:["_col0"]                   |
|                 Filter Operator [FIL_31] (rows=5040 width=7) |
|                   predicate:sr_reason_sk is not null |
|                   TableScan [TS_0] (rows=5112 width=7) |
|                     
default@sr0,store_returns,Tbl:COMPLETE,Col:COMPLETE,Output:["sr_reason_sk"] |
|                                                    |
+----------------------------------------------------+
{code}

repro:
{code}
set hive.query.results.cache.enabled=false;
set hive.explain.user=true;

drop table if exists default.rx0;
drop table if exists default.sr0;

create table rx0 (r_reason_id string, r_reason_sk bigint);
create table sr0 (sr_reason_sk bigint);

insert into rx0 values ('AAAAAAAAAAAAAAAA',1),('AAAAAAAAGEAAAAAA',70);

insert into sr0 values (NULL),(1),(2),(3),(4),(5),(6),(7),(8),(9),(10),
(11),(12),(13),(14),(15),(16),(17),(18),(19),(20),(21),(22),(23),(24),(25),
(26),(27),(28),(29),(30),(31),(32),(33),(34),(35),(36),(37),(38),(39),(40),
(41),(42),(43),(44),(45),(46),(47),(48),(49),(50),(51),(52),(53),(54),(55),
(56),(57),(58),(59),(60),(61),(62),(63),(64),(65),(66),(67),(68),(69),(70);

insert into sr0 select a.* from sr0 a,sr0 b;
-- |sr0| ~ 5112

explain select 1
from default.sr0  store_returns , default.rx0 reason
            where sr_reason_sk = r_reason_sk
              and r_reason_id = 'reason 66'
limit 12;

{code}





--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to