Sahil Takiar created HIVE-16521:
-----------------------------------

             Summary: HoS user level explain plan possibly incorrect for UNION 
clause
                 Key: HIVE-16521
                 URL: https://issues.apache.org/jira/browse/HIVE-16521
             Project: Hive
          Issue Type: Bug
          Components: Spark
    Affects Versions: 3.0.0
            Reporter: Sahil Takiar
            Assignee: Sahil Takiar


The user-level explain plan for queries with a UNION operator look very 
different for HoS vs. Hive-on-Tez. Furthermore, the HoS plan looks incomplete:

Query: {{EXPLAIN select count(*) from srcpart where srcpart.ds in (select 
max(srcpart.ds) from srcpart union all select min(srcpart.ds) from srcpart)}}

Hive-on-Tez:

{code}
Plan optimized by CBO.

Vertex dependency in root stage
Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 7 (SIMPLE_EDGE)
Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE)
Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 6 (CONTAINS)
Reducer 7 <- Union 6 (SIMPLE_EDGE)
Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE), Union 6 (CONTAINS)

Stage-0
  Fetch Operator
    limit:-1
    Stage-1
      Reducer 3
      File Output Operator [FS_34]
        Group By Operator [GBY_32] (rows=1 width=8)
          Output:["_col0"],aggregations:["count(VALUE._col0)"]
        <-Reducer 2 [CUSTOM_SIMPLE_EDGE]
          PARTITION_ONLY_SHUFFLE [RS_31]
            Group By Operator [GBY_30] (rows=1 width=8)
              Output:["_col0"],aggregations:["count()"]
              Merge Join Operator [MERGEJOIN_44] (rows=1000 width=8)
                Conds:RS_26._col0=RS_27._col0(Inner)
              <-Map 1 [SIMPLE_EDGE]
                SHUFFLE [RS_26]
                  PartitionCols:_col0
                  Select Operator [SEL_2] (rows=2000 width=184)
                    Output:["_col0"]
                    TableScan [TS_0] (rows=2000 width=194)
                      default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE
              <-Reducer 7 [SIMPLE_EDGE]
                SHUFFLE [RS_27]
                  PartitionCols:_col0
                  Group By Operator [GBY_24] (rows=1 width=184)
                    Output:["_col0"],keys:KEY._col0
                  <-Union 6 [SIMPLE_EDGE]
                    <-Reducer 5 [CONTAINS]
                      Reduce Output Operator [RS_23]
                        PartitionCols:_col0
                        Group By Operator [GBY_22] (rows=1 width=184)
                          Output:["_col0"],keys:_col0
                          Filter Operator [FIL_9] (rows=1 width=184)
                            predicate:_col0 is not null
                            Group By Operator [GBY_7] (rows=1 width=184)
                              Output:["_col0"],aggregations:["max(VALUE._col0)"]
                            <-Map 4 [CUSTOM_SIMPLE_EDGE]
                              PARTITION_ONLY_SHUFFLE [RS_6]
                                Group By Operator [GBY_5] (rows=1 width=184)
                                  Output:["_col0"],aggregations:["max(ds)"]
                                  Select Operator [SEL_4] (rows=2000 width=194)
                                    Output:["ds"]
                                    TableScan [TS_3] (rows=2000 width=194)
                                      
default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE
                    <-Reducer 9 [CONTAINS]
                      Reduce Output Operator [RS_23]
                        PartitionCols:_col0
                        Group By Operator [GBY_22] (rows=1 width=184)
                          Output:["_col0"],keys:_col0
                          Filter Operator [FIL_17] (rows=1 width=184)
                            predicate:_col0 is not null
                            Group By Operator [GBY_15] (rows=1 width=184)
                              Output:["_col0"],aggregations:["min(VALUE._col0)"]
                            <-Map 8 [CUSTOM_SIMPLE_EDGE]
                              PARTITION_ONLY_SHUFFLE [RS_14]
                                Group By Operator [GBY_13] (rows=1 width=184)
                                  Output:["_col0"],aggregations:["min(ds)"]
                                  Select Operator [SEL_12] (rows=2000 width=194)
                                    Output:["ds"]
                                    TableScan [TS_11] (rows=2000 width=194)
                                      
default@srcpart,srcpart,Tbl:COMPLETE,Col:COMPLETE
                Dynamic Partitioning Event Operator [EVENT_43] (rows=1 
width=184)
                  Group By Operator [GBY_42] (rows=1 width=184)
                    Output:["_col0"],keys:_col0
                    Select Operator [SEL_41] (rows=1 width=184)
                      Output:["_col0"]
                       Please refer to the previous Group By Operator [GBY_24]
{code}

HoS:

{code}
Plan optimized by CBO.

Vertex dependency in root stage
Reducer 10 <- Map 9 (GROUP)
Reducer 11 <- Reducer 10 (GROUP), Reducer 13 (GROUP)
Reducer 13 <- Map 12 (GROUP)

Vertex dependency in root stage
Reducer 2 <- Map 1 (PARTITION-LEVEL SORT), Reducer 6 (PARTITION-LEVEL SORT)
Reducer 3 <- Reducer 2 (GROUP)
Reducer 5 <- Map 4 (GROUP)
Reducer 6 <- Reducer 5 (GROUP), Reducer 8 (GROUP)
Reducer 8 <- Map 7 (GROUP)

Stage-0
  Fetch Operator
    limit:-1
    Stage-1
      Reducer 3
      File Output Operator [FS_34]
        Group By Operator [GBY_32] (rows=1 width=8)
          Output:["_col0"],aggregations:["count(VALUE._col0)"]
        <-Reducer 2 [GROUP]
          GROUP [RS_31]
            Group By Operator [GBY_30] (rows=1 width=8)
              Output:["_col0"],aggregations:["count()"]
              Join Operator [JOIN_28] (rows=2200 width=10)
                condition 
map:[{"":"{\"type\":\"Inner\",\"left\":0,\"right\":1}"}],keys:{"0":"_col0","1":"_col0"}
              <-Map 1 [PARTITION-LEVEL SORT]
                PARTITION-LEVEL SORT [RS_26]
                  PartitionCols:_col0
                  Select Operator [SEL_2] (rows=2000 width=10)
                    Output:["_col0"]
                    TableScan [TS_0] (rows=2000 width=10)
                      default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE
              <-Reducer 6 [PARTITION-LEVEL SORT]
                PARTITION-LEVEL SORT [RS_27]
                  PartitionCols:_col0
                  Group By Operator [GBY_24] (rows=1 width=184)
                    Output:["_col0"],keys:KEY._col0
                  <-Reducer 5 [GROUP]
                    GROUP [RS_23]
                      PartitionCols:_col0
                      Group By Operator [GBY_22] (rows=2 width=184)
                        Output:["_col0"],keys:_col0
                        Filter Operator [FIL_9] (rows=1 width=184)
                          predicate:_col0 is not null
                          Group By Operator [GBY_7] (rows=1 width=184)
                            Output:["_col0"],aggregations:["max(VALUE._col0)"]
                          <-Map 4 [GROUP]
                            GROUP [RS_6]
                              Group By Operator [GBY_5] (rows=1 width=184)
                                Output:["_col0"],aggregations:["max(ds)"]
                                Select Operator [SEL_4] (rows=2000 width=10)
                                  Output:["ds"]
                                  TableScan [TS_3] (rows=2000 width=10)
                                    
default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE
                  <-Reducer 8 [GROUP]
                    GROUP [RS_23]
                      PartitionCols:_col0
                      Group By Operator [GBY_22] (rows=2 width=184)
                        Output:["_col0"],keys:_col0
                        Filter Operator [FIL_17] (rows=1 width=184)
                          predicate:_col0 is not null
                          Group By Operator [GBY_15] (rows=1 width=184)
                            Output:["_col0"],aggregations:["min(VALUE._col0)"]
                          <-Map 7 [GROUP]
                            GROUP [RS_14]
                              Group By Operator [GBY_13] (rows=1 width=184)
                                Output:["_col0"],aggregations:["min(ds)"]
                                Select Operator [SEL_12] (rows=2000 width=10)
                                  Output:["ds"]
                                  TableScan [TS_11] (rows=2000 width=10)
                                    
default@srcpart,srcpart,Tbl:COMPLETE,Col:NONE
        Stage-2
          Reducer 11
{code}

The HoS plan looks incomplete, Stage-2 only contains a single empty vertex.



--
This message was sent by Atlassian JIRA
(v6.3.15#6346)

Reply via email to