Github user stanzhai commented on the issue:

    https://github.com/apache/spark/pull/19301
  
    @cenyuhai This  is an optimize for physical plan, and your case can be 
optimized.
    
    ```SQL
    select dt,
    geohash_of_latlng,
    sum(mt_cnt),
    sum(ele_cnt),
    round(sum(mt_cnt) * 1.0 * 100 / sum(mt_cnt_all), 2),
    round(sum(ele_cnt) * 1.0 * 100 / sum(ele_cnt_all), 2)
    from values(1, 2, 3, 4, 5, 6) as (dt, geohash_of_latlng, mt_cnt, ele_cnt, 
mt_cnt_all, ele_cnt_all)
    group by dt, geohash_of_latlng
    order by dt, geohash_of_latlng limit 10
    ```
    
    Before:
    
    ```
    == Physical Plan ==
    TakeOrderedAndProject(limit=10, orderBy=[dt#26 ASC NULLS 
FIRST,geohash_of_latlng#27 ASC NULLS FIRST], 
output=[dt#26,geohash_of_latlng#27,sum(mt_cnt)#38L,sum(ele_cnt)#39L,round((CAST((CAST((CAST(CAST(sum(CAST(mt_cnt
 AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1))) 
AS DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS 
DECIMAL(38,2)) / CAST(CAST(sum(CAST(mt_cnt_all AS BIGINT)) AS DECIMAL(20,0)) AS 
DECIMAL(38,2))), 2)#40,round((CAST((CAST((CAST(CAST(sum(CAST(ele_cnt AS 
BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1))) AS 
DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS 
DECIMAL(38,2)) / CAST(CAST(sum(CAST(ele_cnt_all AS BIGINT)) AS DECIMAL(20,0)) 
AS DECIMAL(38,2))), 2)#41])
    +- *HashAggregate(keys=[dt#26, geohash_of_latlng#27], 
functions=[sum(cast(mt_cnt#28 as bigint)), sum(cast(ele_cnt#29 as bigint)), 
sum(cast(mt_cnt#28 as bigint)), sum(cast(mt_cnt_all#30 as bigint)), 
sum(cast(ele_cnt#29 as bigint)), sum(cast(ele_cnt_all#31 as bigint))])
       +- Exchange hashpartitioning(dt#26, geohash_of_latlng#27, 200)
          +- *HashAggregate(keys=[dt#26, geohash_of_latlng#27], 
functions=[partial_sum(cast(mt_cnt#28 as bigint)), partial_sum(cast(ele_cnt#29 
as bigint)), partial_sum(cast(mt_cnt#28 as bigint)), 
partial_sum(cast(mt_cnt_all#30 as bigint)), partial_sum(cast(ele_cnt#29 as 
bigint)), partial_sum(cast(ele_cnt_all#31 as bigint))])
             +- LocalTableScan [dt#26, geohash_of_latlng#27, mt_cnt#28, 
ele_cnt#29, mt_cnt_all#30, ele_cnt_all#31]
    ```
    
    After:
    
    ```
    == Physical Plan ==
    TakeOrderedAndProject(limit=10, orderBy=[dt#28 ASC NULLS 
FIRST,geohash_of_latlng#29 ASC NULLS FIRST], 
output=[dt#28,geohash_of_latlng#29,sum(mt_cnt)#34L,sum(ele_cnt)#35L,round((CAST((CAST((CAST(CAST(sum(CAST(mt_cnt
 AS BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1))) 
AS DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS 
DECIMAL(38,2)) / CAST(CAST(sum(CAST(mt_cnt_all AS BIGINT)) AS DECIMAL(20,0)) AS 
DECIMAL(38,2))), 2)#36,round((CAST((CAST((CAST(CAST(sum(CAST(ele_cnt AS 
BIGINT)) AS DECIMAL(20,0)) AS DECIMAL(21,1)) * CAST(1.0 AS DECIMAL(21,1))) AS 
DECIMAL(23,1)) * CAST(CAST(100 AS DECIMAL(23,1)) AS DECIMAL(23,1))) AS 
DECIMAL(38,2)) / CAST(CAST(sum(CAST(ele_cnt_all AS BIGINT)) AS DECIMAL(20,0)) 
AS DECIMAL(38,2))), 2)#37])
    +- *HashAggregate(keys=[dt#28, geohash_of_latlng#29], 
functions=[sum(cast(mt_cnt#30 as bigint)), sum(cast(ele_cnt#31 as bigint)), 
sum(cast(mt_cnt_all#32 as bigint)), sum(cast(ele_cnt_all#33 as bigint))])
       +- Exchange hashpartitioning(dt#28, geohash_of_latlng#29, 200)
          +- *HashAggregate(keys=[dt#28, geohash_of_latlng#29], 
functions=[partial_sum(cast(mt_cnt#30 as bigint)), partial_sum(cast(ele_cnt#31 
as bigint)), partial_sum(cast(mt_cnt_all#32 as bigint)), 
partial_sum(cast(ele_cnt_all#33 as bigint))])
             +- LocalTableScan [dt#28, geohash_of_latlng#29, mt_cnt#30, 
ele_cnt#31, mt_cnt_all#32, ele_cnt_all#33]
    ```


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to