This is an automated email from the ASF dual-hosted git repository.

kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git


The following commit(s) were added to refs/heads/branch-2.0 by this push:
     new 7c591b2151  [fix](nereids)fix avg-size (pick pr 22421 for 2.0 branch) 
(#22489)
7c591b2151 is described below

commit 7c591b2151b886059c7529f338cb5430cdce654c
Author: minghong <[email protected]>
AuthorDate: Wed Aug 2 14:16:44 2023 +0800

     [fix](nereids)fix avg-size (pick pr 22421 for 2.0 branch) (#22489)
---
 .../doris/nereids/stats/FilterEstimation.java      |   2 +-
 .../nereids_tpcds_shape_sf100_p0/shape/query65.out |  29 +++---
 .../nereids_tpcds_shape_sf100_p0/shape/query72.out | 109 +++++++++++----------
 .../nereids_tpch_shape_sf1000_p0/shape/q22.out     |  43 ++++----
 .../data/nereids_tpch_shape_sf500_p0/shape/q22.out |  43 ++++----
 5 files changed, 112 insertions(+), 114 deletions(-)

diff --git 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index acf072fb82..c5ddbd285b 100644
--- 
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++ 
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -454,7 +454,7 @@ public class FilterEstimation extends 
ExpressionVisitor<Statistics, EstimationCo
         ColumnStatistic rightColumnStatistic = new 
ColumnStatisticBuilder(rightStats)
                 .setMinValue(Math.max(leftRange.getLow(), rightRange.getLow()))
                 .setMaxValue(rightRange.getHigh())
-                .setAvgSizeByte(rightStats.ndv * 
(rightAlwaysGreaterRangeFraction + rightOverlappingRangeFraction))
+                .setNdv(rightStats.ndv * (rightAlwaysGreaterRangeFraction + 
rightOverlappingRangeFraction))
                 .setNumNulls(0)
                 .build();
         double sel = leftAlwaysLessThanRightPercent
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out
index 0cf4b87d09..ba6a5ed504 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out
@@ -1,13 +1,13 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !ds_shape_65 --
-PhysicalTopN
---PhysicalDistribute
-----PhysicalTopN
-------PhysicalProject
---------hashJoin[INNER_JOIN](item.i_item_sk = sc.ss_item_sk)
-----------PhysicalDistribute
-------------PhysicalProject
---------------hashJoin[INNER_JOIN](store.s_store_sk = sc.ss_store_sk)
+PhysicalResultSink
+--PhysicalTopN
+----PhysicalDistribute
+------PhysicalTopN
+--------PhysicalProject
+----------hashJoin[INNER_JOIN](store.s_store_sk = sc.ss_store_sk)
+------------hashJoin[INNER_JOIN](item.i_item_sk = sc.ss_item_sk)
+--------------PhysicalDistribute
 ----------------hashJoin[INNER_JOIN](sb.ss_store_sk = 
sc.ss_store_sk)(cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))
 ------------------hashAgg[GLOBAL]
 --------------------PhysicalDistribute
@@ -36,10 +36,9 @@ PhysicalTopN
 ----------------------------------------PhysicalProject
 ------------------------------------------filter((date_dim.d_month_seq >= 
1221)(date_dim.d_month_seq <= 1232))
 --------------------------------------------PhysicalOlapScan[date_dim]
-----------------PhysicalDistribute
-------------------PhysicalProject
---------------------PhysicalOlapScan[store]
-----------PhysicalDistribute
-------------PhysicalProject
---------------PhysicalOlapScan[item]
-
+--------------PhysicalDistribute
+----------------PhysicalProject
+------------------PhysicalOlapScan[item]
+------------PhysicalDistribute
+--------------PhysicalProject
+----------------PhysicalOlapScan[store]
\ No newline at end of file
diff --git 
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out 
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
index 0e04db4a1f..22010cbb89 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
@@ -1,61 +1,62 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !ds_shape_72 --
-PhysicalTopN
---PhysicalDistribute
-----PhysicalTopN
-------hashAgg[GLOBAL]
---------PhysicalDistribute
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[RIGHT_OUTER_JOIN](catalog_returns.cr_item_sk = 
catalog_sales.cs_item_sk)(catalog_returns.cr_order_number = 
catalog_sales.cs_order_number)
-----------------PhysicalProject
-------------------PhysicalOlapScan[catalog_returns]
-----------------PhysicalDistribute
+PhysicalResultSink
+--PhysicalTopN
+----PhysicalDistribute
+------PhysicalTopN
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[RIGHT_OUTER_JOIN](catalog_returns.cr_item_sk = 
catalog_sales.cs_item_sk)(catalog_returns.cr_order_number = 
catalog_sales.cs_order_number)
 ------------------PhysicalProject
---------------------hashJoin[LEFT_OUTER_JOIN](catalog_sales.cs_promo_sk = 
promotion.p_promo_sk)
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN](warehouse.w_warehouse_sk = 
inventory.inv_warehouse_sk)
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN](item.i_item_sk = 
catalog_sales.cs_item_sk)
-------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN](inventory.inv_date_sk = 
d2.d_date_sk)(d1.d_week_seq = d2.d_week_seq)
+--------------------PhysicalOlapScan[catalog_returns]
+------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------hashJoin[LEFT_OUTER_JOIN](catalog_sales.cs_promo_sk = 
promotion.p_promo_sk)
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN](warehouse.w_warehouse_sk = 
inventory.inv_warehouse_sk)
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN](item.i_item_sk = 
catalog_sales.cs_item_sk)
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------------------PhysicalDistribute
 ----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk
 = inventory.inv_item_sk)(inventory.inv_quantity_on_hand < 
catalog_sales.cs_quantity)
---------------------------------------PhysicalDistribute
-----------------------------------------PhysicalOlapScan[inventory]
---------------------------------------PhysicalDistribute
-----------------------------------------PhysicalProject
-------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_ship_date_sk
 = d3.d_date_sk)(d3.d_date > cast((cast(d_date as BIGINT) + 5) as DATEV2))
---------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk
 = d1.d_date_sk)
-----------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_cdemo_sk
 = customer_demographics.cd_demo_sk)
-------------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_hdemo_sk
 = household_demographics.hd_demo_sk)
---------------------------------------------------PhysicalProject
-----------------------------------------------------PhysicalOlapScan[catalog_sales]
---------------------------------------------------PhysicalDistribute
-----------------------------------------------------PhysicalProject
-------------------------------------------------------filter((cast(hd_buy_potential
 as VARCHAR(*)) = '501-1000'))
---------------------------------------------------------PhysicalOlapScan[household_demographics]
+------------------------------------hashJoin[INNER_JOIN](inventory.inv_date_sk 
= d2.d_date_sk)(d1.d_week_seq = d2.d_week_seq)
+--------------------------------------PhysicalProject
+----------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk
 = inventory.inv_item_sk)(inventory.inv_quantity_on_hand < 
catalog_sales.cs_quantity)
+------------------------------------------PhysicalOlapScan[inventory]
+------------------------------------------PhysicalDistribute
+--------------------------------------------PhysicalProject
+----------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_ship_date_sk
 = d3.d_date_sk)(d3.d_date > cast((cast(d_date as BIGINT) + 5) as DATEV2))
+------------------------------------------------PhysicalDistribute
+--------------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_cdemo_sk
 = customer_demographics.cd_demo_sk)
+----------------------------------------------------PhysicalDistribute
+------------------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk
 = d1.d_date_sk)
+--------------------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_hdemo_sk
 = household_demographics.hd_demo_sk)
+----------------------------------------------------------PhysicalProject
+------------------------------------------------------------PhysicalOlapScan[catalog_sales]
+----------------------------------------------------------PhysicalDistribute
+------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------filter((cast(hd_buy_potential
 as VARCHAR(*)) = '501-1000'))
+----------------------------------------------------------------PhysicalOlapScan[household_demographics]
+--------------------------------------------------------PhysicalDistribute
+----------------------------------------------------------PhysicalProject
+------------------------------------------------------------filter((d1.d_year 
= 2002))
+--------------------------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------------------------PhysicalDistribute
+------------------------------------------------------PhysicalProject
+--------------------------------------------------------filter((cast(cd_marital_status
 as VARCHAR(*)) = 'W'))
+----------------------------------------------------------PhysicalOlapScan[customer_demographics]
 ------------------------------------------------PhysicalDistribute
 --------------------------------------------------PhysicalProject
-----------------------------------------------------filter((cast(cd_marital_status
 as VARCHAR(*)) = 'W'))
-------------------------------------------------------PhysicalOlapScan[customer_demographics]
-----------------------------------------------PhysicalDistribute
-------------------------------------------------PhysicalProject
---------------------------------------------------filter((d1.d_year = 2002))
 ----------------------------------------------------PhysicalOlapScan[date_dim]
---------------------------------------------PhysicalDistribute
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------PhysicalDistribute
-------------------------------------PhysicalProject
---------------------------------------PhysicalOlapScan[date_dim]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[item]
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[warehouse]
-----------------------PhysicalDistribute
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[promotion]
-
+--------------------------------------PhysicalDistribute
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------PhysicalDistribute
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[warehouse]
+------------------------PhysicalDistribute
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[promotion]
\ No newline at end of file
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q22.out 
b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q22.out
index b2399ebd13..d5088ac56b 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q22.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q22.out
@@ -1,28 +1,27 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !select --
-PhysicalQuickSort
---PhysicalDistribute
-----PhysicalQuickSort
-------hashAgg[GLOBAL]
---------PhysicalDistribute
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 4)) > 
avg(c_acctbal))
-----------------PhysicalProject
-------------------hashJoin[RIGHT_ANTI_JOIN](orders.o_custkey = 
customer.c_custkey)
---------------------PhysicalDistribute
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[orders]
---------------------PhysicalDistribute
+PhysicalResultSink
+--PhysicalQuickSort
+----PhysicalDistribute
+------PhysicalQuickSort
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[RIGHT_ANTI_JOIN](orders.o_custkey = 
customer.c_custkey)
+------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[orders]
+------------------PhysicalDistribute
+--------------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 
4)) > avg(c_acctbal))
 ----------------------PhysicalProject
 ------------------------filter(substring(c_phone, 1, 2) IN ('13', '31', '23', 
'29', '30', '18', '17'))
 --------------------------PhysicalOlapScan[customer]
-----------------PhysicalDistribute
-------------------PhysicalAssertNumRows
---------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute
-------------------------hashAgg[LOCAL]
---------------------------PhysicalProject
-----------------------------filter((customer.c_acctbal > 
0.00)substring(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17'))
-------------------------------PhysicalOlapScan[customer]
-
+------------------------PhysicalAssertNumRows
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------filter((customer.c_acctbal > 
0.00)substring(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17'))
+------------------------------------PhysicalOlapScan[customer]
\ No newline at end of file
diff --git a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q22.out 
b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q22.out
index b2399ebd13..d5088ac56b 100644
--- a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q22.out
+++ b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q22.out
@@ -1,28 +1,27 @@
 -- This file is automatically generated. You should know what you did if you 
want to edit this
 -- !select --
-PhysicalQuickSort
---PhysicalDistribute
-----PhysicalQuickSort
-------hashAgg[GLOBAL]
---------PhysicalDistribute
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 4)) > 
avg(c_acctbal))
-----------------PhysicalProject
-------------------hashJoin[RIGHT_ANTI_JOIN](orders.o_custkey = 
customer.c_custkey)
---------------------PhysicalDistribute
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[orders]
---------------------PhysicalDistribute
+PhysicalResultSink
+--PhysicalQuickSort
+----PhysicalDistribute
+------PhysicalQuickSort
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[RIGHT_ANTI_JOIN](orders.o_custkey = 
customer.c_custkey)
+------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[orders]
+------------------PhysicalDistribute
+--------------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 
4)) > avg(c_acctbal))
 ----------------------PhysicalProject
 ------------------------filter(substring(c_phone, 1, 2) IN ('13', '31', '23', 
'29', '30', '18', '17'))
 --------------------------PhysicalOlapScan[customer]
-----------------PhysicalDistribute
-------------------PhysicalAssertNumRows
---------------------hashAgg[GLOBAL]
 ----------------------PhysicalDistribute
-------------------------hashAgg[LOCAL]
---------------------------PhysicalProject
-----------------------------filter((customer.c_acctbal > 
0.00)substring(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17'))
-------------------------------PhysicalOlapScan[customer]
-
+------------------------PhysicalAssertNumRows
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------filter((customer.c_acctbal > 
0.00)substring(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17'))
+------------------------------------PhysicalOlapScan[customer]
\ No newline at end of file


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to