This is an automated email from the ASF dual-hosted git repository.
kxiao pushed a commit to branch branch-2.0
in repository https://gitbox.apache.org/repos/asf/doris.git
The following commit(s) were added to refs/heads/branch-2.0 by this push:
new 7c591b2151 [fix](nereids)fix avg-size (pick pr 22421 for 2.0 branch)
(#22489)
7c591b2151 is described below
commit 7c591b2151b886059c7529f338cb5430cdce654c
Author: minghong <[email protected]>
AuthorDate: Wed Aug 2 14:16:44 2023 +0800
[fix](nereids)fix avg-size (pick pr 22421 for 2.0 branch) (#22489)
---
.../doris/nereids/stats/FilterEstimation.java | 2 +-
.../nereids_tpcds_shape_sf100_p0/shape/query65.out | 29 +++---
.../nereids_tpcds_shape_sf100_p0/shape/query72.out | 109 +++++++++++----------
.../nereids_tpch_shape_sf1000_p0/shape/q22.out | 43 ++++----
.../data/nereids_tpch_shape_sf500_p0/shape/q22.out | 43 ++++----
5 files changed, 112 insertions(+), 114 deletions(-)
diff --git
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
index acf072fb82..c5ddbd285b 100644
---
a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
+++
b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java
@@ -454,7 +454,7 @@ public class FilterEstimation extends
ExpressionVisitor<Statistics, EstimationCo
ColumnStatistic rightColumnStatistic = new
ColumnStatisticBuilder(rightStats)
.setMinValue(Math.max(leftRange.getLow(), rightRange.getLow()))
.setMaxValue(rightRange.getHigh())
- .setAvgSizeByte(rightStats.ndv *
(rightAlwaysGreaterRangeFraction + rightOverlappingRangeFraction))
+ .setNdv(rightStats.ndv * (rightAlwaysGreaterRangeFraction +
rightOverlappingRangeFraction))
.setNumNulls(0)
.build();
double sel = leftAlwaysLessThanRightPercent
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out
index 0cf4b87d09..ba6a5ed504 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query65.out
@@ -1,13 +1,13 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !ds_shape_65 --
-PhysicalTopN
---PhysicalDistribute
-----PhysicalTopN
-------PhysicalProject
---------hashJoin[INNER_JOIN](item.i_item_sk = sc.ss_item_sk)
-----------PhysicalDistribute
-------------PhysicalProject
---------------hashJoin[INNER_JOIN](store.s_store_sk = sc.ss_store_sk)
+PhysicalResultSink
+--PhysicalTopN
+----PhysicalDistribute
+------PhysicalTopN
+--------PhysicalProject
+----------hashJoin[INNER_JOIN](store.s_store_sk = sc.ss_store_sk)
+------------hashJoin[INNER_JOIN](item.i_item_sk = sc.ss_item_sk)
+--------------PhysicalDistribute
----------------hashJoin[INNER_JOIN](sb.ss_store_sk =
sc.ss_store_sk)(cast(revenue as DOUBLE) <= cast((0.1 * ave) as DOUBLE))
------------------hashAgg[GLOBAL]
--------------------PhysicalDistribute
@@ -36,10 +36,9 @@ PhysicalTopN
----------------------------------------PhysicalProject
------------------------------------------filter((date_dim.d_month_seq >=
1221)(date_dim.d_month_seq <= 1232))
--------------------------------------------PhysicalOlapScan[date_dim]
-----------------PhysicalDistribute
-------------------PhysicalProject
---------------------PhysicalOlapScan[store]
-----------PhysicalDistribute
-------------PhysicalProject
---------------PhysicalOlapScan[item]
-
+--------------PhysicalDistribute
+----------------PhysicalProject
+------------------PhysicalOlapScan[item]
+------------PhysicalDistribute
+--------------PhysicalProject
+----------------PhysicalOlapScan[store]
\ No newline at end of file
diff --git
a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
index 0e04db4a1f..22010cbb89 100644
--- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
+++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query72.out
@@ -1,61 +1,62 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !ds_shape_72 --
-PhysicalTopN
---PhysicalDistribute
-----PhysicalTopN
-------hashAgg[GLOBAL]
---------PhysicalDistribute
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------hashJoin[RIGHT_OUTER_JOIN](catalog_returns.cr_item_sk =
catalog_sales.cs_item_sk)(catalog_returns.cr_order_number =
catalog_sales.cs_order_number)
-----------------PhysicalProject
-------------------PhysicalOlapScan[catalog_returns]
-----------------PhysicalDistribute
+PhysicalResultSink
+--PhysicalTopN
+----PhysicalDistribute
+------PhysicalTopN
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[RIGHT_OUTER_JOIN](catalog_returns.cr_item_sk =
catalog_sales.cs_item_sk)(catalog_returns.cr_order_number =
catalog_sales.cs_order_number)
------------------PhysicalProject
---------------------hashJoin[LEFT_OUTER_JOIN](catalog_sales.cs_promo_sk =
promotion.p_promo_sk)
-----------------------PhysicalProject
-------------------------hashJoin[INNER_JOIN](warehouse.w_warehouse_sk =
inventory.inv_warehouse_sk)
---------------------------PhysicalProject
-----------------------------hashJoin[INNER_JOIN](item.i_item_sk =
catalog_sales.cs_item_sk)
-------------------------------PhysicalProject
---------------------------------hashJoin[INNER_JOIN](inventory.inv_date_sk =
d2.d_date_sk)(d1.d_week_seq = d2.d_week_seq)
+--------------------PhysicalOlapScan[catalog_returns]
+------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------hashJoin[LEFT_OUTER_JOIN](catalog_sales.cs_promo_sk =
promotion.p_promo_sk)
+------------------------PhysicalProject
+--------------------------hashJoin[INNER_JOIN](warehouse.w_warehouse_sk =
inventory.inv_warehouse_sk)
+----------------------------PhysicalProject
+------------------------------hashJoin[INNER_JOIN](item.i_item_sk =
catalog_sales.cs_item_sk)
+--------------------------------PhysicalProject
+----------------------------------PhysicalOlapScan[item]
+--------------------------------PhysicalDistribute
----------------------------------PhysicalProject
-------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk
= inventory.inv_item_sk)(inventory.inv_quantity_on_hand <
catalog_sales.cs_quantity)
---------------------------------------PhysicalDistribute
-----------------------------------------PhysicalOlapScan[inventory]
---------------------------------------PhysicalDistribute
-----------------------------------------PhysicalProject
-------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_ship_date_sk
= d3.d_date_sk)(d3.d_date > cast((cast(d_date as BIGINT) + 5) as DATEV2))
---------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk
= d1.d_date_sk)
-----------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_cdemo_sk
= customer_demographics.cd_demo_sk)
-------------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_hdemo_sk
= household_demographics.hd_demo_sk)
---------------------------------------------------PhysicalProject
-----------------------------------------------------PhysicalOlapScan[catalog_sales]
---------------------------------------------------PhysicalDistribute
-----------------------------------------------------PhysicalProject
-------------------------------------------------------filter((cast(hd_buy_potential
as VARCHAR(*)) = '501-1000'))
---------------------------------------------------------PhysicalOlapScan[household_demographics]
+------------------------------------hashJoin[INNER_JOIN](inventory.inv_date_sk
= d2.d_date_sk)(d1.d_week_seq = d2.d_week_seq)
+--------------------------------------PhysicalProject
+----------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_item_sk
= inventory.inv_item_sk)(inventory.inv_quantity_on_hand <
catalog_sales.cs_quantity)
+------------------------------------------PhysicalOlapScan[inventory]
+------------------------------------------PhysicalDistribute
+--------------------------------------------PhysicalProject
+----------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_ship_date_sk
= d3.d_date_sk)(d3.d_date > cast((cast(d_date as BIGINT) + 5) as DATEV2))
+------------------------------------------------PhysicalDistribute
+--------------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_cdemo_sk
= customer_demographics.cd_demo_sk)
+----------------------------------------------------PhysicalDistribute
+------------------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_sold_date_sk
= d1.d_date_sk)
+--------------------------------------------------------hashJoin[INNER_JOIN](catalog_sales.cs_bill_hdemo_sk
= household_demographics.hd_demo_sk)
+----------------------------------------------------------PhysicalProject
+------------------------------------------------------------PhysicalOlapScan[catalog_sales]
+----------------------------------------------------------PhysicalDistribute
+------------------------------------------------------------PhysicalProject
+--------------------------------------------------------------filter((cast(hd_buy_potential
as VARCHAR(*)) = '501-1000'))
+----------------------------------------------------------------PhysicalOlapScan[household_demographics]
+--------------------------------------------------------PhysicalDistribute
+----------------------------------------------------------PhysicalProject
+------------------------------------------------------------filter((d1.d_year
= 2002))
+--------------------------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------------------------------PhysicalDistribute
+------------------------------------------------------PhysicalProject
+--------------------------------------------------------filter((cast(cd_marital_status
as VARCHAR(*)) = 'W'))
+----------------------------------------------------------PhysicalOlapScan[customer_demographics]
------------------------------------------------PhysicalDistribute
--------------------------------------------------PhysicalProject
-----------------------------------------------------filter((cast(cd_marital_status
as VARCHAR(*)) = 'W'))
-------------------------------------------------------PhysicalOlapScan[customer_demographics]
-----------------------------------------------PhysicalDistribute
-------------------------------------------------PhysicalProject
---------------------------------------------------filter((d1.d_year = 2002))
----------------------------------------------------PhysicalOlapScan[date_dim]
---------------------------------------------PhysicalDistribute
-----------------------------------------------PhysicalProject
-------------------------------------------------PhysicalOlapScan[date_dim]
-----------------------------------PhysicalDistribute
-------------------------------------PhysicalProject
---------------------------------------PhysicalOlapScan[date_dim]
-------------------------------PhysicalDistribute
---------------------------------PhysicalProject
-----------------------------------PhysicalOlapScan[item]
---------------------------PhysicalDistribute
-----------------------------PhysicalProject
-------------------------------PhysicalOlapScan[warehouse]
-----------------------PhysicalDistribute
-------------------------PhysicalProject
---------------------------PhysicalOlapScan[promotion]
-
+--------------------------------------PhysicalDistribute
+----------------------------------------PhysicalProject
+------------------------------------------PhysicalOlapScan[date_dim]
+----------------------------PhysicalDistribute
+------------------------------PhysicalProject
+--------------------------------PhysicalOlapScan[warehouse]
+------------------------PhysicalDistribute
+--------------------------PhysicalProject
+----------------------------PhysicalOlapScan[promotion]
\ No newline at end of file
diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q22.out
b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q22.out
index b2399ebd13..d5088ac56b 100644
--- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q22.out
+++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q22.out
@@ -1,28 +1,27 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !select --
-PhysicalQuickSort
---PhysicalDistribute
-----PhysicalQuickSort
-------hashAgg[GLOBAL]
---------PhysicalDistribute
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 4)) >
avg(c_acctbal))
-----------------PhysicalProject
-------------------hashJoin[RIGHT_ANTI_JOIN](orders.o_custkey =
customer.c_custkey)
---------------------PhysicalDistribute
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[orders]
---------------------PhysicalDistribute
+PhysicalResultSink
+--PhysicalQuickSort
+----PhysicalDistribute
+------PhysicalQuickSort
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[RIGHT_ANTI_JOIN](orders.o_custkey =
customer.c_custkey)
+------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[orders]
+------------------PhysicalDistribute
+--------------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38,
4)) > avg(c_acctbal))
----------------------PhysicalProject
------------------------filter(substring(c_phone, 1, 2) IN ('13', '31', '23',
'29', '30', '18', '17'))
--------------------------PhysicalOlapScan[customer]
-----------------PhysicalDistribute
-------------------PhysicalAssertNumRows
---------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute
-------------------------hashAgg[LOCAL]
---------------------------PhysicalProject
-----------------------------filter((customer.c_acctbal >
0.00)substring(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17'))
-------------------------------PhysicalOlapScan[customer]
-
+------------------------PhysicalAssertNumRows
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------filter((customer.c_acctbal >
0.00)substring(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17'))
+------------------------------------PhysicalOlapScan[customer]
\ No newline at end of file
diff --git a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q22.out
b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q22.out
index b2399ebd13..d5088ac56b 100644
--- a/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q22.out
+++ b/regression-test/data/nereids_tpch_shape_sf500_p0/shape/q22.out
@@ -1,28 +1,27 @@
-- This file is automatically generated. You should know what you did if you
want to edit this
-- !select --
-PhysicalQuickSort
---PhysicalDistribute
-----PhysicalQuickSort
-------hashAgg[GLOBAL]
---------PhysicalDistribute
-----------hashAgg[LOCAL]
-------------PhysicalProject
---------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38, 4)) >
avg(c_acctbal))
-----------------PhysicalProject
-------------------hashJoin[RIGHT_ANTI_JOIN](orders.o_custkey =
customer.c_custkey)
---------------------PhysicalDistribute
-----------------------PhysicalProject
-------------------------PhysicalOlapScan[orders]
---------------------PhysicalDistribute
+PhysicalResultSink
+--PhysicalQuickSort
+----PhysicalDistribute
+------PhysicalQuickSort
+--------hashAgg[GLOBAL]
+----------PhysicalDistribute
+------------hashAgg[LOCAL]
+--------------PhysicalProject
+----------------hashJoin[RIGHT_ANTI_JOIN](orders.o_custkey =
customer.c_custkey)
+------------------PhysicalDistribute
+--------------------PhysicalProject
+----------------------PhysicalOlapScan[orders]
+------------------PhysicalDistribute
+--------------------NestedLoopJoin[INNER_JOIN](cast(c_acctbal as DECIMALV3(38,
4)) > avg(c_acctbal))
----------------------PhysicalProject
------------------------filter(substring(c_phone, 1, 2) IN ('13', '31', '23',
'29', '30', '18', '17'))
--------------------------PhysicalOlapScan[customer]
-----------------PhysicalDistribute
-------------------PhysicalAssertNumRows
---------------------hashAgg[GLOBAL]
----------------------PhysicalDistribute
-------------------------hashAgg[LOCAL]
---------------------------PhysicalProject
-----------------------------filter((customer.c_acctbal >
0.00)substring(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17'))
-------------------------------PhysicalOlapScan[customer]
-
+------------------------PhysicalAssertNumRows
+--------------------------hashAgg[GLOBAL]
+----------------------------PhysicalDistribute
+------------------------------hashAgg[LOCAL]
+--------------------------------PhysicalProject
+----------------------------------filter((customer.c_acctbal >
0.00)substring(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17'))
+------------------------------------PhysicalOlapScan[customer]
\ No newline at end of file
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]