Yuming Wang created SPARK-36162:
-----------------------------------

             Summary: extractJoinKeysWithColStats support EqualNullSafe
                 Key: SPARK-36162
                 URL: https://issues.apache.org/jira/browse/SPARK-36162
             Project: Spark
          Issue Type: Improvement
          Components: SQL
    Affects Versions: 3.3.0
            Reporter: Yuming Wang


sql("select * from date_dim join item on d_date_sk = i_item_sk").explain("cost")
{noformat}
== Optimized Logical Plan ==
Join Inner, (d_date_sk#0 <=> i_item_sk#28), Statistics(sizeInBytes=9.2 TiB, 
rowCount=1.49E+10)
:- Relation 
default.date_dim[d_date_sk#0,d_date_id#1,d_date#2,d_month_seq#3,d_week_seq#4,d_quarter_seq#5,d_year#6,d_dow#7,d_moy#8,d_dom#9,d_qoy#10,d_fy_year#11,d_fy_quarter_seq#12,d_fy_week_seq#13,d_day_name#14,d_quarter_name#15,d_holiday#16,d_weekend#17,d_following_holiday#18,d_first_dom#19,d_last_dom#20,d_same_day_ly#21,d_same_day_lq#22,d_current_day#23,...
 4 more fields] parquet, Statistics(sizeInBytes=17.6 MiB, rowCount=7.30E+4)
+- Relation 
default.item[i_item_sk#28,i_item_id#29,i_rec_start_date#30,i_rec_end_date#31,i_item_desc#32,i_current_price#33,i_wholesale_cost#34,i_brand_id#35,i_brand#36,i_class_id#37,i_class#38,i_category_id#39,i_category#40,i_manufact_id#41,i_manufact#42,i_size#43,i_formulation#44,i_color#45,i_units#46,i_container#47,i_manager_id#48,i_product_name#49]
 parquet, Statistics(sizeInBytes=85.2 MiB, rowCount=2.04E+5)
{noformat}

sql("select * from date_dim join item on d_date_sk <=> 
i_item_sk").explain("cost")
{noformat}
== Optimized Logical Plan ==
Join Inner, (d_date_sk#0 <=> i_item_sk#28), Statistics(sizeInBytes=9.2 TiB, 
rowCount=1.49E+10)
:- Relation 
default.date_dim[d_date_sk#0,d_date_id#1,d_date#2,d_month_seq#3,d_week_seq#4,d_quarter_seq#5,d_year#6,d_dow#7,d_moy#8,d_dom#9,d_qoy#10,d_fy_year#11,d_fy_quarter_seq#12,d_fy_week_seq#13,d_day_name#14,d_quarter_name#15,d_holiday#16,d_weekend#17,d_following_holiday#18,d_first_dom#19,d_last_dom#20,d_same_day_ly#21,d_same_day_lq#22,d_current_day#23,...
 4 more fields] parquet, Statistics(sizeInBytes=17.6 MiB, rowCount=7.30E+4)
+- Relation 
default.item[i_item_sk#28,i_item_id#29,i_rec_start_date#30,i_rec_end_date#31,i_item_desc#32,i_current_price#33,i_wholesale_cost#34,i_brand_id#35,i_brand#36,i_class_id#37,i_class#38,i_category_id#39,i_category#40,i_manufact_id#41,i_manufact#42,i_size#43,i_formulation#44,i_color#45,i_units#46,i_container#47,i_manager_id#48,i_product_name#49]
 parquet, Statistics(sizeInBytes=85.2 MiB, rowCount=2.04E+5)
{noformat}

https://github.com/apache/spark/blob/d6a68e0b67ff7de58073c176dd097070e88ac831/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/statsEstimation/JoinEstimation.scala#L329-L339




--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to