[ https://issues.apache.org/jira/browse/SPARK-37581?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
ocean updated SPARK-37581: -------------------------- Description: when exec a sql, this sql hang at planning stage. when disable DPP, sql can finish very quickly. we can reproduce this problem through example below: create table test.test_a ( day string, week int, weekday int) partitioned by ( dt varchar(8)) stored as orc; insert into test.test_a partition (dt=20211126) values('1',1,2); create table test.test_b ( session_id string, device_id string, brand string, model string, wx_version string, os string, net_work_type string, app_id string, app_name string, col_z string, page_url string, page_title string, olabel string, otitle string, source string, send_dt string, recv_dt string, request_time string, write_time string, client_ip string, col_a string, dt_hour varchar(12), product string, channelfrom string, customer_um string, kb_code string, col_b string, rectype string, errcode string, col_c string, pageid_merge string) partitioned by ( dt varchar(8)) stored as orc; insert into test.test_b partition(dt=20211126) values('2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2'); drop table if exists test.test_c;create table if not exists test.test_c stored as ORCFILE as select calendar.day,calendar.week,calendar.weekday, a_kbs, b_kbs, c_kbs,d_kbs,e_kbs,f_kbs,g_kbs,h_kbs,i_kbs,j_kbs,k_kbs from (select * from test.test_a where dt = '20211126') calendar left join (select dt,count(distinct kb_code) as a_kbs from test.test_b where dt = '20211126' group by dt) t1 on calendar.dt = t1.dt left join (select dt,count(distinct kb_code) as b_kbs from test.test_b where dt = '20211126' group by dt) t2 on calendar.dt = t2.dt left join (select dt,count(distinct kb_code) as c_kbs from test.test_b where dt = '20211126' group by dt) t3 on calendar.dt = t3.dt left join (select dt,count(distinct kb_code) as d_kbs from test.test_b where dt = '20211126' group by dt) t4 on calendar.dt = t4.dt left join (select dt,count(distinct kb_code) as e_kbs from test.test_b where dt = '20211126' group by dt) t5 on calendar.dt = t5.dt left join (select dt,count(distinct kb_code) as f_kbs from test.test_b where dt = '20211126' group by dt) t6 on calendar.dt = t6.dt left join (select dt,count(distinct kb_code) as g_kbs from test.test_b where dt = '20211126' group by dt) t7 on calendar.dt = t7.dt left join (select dt,count(distinct kb_code) as h_kbs from test.test_b where dt = '20211126' group by dt) t8 on calendar.dt = t8.dt left join (select dt,count(distinct kb_code) as i_kbs from test.test_b where dt = '20211126' group by dt) t9 on calendar.dt = t9.dt left join (select dt,count(distinct kb_code) as j_kbs from test.test_b where dt = '20211126' group by dt) t10 on calendar.dt = t10.dt left join (select dt,count(distinct kb_code) as k_kbs from test.test_b where dt = '20211126' group by dt) t11 on calendar.dt = t11.dt was: when exec a sql, this sql hang at planning stage. when disable DPP, sql can finish very quickly. we can reproduce this problem through example below: create table test.test_a ( day string, week int, weekday int) partitioned by ( dt varchar(8)) stored as orc; insert into test.test_a partition (dt=20211126) values('1',1,2); create table test.test_b ( session_id string, device_id string, brand string, model string, wx_version string, os string, net_work_type string, app_id string, app_name string, col_z string, page_url string, page_title string, olabel string, otitle string, source string, send_dt string, recv_dt string, request_time string, write_time string, client_ip string, col_a string, dt_hour varchar(12), product string, channelfrom string, customer_um string, kb_code string, col_b string, rectype string, errcode string, col_c string, pageid_merge string) partitioned by ( dt varchar(8)) stored as orc; insert into test.test_b partition(dt=20211126) values('2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2'); > sql hang at planning stage > -------------------------- > > Key: SPARK-37581 > URL: https://issues.apache.org/jira/browse/SPARK-37581 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.1.1, 3.2.0 > Reporter: ocean > Priority: Major > > when exec a sql, this sql hang at planning stage. > when disable DPP, sql can finish very quickly. > we can reproduce this problem through example below: > create table test.test_a ( > day string, > week int, > weekday int) > partitioned by ( > dt varchar(8)) > stored as orc; > insert into test.test_a partition (dt=20211126) values('1',1,2); > create table test.test_b ( > session_id string, > device_id string, > brand string, > model string, > wx_version string, > os string, > net_work_type string, > app_id string, > app_name string, > col_z string, > page_url string, > page_title string, > olabel string, > otitle string, > source string, > send_dt string, > recv_dt string, > request_time string, > write_time string, > client_ip string, > col_a string, > dt_hour varchar(12), > product string, > channelfrom string, > customer_um string, > kb_code string, > col_b string, > rectype string, > errcode string, > col_c string, > pageid_merge string) > partitioned by ( > dt varchar(8)) > stored as orc; > insert into test.test_b partition(dt=20211126) > values('2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2'); > > > drop table if exists test.test_c;create table if not exists test.test_c > stored as ORCFILE as > select calendar.day,calendar.week,calendar.weekday, a_kbs, > b_kbs, c_kbs,d_kbs,e_kbs,f_kbs,g_kbs,h_kbs,i_kbs,j_kbs,k_kbs > from (select * from test.test_a where dt = '20211126') calendar > left join > (select dt,count(distinct kb_code) as a_kbs > from test.test_b > where dt = '20211126' > group by dt) t1 > on calendar.dt = t1.dt > left join > (select dt,count(distinct kb_code) as b_kbs > from test.test_b > where dt = '20211126' > group by dt) t2 > on calendar.dt = t2.dt > left join > (select dt,count(distinct kb_code) as c_kbs > from test.test_b > where dt = '20211126' > group by dt) t3 > on calendar.dt = t3.dt > left join > (select dt,count(distinct kb_code) as d_kbs > from test.test_b > where dt = '20211126' > group by dt) t4 > on calendar.dt = t4.dt > left join > (select dt,count(distinct kb_code) as e_kbs > from test.test_b > where dt = '20211126' > group by dt) t5 > on calendar.dt = t5.dt > left join > (select dt,count(distinct kb_code) as f_kbs > from test.test_b > where dt = '20211126' > group by dt) t6 > on calendar.dt = t6.dt > left join > (select dt,count(distinct kb_code) as g_kbs > from test.test_b > where dt = '20211126' > group by dt) t7 > on calendar.dt = t7.dt > left join > (select dt,count(distinct kb_code) as h_kbs > from test.test_b > where dt = '20211126' > group by dt) t8 > on calendar.dt = t8.dt > left join > (select dt,count(distinct kb_code) as i_kbs > from test.test_b > where dt = '20211126' > group by dt) t9 > on calendar.dt = t9.dt > left join > (select dt,count(distinct kb_code) as j_kbs > from test.test_b > where dt = '20211126' > group by dt) t10 > on calendar.dt = t10.dt > left join > (select dt,count(distinct kb_code) as k_kbs > from test.test_b > where dt = '20211126' > group by dt) t11 > on calendar.dt = t11.dt > -- This message was sent by Atlassian Jira (v8.20.1#820001) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org