[ 
https://issues.apache.org/jira/browse/SPARK-37581?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

ocean updated SPARK-37581:
--------------------------
    Description: 
when exec a sql, this sql hang at planning stage.

when disable DPP, sql can finish very quickly.

we can reproduce this  problem through example below:

create table test.test_a (
day string,
week int,
weekday int)
partitioned by (
dt varchar(8))
stored as orc;

insert into test.test_a partition (dt=20211126) values('1',1,2);

create table test.test_b (
session_id string,
device_id string,
brand string,
model string,
wx_version string,
os string,
net_work_type string,
app_id string,
app_name string,
col_z string,
page_url string,
page_title string,
olabel string,
otitle string,
source string,
send_dt string,
recv_dt string,
request_time string,
write_time string,
client_ip string,
col_a string,
dt_hour varchar(12),
product string,
channelfrom string,
customer_um string,
kb_code string,
col_b string,
rectype string,
errcode string,
col_c string,
pageid_merge string)
partitioned by (
dt varchar(8))
stored as orc;

insert into test.test_b partition(dt=20211126)
values('2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2');

 

 

drop table if exists test.test_c;create table if not exists test.test_c stored 
as ORCFILE as
select calendar.day,calendar.week,calendar.weekday, a_kbs,
b_kbs, c_kbs,d_kbs,e_kbs,f_kbs,g_kbs,h_kbs,i_kbs,j_kbs,k_kbs
from (select * from test.test_a where dt = '20211126') calendar
left join
(select dt,count(distinct kb_code) as a_kbs
from test.test_b
where dt = '20211126'
group by dt) t1
on calendar.dt = t1.dt

left join
(select dt,count(distinct kb_code) as b_kbs
from test.test_b
where dt = '20211126'
group by dt) t2
on calendar.dt = t2.dt

left join
(select dt,count(distinct kb_code) as c_kbs
from test.test_b
where dt = '20211126'
group by dt) t3
on calendar.dt = t3.dt

left join
(select dt,count(distinct kb_code) as d_kbs
from test.test_b
where dt = '20211126'
group by dt) t4
on calendar.dt = t4.dt

left join
(select dt,count(distinct kb_code) as e_kbs
from test.test_b
where dt = '20211126'
group by dt) t5
on calendar.dt = t5.dt

left join
(select dt,count(distinct kb_code) as f_kbs
from test.test_b
where dt = '20211126'
group by dt) t6
on calendar.dt = t6.dt

left join
(select dt,count(distinct kb_code) as g_kbs
from test.test_b
where dt = '20211126'
group by dt) t7
on calendar.dt = t7.dt

left join
(select dt,count(distinct kb_code) as h_kbs
from test.test_b
where dt = '20211126'
group by dt) t8
on calendar.dt = t8.dt

left join
(select dt,count(distinct kb_code) as i_kbs
from test.test_b
where dt = '20211126'
group by dt) t9
on calendar.dt = t9.dt

left join
(select dt,count(distinct kb_code) as j_kbs
from test.test_b
where dt = '20211126'
group by dt) t10
on calendar.dt = t10.dt

left join
(select dt,count(distinct kb_code) as k_kbs
from test.test_b
where dt = '20211126'
group by dt) t11
on calendar.dt = t11.dt

 

  was:
when exec a sql, this sql hang at planning stage.

when disable DPP, sql can finish very quickly.

we can reproduce this  problem through example below:

create table test.test_a (
day string,
week int,
weekday int)
partitioned by (
dt varchar(8))
stored as orc;

insert into test.test_a partition (dt=20211126) values('1',1,2);

create table test.test_b (
session_id string,
device_id string,
brand string,
model string,
wx_version string,
os string,
net_work_type string,
app_id string,
app_name string,
col_z string,
page_url string,
page_title string,
olabel string,
otitle string,
source string,
send_dt string,
recv_dt string,
request_time string,
write_time string,
client_ip string,
col_a string,
dt_hour varchar(12),
product string,
channelfrom string,
customer_um string,
kb_code string,
col_b string,
rectype string,
errcode string,
col_c string,
pageid_merge string)
partitioned by (
dt varchar(8))
stored as orc;

insert into test.test_b partition(dt=20211126)
values('2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2');

 


> sql hang at planning stage
> --------------------------
>
>                 Key: SPARK-37581
>                 URL: https://issues.apache.org/jira/browse/SPARK-37581
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 3.1.1, 3.2.0
>            Reporter: ocean
>            Priority: Major
>
> when exec a sql, this sql hang at planning stage.
> when disable DPP, sql can finish very quickly.
> we can reproduce this  problem through example below:
> create table test.test_a (
> day string,
> week int,
> weekday int)
> partitioned by (
> dt varchar(8))
> stored as orc;
> insert into test.test_a partition (dt=20211126) values('1',1,2);
> create table test.test_b (
> session_id string,
> device_id string,
> brand string,
> model string,
> wx_version string,
> os string,
> net_work_type string,
> app_id string,
> app_name string,
> col_z string,
> page_url string,
> page_title string,
> olabel string,
> otitle string,
> source string,
> send_dt string,
> recv_dt string,
> request_time string,
> write_time string,
> client_ip string,
> col_a string,
> dt_hour varchar(12),
> product string,
> channelfrom string,
> customer_um string,
> kb_code string,
> col_b string,
> rectype string,
> errcode string,
> col_c string,
> pageid_merge string)
> partitioned by (
> dt varchar(8))
> stored as orc;
> insert into test.test_b partition(dt=20211126)
> values('2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2','2');
>  
>  
> drop table if exists test.test_c;create table if not exists test.test_c 
> stored as ORCFILE as
> select calendar.day,calendar.week,calendar.weekday, a_kbs,
> b_kbs, c_kbs,d_kbs,e_kbs,f_kbs,g_kbs,h_kbs,i_kbs,j_kbs,k_kbs
> from (select * from test.test_a where dt = '20211126') calendar
> left join
> (select dt,count(distinct kb_code) as a_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t1
> on calendar.dt = t1.dt
> left join
> (select dt,count(distinct kb_code) as b_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t2
> on calendar.dt = t2.dt
> left join
> (select dt,count(distinct kb_code) as c_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t3
> on calendar.dt = t3.dt
> left join
> (select dt,count(distinct kb_code) as d_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t4
> on calendar.dt = t4.dt
> left join
> (select dt,count(distinct kb_code) as e_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t5
> on calendar.dt = t5.dt
> left join
> (select dt,count(distinct kb_code) as f_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t6
> on calendar.dt = t6.dt
> left join
> (select dt,count(distinct kb_code) as g_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t7
> on calendar.dt = t7.dt
> left join
> (select dt,count(distinct kb_code) as h_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t8
> on calendar.dt = t8.dt
> left join
> (select dt,count(distinct kb_code) as i_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t9
> on calendar.dt = t9.dt
> left join
> (select dt,count(distinct kb_code) as j_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t10
> on calendar.dt = t10.dt
> left join
> (select dt,count(distinct kb_code) as k_kbs
> from test.test_b
> where dt = '20211126'
> group by dt) t11
> on calendar.dt = t11.dt
>  



--
This message was sent by Atlassian Jira
(v8.20.1#820001)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to