[ 
https://issues.apache.org/jira/browse/SPARK-30349?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

cen yuhai updated SPARK-30349:
------------------------------
    Description: 
{code:sql}
// code placeholder
with tmp as(
select
log_date,
buvid,
manga_id,
sum(readtime) readtime
from
manga.dwd_app_readtime_xt_dt
where
log_date >= 20191220
group by
log_date,
buvid,
manga_id
)
select
t.log_date,
GET_JSON_OBJECT(t.extended_fields, '$.type'),
count(distinct t.buvid),
count(distinct t0.buvid),
count(distinct t1.buvid),
count(distinct t2.buvid),
count(
distinct case
when t1.buvid = t0.buvid then t1.buvid
end
),
count(
distinct case
when t1.buvid = t0.buvid
and t1.buvid = t2.buvid then t1.buvid
end
),
count(
distinct case
when t0.buvid = t2.buvid then t0.buvid
end
),
sum(readtime),
avg(readtime),
sum(
case
when t0.buvid = t3.buvid then readtime
end
),
avg(
case
when t0.buvid = t3.buvid then readtime
end
)
from
manga.manga_tfc_app_ubt_d t
join manga.manga_tfc_app_ubt_d t1 on t.buvid = t1.buvid
and t1.log_date >= 20191220
and t1.event_id = 'bilibili-manga.manga-detail.0.0.pv'
and to_date(t.stime) = TO_DATE(t1.stime)
and GET_JSON_OBJECT(t1.extended_fields, '$.manga_id') = 
GET_JSON_OBJECT(t.extended_fields, '$.manga_id')
left join manga.manga_buvid_minlog t0 on t.buvid = t0.buvid
and t0.log_date = 20191223
and t0.minlog >= '2019-12-20'
and to_date(t.stime) = TO_DATE(t0.minlog)
left join manga.dwb_tfc_app_launch_df t2 on t.buvid = t2.buvid
and t2.log_date >= 20191220
and DATE_ADD(to_date(t.stime), 1) = to_date(t2.stime)
left join tmp t3 on t1.buvid = t3.buvid
and t3.log_date >= 20191220
and t3.manga_id = GET_JSON_OBJECT(t.extended_fields, '$.manga_id')
where
t.log_date >= 20191220
and t.event_id = 'manga.homepage-recommend.detail.0.click'
group by
t.log_date,
GET_JSON_OBJECT(t.extended_fields, '$.type')

{code}

 !screenshot-1.png! 

  was:
{code:java}
// code placeholder
with tmp as(
select
log_date,
buvid,
manga_id,
sum(readtime) readtime
from
manga.dwd_app_readtime_xt_dt
where
log_date >= 20191220
group by
log_date,
buvid,
manga_id
)
select
t.log_date,
GET_JSON_OBJECT(t.extended_fields, '$.type'),
count(distinct t.buvid),
count(distinct t0.buvid),
count(distinct t1.buvid),
count(distinct t2.buvid),
count(
distinct case
when t1.buvid = t0.buvid then t1.buvid
end
),
count(
distinct case
when t1.buvid = t0.buvid
and t1.buvid = t2.buvid then t1.buvid
end
),
count(
distinct case
when t0.buvid = t2.buvid then t0.buvid
end
),
sum(readtime),
avg(readtime),
sum(
case
when t0.buvid = t3.buvid then readtime
end
),
avg(
case
when t0.buvid = t3.buvid then readtime
end
)
from
manga.manga_tfc_app_ubt_d t
join manga.manga_tfc_app_ubt_d t1 on t.buvid = t1.buvid
and t1.log_date >= 20191220
and t1.event_id = 'bilibili-manga.manga-detail.0.0.pv'
and to_date(t.stime) = TO_DATE(t1.stime)
and GET_JSON_OBJECT(t1.extended_fields, '$.manga_id') = 
GET_JSON_OBJECT(t.extended_fields, '$.manga_id')
left join manga.manga_buvid_minlog t0 on t.buvid = t0.buvid
and t0.log_date = 20191223
and t0.minlog >= '2019-12-20'
and to_date(t.stime) = TO_DATE(t0.minlog)
left join manga.dwb_tfc_app_launch_df t2 on t.buvid = t2.buvid
and t2.log_date >= 20191220
and DATE_ADD(to_date(t.stime), 1) = to_date(t2.stime)
left join tmp t3 on t1.buvid = t3.buvid
and t3.log_date >= 20191220
and t3.manga_id = GET_JSON_OBJECT(t.extended_fields, '$.manga_id')
where
t.log_date >= 20191220
and t.event_id = 'manga.homepage-recommend.detail.0.click'
group by
t.log_date,
GET_JSON_OBJECT(t.extended_fields, '$.type')
{code}


> The result is wrong when joining tables with selecting the same columns
> -----------------------------------------------------------------------
>
>                 Key: SPARK-30349
>                 URL: https://issues.apache.org/jira/browse/SPARK-30349
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.4.4
>         Environment: hadoop-2.8.4 spark 2.4.4
>            Reporter: cen yuhai
>            Priority: Major
>         Attachments: screenshot-1.png
>
>
> {code:sql}
> // code placeholder
> with tmp as(
> select
> log_date,
> buvid,
> manga_id,
> sum(readtime) readtime
> from
> manga.dwd_app_readtime_xt_dt
> where
> log_date >= 20191220
> group by
> log_date,
> buvid,
> manga_id
> )
> select
> t.log_date,
> GET_JSON_OBJECT(t.extended_fields, '$.type'),
> count(distinct t.buvid),
> count(distinct t0.buvid),
> count(distinct t1.buvid),
> count(distinct t2.buvid),
> count(
> distinct case
> when t1.buvid = t0.buvid then t1.buvid
> end
> ),
> count(
> distinct case
> when t1.buvid = t0.buvid
> and t1.buvid = t2.buvid then t1.buvid
> end
> ),
> count(
> distinct case
> when t0.buvid = t2.buvid then t0.buvid
> end
> ),
> sum(readtime),
> avg(readtime),
> sum(
> case
> when t0.buvid = t3.buvid then readtime
> end
> ),
> avg(
> case
> when t0.buvid = t3.buvid then readtime
> end
> )
> from
> manga.manga_tfc_app_ubt_d t
> join manga.manga_tfc_app_ubt_d t1 on t.buvid = t1.buvid
> and t1.log_date >= 20191220
> and t1.event_id = 'bilibili-manga.manga-detail.0.0.pv'
> and to_date(t.stime) = TO_DATE(t1.stime)
> and GET_JSON_OBJECT(t1.extended_fields, '$.manga_id') = 
> GET_JSON_OBJECT(t.extended_fields, '$.manga_id')
> left join manga.manga_buvid_minlog t0 on t.buvid = t0.buvid
> and t0.log_date = 20191223
> and t0.minlog >= '2019-12-20'
> and to_date(t.stime) = TO_DATE(t0.minlog)
> left join manga.dwb_tfc_app_launch_df t2 on t.buvid = t2.buvid
> and t2.log_date >= 20191220
> and DATE_ADD(to_date(t.stime), 1) = to_date(t2.stime)
> left join tmp t3 on t1.buvid = t3.buvid
> and t3.log_date >= 20191220
> and t3.manga_id = GET_JSON_OBJECT(t.extended_fields, '$.manga_id')
> where
> t.log_date >= 20191220
> and t.event_id = 'manga.homepage-recommend.detail.0.click'
> group by
> t.log_date,
> GET_JSON_OBJECT(t.extended_fields, '$.type')
> {code}
>  !screenshot-1.png! 



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to