[ https://issues.apache.org/jira/browse/SPARK-13862?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Reynold Xin updated SPARK-13862: -------------------------------- Fix Version/s: 2.0.0 > TPCDS query 49 returns wrong results compared to TPC official result set > ------------------------------------------------------------------------- > > Key: SPARK-13862 > URL: https://issues.apache.org/jira/browse/SPARK-13862 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 1.6.0 > Reporter: JESSE CHEN > Labels: tpcds-result-mismatch > Fix For: 2.0.0 > > > Testing Spark SQL using TPC queries. Query 49 returns wrong results compared > to official result set. This is at 1GB SF (validation run). > SparkSQL has right answer but in wrong order (and there is an 'order by' in > the query). > Actual results: > {noformat} > store,9797,0.80000000000000000000,2,2] > [store,12641,0.81609195402298850575,3,3] > [store,6661,0.92207792207792207792,7,7] > [store,13013,0.94202898550724637681,8,8] > [store,9029,1.00000000000000000000,10,10] > [web,15597,0.66197183098591549296,3,3] > [store,14925,0.96470588235294117647,9,9] > [store,4063,1.00000000000000000000,10,10] > [catalog,8929,0.76250000000000000000,7,7] > [store,11589,0.82653061224489795918,6,6] > [store,1171,0.82417582417582417582,5,5] > [store,9471,0.77500000000000000000,1,1] > [catalog,12577,0.65591397849462365591,3,3] > [web,97,0.90361445783132530120,9,8] > [web,85,0.85714285714285714286,8,7] > [catalog,361,0.74647887323943661972,5,5] > [web,2915,0.69863013698630136986,4,4] > [web,117,0.92500000000000000000,10,9] > [catalog,9295,0.77894736842105263158,9,9] > [web,3305,0.73750000000000000000,6,16] > [catalog,16215,0.79069767441860465116,10,10] > [web,7539,0.59000000000000000000,1,1] > [catalog,17543,0.57142857142857142857,1,1] > [catalog,3411,0.71641791044776119403,4,4] > [web,11933,0.71717171717171717172,5,5] > [catalog,14513,0.63541666666666666667,2,2] > [store,15839,0.81632653061224489796,4,4] > [web,3337,0.62650602409638554217,2,2] > [web,5299,0.92708333333333333333,11,10] > [catalog,8189,0.74698795180722891566,6,6] > [catalog,14869,0.77173913043478260870,8,8] > [web,483,0.80000000000000000000,7,6] > {noformat} > Expected results: > {noformat} > +---------+-------+--------------------+-------------+---------------+ > | CHANNEL | ITEM | RETURN_RATIO | RETURN_RANK | CURRENCY_RANK | > +---------+-------+--------------------+-------------+---------------+ > | catalog | 17543 | .5714285714285714 | 1 | 1 | > | catalog | 14513 | .6354166666666666 | 2 | 2 | > | catalog | 12577 | .6559139784946236 | 3 | 3 | > | catalog | 3411 | .7164179104477611 | 4 | 4 | > | catalog | 361 | .7464788732394366 | 5 | 5 | > | catalog | 8189 | .7469879518072289 | 6 | 6 | > | catalog | 8929 | .7625000000000000 | 7 | 7 | > | catalog | 14869 | .7717391304347826 | 8 | 8 | > | catalog | 9295 | .7789473684210526 | 9 | 9 | > | catalog | 16215 | .7906976744186046 | 10 | 10 | > | store | 9471 | .7750000000000000 | 1 | 1 | > | store | 9797 | .8000000000000000 | 2 | 2 | > | store | 12641 | .8160919540229885 | 3 | 3 | > | store | 15839 | .8163265306122448 | 4 | 4 | > | store | 1171 | .8241758241758241 | 5 | 5 | > | store | 11589 | .8265306122448979 | 6 | 6 | > | store | 6661 | .9220779220779220 | 7 | 7 | > | store | 13013 | .9420289855072463 | 8 | 8 | > | store | 14925 | .9647058823529411 | 9 | 9 | > | store | 4063 | 1.0000000000000000 | 10 | 10 | > | store | 9029 | 1.0000000000000000 | 10 | 10 | > | web | 7539 | .5900000000000000 | 1 | 1 | > | web | 3337 | .6265060240963855 | 2 | 2 | > | web | 15597 | .6619718309859154 | 3 | 3 | > | web | 2915 | .6986301369863013 | 4 | 4 | > | web | 11933 | .7171717171717171 | 5 | 5 | > | web | 3305 | .7375000000000000 | 6 | 16 | > | web | 483 | .8000000000000000 | 7 | 6 | > | web | 85 | .8571428571428571 | 8 | 7 | > | web | 97 | .9036144578313253 | 9 | 8 | > | web | 117 | .9250000000000000 | 10 | 9 | > | web | 5299 | .9270833333333333 | 11 | 10 | > +---------+-------+--------------------+-------------+---------------+ > {noformat} > Query used: > {noformat} > -- start query 49 in stream 0 using template query49.tpl and seed > QUALIFICATION > select > 'web' as channel > ,web.item > ,web.return_ratio > ,web.return_rank > ,web.currency_rank > from ( > select > item > ,return_ratio > ,currency_ratio > ,rank() over (order by return_ratio) as return_rank > ,rank() over (order by currency_ratio) as currency_rank > from > ( select ws.ws_item_sk as item > ,(cast(sum(coalesce(wr.wr_return_quantity,0)) as decimal(15,4))/ > cast(sum(coalesce(ws.ws_quantity,0)) as decimal(15,4) )) as > return_ratio > ,(cast(sum(coalesce(wr.wr_return_amt,0)) as decimal(15,4))/ > cast(sum(coalesce(ws.ws_net_paid,0)) as decimal(15,4) )) as > currency_ratio > from > web_sales ws left outer join web_returns wr > on (ws.ws_order_number = wr.wr_order_number and > ws.ws_item_sk = wr.wr_item_sk) > ,date_dim > where > wr.wr_return_amt > 10000 > and ws.ws_net_profit > 1 > and ws.ws_net_paid > 0 > and ws.ws_quantity > 0 > and ws_sold_date_sk = d_date_sk > and d_year = 2001 > and d_moy = 12 > group by ws.ws_item_sk > ) in_web > ) web > where > ( > web.return_rank <= 10 > or > web.currency_rank <= 10 > ) > union > select > 'catalog' as channel > ,catalog.item > ,catalog.return_ratio > ,catalog.return_rank > ,catalog.currency_rank > from ( > select > item > ,return_ratio > ,currency_ratio > ,rank() over (order by return_ratio) as return_rank > ,rank() over (order by currency_ratio) as currency_rank > from > ( select > cs.cs_item_sk as item > ,(cast(sum(coalesce(cr.cr_return_quantity,0)) as decimal(15,4))/ > cast(sum(coalesce(cs.cs_quantity,0)) as decimal(15,4) )) as > return_ratio > ,(cast(sum(coalesce(cr.cr_return_amount,0)) as decimal(15,4))/ > cast(sum(coalesce(cs.cs_net_paid,0)) as decimal(15,4) )) as > currency_ratio > from > catalog_sales cs left outer join catalog_returns cr > on (cs.cs_order_number = cr.cr_order_number and > cs.cs_item_sk = cr.cr_item_sk) > ,date_dim > where > cr.cr_return_amount > 10000 > and cs.cs_net_profit > 1 > and cs.cs_net_paid > 0 > and cs.cs_quantity > 0 > and cs_sold_date_sk = d_date_sk > and d_year = 2001 > and d_moy = 12 > group by cs.cs_item_sk > ) in_cat > ) catalog > where > ( > catalog.return_rank <= 10 > or > catalog.currency_rank <=10 > ) > union > select > 'store' as channel > ,store.item > ,store.return_ratio > ,store.return_rank > ,store.currency_rank > from ( > select > item > ,return_ratio > ,currency_ratio > ,rank() over (order by return_ratio) as return_rank > ,rank() over (order by currency_ratio) as currency_rank > from > ( select sts.ss_item_sk as item > ,(cast(sum(coalesce(sr.sr_return_quantity,0)) as > decimal(15,4))/cast(sum(coalesce(sts.ss_quantity,0)) as decimal(15,4) )) as > return_ratio > ,(cast(sum(coalesce(sr.sr_return_amt,0)) as > decimal(15,4))/cast(sum(coalesce(sts.ss_net_paid,0)) as decimal(15,4) )) as > currency_ratio > from > store_sales sts left outer join store_returns sr > on (sts.ss_ticket_number = sr.sr_ticket_number and > sts.ss_item_sk = sr.sr_item_sk) > ,date_dim > where > sr.sr_return_amt > 10000 > and sts.ss_net_profit > 1 > and sts.ss_net_paid > 0 > and sts.ss_quantity > 0 > and ss_sold_date_sk = d_date_sk > and d_year = 2001 > and d_moy = 12 > group by sts.ss_item_sk > ) in_store > ) store > where ( > store.return_rank <= 10 > or > store.currency_rank <= 10 > ) > order by 1,4,5 > limit 100; > -- end query 49 in stream 0 using template query49.tpl > {noformat} -- This message was sent by Atlassian JIRA (v6.3.4#6332) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org