Github user tiffanyTown commented on the pull request: https://github.com/apache/spark/pull/5542#issuecomment-101890083 found an issue when running the query with SET spark.sql.aggregate2=true configuration after applying this patch. ERROR message: 15/05/07 17:11:14 WARN TaskSetManager: Lost task 15.0 in stage 101.0 (TID 2056, qac8-node2): java.lang.ClassCastException: java.lang.Double cannot be cast to java.lang.Long at scala.runtime.BoxesRunTime.unboxToLong(BoxesRunTime.java:110) at scala.math.Numeric$LongIsIntegral$.toInt(Numeric.scala:117) at org.apache.spark.sql.catalyst.expressions.Cast$$anonfun$castToInt$5.apply(Cast.scala:274) at org.apache.spark.sql.catalyst.expressions.Cast$$anonfun$castToInt$5.apply(Cast.scala:274) at org.apache.spark.sql.catalyst.expressions.Cast.eval(Cast.scala:435) at org.apache.spark.sql.catalyst.expressions.Alias.eval(namedExpressions.scala:118) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:68) at org.apache.spark.sql.catalyst.expressions.InterpretedMutableProjection.apply(Projection.scala:52) at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) at scala.collection.Iterator$class.foreach(Iterator.scala:727) at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable.org$apache$spark$sql$hive$execution$InsertIntoHiveTable$$writeToFile$1(InsertIntoHiveTable.scala:101) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$saveAsHiveFile$3.apply(InsertIntoHiveTable.scala:83) at org.apache.spark.sql.hive.execution.InsertIntoHiveTable$$anonfun$saveAsHiveFile$3.apply(InsertIntoHiveTable.scala:83) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:61) at org.apache.spark.scheduler.Task.run(Task.scala:64) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:209) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745) query.sql file: INSERT INTO TABLE ${hiveconf:TEMP_TABLE} SELECT cid, 100.0 * COUNT(distinct (CASE WHEN r_date IS NOT NULL THEN oid ELSE 0L END)) / COUNT(distinct oid) AS r_order_ratio, SUM(CASE WHEN r_date IS NOT NULL THEN 1 ELSE 0 END) / COUNT(item) * 100 AS r_item_ratio, CASE WHEN SUM(s_amount)=0.0 THEN 0.0 ELSE (SUM(CASE WHEN r_date IS NOT NULL THEN r_amount ELSE 0.0 END) / SUM(s_amount) * 100) END AS r_amount_ratio, COUNT(distinct (CASE WHEN r_date IS NOT NULL THEN r_date ELSE 0L END)) AS r_freq FROM ( SELECT r.sr_returned_date_sk AS r_date, s.ss_item_sk AS item, s.ss_ticket_number AS oid, s.ss_net_paid AS s_amount, CASE WHEN r.sr_return_amt IS NULL THEN 0.0 ELSE r.sr_return_amt END AS r_amount, (CASE WHEN s.ss_customer_sk IS NULL THEN r.sr_customer_sk ELSE s.ss_customer_sk END) AS cid FROM store_sales s LEFT OUTER JOIN store_returns r ON ( r.sr_item_sk = s.ss_item_sk AND r.sr_ticket_number = s.ss_ticket_number AND s.ss_sold_date_sk IS NOT NULL ) ) q20_sales_returns WHERE cid IS NOT NULL GROUP BY cid ;
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org