Please fine the query plan scala> sqlContext.sql("SELECT dw.DAY_OF_WEEK, dw.HOUR, avg(dw.SDP_USAGE) AS AVG_SDP_USAGE FROM (SELECT sdp.WID, DAY_OF_WEEK, HOUR, SUM(INTERVAL_VALUE) AS SDP_USAGE FROM (SELECT * FROM date_d AS dd JOIN interval_f AS intf ON intf.DATE_WID = dd.WID WHERE intf.DATE_WID >= 20150101 AND intf.DATE_WID <= 20150110 AND CAST(INTERVAL_END_TIME AS STRING) >= '2015-01-01 00:00:00.000' AND CAST(INTERVAL_END_TIME AS STRING) <= '2015-01-10 00:00:00.000' AND MEAS_WID = 3) AS test JOIN sdp_d AS sdp on test.SDP_WID = sdp.WID where sdp.UDC_ID = 'SP-168451834' group by sdp.WID, DAY_OF_WEEK, HOUR) AS dw group by dw.DAY_OF_WEEK, dw.HOUR")
q2: org.apache.spark.sql.SchemaRDD = SchemaRDD[36] at RDD at SchemaRDD.scala:103 == Query Plan == == Physical Plan == Aggregate false, [DAY_OF_WEEK#3,HOUR#43L], [DAY_OF_WEEK#3,HOUR#43L,(CAST(SUM(PartialSum#133), DoubleType) / CAST(SUM(PartialCount#134L), DoubleType)) AS AVG_SDP_USAGE#126] Exchange (HashPartitioning [DAY_OF_WEEK#3,HOUR#43L], 200) Aggregate true, [DAY_OF_WEEK#3,HOUR#43L], [DAY_OF_WEEK#3,HOUR#43L,COUNT(SDP_USAGE#130) AS PartialCount#134L,SUM(SDP_USAGE#130) AS PartialSum#133] Project [DAY_OF_WEEK#3,HOUR#43L,SDP_USAGE#130] Aggregate false, [WID#49,DAY_OF_WEEK#3,HOUR#43L], [WID#49,DAY_OF_WEEK#3,HOUR#43L,SUM(PartialSum#136) AS SDP_USAGE#130] Exchange (HashPartitioning [WID#49,DAY_OF_WEEK#3,HOUR#43L], 200) Aggregate true, [WID#49,DAY_OF_WEEK#3,HOUR#43L], [...