Hi, I can use sbt to compile and run the following code. It works without any problem.
I want to divide this into the obj and another class. I would like to do the result set joining tables identified by Data Frame 'rs' and then calls the method "firstquerym" in the class FirstQuery to do the calculation identified as "rs1" Now it needs "rs" to be available in class FrstQuery. Two questions please 1. How can I pass rs to class FirstQuery 2. Is there a better way of modularising this work so I can use methods defined in another class to be called in main method Thanks import org.apache.spark.SparkContext import org.apache.spark.SparkConf import org.apache.spark.sql.Row import org.apache.spark.sql.hive.HiveContext import org.apache.spark.sql.types._ import org.apache.spark.sql.SQLContext import org.apache.spark.sql.functions._ // object Harness4 { def main(args: Array[String]) { val conf = new SparkConf().setAppName("Harness4").setMaster("local[*]").set("spark.driver.allowMultipleContexts", "true") val sc = new SparkContext(conf) // Note that this should be done only after an instance of org.apache.spark.sql.SQLContext is created. It should be written as: val sqlContext= new org.apache.spark.sql.SQLContext(sc) import sqlContext.implicits._ val HiveContext = new org.apache.spark.sql.hive.HiveContext(sc) println ("\nStarted at"); HiveContext.sql("SELECT FROM_unixtime(unix_timestamp(), 'dd/MM/yyyy HH:mm:ss.ss') ").collect.foreach(println) HiveContext.sql("use oraclehadoop") var s = HiveContext.table("sales").select("AMOUNT_SOLD","TIME_ID","CHANNEL_ID") val c = HiveContext.table("channels").select("CHANNEL_ID","CHANNEL_DESC") val t = HiveContext.table("times").select("TIME_ID","CALENDAR_MONTH_DESC") println ("\ncreating data set at"); HiveContext.sql("SELECT FROM_unixtime(unix_timestamp(), 'dd/MM/yyyy HH:mm:ss.ss') ").collect.foreach(println) val rs = s.join(t,"time_id").join(c,"channel_id").groupBy("calendar_month_desc","channel_desc").agg(sum("amount_sold").as("TotalSales")) //println ("\nfirst query at"); HiveContext.sql("SELECT FROM_unixtime(unix_timestamp(), 'dd/MM/yyyy HH:mm:ss.ss') ").collect.foreach(println) //val rs1 = rs.orderBy("calendar_month_desc","channel_desc").take(5).foreach(println) val firstquery = new FirstQuery firstquery.firstquerym } } // class FirstQuery { def firstquerym { println ("\nfirst query at"); HiveContext.sql("SELECT FROM_unixtime(unix_timestamp(), 'dd/MM/yyyy HH:mm:ss.ss') ").collect.foreach(println) val rs1 = rs.orderBy("calendar_month_desc","channel_desc").take(5).foreach(println) } } Dr Mich Talebzadeh LinkedIn * https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw <https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>* http://talebzadehmich.wordpress.com