Hi,

I can use sbt to compile and run the following code. It works without any
problem.

I want to divide this into the obj and another class. I would like to do
the result set joining tables identified by Data Frame 'rs' and then calls
the method "firstquerym" in the class FirstQuery to do the calculation
identified as "rs1"

Now it needs "rs" to be available in class FrstQuery. Two questions please


   1. How can I pass rs to class FirstQuery
   2. Is there a better way of modularising this work so I can use methods
   defined in another class to be called in main method

Thanks

import org.apache.spark.SparkContext
import org.apache.spark.SparkConf
import org.apache.spark.sql.Row
import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.sql.types._
import org.apache.spark.sql.SQLContext
import org.apache.spark.sql.functions._
//
object Harness4 {
  def main(args: Array[String]) {
  val conf = new
SparkConf().setAppName("Harness4").setMaster("local[*]").set("spark.driver.allowMultipleContexts",
"true")
  val sc = new SparkContext(conf)
  // Note that this should be done only after an instance of
org.apache.spark.sql.SQLContext is created. It should be written as:
  val sqlContext= new org.apache.spark.sql.SQLContext(sc)
  import sqlContext.implicits._
  val HiveContext = new org.apache.spark.sql.hive.HiveContext(sc)
println ("\nStarted at"); HiveContext.sql("SELECT
FROM_unixtime(unix_timestamp(), 'dd/MM/yyyy HH:mm:ss.ss')
").collect.foreach(println)
HiveContext.sql("use oraclehadoop")
var s =
HiveContext.table("sales").select("AMOUNT_SOLD","TIME_ID","CHANNEL_ID")
val c = HiveContext.table("channels").select("CHANNEL_ID","CHANNEL_DESC")
val t = HiveContext.table("times").select("TIME_ID","CALENDAR_MONTH_DESC")
println ("\ncreating data set at"); HiveContext.sql("SELECT
FROM_unixtime(unix_timestamp(), 'dd/MM/yyyy HH:mm:ss.ss')
").collect.foreach(println)
val rs =
s.join(t,"time_id").join(c,"channel_id").groupBy("calendar_month_desc","channel_desc").agg(sum("amount_sold").as("TotalSales"))
//println ("\nfirst query at"); HiveContext.sql("SELECT
FROM_unixtime(unix_timestamp(), 'dd/MM/yyyy HH:mm:ss.ss')
").collect.foreach(println)
//val rs1 =
rs.orderBy("calendar_month_desc","channel_desc").take(5).foreach(println)
val firstquery =  new FirstQuery
firstquery.firstquerym
 }
}
//
class FirstQuery {
   def firstquerym {
      println ("\nfirst query at"); HiveContext.sql("SELECT
FROM_unixtime(unix_timestamp(), 'dd/MM/yyyy HH:mm:ss.ss')
").collect.foreach(println)
      val rs1 =
rs.orderBy("calendar_month_desc","channel_desc").take(5).foreach(println)
  }
}



Dr Mich Talebzadeh



LinkedIn * 
https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
<https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*



http://talebzadehmich.wordpress.com

Reply via email to