Hello,

I would like to export RDD/DataFrames via JDBC SQL interface from the
standalone application for currently stable Spark v1.3.1.

I found one way of doing it but it requires the use of @DeveloperAPI method
HiveThriftServer2.startWithContext(sqlContext)

Is there a better, production level approach to do that?

Full code snippet is below:
// you can run it via:
// ../spark/bin/spark-submit --master local[*] --class "SimpleApp"
target/scala-2.10/simple-project_2.10-1.0.jar src/test/resources/1.json
tableFromJson
====================

import org.apache.spark.sql.hive.thriftserver.HiveThriftServer2
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.hive.HiveContext

object SimpleApp {

  def main(args: Array[String]) {

    if (args.length != 2) {
      Console.err.println("Usage: app <source_json_file> <table_name>")
      System.exit(1)
    }
    val sourceFile = args(0)
    val tableName = args(1)

    val sparkConf = new SparkConf().setAppName("Simple Application")
    val sc = new SparkContext(sparkConf)
    val sqlContext = new HiveContext(sc)

    val df = sqlContext.jsonFile(sourceFile)
    df.registerTempTable(tableName)

    println("Registered temp table %s for data source
%s".format(tableName, sourceFile))

    HiveThriftServer2.startWithContext(sqlContext)

  }
}

====================



Best, Vladimir Grigor

Reply via email to