Hi all, I wrote below spark code to extract data from SQL server using spark SQLContext.read.format with several different options , question does by default sqlContext.read load function run in parallel does it use all the available cores available ? when I am saving the output to a file it is getting saved as one file does it mean the code used single core in my machine ?
My Code:- package com.kali.db /** * Created by kalit_000 on 06/12/2015. */ import org.apache.spark.SparkConf import org.apache.log4j.Logger import org.apache.log4j.Level import org.apache.spark._ import org.apache.spark.rdd.{JdbcRDD, RDD} import org.apache.spark.sql.DataFrame import org.springframework.context.support.ClassPathXmlApplicationContext case class SparkSqlValueClass(driver:String,url:String,username:String,password:String,sql:String,table:String,opdelimeter:String) object SparkSqlSelectSpring { def main (args: Array[String]) { Logger.getLogger("org").setLevel(Level.WARN) Logger.getLogger("akka").setLevel(Level.WARN) val conf = new SparkConf().setMaster("local[*]").setAppName("SparkSqlConfigurable").set("spark.hadoop.validateOutputSpecs", "false") val sc = new SparkContext(conf) def opfile(value:DataFrame,delimeter:String):RDD[String]= { value.map(x => x.toString.replace("[","").replace("]","").replace(",",delimeter)) } //read the application context file val ctx = new ClassPathXmlApplicationContext("sparksql.xml") val DBinfo = ctx.getBean("SparkSQLInst").asInstanceOf[SparkSqlValueClass] val driver = DBinfo.driver val url = DBinfo.url val username = DBinfo.username val password = DBinfo.password val query = DBinfo.sql val sqlquery = DBinfo.sql val table = DBinfo.table val opdelimeter=DBinfo.opdelimeter println("DB Driver:-%s".format(driver)) println("DB Url:-%s".format(url)) println("Username:-%s".format(username)) println("Password:-%s".format(password)) println("Query:-%s".format(query)) println("Table:-%s".format(table)) println("Opdelimeter:-%s".format(opdelimeter)) try { val sqlContext = new org.apache.spark.sql.SQLContext(sc) val df = sqlContext.read.format("jdbc").options(Map("url" -> url,"dbtable" -> table,"driver" -> driver)).load() df.registerTempTable(table) val OP=sqlContext.sql(query) opfile(OP,opdelimeter).saveAsTextFile("C:\\Users\\kalit_000\\Desktop\\typesafe\\scaladbop\\op.txt") } catch { case e: Exception => e.printStackTrace } sc.stop() } } Spring Bean:- <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE beans PUBLIC "-//SPRING//DTD BEAN//EN" "http://www.springframework.org/dtd/spring-beans.dtd"> <beans> <bean id="queryProps" class="org.springframework.beans.factory.config.PropertiesFactoryBean"> </bean> <bean id="SparkSQLInst" class="com.kali.db.SparkSqlValueClass"> <constructor-arg value="com.microsoft.sqlserver.jdbc.SQLServerDriver" /> <constructor-arg value="jdbc:sqlserver://localhost;user=admin;password=oracle;database=AdventureWorks2014" /> <constructor-arg value="admin" /> <constructor-arg value="oracle" /> <constructor-arg value="select CustomerID,StoreID,TerritoryID,AccountNumber,ModifiedDate from customer limit 10" /> <constructor-arg value="customer" /> <constructor-arg value="~" /> </bean> </beans> Thanks Sri -- View this message in context: http://apache-spark-user-list.1001560.n3.nabble.com/Spark-sql-data-frames-do-they-run-in-parallel-by-default-tp25604.html Sent from the Apache Spark User List mailing list archive at Nabble.com. --------------------------------------------------------------------- To unsubscribe, e-mail: user-unsubscr...@spark.apache.org For additional commands, e-mail: user-h...@spark.apache.org