I am trying to Parsing the data from XML file through Spark using databrics library
Here is my code: import org.apache.spark.SparkConfimport org.apache.spark.SparkContextimport org.apache.spark.sql.SQLContextimport org.apache.spark.sql.functionsimport java.text.Formatimport org.apache.spark.sql.functions.concat_wsimport org.apache.spark.sqlimport org.apache.spark.sql.types._import org.apache.spark.sql.catalyst.plans.logical.Withimport org.apache.spark.sql.functions.litimport org.apache.spark.sql.functions.udfimport scala.sys.process._import org.apache.spark.sql.functions.litimport org.apache.spark.sql.functions.udfimport org.apache.spark.sql.functions._ object printschema { def main(args: Array[String]): Unit = { val conf = new SparkConf().setAppName("printschema").setMaster("local") conf.set("spark.debug.maxToStringFields", "10000000") val context = new SparkContext(conf) val sqlCotext = new SQLContext(context) import sqlCotext.implicits._ val df = sqlCotext.read.format("com.databricks.spark.xml") .option("rowTag", "us-bibliographic-data-application") .option("treatEmptyValuesAsNulls", true) .load("/Users/praveen/Desktop/ipa0105.xml") val q1= df.withColumn("document",$"application-reference.document-id.doc-number".cast(sql.types.StringType)) .withColumn("document_number",$"application-reference.document-id.doc-number".cast(sql.types.StringType)).select("document","document_number") for(l<-q1) { val m1=l.get(0) val m2=l.get(1) println(m1,m2) } }} When I run the code on ScalaIDE/IntelliJ IDEA its works fine and Here is my Output. (14789882,14789882)(14755945,14755945)(14755919,14755919)(14755034,14755034) But, when i make a jar and run by using spark-submit it returns simply null values OUTPUT : NULL,NULL NULL,NULL NULL,NULL Please help me out. Thanks in advance.