Hi Jan , Thanks for help. Alas.. you suggestion also didnt work scala> import org.apache.spark.sql.types.{StructType, StructField, > StringType,IntegerType,LongType,DoubleType, FloatType}; > import org.apache.spark.sql.types.{StructType, StructField, StringType, > IntegerType, LongType, DoubleType, FloatType} > scala> val nulltestSchema = StructType(Seq(StructField("name", StringType, > false),StructField("age", DoubleType, true))) > nulltestSchema: org.apache.spark.sql.types.StructType = > StructType(StructField(name,StringType,false), > StructField(age,DoubleType,true)) > scala> val dfnulltest = > sqlContext.read.format("com.databricks.spark.csv").option("header", > "true").schema(nulltestSchema).load("hdfs://xx.xx.xx.xxx:8020/TestDivya/Spark/nulltest.csv") > dfnulltest: org.apache.spark.sql.DataFrame = [name: string, age: double] > scala> dfnulltest.selectExpr("name", "coalesce(age, 0) as age") > res0: org.apache.spark.sql.DataFrame = [name: string, age: double] > scala> val dfresult = dfnulltest.selectExpr("name", "coalesce(age, 0) as > age") > dfresult: org.apache.spark.sql.DataFrame = [name: string, age: double] > scala> dfresult.show
java.text.ParseException: Unparseable number: "null" at java.text.NumberFormat.parse(NumberFormat.java:350) On 26 February 2016 at 15:15, Jan Štěrba <i...@jansterba.com> wrote: > just use coalesce function > > df.selectExpr("name", "coalesce(age, 0) as age") > > -- > Jan Sterba > https://twitter.com/honzasterba | http://flickr.com/honzasterba | > http://500px.com/honzasterba > > On Fri, Feb 26, 2016 at 5:27 AM, Divya Gehlot <divya.htco...@gmail.com> > wrote: > >> Hi, >> I have dataset which looks like below >> name age >> alice 35 >> bob null >> peter 24 >> I need to replace null values of columns with 0 >> so I referred Spark API DataframeNAfunctions.scala >> <https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala> >> >> I tried the below code its throwing exception >> scala> import org.apache.spark.sql.types.{StructType, StructField, >> StringType,IntegerType,LongType,DoubleType, FloatType}; >> import org.apache.spark.sql.types.{StructType, StructField, StringType, >> IntegerType, LongType, DoubleType, FloatType} >> >> scala> val nulltestSchema = StructType(Seq(StructField("name", >> StringType, false),StructField("age", DoubleType, true))) >> nulltestSchema: org.apache.spark.sql.types.StructType = >> StructType(StructField(name,StringType,false), >> StructField(age,DoubleType,true)) >> >> scala> val dfnulltest = >> sqlContext.read.format("com.databricks.spark.csv").option("header", >> "true").schema(nulltestSchema).load("hdfs:// >> 172.31.29.201:8020/TestDivya/Spark/nulltest.csv") >> dfnulltest: org.apache.spark.sql.DataFrame = [name: string, age: double] >> >> scala> val dfchangenull = >> dfnulltest.na.fill(0,Seq("age")).select("name","age") >> dfchangenull: org.apache.spark.sql.DataFrame = [name: string, age: double] >> >> scala> dfchangenull.show >> 16/02/25 23:15:59 WARN TaskSetManager: Lost task 0.0 in stage 2.0 (TID 2, >> ip-172-31-22-135.ap-southeast-1.compute.internal): >> java.text.ParseException: Unparseable number: "null" >> at java.text.NumberFormat.parse(NumberFormat.java:350) >> >> > >