Hi, I have dataset which looks like below name age alice 35 bob null peter 24 I need to replace null values of columns with 0 so I referred Spark API DataframeNAfunctions.scala <https://github.com/apache/spark/blob/master/sql/core/src/main/scala/org/apache/spark/sql/DataFrameNaFunctions.scala>
I tried the below code its throwing exception scala> import org.apache.spark.sql.types.{StructType, StructField, StringType,IntegerType,LongType,DoubleType, FloatType}; import org.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType, LongType, DoubleType, FloatType} scala> val nulltestSchema = StructType(Seq(StructField("name", StringType, false),StructField("age", DoubleType, true))) nulltestSchema: org.apache.spark.sql.types.StructType = StructType(StructField(name,StringType,false), StructField(age,DoubleType,true)) scala> val dfnulltest = sqlContext.read.format("com.databricks.spark.csv").option("header", "true").schema(nulltestSchema).load("hdfs:// 172.31.29.201:8020/TestDivya/Spark/nulltest.csv") dfnulltest: org.apache.spark.sql.DataFrame = [name: string, age: double] scala> val dfchangenull = dfnulltest.na.fill(0,Seq("age")).select("name","age") dfchangenull: org.apache.spark.sql.DataFrame = [name: string, age: double] scala> dfchangenull.show 16/02/25 23:15:59 WARN TaskSetManager: Lost task 0.0 in stage 2.0 (TID 2, ip-172-31-22-135.ap-southeast-1.compute.internal): java.text.ParseException: Unparseable number: "null" at java.text.NumberFormat.parse(NumberFormat.java:350)