Hi,
I am using a org.apache.spark.sql.Encoder to serialize a custom object.
I now want to pass this column to a udf so it can do some operations on it
but this gives me the error :
Caused by: java.lang.ClassCastException: [B cannot be cast to
The code included at the problem demonstrates the issue.
I know I can simply make Person a case class in this example but its for
illustration purposes
Does anyone know how to solve this problem?
import com.holdenkarau.spark.testing.DatasetSuiteBase
import org.apache.spark.sql.{Encoder, Encoders}
import org.apache.spark.sql.functions._
import org.scalatest.FunSuite
import org.scalatest.Matchers._
class Person(val name: String) extends Serializable
class MySpec extends FunSuite with DatasetSuiteBase {
test("udf test") {
val sqlCtx = sqlContext
import sqlCtx.implicits._
val myUdf = udf { person: Person => person.name }
implicit val personEncoder: Encoder[Person] =
Encoders.javaSerialization[Person]
implicit val partitionAndPersonEncoder: Encoder[(Int, Person)] =
Encoders.tuple(Encoders.scalaInt, personEncoder)
val input = sc.parallelize(Seq(
1 -> new Person("jack"),
2 -> new Person("jill")
)).toDF("partition", "value")
input.printSchema()
input.select(myUdf($"value"))show()
}
}