[ https://issues.apache.org/jira/browse/SPARK-37203?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
jiaan.geng updated SPARK-37203: ------------------------------- Summary: Fix NotSerializableException when observe with TypedImperativeAggregate (was: Fix NotSerializableException when observe with percentile_approx) > Fix NotSerializableException when observe with TypedImperativeAggregate > ----------------------------------------------------------------------- > > Key: SPARK-37203 > URL: https://issues.apache.org/jira/browse/SPARK-37203 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.3.0 > Reporter: jiaan.geng > Priority: Major > > {code:java} > val namedObservation = Observation("named") > val df = spark.range(100) > val observed_df = df.observe( > namedObservation, percentile_approx($"id", lit(0.5), > lit(100)).as("percentile_approx_val")) > observed_df.collect() > namedObservation.get > {code} > throws exception as follows: > {code:java} > 15:16:27.994 ERROR org.apache.spark.util.Utils: Exception encountered > java.io.NotSerializableException: > org.apache.spark.sql.catalyst.expressions.aggregate.ApproximatePercentile$PercentileDigest > at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1184) > at > java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548) > at > java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509) > at > java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432) > at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178) > at java.io.ObjectOutputStream.writeArray(ObjectOutputStream.java:1378) > at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1174) > at > java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548) > at > java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509) > at > java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432) > at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178) > at > java.io.ObjectOutputStream.defaultWriteFields(ObjectOutputStream.java:1548) > at > java.io.ObjectOutputStream.writeSerialData(ObjectOutputStream.java:1509) > at > java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1432) > at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178) > at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348) > at > org.apache.spark.scheduler.DirectTaskResult.$anonfun$writeExternal$2(TaskResult.scala:55) > at > org.apache.spark.scheduler.DirectTaskResult.$anonfun$writeExternal$2$adapted(TaskResult.scala:55) > at scala.collection.Iterator.foreach(Iterator.scala:943) > at scala.collection.Iterator.foreach$(Iterator.scala:943) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1431) > at scala.collection.IterableLike.foreach(IterableLike.scala:74) > at scala.collection.IterableLike.foreach$(IterableLike.scala:73) > at scala.collection.AbstractIterable.foreach(Iterable.scala:56) > at > org.apache.spark.scheduler.DirectTaskResult.$anonfun$writeExternal$1(TaskResult.scala:55) > at > scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) > at org.apache.spark.util.Utils$.tryOrIOException(Utils.scala:1434) > at > org.apache.spark.scheduler.DirectTaskResult.writeExternal(TaskResult.scala:51) > at > java.io.ObjectOutputStream.writeExternalData(ObjectOutputStream.java:1459) > at > java.io.ObjectOutputStream.writeOrdinaryObject(ObjectOutputStream.java:1430) > at java.io.ObjectOutputStream.writeObject0(ObjectOutputStream.java:1178) > at java.io.ObjectOutputStream.writeObject(ObjectOutputStream.java:348) > at > org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:44) > at > org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:101) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:616) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org