[ 
https://issues.apache.org/jira/browse/SPARK-49042?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17869671#comment-17869671
 ] 

Wei Guo commented on SPARK-49042:
---------------------------------

[~arnaud.nauwynck]  Can you provide some code to construct a dataset to 
reproduce this warning log?

> CodeGenerator: Error calculating stats of compiled class. 
> java.lang.UnsupportedOperationException: empty.max
> ------------------------------------------------------------------------------------------------------------
>
>                 Key: SPARK-49042
>                 URL: https://issues.apache.org/jira/browse/SPARK-49042
>             Project: Spark
>          Issue Type: Bug
>          Components: Spark Core
>    Affects Versions: 3.1.1, 3.5.1
>            Reporter: Arnaud Nauwynck
>            Priority: Major
>
> CodeGenerator   (here using "dataset.count()")   generate WARN logs  for some 
> datasets 
> The thrown exception is catched, an error log is printed, and code statistics 
> are WRONG because it increment "(-1, -1)" instead of real values.
> Here is log error 
> {noformat}
> WARN CodeGenerator: Error calculating stats of compiled class.
> java.lang.UnsupportedOperationException: empty.max
>       at scala.collection.TraversableOnce.max(TraversableOnce.scala:234)
>       at scala.collection.TraversableOnce.max$(TraversableOnce.scala:232)
>       at scala.collection.AbstractTraversable.max(Traversable.scala:108)
>       at 
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.$anonfun$updateAndGetCompilationStats$1(CodeGenerator.scala:1470)
>       at 
> scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238)
>       at scala.collection.Iterator.foreach(Iterator.scala:941)
>       at scala.collection.Iterator.foreach$(Iterator.scala:941)
>       at scala.collection.AbstractIterator.foreach(Iterator.scala:1429)
>       at scala.collection.IterableLike.foreach(IterableLike.scala:74)
>       at scala.collection.IterableLike.foreach$(IterableLike.scala:73)
>       at scala.collection.AbstractIterable.foreach(Iterable.scala:56)
>       at scala.collection.TraversableLike.map(TraversableLike.scala:238)
>       at scala.collection.TraversableLike.map$(TraversableLike.scala:231)
>       at scala.collection.AbstractTraversable.map(Traversable.scala:108)
>       at 
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.updateAndGetCompilationStats(CodeGenerator.scala:1451)
>       at 
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1405)
>       at 
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1501)
>       at 
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1498)
>       at 
> org.sparkproject.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599)
>       at 
> org.sparkproject.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379)
>       at 
> org.sparkproject.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342)
>       at 
> org.sparkproject.guava.cache.LocalCache$Segment.get(LocalCache.java:2257)
>       at org.sparkproject.guava.cache.LocalCache.get(LocalCache.java:4000)
>       at 
> org.sparkproject.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004)
>       at 
> org.sparkproject.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874)
>       at 
> org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1352)
>       at 
> org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:721)
>       at 
> org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:720)
>       at 
> org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180)
>       at 
> org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218)
>       at 
> org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
>       at 
> org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215)
>       at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176)
>       at 
> org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:321)
>       at 
> org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:387)
>       at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:3006)
>       at 
> org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:3005)
>       at 
> org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90)
>       at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775)
>       at 
> org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64)
>       at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685)
>       at org.apache.spark.sql.Dataset.count(Dataset.scala:3005)
> {noformat}
> Here is corresponding code:
> https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala#L1604
> the variable methodCodeSizes is empty, therefore the method ".max" throws 
> exception 
>  (instead of returning 0 which would be correct).
> {noformat}
>   private def updateAndGetCompilationStats(evaluator: ClassBodyEvaluator): 
> ByteCodeStats = {
>     // First retrieve the generated classes.
>     val classes = evaluator.getBytecodes.asScala
>     // Then walk the classes to get at the method bytecode.
>     val codeAttr = 
> Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute")
>     val codeAttrField = codeAttr.getDeclaredField("code")
>     codeAttrField.setAccessible(true)
>     val codeStats = classes.map { case (_, classBytes) =>
>       val classCodeSize = classBytes.length
>       
> CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classCodeSize)
>       try {
>         val cf = new ClassFile(new ByteArrayInputStream(classBytes))
>         val constPoolSize = cf.getConstantPoolSize
>         val methodCodeSizes = cf.methodInfos.asScala.flatMap { method =>   // 
> <== methodInfos EMPTY ??
>           method.getAttributes().filter(_.getClass eq codeAttr).map { a =>
>              ... truncated ....
>           }
>         }
>         (methodCodeSizes.max, constPoolSize)  // <=== EMPTY, throw 
>       } catch {
>         case NonFatal(e) =>
>           logWarning("Error calculating stats of compiled class.", e)  // 
> <=== the exception printed in log
>           (-1, -1)   // <==== the value accumulated to statistics... then 
> continuing "normally"
>       }
>     }
> {noformat}
> Maybe the code could be changed to add 
> {noformat}
> if (cf.methodInfos.isEmpty) {
>    return (0, 0)
> }
> {noformat}



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to