[ https://issues.apache.org/jira/browse/SPARK-49042?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17869671#comment-17869671 ]
Wei Guo commented on SPARK-49042: --------------------------------- [~arnaud.nauwynck] Can you provide some code to construct a dataset to reproduce this warning log? > CodeGenerator: Error calculating stats of compiled class. > java.lang.UnsupportedOperationException: empty.max > ------------------------------------------------------------------------------------------------------------ > > Key: SPARK-49042 > URL: https://issues.apache.org/jira/browse/SPARK-49042 > Project: Spark > Issue Type: Bug > Components: Spark Core > Affects Versions: 3.1.1, 3.5.1 > Reporter: Arnaud Nauwynck > Priority: Major > > CodeGenerator (here using "dataset.count()") generate WARN logs for some > datasets > The thrown exception is catched, an error log is printed, and code statistics > are WRONG because it increment "(-1, -1)" instead of real values. > Here is log error > {noformat} > WARN CodeGenerator: Error calculating stats of compiled class. > java.lang.UnsupportedOperationException: empty.max > at scala.collection.TraversableOnce.max(TraversableOnce.scala:234) > at scala.collection.TraversableOnce.max$(TraversableOnce.scala:232) > at scala.collection.AbstractTraversable.max(Traversable.scala:108) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.$anonfun$updateAndGetCompilationStats$1(CodeGenerator.scala:1470) > at > scala.collection.TraversableLike.$anonfun$map$1(TraversableLike.scala:238) > at scala.collection.Iterator.foreach(Iterator.scala:941) > at scala.collection.Iterator.foreach$(Iterator.scala:941) > at scala.collection.AbstractIterator.foreach(Iterator.scala:1429) > at scala.collection.IterableLike.foreach(IterableLike.scala:74) > at scala.collection.IterableLike.foreach$(IterableLike.scala:73) > at scala.collection.AbstractIterable.foreach(Iterable.scala:56) > at scala.collection.TraversableLike.map(TraversableLike.scala:238) > at scala.collection.TraversableLike.map$(TraversableLike.scala:231) > at scala.collection.AbstractTraversable.map(Traversable.scala:108) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.updateAndGetCompilationStats(CodeGenerator.scala:1451) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.org$apache$spark$sql$catalyst$expressions$codegen$CodeGenerator$$doCompile(CodeGenerator.scala:1405) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1501) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$$anon$1.load(CodeGenerator.scala:1498) > at > org.sparkproject.guava.cache.LocalCache$LoadingValueReference.loadFuture(LocalCache.java:3599) > at > org.sparkproject.guava.cache.LocalCache$Segment.loadSync(LocalCache.java:2379) > at > org.sparkproject.guava.cache.LocalCache$Segment.lockedGetOrLoad(LocalCache.java:2342) > at > org.sparkproject.guava.cache.LocalCache$Segment.get(LocalCache.java:2257) > at org.sparkproject.guava.cache.LocalCache.get(LocalCache.java:4000) > at > org.sparkproject.guava.cache.LocalCache.getOrLoad(LocalCache.java:4004) > at > org.sparkproject.guava.cache.LocalCache$LocalLoadingCache.get(LocalCache.java:4874) > at > org.apache.spark.sql.catalyst.expressions.codegen.CodeGenerator$.compile(CodeGenerator.scala:1352) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.liftedTree1$1(WholeStageCodegenExec.scala:721) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:720) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:180) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:218) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:215) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:176) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:321) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:387) > at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:3006) > at > org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:3005) > at > org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3687) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$5(SQLExecution.scala:103) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:163) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:90) > at org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:775) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:64) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3685) > at org.apache.spark.sql.Dataset.count(Dataset.scala:3005) > {noformat} > Here is corresponding code: > https://github.com/apache/spark/blob/master/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala#L1604 > the variable methodCodeSizes is empty, therefore the method ".max" throws > exception > (instead of returning 0 which would be correct). > {noformat} > private def updateAndGetCompilationStats(evaluator: ClassBodyEvaluator): > ByteCodeStats = { > // First retrieve the generated classes. > val classes = evaluator.getBytecodes.asScala > // Then walk the classes to get at the method bytecode. > val codeAttr = > Utils.classForName("org.codehaus.janino.util.ClassFile$CodeAttribute") > val codeAttrField = codeAttr.getDeclaredField("code") > codeAttrField.setAccessible(true) > val codeStats = classes.map { case (_, classBytes) => > val classCodeSize = classBytes.length > > CodegenMetrics.METRIC_GENERATED_CLASS_BYTECODE_SIZE.update(classCodeSize) > try { > val cf = new ClassFile(new ByteArrayInputStream(classBytes)) > val constPoolSize = cf.getConstantPoolSize > val methodCodeSizes = cf.methodInfos.asScala.flatMap { method => // > <== methodInfos EMPTY ?? > method.getAttributes().filter(_.getClass eq codeAttr).map { a => > ... truncated .... > } > } > (methodCodeSizes.max, constPoolSize) // <=== EMPTY, throw > } catch { > case NonFatal(e) => > logWarning("Error calculating stats of compiled class.", e) // > <=== the exception printed in log > (-1, -1) // <==== the value accumulated to statistics... then > continuing "normally" > } > } > {noformat} > Maybe the code could be changed to add > {noformat} > if (cf.methodInfos.isEmpty) { > return (0, 0) > } > {noformat} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org