Benedikt Beckermann created SPARK-25938: -------------------------------------------
Summary: Action on cached dataset causes WARN java.lang.AssertionError in log4j logs. Key: SPARK-25938 URL: https://issues.apache.org/jira/browse/SPARK-25938 Project: Spark Issue Type: Bug Components: Input/Output Affects Versions: 2.3.1 Environment: We're running Apache Spark 2.3.1 on Databricks 4.2 while developing in Scala Reporter: Benedikt Beckermann Caching and counting any DataSet causes an AssertionError in the log4j logs. The results are still valid and no stderr is shown. Example code: {code:scala} var df = Seq(100).toDF("count") df.cache() df.count() {code} {code:log} log4j Output: 18/11/02 15:39:57 WARN ExecutionListenerManager: Error executing query execution listener java.lang.AssertionError: assertion failed: InMemoryRelation fields: output, useCompression, batchSize, storageLevel, child, tableName, _cachedColumnBuffers, rowCountStats, sizeInBytesStats, statsOfPlanToCache, outputOrdering, values: List(count#124), true, 10000, StorageLevel(disk, memory, deserialized, 1 replicas), LocalTableScan [count#47] , None, LocalTableScan [count#47] MapPartitionsRDD[2] at cache at command-1842411053765017:3, LongAccumulator(id: 0, name: None, value: 1), LongAccumulator(id: 1, name: None, value: 4), Statistics(sizeInBytes=12.0 B, hints=none) at scala.Predef$.assert(Predef.scala:170) at org.apache.spark.sql.catalyst.trees.TreeNode.jsonFields(TreeNode.scala:638) at org.apache.spark.sql.catalyst.trees.TreeNode.org$apache$spark$sql$catalyst$trees$TreeNode$$collectJsonValue$1(TreeNode.scala:626) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$org$apache$spark$sql$catalyst$trees$TreeNode$$collectJsonValue$1$1.apply(TreeNode.scala:628) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$org$apache$spark$sql$catalyst$trees$TreeNode$$collectJsonValue$1$1.apply(TreeNode.scala:628) at scala.collection.immutable.List.foreach(List.scala:381) at org.apache.spark.sql.catalyst.trees.TreeNode.org$apache$spark$sql$catalyst$trees$TreeNode$$collectJsonValue$1(TreeNode.scala:628) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$org$apache$spark$sql$catalyst$trees$TreeNode$$collectJsonValue$1$1.apply(TreeNode.scala:628) at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$org$apache$spark$sql$catalyst$trees$TreeNode$$collectJsonValue$1$1.apply(TreeNode.scala:628) at scala.collection.immutable.List.foreach(List.scala:381) at org.apache.spark.sql.catalyst.trees.TreeNode.org$apache$spark$sql$catalyst$trees$TreeNode$$collectJsonValue$1(TreeNode.scala:628) at org.apache.spark.sql.catalyst.trees.TreeNode.jsonValue(TreeNode.scala:631) at org.apache.spark.sql.catalyst.trees.TreeNode.toJSON(TreeNode.scala:617) at com.databricks.backend.daemon.driver.SQLQueryPlanLogger$.getSQLQueryPlanBlob(SQLQueryPlanLogger.scala:70) at com.databricks.backend.daemon.driver.SQLQueryPlanLogger.onSuccess(SQLQueryPlanLogger.scala:42) at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1$$anonfun$apply$mcV$sp$1.apply(QueryExecutionListener.scala:124) at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1$$anonfun$apply$mcV$sp$1.apply(QueryExecutionListener.scala:123) at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$org$apache$spark$sql$util$ExecutionListenerManager$$withErrorHandling$1.apply(QueryExecutionListener.scala:145) at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$org$apache$spark$sql$util$ExecutionListenerManager$$withErrorHandling$1.apply(QueryExecutionListener.scala:143) at scala.collection.immutable.List.foreach(List.scala:381) at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35) at scala.collection.mutable.ListBuffer.foreach(ListBuffer.scala:45) at org.apache.spark.sql.util.ExecutionListenerManager.org$apache$spark$sql$util$ExecutionListenerManager$$withErrorHandling(QueryExecutionListener.scala:143) at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1.apply$mcV$sp(QueryExecutionListener.scala:123) at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1.apply(QueryExecutionListener.scala:123) at org.apache.spark.sql.util.ExecutionListenerManager$$anonfun$onSuccess$1.apply(QueryExecutionListener.scala:123) at org.apache.spark.sql.util.ExecutionListenerManager.readLock(QueryExecutionListener.scala:156) at org.apache.spark.sql.util.ExecutionListenerManager.onSuccess(QueryExecutionListener.scala:122) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3330) at org.apache.spark.sql.Dataset.count(Dataset.scala:2804) at line587cf41c5b6a4080a07e49150407fa2732.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(command-1842411053765017:4) at line587cf41c5b6a4080a07e49150407fa2732.$read$$iw$$iw$$iw$$iw$$iw.<init>(command-1842411053765017:51) at line587cf41c5b6a4080a07e49150407fa2732.$read$$iw$$iw$$iw$$iw.<init>(command-1842411053765017:53) at line587cf41c5b6a4080a07e49150407fa2732.$read$$iw$$iw$$iw.<init>(command-1842411053765017:55) at line587cf41c5b6a4080a07e49150407fa2732.$read$$iw$$iw.<init>(command-1842411053765017:57) at line587cf41c5b6a4080a07e49150407fa2732.$read$$iw.<init>(command-1842411053765017:59) at line587cf41c5b6a4080a07e49150407fa2732.$read.<init>(command-1842411053765017:61) at line587cf41c5b6a4080a07e49150407fa2732.$read$.<init>(command-1842411053765017:65) at line587cf41c5b6a4080a07e49150407fa2732.$read$.<clinit>(command-1842411053765017) at line587cf41c5b6a4080a07e49150407fa2732.$eval$.$print$lzycompute(<notebook>:7) at line587cf41c5b6a4080a07e49150407fa2732.$eval$.$print(<notebook>:6) at line587cf41c5b6a4080a07e49150407fa2732.$eval.$print(<notebook>) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786) at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638) at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637) at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31) at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19) at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569) at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565) at com.databricks.backend.daemon.driver.DriverILoop.execute(DriverILoop.scala:199) at com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply$mcV$sp(ScalaDriverLocal.scala:189) at com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:189) at com.databricks.backend.daemon.driver.ScalaDriverLocal$$anonfun$repl$1.apply(ScalaDriverLocal.scala:189) at com.databricks.backend.daemon.driver.DriverLocal$TrapExitInternal$.trapExit(DriverLocal.scala:493) at com.databricks.backend.daemon.driver.DriverLocal$TrapExit$.apply(DriverLocal.scala:448) at com.databricks.backend.daemon.driver.ScalaDriverLocal.repl(ScalaDriverLocal.scala:189) at com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$3.apply(DriverLocal.scala:248) at com.databricks.backend.daemon.driver.DriverLocal$$anonfun$execute$3.apply(DriverLocal.scala:228) at com.databricks.logging.UsageLogging$$anonfun$withAttributionContext$1.apply(UsageLogging.scala:188) at scala.util.DynamicVariable.withValue(DynamicVariable.scala:58) at com.databricks.logging.UsageLogging$class.withAttributionContext(UsageLogging.scala:183) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionContext(DriverLocal.scala:40) at com.databricks.logging.UsageLogging$class.withAttributionTags(UsageLogging.scala:221) at com.databricks.backend.daemon.driver.DriverLocal.withAttributionTags(DriverLocal.scala:40) at com.databricks.backend.daemon.driver.DriverLocal.execute(DriverLocal.scala:228) at com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:595) at com.databricks.backend.daemon.driver.DriverWrapper$$anonfun$tryExecutingCommand$2.apply(DriverWrapper.scala:595) at scala.util.Try$.apply(Try.scala:192) at com.databricks.backend.daemon.driver.DriverWrapper.tryExecutingCommand(DriverWrapper.scala:590) at com.databricks.backend.daemon.driver.DriverWrapper.getCommandOutputAndError(DriverWrapper.scala:474) at com.databricks.backend.daemon.driver.DriverWrapper.executeCommand(DriverWrapper.scala:548) at com.databricks.backend.daemon.driver.DriverWrapper.runInnerLoop(DriverWrapper.scala:380) at com.databricks.backend.daemon.driver.DriverWrapper.runInner(DriverWrapper.scala:327) at com.databricks.backend.daemon.driver.DriverWrapper.run(DriverWrapper.scala:215) at java.lang.Thread.run(Thread.java:748){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org