This is an automated email from the ASF dual-hosted git repository. ulyssesyou pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 47e8205628a [SPARK-45191][SQL] InMemoryTableScanExec simpleStringWithNodeId adds columnar info 47e8205628a is described below commit 47e8205628a0aed54ad638a53a5881efa2306455 Author: ulysses-you <ulyssesyo...@gmail.com> AuthorDate: Fri Sep 22 09:51:32 2023 +0800 [SPARK-45191][SQL] InMemoryTableScanExec simpleStringWithNodeId adds columnar info ### What changes were proposed in this pull request? InMemoryTableScanExec supports both row-based and columnar input and output which is based on the cache serialzier. It would be more friendly for user if we can provide the columnar info to show whether it is columnar in/out. ### Why are the changes needed? Add columnar info for InMemoryTableScanExec explain. ### Does this PR introduce _any_ user-facing change? no, if no columnar input or output. ### How was this patch tested? manually test a columnar supporting example. before: <img width="303" alt="image" src="https://github.com/apache/spark/assets/12025282/289cba7e-51af-4b01-b591-bc9c8328801d"> after: <img width="435" alt="image" src="https://github.com/apache/spark/assets/12025282/e1514a2c-bf47-47c3-b311-23e49c9db222"> ### Was this patch authored or co-authored using generative AI tooling? no Closes #42967 from ulysses-you/cache. Authored-by: ulysses-you <ulyssesyo...@gmail.com> Signed-off-by: Xiduo You <ulysses...@apache.org> --- .../apache/spark/sql/execution/columnar/InMemoryRelation.scala | 8 ++++++-- .../spark/sql/execution/columnar/InMemoryTableScanExec.scala | 9 +++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala index 45d006b58e8..27860f23d9b 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryRelation.scala @@ -217,6 +217,11 @@ case class CachedRDDBuilder( val cachedName = tableName.map(n => s"In-memory table $n") .getOrElse(StringUtils.abbreviate(cachedPlan.toString, 1024)) + val supportsColumnarInput: Boolean = { + cachedPlan.supportsColumnar && + serializer.supportsColumnarInput(cachedPlan.output) + } + def cachedColumnBuffers: RDD[CachedBatch] = { if (_cachedColumnBuffers == null) { synchronized { @@ -264,8 +269,7 @@ case class CachedRDDBuilder( } private def buildBuffers(): RDD[CachedBatch] = { - val cb = if (cachedPlan.supportsColumnar && - serializer.supportsColumnarInput(cachedPlan.output)) { + val cb = if (supportsColumnarInput) { serializer.convertColumnarBatchToCachedBatch( cachedPlan.executeColumnar(), cachedPlan.output, diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala index 08244a4f84f..064a4636905 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/columnar/InMemoryTableScanExec.scala @@ -46,6 +46,15 @@ case class InMemoryTableScanExec( } } + override def simpleStringWithNodeId(): String = { + val columnarInfo = if (relation.cacheBuilder.supportsColumnarInput || supportsColumnar) { + s" (columnarIn=${relation.cacheBuilder.supportsColumnarInput}, columnarOut=$supportsColumnar)" + } else { + "" + } + super.simpleStringWithNodeId() + columnarInfo + } + override def innerChildren: Seq[QueryPlan[_]] = Seq(relation) ++ super.innerChildren override def doCanonicalize(): SparkPlan = --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org