This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 16a28b1a961 [SPARK-43124][SQL] Dataset.show projects CommandResults locally 16a28b1a961 is described below commit 16a28b1a961052a250dcf05b7c249c92156e1077 Author: Peter Toth <peter.t...@gmail.com> AuthorDate: Fri Apr 21 09:33:45 2023 +0900 [SPARK-43124][SQL] Dataset.show projects CommandResults locally ### What changes were proposed in this pull request? `DataSet.show()` currently triggers a job for a simple `show tables` command. This is because the command output contains an `isTemporary` boolean column that needs to be casted to string when we use `show()` on the dataset. This PR converts `CommandResult` to `LocalRelation` and let `ConvertToLocalRelation` to do the casting locally to avoid triggering job execution. ### Why are the changes needed? A simple `show tables` shouldn not require an executor. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added new UT. Closes #40779 from peter-toth/SPARK-43124-dataset-show-projects-commandresults-locally. Authored-by: Peter Toth <peter.t...@gmail.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../src/main/scala/org/apache/spark/sql/Dataset.scala | 8 +++++++- .../scala/org/apache/spark/sql/DatasetSuite.scala | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index be37fdae025..d33a36a8380 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -272,7 +272,13 @@ class Dataset[T] private[sql]( private[sql] def getRows( numRows: Int, truncate: Int): Seq[Seq[String]] = { - val newDf = toDF() + val newDf = logicalPlan match { + case c: CommandResult => + // Convert to `LocalRelation` and let `ConvertToLocalRelation` do the casting locally to + // avoid triggering a job + Dataset.ofRows(sparkSession, LocalRelation(c.output, c.rows)) + case _ => toDF() + } val castCols = newDf.logicalPlan.output.map { col => // Since binary types in top-level schema fields have a specific format to print, // so we do not cast them to strings here. diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala index 75cee407819..167aea79209 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala @@ -2474,6 +2474,25 @@ class DatasetSuite extends QueryTest ) assert(result == expected) } + + test("SPARK-43124: Show does not trigger job execution on CommandResults") { + withSQLConf(SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> "") { + withTable("t1") { + sql("create table t1(c int) using parquet") + + @volatile var jobCounter = 0 + val listener = new SparkListener { + override def onJobStart(jobStart: SparkListenerJobStart): Unit = { + jobCounter += 1 + } + } + withListener(spark.sparkContext, listener) { _ => + sql("show tables").show() + } + assert(jobCounter === 0) + } + } + } } class DatasetLargeResultCollectingSuite extends QueryTest --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org