This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 1507a52d32f [SPARK-42705][CONNECT] Fix spark.sql to return values from the command 1507a52d32f is described below commit 1507a52d32ff23ea11516af360774bd622bd47a4 Author: Takuya UESHIN <ues...@databricks.com> AuthorDate: Wed Mar 8 11:06:36 2023 +0800 [SPARK-42705][CONNECT] Fix spark.sql to return values from the command ### What changes were proposed in this pull request? Fixes `spark.sql` to return values from the command. ### Why are the changes needed? Currently `spark.sql` doesn't return the result from the commands. ```py >>> spark.sql("show functions").show() +--------+ |function| +--------+ +--------+ ``` ### Does this PR introduce _any_ user-facing change? `spark.sql` with commands will return the values. ### How was this patch tested? Added a test. Closes #40323 from ueshin/issues/SPARK-42705/sql. Authored-by: Takuya UESHIN <ues...@databricks.com> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- .../org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala | 6 ++++-- python/pyspark/sql/tests/connect/test_connect_basic.py | 6 ++++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala index 8ca004d520c..d7b3c057d92 100644 --- a/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala +++ b/connector/connect/server/src/main/scala/org/apache/spark/sql/connect/planner/SparkConnectPlanner.scala @@ -1508,8 +1508,10 @@ class SparkConnectPlanner(val session: SparkSession) { maxRecordsPerBatch, maxBatchSize, timeZoneId) - assert(batches.size == 1) - batches.next() + assert(batches.hasNext) + val bytes = batches.next() + assert(!batches.hasNext, s"remaining batches: ${batches.size}") + bytes } // To avoid explicit handling of the result on the client, we build the expected input diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index 806fe6e2329..dad303d3463 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -2876,6 +2876,12 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase): with self.assertRaises(NotImplementedError): getattr(df.write, f)() + def test_sql_with_command(self): + # SPARK-42705: spark.sql should return values from the command. + self.assertEqual( + self.connect.sql("show functions").collect(), self.spark.sql("show functions").collect() + ) + @unittest.skipIf(not should_test_connect, connect_requirement_message) class ClientTests(unittest.TestCase): --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org