This is an automated email from the ASF dual-hosted git repository. hvanhovell pushed a commit to branch branch-3.4 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.4 by this push: new 6752ad6fa1a [SPARK-42544][CONNNECT] Spark Connect Scala Client: support parameterized SQL 6752ad6fa1a is described below commit 6752ad6fa1ab0b3a98c45f1c490a476443781ec7 Author: Rui Wang <rui.w...@databricks.com> AuthorDate: Thu Feb 23 21:52:50 2023 -0400 [SPARK-42544][CONNNECT] Spark Connect Scala Client: support parameterized SQL ### What changes were proposed in this pull request? Support parameterized SQL API in Scala client. ### Why are the changes needed? API coverage ### Does this PR introduce _any_ user-facing change? NO ### How was this patch tested? UT Closes #40148 from amaliujia/parameterized_sql. Authored-by: Rui Wang <rui.w...@databricks.com> Signed-off-by: Herman van Hovell <her...@databricks.com> (cherry picked from commit ee22a0bf3c91a6b26d608b5fc28e9472eaca6b40) Signed-off-by: Herman van Hovell <her...@databricks.com> --- .../scala/org/apache/spark/sql/SparkSession.scala | 39 ++++++++++++++++++++-- .../apache/spark/sql/PlanGenerationTestSuite.scala | 4 +++ .../explain-results/parameterized_sql.explain | 2 ++ .../query-tests/queries/parameterized_sql.json | 9 +++++ .../queries/parameterized_sql.proto.bin | 5 +++ 5 files changed, 57 insertions(+), 2 deletions(-) diff --git a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala index 1761e8ce42d..b086db09365 100644 --- a/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -22,6 +22,7 @@ import scala.collection.JavaConverters._ import org.apache.arrow.memory.RootAllocator +import org.apache.spark.annotation.Experimental import org.apache.spark.connect.proto import org.apache.spark.internal.Logging import org.apache.spark.sql.connect.client.{SparkConnectClient, SparkResult} @@ -54,14 +55,48 @@ class SparkSession(private val client: SparkConnectClient, private val cleaner: private[this] val allocator = new RootAllocator() + /** + * Executes a SQL query substituting named parameters by the given arguments, returning the + * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries. + * + * @param sqlText + * A SQL statement with named parameters to execute. + * @param args + * A map of parameter names to literal values. + * + * @since 3.4.0 + */ + @Experimental + def sql(sqlText: String, args: Map[String, String]): DataFrame = { + sql(sqlText, args.asJava) + } + + /** + * Executes a SQL query substituting named parameters by the given arguments, returning the + * result as a `DataFrame`. This API eagerly runs DDL/DML commands, but not for SELECT queries. + * + * @param sqlText + * A SQL statement with named parameters to execute. + * @param args + * A map of parameter names to literal values. + * + * @since 3.4.0 + */ + @Experimental + def sql(sqlText: String, args: java.util.Map[String, String]): DataFrame = newDataset { + builder => + builder + .setSql(proto.SQL.newBuilder().setQuery(sqlText).putAllArgs(args)) + } + /** * Executes a SQL query using Spark, returning the result as a `DataFrame`. This API eagerly * runs DDL/DML commands, but not for SELECT queries. * * @since 3.4.0 */ - def sql(query: String): DataFrame = newDataset { builder => - builder.setSql(proto.SQL.newBuilder().setQuery(query)) + def sql(query: String): DataFrame = { + sql(query, Map.empty[String, String]) } /** diff --git a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala index 9d4ed0f912f..6a54cc88aec 100644 --- a/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala +++ b/connector/connect/client/jvm/src/test/scala/org/apache/spark/sql/PlanGenerationTestSuite.scala @@ -223,6 +223,10 @@ class PlanGenerationTestSuite extends ConnectFunSuite with BeforeAndAfterAll wit session.sql("select 1") } + test("parameterized sql") { + session.sql("select 1", Map("minId" -> "7", "maxId" -> "20")) + } + test("range") { session.range(1, 10, 1, 2) } diff --git a/connector/connect/common/src/test/resources/query-tests/explain-results/parameterized_sql.explain b/connector/connect/common/src/test/resources/query-tests/explain-results/parameterized_sql.explain new file mode 100644 index 00000000000..7f5aafb1943 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/explain-results/parameterized_sql.explain @@ -0,0 +1,2 @@ +Project [1 AS 1#0] ++- OneRowRelation diff --git a/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.json b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.json new file mode 100644 index 00000000000..99268661e72 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.json @@ -0,0 +1,9 @@ +{ + "sql": { + "query": "select 1", + "args": { + "minId": "7", + "maxId": "20" + } + } +} \ No newline at end of file diff --git a/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.proto.bin b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.proto.bin new file mode 100644 index 00000000000..fd9304b4e47 --- /dev/null +++ b/connector/connect/common/src/test/resources/query-tests/queries/parameterized_sql.proto.bin @@ -0,0 +1,5 @@ +R# +select 1 + +minId7 +maxId20 \ No newline at end of file --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org