This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new c89221b02bb3 [SPARK-45224][PYTHON] Add examples w/ map and array as parameters of `sql()` c89221b02bb3 is described below commit c89221b02bb3000f707a31322e6d40b561e527bd Author: Max Gekk <max.g...@gmail.com> AuthorDate: Wed Sep 20 11:09:01 2023 +0300 [SPARK-45224][PYTHON] Add examples w/ map and array as parameters of `sql()` ### What changes were proposed in this pull request? In the PR, I propose to add a few more examples for the `sql()` method in PySpark API with array and map parameters. ### Why are the changes needed? To inform users about recent changes introduced by #42752 and #42470, and check the changes work actually. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? By running new examples: ``` $ python/run-tests --parallelism=1 --testnames 'pyspark.sql.session SparkSession.sql' ``` ### Was this patch authored or co-authored using generative AI tooling? No. Closes #42996 from MaxGekk/map-sql-parameterized-python-connect. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- python/pyspark/sql/session.py | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index dc4f8f321a59..de2e8d0cda2a 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -1599,23 +1599,27 @@ class SparkSession(SparkConversionMixin): And substitude named parameters with the `:` prefix by SQL literals. - >>> spark.sql("SELECT * FROM {df} WHERE {df[B]} > :minB", {"minB" : 5}, df=mydf).show() - +---+---+ - | A| B| - +---+---+ - | 3| 6| - +---+---+ + >>> from pyspark.sql.functions import create_map + >>> spark.sql( + ... "SELECT *, element_at(:m, 'a') AS C FROM {df} WHERE {df[B]} > :minB", + ... {"minB" : 5, "m" : create_map(lit('a'), lit(1))}, df=mydf).show() + +---+---+---+ + | A| B| C| + +---+---+---+ + | 3| 6| 1| + +---+---+---+ Or positional parameters marked by `?` in the SQL query by SQL literals. + >>> from pyspark.sql.functions import array >>> spark.sql( - ... "SELECT * FROM {df} WHERE {df[B]} > ? and ? < {df[A]}", - ... args=[5, 2], df=mydf).show() - +---+---+ - | A| B| - +---+---+ - | 3| 6| - +---+---+ + ... "SELECT *, element_at(?, 1) AS C FROM {df} WHERE {df[B]} > ? and ? < {df[A]}", + ... args=[array(lit(1), lit(2), lit(3)), 5, 2], df=mydf).show() + +---+---+---+ + | A| B| C| + +---+---+---+ + | 3| 6| 1| + +---+---+---+ """ formatter = SQLStringFormatter(self) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org