This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 501beeb93b2 [SPARK-41871][CONNECT] DataFrame hint parameter can be str, float or int 501beeb93b2 is described below commit 501beeb93b2be42348fb1150204023e13ed5e35f Author: Sandeep Singh <sand...@techaddict.me> AuthorDate: Thu Jan 5 14:33:03 2023 +0900 [SPARK-41871][CONNECT] DataFrame hint parameter can be str, float or int ### What changes were proposed in this pull request? Spark Connect DataFrame hint parameter can be str, list, float, or int. This is done in parity with pyspark DataFrame.hint ### Why are the changes needed? For parity ### Does this PR introduce _any_ user-facing change? yes, allows more types as parameters. ### How was this patch tested? Enabling existing tests Closes #39393 from techaddict/SPARK-41871. Authored-by: Sandeep Singh <sand...@techaddict.me> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/connect/dataframe.py | 6 ++++-- python/pyspark/sql/connect/plan.py | 3 ++- python/pyspark/sql/tests/connect/test_connect_basic.py | 15 ++++++++++++++- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index 2464441bcf2..de50e6f52ca 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -480,9 +480,11 @@ class DataFrame: def hint(self, name: str, *params: Any) -> "DataFrame": for param in params: - if param is not None and not isinstance(param, (int, str)): + # TODO(SPARK-41887): support list type as hint parameter + if param is not None and not isinstance(param, (int, str, float)): raise TypeError( - f"param should be a int or str, but got {type(param).__name__} {param}" + f"param should be a str, float or int, but got {type(param).__name__}" + f" {param}" ) return DataFrame.withPlan( diff --git a/python/pyspark/sql/connect/plan.py b/python/pyspark/sql/connect/plan.py index 1f4e4192fdf..f63e39c7f3e 100644 --- a/python/pyspark/sql/connect/plan.py +++ b/python/pyspark/sql/connect/plan.py @@ -403,8 +403,9 @@ class Hint(LogicalPlan): self.name = name + # TODO(SPARK-41887): support list type as hint parameter assert isinstance(params, list) and all( - p is None or isinstance(p, (int, str)) for p in params + p is not None and isinstance(p, (int, str, float)) for p in params ) self.params = params diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index fe6c2c65e25..57d2b675065 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -1193,13 +1193,26 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase): self.spark.read.table(self.tbl_name).hint("illegal").toPandas(), ) + # Hint with all supported parameter values + such_a_nice_list = ["itworks1", "itworks2", "itworks3"] + self.assert_eq( + self.connect.read.table(self.tbl_name).hint("my awesome hint", 1.2345, 2).toPandas(), + self.spark.read.table(self.tbl_name).hint("my awesome hint", 1.2345, 2).toPandas(), + ) + # Hint with unsupported parameter values with self.assertRaises(SparkConnectException): self.connect.read.table(self.tbl_name).hint("REPARTITION", "id+1").toPandas() # Hint with unsupported parameter types with self.assertRaises(TypeError): - self.connect.read.table(self.tbl_name).hint("REPARTITION", 1.1).toPandas() + self.connect.read.table(self.tbl_name).hint("REPARTITION", range(5)).toPandas() + + # Hint with unsupported parameter types + with self.assertRaises(TypeError): + self.connect.read.table(self.tbl_name).hint( + "my awesome hint", 1.2345, 2, such_a_nice_list, range(6) + ).toPandas() # Hint with wrong combination with self.assertRaises(SparkConnectException): --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org