This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new ae2d43f279d5 [SPARK-46806][PYTHON] Improve error message for spark.table when argument type is wrong ae2d43f279d5 is described below commit ae2d43f279d5d27b63db3356abaf7d64755f3f5c Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Tue Jan 23 12:35:52 2024 +0900 [SPARK-46806][PYTHON] Improve error message for spark.table when argument type is wrong ### What changes were proposed in this pull request? This PR improves error message for spark.table when argument type is wrong ```python spark.table(None) ``` **Before:** ``` Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/.../spark/python/pyspark/sql/session.py", line 1710, in table return DataFrame(self._jsparkSession.table(tableName), self) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ File "/.../spark/python/lib/py4j-0.10.9.7-src.zip/py4j/java_gateway.py", line 1322, in __call__ File "/.../spark/python/pyspark/errors/exceptions/captured.py", line 215, in deco return f(*a, **kw) ^^^^^^^^^^^ File "/.../spark/python/lib/py4j-0.10.9.7-src.zip/py4j/protocol.py", line 326, in get_return_value py4j.protocol.Py4JJavaError: An error occurred while calling o27.table. : java.lang.NullPointerException: Cannot invoke "String.length()" because "s" is null at org.antlr.v4.runtime.CharStreams.fromString(CharStreams.java:222) at org.antlr.v4.runtime.CharStreams.fromString(CharStreams.java:212) at org.apache.spark.sql.catalyst.parser.AbstractParser.parse(parsers.scala:58) at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:55) at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parseMultipartIdentifier(AbstractSqlParser.scala:54) at org.apache.spark.sql.DataFrameReader.table(DataFrameReader.scala:681) at org.apache.spark.sql.SparkSession.table(SparkSession.scala:619) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:77) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.base/java.lang.reflect.Method.invoke(Method.java:568) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:374) at py4j.Gateway.invoke(Gateway.java:282) ``` **After:** ``` Traceback (most recent call last): File "<stdin>", line 1, in <module> File "/.../spark/python/pyspark/sql/session.py", line 1711, in table raise PySparkTypeError( pyspark.errors.exceptions.base.PySparkTypeError: [INVALID_TYPE] Argument `tableName` should not be a str. ``` ### Why are the changes needed? For better error messages to the end users. ### Does this PR introduce _any_ user-facing change? Yes, it fixes the user-facing error messages. ### How was this patch tested? Unittest was added. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #44846 from HyukjinKwon/SPARK-46806. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/connect/session.py | 6 ++++++ python/pyspark/sql/session.py | 6 ++++++ python/pyspark/sql/tests/test_dataframe.py | 10 ++++++++++ 3 files changed, 22 insertions(+) diff --git a/python/pyspark/sql/connect/session.py b/python/pyspark/sql/connect/session.py index 5cbcb4ab5c35..1c53e460c196 100644 --- a/python/pyspark/sql/connect/session.py +++ b/python/pyspark/sql/connect/session.py @@ -287,6 +287,12 @@ class SparkSession: active.__doc__ = PySparkSession.active.__doc__ def table(self, tableName: str) -> DataFrame: + if not isinstance(tableName, str): + raise PySparkTypeError( + error_class="NOT_STR", + message_parameters={"arg_name": "tableName", "arg_type": type(tableName).__name__}, + ) + return self.read.table(tableName) table.__doc__ = PySparkSession.table.__doc__ diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index fef834b9f0a0..7d0d9dc113f2 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -1707,6 +1707,12 @@ class SparkSession(SparkConversionMixin): | 4| +---+ """ + if not isinstance(tableName, str): + raise PySparkTypeError( + error_class="NOT_STR", + message_parameters={"arg_name": "tableName", "arg_type": type(tableName).__name__}, + ) + return DataFrame(self._jsparkSession.table(tableName), self) @property diff --git a/python/pyspark/sql/tests/test_dataframe.py b/python/pyspark/sql/tests/test_dataframe.py index 1788f1d9fb1a..f87e3b15eadb 100644 --- a/python/pyspark/sql/tests/test_dataframe.py +++ b/python/pyspark/sql/tests/test_dataframe.py @@ -69,6 +69,16 @@ class DataFrameTestsMixin: self.assertEqual(self.spark.range(-2).count(), 0) self.assertEqual(self.spark.range(3).count(), 3) + def test_table(self): + with self.assertRaises(PySparkTypeError) as pe: + self.spark.table(None) + + self.check_error( + exception=pe.exception, + error_class="NOT_STR", + message_parameters={"arg_name": "tableName", "arg_type": "NoneType"}, + ) + def test_dataframe_star(self): df1 = self.spark.createDataFrame([{"a": 1}]) df2 = self.spark.createDataFrame([{"a": 1, "b": "v"}]) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org