This is an automated email from the ASF dual-hosted git repository. xinrong pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 40dd5235373 [SPARK-41532][CONNECT][FOLLOWUP] add error class `SESSION_NOT_SAME` into error_classes.py 40dd5235373 is described below commit 40dd5235373891bdcc536e25082597aca24e6507 Author: Jia Fan <fanjiaemi...@qq.com> AuthorDate: Mon May 22 10:51:25 2023 -0700 [SPARK-41532][CONNECT][FOLLOWUP] add error class `SESSION_NOT_SAME` into error_classes.py ### What changes were proposed in this pull request? This is a follow up PR for #40684 . Add error class `SESSION_NOT_SAME` define into `error_classes.py` with a template error message. ### Why are the changes needed? Unified error message ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? add new test. Closes #41259 from Hisoka-X/follow_up_session_not_same. Authored-by: Jia Fan <fanjiaemi...@qq.com> Signed-off-by: Xinrong Meng <xinr...@apache.org> --- python/pyspark/errors/error_classes.py | 5 +++++ python/pyspark/sql/connect/dataframe.py | 5 ++++- .../pyspark/sql/tests/connect/test_connect_basic.py | 21 ++++++++++++++++++--- 3 files changed, 27 insertions(+), 4 deletions(-) diff --git a/python/pyspark/errors/error_classes.py b/python/pyspark/errors/error_classes.py index c7b00e0736d..817b8ce60db 100644 --- a/python/pyspark/errors/error_classes.py +++ b/python/pyspark/errors/error_classes.py @@ -576,6 +576,11 @@ ERROR_CLASSES_JSON = """ "Result vector from pandas_udf was not the required length: expected <expected>, got <actual>." ] }, + "SESSION_NOT_SAME" : { + "message" : [ + "Both Datasets must belong to the same SparkSession." + ] + }, "SESSION_OR_CONTEXT_EXISTS" : { "message" : [ "There should not be an existing Spark Session or Spark Context." diff --git a/python/pyspark/sql/connect/dataframe.py b/python/pyspark/sql/connect/dataframe.py index 7a5ba50b3c6..4563366ef0f 100644 --- a/python/pyspark/sql/connect/dataframe.py +++ b/python/pyspark/sql/connect/dataframe.py @@ -265,7 +265,10 @@ class DataFrame: def checkSameSparkSession(self, other: "DataFrame") -> None: if self._session.session_id != other._session.session_id: - raise SessionNotSameException("Both Datasets must belong to the same SparkSession") + raise SessionNotSameException( + error_class="SESSION_NOT_SAME", + message_parameters={}, + ) def coalesce(self, numPartitions: int) -> "DataFrame": if not numPartitions > 0: diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index dd5e52894c9..7225b6aa8d0 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -1815,14 +1815,29 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase): spark2 = RemoteSparkSession(connection="sc://localhost") df2 = spark2.range(10).limit(3) - with self.assertRaises(SessionNotSameException): + with self.assertRaises(SessionNotSameException) as e1: df.union(df2).collect() + self.check_error( + exception=e1.exception, + error_class="SESSION_NOT_SAME", + message_parameters={}, + ) - with self.assertRaises(SessionNotSameException): + with self.assertRaises(SessionNotSameException) as e2: df.unionByName(df2).collect() + self.check_error( + exception=e2.exception, + error_class="SESSION_NOT_SAME", + message_parameters={}, + ) - with self.assertRaises(SessionNotSameException): + with self.assertRaises(SessionNotSameException) as e3: df.join(df2).collect() + self.check_error( + exception=e3.exception, + error_class="SESSION_NOT_SAME", + message_parameters={}, + ) def test_extended_hint_types(self): cdf = self.connect.range(100).toDF("id") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org