This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d2a1a31a3800 [SPARK-45783][PYTHON][CONNECT] Improve error messages when Spark Connect mode is enabled but remote URL is not set d2a1a31a3800 is described below commit d2a1a31a3800c2b64f1e45793267e84ef046e888 Author: allisonwang-db <allison.w...@databricks.com> AuthorDate: Mon Nov 13 21:17:27 2023 +0900 [SPARK-45783][PYTHON][CONNECT] Improve error messages when Spark Connect mode is enabled but remote URL is not set ### What changes were proposed in this pull request? This PR improves the error messages when `SPARK_CONNECT_MODE_ENABLED` is defined but neither `spark.remote` option nor the `SPARK_REMOTE` env var is set. ### Why are the changes needed? To improve the error message. Currently the error looks like a bug: ``` url = opts.get("spark.remote", os.environ.get("SPARK_REMOTE")) > if url.startswith("local"): E AttributeError: 'NoneType' object has no attribute 'startswith' ``` ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? New unit test ### Was this patch authored or co-authored using generative AI tooling? No Closes #43653 from allisonwang-db/spark-45783-fix-url-err. Authored-by: allisonwang-db <allison.w...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/sql/session.py | 8 ++++++++ python/pyspark/sql/tests/test_session.py | 6 ++++++ 2 files changed, 14 insertions(+) diff --git a/python/pyspark/sql/session.py b/python/pyspark/sql/session.py index 85aff09aa3df..b4fad7ad29da 100644 --- a/python/pyspark/sql/session.py +++ b/python/pyspark/sql/session.py @@ -481,6 +481,14 @@ class SparkSession(SparkConversionMixin): ): url = opts.get("spark.remote", os.environ.get("SPARK_REMOTE")) + if url is None: + raise RuntimeError( + "Cannot create a Spark Connect session because the " + "Spark Connect remote URL has not been set. Please define " + "the remote URL by setting either the 'spark.remote' option " + "or the 'SPARK_REMOTE' environment variable." + ) + if url.startswith("local"): os.environ["SPARK_LOCAL_REMOTE"] = "1" RemoteSparkSession._start_connect_server(url, opts) diff --git a/python/pyspark/sql/tests/test_session.py b/python/pyspark/sql/tests/test_session.py index 706b041bb514..da27bf925749 100644 --- a/python/pyspark/sql/tests/test_session.py +++ b/python/pyspark/sql/tests/test_session.py @@ -17,6 +17,7 @@ import os import unittest +import unittest.mock from pyspark import SparkConf, SparkContext from pyspark.sql import SparkSession, SQLContext, Row @@ -187,6 +188,11 @@ class SparkSessionTests3(unittest.TestCase): if sc is not None: sc.stop() + def test_session_with_spark_connect_mode_enabled(self): + with unittest.mock.patch.dict(os.environ, {"SPARK_CONNECT_MODE_ENABLED": "1"}): + with self.assertRaisesRegex(RuntimeError, "Cannot create a Spark Connect session"): + SparkSession.builder.appName("test").getOrCreate() + class SparkSessionTests4(ReusedSQLTestCase): def test_get_active_session_after_create_dataframe(self): --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org