This is an automated email from the ASF dual-hosted git repository. sandy pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 7fee2912ba8b [SPARK-52224][CONNECT][PYTHON] Introduce pyyaml as a dependency for the Python client 7fee2912ba8b is described below commit 7fee2912ba8b068ed730c449f3823c317b3f130b Author: Sandy Ryza <sandyr...@gmail.com> AuthorDate: Thu May 22 13:12:17 2025 -0700 [SPARK-52224][CONNECT][PYTHON] Introduce pyyaml as a dependency for the Python client ### What changes were proposed in this pull request? Introduces pyyaml as a dependency for the Python client. When `pip install`-ing the pyspark client, it will be installed with it. ### Why are the changes needed? The pipeline spec file described in the [Declarative Pipelines SPIP](https://docs.google.com/document/d/1PsSTngFuRVEOvUGzp_25CQL1yfzFHFr02XdMfQ7jOM4/edit?tab=t.0) expects data in a YAML format. YAML is superior to alternatives, for a few reasons: - Unlike the flat files that are used for [spark-submit confs](https://spark.apache.org/docs/latest/submitting-applications.html#loading-configuration-from-a-file), it supports the hierarchical data required by the pipeline spec. - It's much more user-friendly to author than JSON. - It's consistent with the config files used for similar tools, like dbt. The Declarative Pipelines CLI will be a Spark Connect Python client, and thus require a Python library for loading YAML. The pyyaml library is an extremely stable dependency. The `safe_load` function that we'll use to load YAML files was introduced more than a decade ago. ### Does this PR introduce _any_ user-facing change? Yes – users who `pip install` the PySpark client library will see the pyyaml library installed. ### How was this patch tested? - Made a clean virtualenv - Ran `pip install python/packaging/client` - Confirmed that I could `import yaml` in a Python shell ### Was this patch authored or co-authored using generative AI tooling? No Closes #50944 from sryza/yaml-dep. Authored-by: Sandy Ryza <sandyr...@gmail.com> Signed-off-by: Sandy Ryza <sandy.r...@databricks.com> --- dev/requirements.txt | 1 + python/packaging/classic/setup.py | 2 ++ python/packaging/client/setup.py | 2 ++ python/packaging/connect/setup.py | 2 ++ 4 files changed, 7 insertions(+) diff --git a/dev/requirements.txt b/dev/requirements.txt index 1ed5b4f72d65..df30b2f08e03 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -12,6 +12,7 @@ mlflow>=2.3.1 scikit-learn matplotlib memory-profiler>=0.61.0 +pyyaml>=3.11 # PySpark test dependencies unittest-xml-reporting diff --git a/python/packaging/classic/setup.py b/python/packaging/classic/setup.py index da4d25cc908c..51ab69c6e4cc 100755 --- a/python/packaging/classic/setup.py +++ b/python/packaging/classic/setup.py @@ -155,6 +155,7 @@ _minimum_numpy_version = "1.21" _minimum_pyarrow_version = "11.0.0" _minimum_grpc_version = "1.67.0" _minimum_googleapis_common_protos_version = "1.65.0" +_minimum_pyyaml_version = "3.11" class InstallCommand(install): @@ -365,6 +366,7 @@ try: "grpcio-status>=%s" % _minimum_grpc_version, "googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version, "numpy>=%s" % _minimum_numpy_version, + "pyyaml>=%s" % _minimum_pyyaml_version, ], }, python_requires=">=3.9", diff --git a/python/packaging/client/setup.py b/python/packaging/client/setup.py index 30392bcada4c..6a361a3e0c1b 100755 --- a/python/packaging/client/setup.py +++ b/python/packaging/client/setup.py @@ -137,6 +137,7 @@ try: _minimum_pyarrow_version = "11.0.0" _minimum_grpc_version = "1.67.0" _minimum_googleapis_common_protos_version = "1.65.0" + _minimum_pyyaml_version = "3.11" with open("README.md") as f: long_description = f.read() @@ -209,6 +210,7 @@ try: "grpcio-status>=%s" % _minimum_grpc_version, "googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version, "numpy>=%s" % _minimum_numpy_version, + "pyyaml>=%s" % _minimum_pyyaml_version, ], python_requires=">=3.9", classifiers=[ diff --git a/python/packaging/connect/setup.py b/python/packaging/connect/setup.py index 25e8ad91efff..b0ce24267300 100755 --- a/python/packaging/connect/setup.py +++ b/python/packaging/connect/setup.py @@ -91,6 +91,7 @@ try: _minimum_pyarrow_version = "11.0.0" _minimum_grpc_version = "1.67.0" _minimum_googleapis_common_protos_version = "1.65.0" + _minimum_pyyaml_version = "3.11" with open("README.md") as f: long_description = f.read() @@ -121,6 +122,7 @@ try: "grpcio-status>=%s" % _minimum_grpc_version, "googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version, "numpy>=%s" % _minimum_numpy_version, + "pyyaml>=%s" % _minimum_pyyaml_version, ], python_requires=">=3.9", classifiers=[ --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org