This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 2cf11cdb04f [SPARK-41854][PYTHON][BUILD] Automatic reformat/check python/setup.py 2cf11cdb04f is described below commit 2cf11cdb04f4c8628a991e50470331c3a8682bcd Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Tue Jan 3 13:05:29 2023 +0900 [SPARK-41854][PYTHON][BUILD] Automatic reformat/check python/setup.py ### What changes were proposed in this pull request? This PR proposes to automatically reformat `python/setup.py` too. ### Why are the changes needed? To make the development cycle easier. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? I manually checked via: ```bash ./dev/reformat-python ./dev/lint-python ``` Closes #39352 from HyukjinKwon/SPARK-41854. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- dev/lint-python | 2 +- dev/reformat-python | 2 +- python/setup.py | 240 ++++++++++++++++++++++++++++------------------------ 3 files changed, 133 insertions(+), 111 deletions(-) diff --git a/dev/lint-python b/dev/lint-python index 59ce71980d9..f1f4e9f1070 100755 --- a/dev/lint-python +++ b/dev/lint-python @@ -220,7 +220,7 @@ function black_test { fi echo "starting black test..." - BLACK_REPORT=$( ($BLACK_BUILD --config dev/pyproject.toml --check python/pyspark dev) 2>&1) + BLACK_REPORT=$( ($BLACK_BUILD --config dev/pyproject.toml --check python/pyspark dev python/setup.py) 2>&1) BLACK_STATUS=$? if [ "$BLACK_STATUS" -ne 0 ]; then diff --git a/dev/reformat-python b/dev/reformat-python index ae2118ab631..9543f5713d1 100755 --- a/dev/reformat-python +++ b/dev/reformat-python @@ -29,4 +29,4 @@ if [ $? -ne 0 ]; then exit 1 fi -$BLACK_BUILD --config dev/pyproject.toml python/pyspark dev +$BLACK_BUILD --config dev/pyproject.toml python/pyspark dev python/setup.py diff --git a/python/setup.py b/python/setup.py index 54115359a60..faba203a53a 100755 --- a/python/setup.py +++ b/python/setup.py @@ -25,19 +25,23 @@ from setuptools.command.install import install from shutil import copyfile, copytree, rmtree try: - exec(open('pyspark/version.py').read()) + exec(open("pyspark/version.py").read()) except IOError: - print("Failed to load PySpark version file for packaging. You must be in Spark's python dir.", - file=sys.stderr) + print( + "Failed to load PySpark version file for packaging. You must be in Spark's python dir.", + file=sys.stderr, + ) sys.exit(-1) try: spec = importlib.util.spec_from_file_location("install", "pyspark/install.py") install_module = importlib.util.module_from_spec(spec) spec.loader.exec_module(install_module) except IOError: - print("Failed to load the installing module (pyspark/install.py) which had to be " - "packaged together.", - file=sys.stderr) + print( + "Failed to load the installing module (pyspark/install.py) which had to be " + "packaged together.", + file=sys.stderr, + ) sys.exit(-1) VERSION = __version__ # noqa # A temporary path so we can access above the Python project root and fetch scripts and jars we need @@ -61,12 +65,16 @@ JARS_PATH = glob.glob(os.path.join(SPARK_HOME, "assembly/target/scala-*/jars/")) if len(JARS_PATH) == 1: JARS_PATH = JARS_PATH[0] -elif (os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1): +elif os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1: # Release mode puts the jars in a jars directory JARS_PATH = os.path.join(SPARK_HOME, "jars") elif len(JARS_PATH) > 1: - print("Assembly jars exist for multiple scalas ({0}), please cleanup assembly/target".format( - JARS_PATH), file=sys.stderr) + print( + "Assembly jars exist for multiple scalas ({0}), please cleanup assembly/target".format( + JARS_PATH + ), + file=sys.stderr, + ) sys.exit(-1) elif len(JARS_PATH) == 0 and not os.path.exists(TEMP_PATH): print(incorrect_invocation_message, file=sys.stderr) @@ -89,8 +97,9 @@ LICENSES_TARGET = os.path.join(TEMP_PATH, "licenses") # This is important because we only want to build the symlink farm while under Spark otherwise we # want to use the symlink farm. And if the symlink farm exists under while under Spark (e.g. a # partially built sdist) we should error and have the user sort it out. -in_spark = (os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or - (os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1)) +in_spark = os.path.isfile("../core/src/main/scala/org/apache/spark/SparkContext.scala") or ( + os.path.isfile("../RELEASE") and len(glob.glob("../jars/spark*core*.jar")) == 1 +) def _supports_symlinks(): @@ -98,13 +107,14 @@ def _supports_symlinks(): return getattr(os, "symlink", None) is not None -if (in_spark): +if in_spark: # Construct links for setup try: os.mkdir(TEMP_PATH) except BaseException: - print("Temp path for symlink to parent already exists {0}".format(TEMP_PATH), - file=sys.stderr) + print( + "Temp path for symlink to parent already exists {0}".format(TEMP_PATH), file=sys.stderr + ) sys.exit(-1) # If you are changing the versions here, please also change ./python/pyspark/sql/pandas/utils.py @@ -134,11 +144,13 @@ class InstallCommand(install): spark_version, hadoop_version, hive_version = install_module.checked_versions( os.environ.get("PYSPARK_VERSION", VERSION).lower(), os.environ.get("PYSPARK_HADOOP_VERSION", install_module.DEFAULT_HADOOP).lower(), - os.environ.get("PYSPARK_HIVE_VERSION", install_module.DEFAULT_HIVE).lower()) + os.environ.get("PYSPARK_HIVE_VERSION", install_module.DEFAULT_HIVE).lower(), + ) - if ("PYSPARK_VERSION" not in os.environ and - ((install_module.DEFAULT_HADOOP, install_module.DEFAULT_HIVE) == - (hadoop_version, hive_version))): + if "PYSPARK_VERSION" not in os.environ and ( + (install_module.DEFAULT_HADOOP, install_module.DEFAULT_HIVE) + == (hadoop_version, hive_version) + ): # Do not download and install if they are same as default. return @@ -146,7 +158,8 @@ class InstallCommand(install): dest=spark_dist, spark_version=spark_version, hadoop_version=hadoop_version, - hive_version=hive_version) + hive_version=hive_version, + ) try: @@ -160,7 +173,7 @@ try: pass copyfile("pyspark/shell.py", "pyspark/python/pyspark/shell.py") - if (in_spark): + if in_spark: # Construct the symlink farm - this is necessary since we can't refer to the path above the # package root and we need to copy the jars and scripts which are up above the python root. if _supports_symlinks(): @@ -181,8 +194,10 @@ try: else: # If we are not inside of SPARK_HOME verify we have the required symlink farm if not os.path.exists(JARS_TARGET): - print("To build packaging must be in the python directory under the SPARK_HOME.", - file=sys.stderr) + print( + "To build packaging must be in the python directory under the SPARK_HOME.", + file=sys.stderr, + ) if not os.path.isdir(SCRIPTS_TARGET): print(incorrect_invocation_message, file=sys.stderr) @@ -195,118 +210,125 @@ try: # will search for SPARK_HOME with Python. scripts.append("pyspark/find_spark_home.py") - with open('README.md') as f: + with open("README.md") as f: long_description = f.read() setup( - name='pyspark', + name="pyspark", version=VERSION, - description='Apache Spark Python API', + description="Apache Spark Python API", long_description=long_description, long_description_content_type="text/markdown", - author='Spark Developers', - author_email='d...@spark.apache.org', - url='https://github.com/apache/spark/tree/master/python', - packages=['pyspark', - 'pyspark.cloudpickle', - 'pyspark.mllib', - 'pyspark.mllib.linalg', - 'pyspark.mllib.stat', - 'pyspark.ml', - 'pyspark.ml.linalg', - 'pyspark.ml.param', - 'pyspark.sql', - 'pyspark.sql.avro', - 'pyspark.sql.connect', - 'pyspark.sql.connect.proto', - 'pyspark.sql.pandas', - 'pyspark.sql.protobuf', - 'pyspark.sql.streaming', - 'pyspark.streaming', - 'pyspark.bin', - 'pyspark.sbin', - 'pyspark.jars', - 'pyspark.pandas', - 'pyspark.pandas.data_type_ops', - 'pyspark.pandas.indexes', - 'pyspark.pandas.missing', - 'pyspark.pandas.plot', - 'pyspark.pandas.spark', - 'pyspark.pandas.typedef', - 'pyspark.pandas.usage_logging', - 'pyspark.python.pyspark', - 'pyspark.python.lib', - 'pyspark.data', - 'pyspark.licenses', - 'pyspark.resource', - 'pyspark.examples.src.main.python'], + author="Spark Developers", + author_email="d...@spark.apache.org", + url="https://github.com/apache/spark/tree/master/python", + packages=[ + "pyspark", + "pyspark.cloudpickle", + "pyspark.mllib", + "pyspark.mllib.linalg", + "pyspark.mllib.stat", + "pyspark.ml", + "pyspark.ml.linalg", + "pyspark.ml.param", + "pyspark.sql", + "pyspark.sql.avro", + "pyspark.sql.connect", + "pyspark.sql.connect.proto", + "pyspark.sql.pandas", + "pyspark.sql.protobuf", + "pyspark.sql.streaming", + "pyspark.streaming", + "pyspark.bin", + "pyspark.sbin", + "pyspark.jars", + "pyspark.pandas", + "pyspark.pandas.data_type_ops", + "pyspark.pandas.indexes", + "pyspark.pandas.missing", + "pyspark.pandas.plot", + "pyspark.pandas.spark", + "pyspark.pandas.typedef", + "pyspark.pandas.usage_logging", + "pyspark.python.pyspark", + "pyspark.python.lib", + "pyspark.data", + "pyspark.licenses", + "pyspark.resource", + "pyspark.examples.src.main.python", + ], include_package_data=True, package_dir={ - 'pyspark.jars': 'deps/jars', - 'pyspark.bin': 'deps/bin', - 'pyspark.sbin': 'deps/sbin', - 'pyspark.python.lib': 'lib', - 'pyspark.data': 'deps/data', - 'pyspark.licenses': 'deps/licenses', - 'pyspark.examples.src.main.python': 'deps/examples', + "pyspark.jars": "deps/jars", + "pyspark.bin": "deps/bin", + "pyspark.sbin": "deps/sbin", + "pyspark.python.lib": "lib", + "pyspark.data": "deps/data", + "pyspark.licenses": "deps/licenses", + "pyspark.examples.src.main.python": "deps/examples", }, package_data={ - 'pyspark.jars': ['*.jar'], - 'pyspark.bin': ['*'], - 'pyspark.sbin': ['spark-config.sh', 'spark-daemon.sh', - 'start-history-server.sh', - 'stop-history-server.sh', ], - 'pyspark.python.lib': ['*.zip'], - 'pyspark.data': ['*.txt', '*.data'], - 'pyspark.licenses': ['*.txt'], - 'pyspark.examples.src.main.python': ['*.py', '*/*.py']}, + "pyspark.jars": ["*.jar"], + "pyspark.bin": ["*"], + "pyspark.sbin": [ + "spark-config.sh", + "spark-daemon.sh", + "start-history-server.sh", + "stop-history-server.sh", + ], + "pyspark.python.lib": ["*.zip"], + "pyspark.data": ["*.txt", "*.data"], + "pyspark.licenses": ["*.txt"], + "pyspark.examples.src.main.python": ["*.py", "*/*.py"], + }, scripts=scripts, - license='http://www.apache.org/licenses/LICENSE-2.0', + license="http://www.apache.org/licenses/LICENSE-2.0", # Don't forget to update python/docs/source/getting_started/install.rst # if you're updating the versions or dependencies. - install_requires=['py4j==0.10.9.7'], + install_requires=["py4j==0.10.9.7"], extras_require={ - 'ml': ['numpy>=1.15'], - 'mllib': ['numpy>=1.15'], - 'sql': [ - 'pandas>=%s' % _minimum_pandas_version, - 'pyarrow>=%s' % _minimum_pyarrow_version, - 'numpy>=1.15', + "ml": ["numpy>=1.15"], + "mllib": ["numpy>=1.15"], + "sql": [ + "pandas>=%s" % _minimum_pandas_version, + "pyarrow>=%s" % _minimum_pyarrow_version, + "numpy>=1.15", ], - 'pandas_on_spark': [ - 'pandas>=%s' % _minimum_pandas_version, - 'pyarrow>=%s' % _minimum_pyarrow_version, - 'numpy>=1.15', + "pandas_on_spark": [ + "pandas>=%s" % _minimum_pandas_version, + "pyarrow>=%s" % _minimum_pyarrow_version, + "numpy>=1.15", ], - 'connect': [ - 'pandas>=%s' % _minimum_pandas_version, - 'pyarrow>=%s' % _minimum_pyarrow_version, - 'grpcio>=%s' % _minimum_grpc_version, - 'grpcio-status>=%s' % _minimum_grpc_version, - 'googleapis-common-protos>=%s' % _minimum_googleapis_common_protos_version, - 'numpy>=1.15', + "connect": [ + "pandas>=%s" % _minimum_pandas_version, + "pyarrow>=%s" % _minimum_pyarrow_version, + "grpcio>=%s" % _minimum_grpc_version, + "grpcio-status>=%s" % _minimum_grpc_version, + "googleapis-common-protos>=%s" % _minimum_googleapis_common_protos_version, + "numpy>=1.15", ], }, - python_requires='>=3.7', + python_requires=">=3.7", classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'License :: OSI Approved :: Apache Software License', - 'Programming Language :: Python :: 3.7', - 'Programming Language :: Python :: 3.8', - 'Programming Language :: Python :: 3.9', - 'Programming Language :: Python :: 3.10', - 'Programming Language :: Python :: 3.11', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy', - 'Typing :: Typed'], + "Development Status :: 5 - Production/Stable", + "License :: OSI Approved :: Apache Software License", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Typing :: Typed", + ], cmdclass={ - 'install': InstallCommand, + "install": InstallCommand, }, ) finally: # We only cleanup the symlink farm if we were in Spark, otherwise we are installing rather than # packaging. - if (in_spark): + if in_spark: # Depending on cleaning up the symlink farm or copied version if _supports_symlinks(): os.remove(os.path.join(TEMP_PATH, "jars")) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org