This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 864b89b  [SPARK-38563][PYTHON] Upgrade to Py4J 0.10.9.5
864b89b is described below

commit 864b89bbea8c2620938b28661fc8bea6271c7048
Author: Hyukjin Kwon <gurwls...@apache.org>
AuthorDate: Fri Mar 18 14:00:48 2022 +0900

    [SPARK-38563][PYTHON] Upgrade to Py4J 0.10.9.5
    
    This PR is a retry of https://github.com/apache/spark/pull/35871 with 
bumping up the version to 0.10.9.5.
    It was reverted because of Python 3.10 is broken, and Python 3.10 was not 
officially supported in Py4J.
    
    In Py4J 0.10.9.5, the issue was fixed 
(https://github.com/py4j/py4j/pull/475), and it added Python 3.10 support 
officially with CI set up (https://github.com/py4j/py4j/pull/477).
    
    See https://github.com/apache/spark/pull/35871
    
    See https://github.com/apache/spark/pull/35871
    
    Py4J sets up Python 3.10 CI now, and I manually tested PySpark with Python 
3.10 with this patch:
    
    ```bash
    ./bin/pyspark
    ```
    
    ```
    import py4j
    py4j.__version__
    spark.range(10).show()
    ```
    
    ```
    Using Python version 3.10.0 (default, Mar  3 2022 03:57:21)
    Spark context Web UI available at http://172.30.5.50:4040
    Spark context available as 'sc' (master = local[*], app id = 
local-1647571387534).
    SparkSession available as 'spark'.
    >>> import py4j
    >>> py4j.__version__
    '0.10.9.5'
    >>> spark.range(10).show()
    +---+
    | id|
    +---+
    ...
    ```
    
    Closes #35907 from HyukjinKwon/SPARK-38563-followup.
    
    Authored-by: Hyukjin Kwon <gurwls...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
    (cherry picked from commit 97335ea037a9a036c013c86ef62d74ca638f808e)
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 bin/pyspark                                        |   2 +-
 bin/pyspark2.cmd                                   |   2 +-
 core/pom.xml                                       |   2 +-
 .../org/apache/spark/api/python/PythonUtils.scala  |   2 +-
 dev/deps/spark-deps-hadoop-2.7-hive-2.3            |   2 +-
 dev/deps/spark-deps-hadoop-3.2-hive-2.3            |   2 +-
 docs/job-scheduling.md                             |   2 +-
 python/docs/Makefile                               |   2 +-
 python/docs/make2.bat                              |   2 +-
 python/docs/source/getting_started/install.rst     |   2 +-
 python/lib/py4j-0.10.9.3-src.zip                   | Bin 42021 -> 0 bytes
 python/lib/py4j-0.10.9.5-src.zip                   | Bin 0 -> 42404 bytes
 python/pyspark/context.py                          |   6 ++--
 python/pyspark/util.py                             |  32 +++------------------
 python/setup.py                                    |   2 +-
 sbin/spark-config.sh                               |   2 +-
 16 files changed, 19 insertions(+), 43 deletions(-)

diff --git a/bin/pyspark b/bin/pyspark
index 4840589..21a514e 100755
--- a/bin/pyspark
+++ b/bin/pyspark
@@ -50,7 +50,7 @@ export PYSPARK_DRIVER_PYTHON_OPTS
 
 # Add the PySpark classes to the Python path:
 export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH"
-export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.3-src.zip:$PYTHONPATH"
+export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.5-src.zip:$PYTHONPATH"
 
 # Load the PySpark shell.py script when ./pyspark is used interactively:
 export OLD_PYTHONSTARTUP="$PYTHONSTARTUP"
diff --git a/bin/pyspark2.cmd b/bin/pyspark2.cmd
index a19627a..eec02a4 100644
--- a/bin/pyspark2.cmd
+++ b/bin/pyspark2.cmd
@@ -30,7 +30,7 @@ if "x%PYSPARK_DRIVER_PYTHON%"=="x" (
 )
 
 set PYTHONPATH=%SPARK_HOME%\python;%PYTHONPATH%
-set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.3-src.zip;%PYTHONPATH%
+set PYTHONPATH=%SPARK_HOME%\python\lib\py4j-0.10.9.5-src.zip;%PYTHONPATH%
 
 set OLD_PYTHONSTARTUP=%PYTHONSTARTUP%
 set PYTHONSTARTUP=%SPARK_HOME%\python\pyspark\shell.py
diff --git a/core/pom.xml b/core/pom.xml
index 3833794..4926664 100644
--- a/core/pom.xml
+++ b/core/pom.xml
@@ -433,7 +433,7 @@
     <dependency>
       <groupId>net.sf.py4j</groupId>
       <artifactId>py4j</artifactId>
-      <version>0.10.9.3</version>
+      <version>0.10.9.5</version>
     </dependency>
     <dependency>
       <groupId>org.apache.spark</groupId>
diff --git a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala 
b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
index 8daba86..6336171 100644
--- a/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
+++ b/core/src/main/scala/org/apache/spark/api/python/PythonUtils.scala
@@ -27,7 +27,7 @@ import org.apache.spark.SparkContext
 import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}
 
 private[spark] object PythonUtils {
-  val PY4J_ZIP_NAME = "py4j-0.10.9.3-src.zip"
+  val PY4J_ZIP_NAME = "py4j-0.10.9.5-src.zip"
 
   /** Get the PYTHONPATH for PySpark, either from SPARK_HOME, if it is set, or 
from our JAR */
   def sparkPythonPath: String = {
diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 
b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index c2882bd..23da066 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -208,7 +208,7 @@ 
parquet-format-structures/1.12.2//parquet-format-structures-1.12.2.jar
 parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar
 parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.9.3//py4j-0.10.9.3.jar
+py4j/0.10.9.5//py4j-0.10.9.5.jar
 pyrolite/4.30//pyrolite-4.30.jar
 rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar
 scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 
b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index be4c7b8..fb69fc5 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -179,7 +179,7 @@ 
parquet-format-structures/1.12.2//parquet-format-structures-1.12.2.jar
 parquet-hadoop/1.12.2//parquet-hadoop-1.12.2.jar
 parquet-jackson/1.12.2//parquet-jackson-1.12.2.jar
 protobuf-java/2.5.0//protobuf-java-2.5.0.jar
-py4j/0.10.9.3//py4j-0.10.9.3.jar
+py4j/0.10.9.5//py4j-0.10.9.5.jar
 pyrolite/4.30//pyrolite-4.30.jar
 rocksdbjni/6.20.3//rocksdbjni-6.20.3.jar
 scala-collection-compat_2.12/2.1.1//scala-collection-compat_2.12-2.1.1.jar
diff --git a/docs/job-scheduling.md b/docs/job-scheduling.md
index 65305fe..40dbd5a 100644
--- a/docs/job-scheduling.md
+++ b/docs/job-scheduling.md
@@ -301,5 +301,5 @@ via `sc.setJobGroup` in a separate PVM thread, which also 
disallows to cancel th
 later.
 
 `pyspark.InheritableThread` is recommended to use together for a PVM thread to 
inherit the inheritable attributes
- such as local properties in a JVM thread, and to avoid resource leak.
+ such as local properties in a JVM thread.
 
diff --git a/python/docs/Makefile b/python/docs/Makefile
index 9cb1a17..14e5214 100644
--- a/python/docs/Makefile
+++ b/python/docs/Makefile
@@ -21,7 +21,7 @@ SPHINXBUILD   ?= sphinx-build
 SOURCEDIR     ?= source
 BUILDDIR      ?= build
 
-export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.3-src.zip)
+export PYTHONPATH=$(realpath ..):$(realpath ../lib/py4j-0.10.9.5-src.zip)
 
 # Put it first so that "make" without argument is like "make help".
 help:
diff --git a/python/docs/make2.bat b/python/docs/make2.bat
index 2e4e2b5..d36b7a1 100644
--- a/python/docs/make2.bat
+++ b/python/docs/make2.bat
@@ -25,7 +25,7 @@ if "%SPHINXBUILD%" == "" (
 set SOURCEDIR=source
 set BUILDDIR=build
 
-set PYTHONPATH=..;..\lib\py4j-0.10.9.3-src.zip
+set PYTHONPATH=..;..\lib\py4j-0.10.9.5-src.zip
 
 if "%1" == "" goto help
 
diff --git a/python/docs/source/getting_started/install.rst 
b/python/docs/source/getting_started/install.rst
index ee66067..e98b7d2 100644
--- a/python/docs/source/getting_started/install.rst
+++ b/python/docs/source/getting_started/install.rst
@@ -157,7 +157,7 @@ Package       Minimum supported version Note
 `pandas`      0.23.2                    Optional for Spark SQL
 `NumPy`       1.7                       Required for MLlib DataFrame-based API
 `pyarrow`     1.0.0                     Optional for Spark SQL
-`Py4J`        0.10.9.3                  Required
+`Py4J`        0.10.9.5                  Required
 `pandas`      0.23.2                    Required for pandas API on Spark
 `pyarrow`     1.0.0                     Required for pandas API on Spark
 `Numpy`       1.14                      Required for pandas API on Spark
diff --git a/python/lib/py4j-0.10.9.3-src.zip b/python/lib/py4j-0.10.9.3-src.zip
deleted file mode 100644
index 428f3ac..0000000
Binary files a/python/lib/py4j-0.10.9.3-src.zip and /dev/null differ
diff --git a/python/lib/py4j-0.10.9.5-src.zip b/python/lib/py4j-0.10.9.5-src.zip
new file mode 100644
index 0000000..478d4b0
Binary files /dev/null and b/python/lib/py4j-0.10.9.5-src.zip differ
diff --git a/python/pyspark/context.py b/python/pyspark/context.py
index 6c94106..4d58dd1 100644
--- a/python/pyspark/context.py
+++ b/python/pyspark/context.py
@@ -1113,7 +1113,7 @@ class SparkContext(object):
         to HDFS-1208, where HDFS may respond to Thread.interrupt() by marking 
nodes as dead.
 
         If you run jobs in parallel, use :class:`pyspark.InheritableThread` 
for thread
-        local inheritance, and preventing resource leak.
+        local inheritance.
 
         Examples
         --------
@@ -1153,7 +1153,7 @@ class SparkContext(object):
         Notes
         -----
         If you run jobs in parallel, use :class:`pyspark.InheritableThread` 
for thread
-        local inheritance, and preventing resource leak.
+        local inheritance.
         """
         self._jsc.setLocalProperty(key, value)
 
@@ -1171,7 +1171,7 @@ class SparkContext(object):
         Notes
         -----
         If you run jobs in parallel, use :class:`pyspark.InheritableThread` 
for thread
-        local inheritance, and preventing resource leak.
+        local inheritance.
         """
         self._jsc.setJobDescription(value)
 
diff --git a/python/pyspark/util.py b/python/pyspark/util.py
index e0933f1..1e2c848 100644
--- a/python/pyspark/util.py
+++ b/python/pyspark/util.py
@@ -320,12 +320,9 @@ def inheritable_thread_target(f):
 
         @functools.wraps(f)
         def wrapped(*args, **kwargs):
-            try:
-                # Set local properties in child thread.
-                
SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties)
-                return f(*args, **kwargs)
-            finally:
-                InheritableThread._clean_py4j_conn_for_current_thread()
+            # Set local properties in child thread.
+            
SparkContext._active_spark_context._jsc.sc().setLocalProperties(properties)
+            return f(*args, **kwargs)
         return wrapped
     else:
         return f
@@ -360,10 +357,7 @@ class InheritableThread(threading.Thread):
                 # self._props is set before starting the thread to match the 
behavior with JVM.
                 assert hasattr(self, "_props")
                 
SparkContext._active_spark_context._jsc.sc().setLocalProperties(self._props)
-                try:
-                    return target(*a, **k)
-                finally:
-                    InheritableThread._clean_py4j_conn_for_current_thread()
+                return target(*a, **k)
 
             super(InheritableThread, self).__init__(
                 target=copy_local_properties, *args, **kwargs)
@@ -380,24 +374,6 @@ class InheritableThread(threading.Thread):
             self._props = 
SparkContext._active_spark_context._jsc.sc().getLocalProperties().clone()
         return super(InheritableThread, self).start(*args, **kwargs)
 
-    @staticmethod
-    def _clean_py4j_conn_for_current_thread():
-        from pyspark import SparkContext
-
-        jvm = SparkContext._jvm
-        thread_connection = jvm._gateway_client.get_thread_connection()
-        if thread_connection is not None:
-            try:
-                # Dequeue is shared across other threads but it's thread-safe.
-                # If this function has to be invoked one more time in the same 
thead
-                # Py4J will create a new connection automatically.
-                jvm._gateway_client.deque.remove(thread_connection)
-            except ValueError:
-                # Should never reach this point
-                return
-            finally:
-                thread_connection.close()
-
 
 if __name__ == "__main__":
     if "pypy" not in platform.python_implementation().lower() and 
sys.version_info[:2] >= (3, 7):
diff --git a/python/setup.py b/python/setup.py
index 962f232..8a873c2 100755
--- a/python/setup.py
+++ b/python/setup.py
@@ -258,7 +258,7 @@ try:
         license='http://www.apache.org/licenses/LICENSE-2.0',
         # Don't forget to update python/docs/source/getting_started/install.rst
         # if you're updating the versions or dependencies.
-        install_requires=['py4j==0.10.9.3'],
+        install_requires=['py4j==0.10.9.5'],
         extras_require={
             'ml': ['numpy>=1.7'],
             'mllib': ['numpy>=1.7'],
diff --git a/sbin/spark-config.sh b/sbin/spark-config.sh
index f27b6fe..6044de2 100755
--- a/sbin/spark-config.sh
+++ b/sbin/spark-config.sh
@@ -28,6 +28,6 @@ export 
SPARK_CONF_DIR="${SPARK_CONF_DIR:-"${SPARK_HOME}/conf"}"
 # Add the PySpark classes to the PYTHONPATH:
 if [ -z "${PYSPARK_PYTHONPATH_SET}" ]; then
   export PYTHONPATH="${SPARK_HOME}/python:${PYTHONPATH}"
-  export 
PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.3-src.zip:${PYTHONPATH}"
+  export 
PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.5-src.zip:${PYTHONPATH}"
   export PYSPARK_PYTHONPATH_SET=1
 fi

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to