Repository: spark
Updated Branches:
  refs/heads/branch-2.3 05239afc9 -> 2ba07d5b1


[SPARK-23300][TESTS][BRANCH-2.3] Prints out if Pandas and PyArrow are installed 
or not in PySpark SQL tests

This PR backports https://github.com/apache/spark/pull/20473 to branch-2.3.

Author: hyukjinkwon <gurwls...@gmail.com>

Closes #20533 from HyukjinKwon/backport-20473.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/2ba07d5b
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/2ba07d5b
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/2ba07d5b

Branch: refs/heads/branch-2.3
Commit: 2ba07d5b101c44382e0db6d660da756c2f5ce627
Parents: 05239af
Author: hyukjinkwon <gurwls...@gmail.com>
Authored: Thu Feb 8 09:29:31 2018 +0900
Committer: hyukjinkwon <gurwls...@gmail.com>
Committed: Thu Feb 8 09:29:31 2018 +0900

----------------------------------------------------------------------
 python/run-tests.py | 56 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 55 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/2ba07d5b/python/run-tests.py
----------------------------------------------------------------------
diff --git a/python/run-tests.py b/python/run-tests.py
index 1341086..3539c76 100755
--- a/python/run-tests.py
+++ b/python/run-tests.py
@@ -31,6 +31,7 @@ if sys.version < '3':
     import Queue
 else:
     import queue as Queue
+from distutils.version import LooseVersion
 
 
 # Append `SPARK_HOME/dev` to the Python path so that we can import the 
sparktestsupport module
@@ -39,7 +40,7 @@ 
sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), "../de
 
 from sparktestsupport import SPARK_HOME  # noqa (suppress pep8 warnings)
 from sparktestsupport.shellutils import which, subprocess_check_output  # noqa
-from sparktestsupport.modules import all_modules  # noqa
+from sparktestsupport.modules import all_modules, pyspark_sql  # noqa
 
 
 python_modules = dict((m.name, m) for m in all_modules if m.python_test_goals 
if m.name != 'root')
@@ -151,6 +152,55 @@ def parse_opts():
     return opts
 
 
+def _check_dependencies(python_exec, modules_to_test):
+    # If we should test 'pyspark-sql', it checks if PyArrow and Pandas are 
installed and
+    # explicitly prints out. See SPARK-23300.
+    if pyspark_sql in modules_to_test:
+        # TODO(HyukjinKwon): Relocate and deduplicate these version 
specifications.
+        minimum_pyarrow_version = '0.8.0'
+        minimum_pandas_version = '0.19.2'
+
+        try:
+            pyarrow_version = subprocess_check_output(
+                [python_exec, "-c", "import pyarrow; 
print(pyarrow.__version__)"],
+                universal_newlines=True,
+                stderr=open(os.devnull, 'w')).strip()
+            if LooseVersion(pyarrow_version) >= 
LooseVersion(minimum_pyarrow_version):
+                LOGGER.info("Will test PyArrow related features against Python 
executable "
+                            "'%s' in '%s' module." % (python_exec, 
pyspark_sql.name))
+            else:
+                LOGGER.warning(
+                    "Will skip PyArrow related features against Python 
executable "
+                    "'%s' in '%s' module. PyArrow >= %s is required; however, 
PyArrow "
+                    "%s was found." % (
+                        python_exec, pyspark_sql.name, 
minimum_pyarrow_version, pyarrow_version))
+        except:
+            LOGGER.warning(
+                "Will skip PyArrow related features against Python executable "
+                "'%s' in '%s' module. PyArrow >= %s is required; however, 
PyArrow "
+                "was not found." % (python_exec, pyspark_sql.name, 
minimum_pyarrow_version))
+
+        try:
+            pandas_version = subprocess_check_output(
+                [python_exec, "-c", "import pandas; 
print(pandas.__version__)"],
+                universal_newlines=True,
+                stderr=open(os.devnull, 'w')).strip()
+            if LooseVersion(pandas_version) >= 
LooseVersion(minimum_pandas_version):
+                LOGGER.info("Will test Pandas related features against Python 
executable "
+                            "'%s' in '%s' module." % (python_exec, 
pyspark_sql.name))
+            else:
+                LOGGER.warning(
+                    "Will skip Pandas related features against Python 
executable "
+                    "'%s' in '%s' module. Pandas >= %s is required; however, 
Pandas "
+                    "%s was found." % (
+                        python_exec, pyspark_sql.name, minimum_pandas_version, 
pandas_version))
+        except:
+            LOGGER.warning(
+                "Will skip Pandas related features against Python executable "
+                "'%s' in '%s' module. Pandas >= %s is required; however, 
Pandas "
+                "was not found." % (python_exec, pyspark_sql.name, 
minimum_pandas_version))
+
+
 def main():
     opts = parse_opts()
     if (opts.verbose):
@@ -175,6 +225,10 @@ def main():
 
     task_queue = Queue.PriorityQueue()
     for python_exec in python_execs:
+        # Check if the python executable has proper dependencies installed to 
run tests
+        # for given modules properly.
+        _check_dependencies(python_exec, modules_to_test)
+
         python_implementation = subprocess_check_output(
             [python_exec, "-c", "import platform; 
print(platform.python_implementation())"],
             universal_newlines=True).strip()


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to