Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/20473#discussion_r165555089 --- Diff: python/run-tests.py --- @@ -151,6 +151,38 @@ def parse_opts(): return opts +def _check_dependencies(python_exec, modules_to_test): + if "COVERAGE_PROCESS_START" in os.environ: + # Make sure if coverage is installed. + try: + subprocess_check_output( + [python_exec, "-c", "import coverage"], + stderr=open(os.devnull, 'w')) + except: + print_red("Coverage is not installed in Python executable '%s' " + "but 'COVERAGE_PROCESS_START' environment variable is set, " + "exiting." % python_exec) + sys.exit(-1) + + if pyspark_sql in modules_to_test: + # If we should test 'pyspark-sql', it checks if PyArrow and Pandas are installed and + # explicitly prints out. See SPARK-23300. + try: + subprocess_check_output( + [python_exec, "-c", "import pyarrow"], + stderr=open(os.devnull, 'w')) + except: --- End diff -- https://github.com/apache/spark/pull/20473#discussion_r165445232 is easy but I think https://github.com/apache/spark/pull/20473#discussion_r165445947 makes things complicated. Let me try it to show how it looks like.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org