This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 37f1fae3514 [SPARK-41394][PYTHON][TESTS] Skip `MemoryProfilerTests` when pandas is not installed 37f1fae3514 is described below commit 37f1fae35149391db80c7b33b6716ab97e0b46a2 Author: Dongjoon Hyun <dongj...@apache.org> AuthorDate: Mon Dec 5 14:29:54 2022 -0800 [SPARK-41394][PYTHON][TESTS] Skip `MemoryProfilerTests` when pandas is not installed ### What changes were proposed in this pull request? This PR aims to skip `pandas`-related tests of `MemoryProfilerTests` when `pandas` is not installed. ### Why are the changes needed? For Apache Spark 3.4, to recover the module (like `pyspark-core`) tests pass like before SPARK-40281 (#38584). ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Run individual test. ``` python/run-tests --testnames pyspark.tests.test_memory_profiler --python-executables python3 ``` Closes #38920 from dongjoon-hyun/SPARK-41394. Authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- python/pyspark/tests/test_memory_profiler.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/python/pyspark/tests/test_memory_profiler.py b/python/pyspark/tests/test_memory_profiler.py index 3dc8ce4ce22..cdb75e5b6aa 100644 --- a/python/pyspark/tests/test_memory_profiler.py +++ b/python/pyspark/tests/test_memory_profiler.py @@ -24,16 +24,16 @@ from io import StringIO from typing import Iterator from unittest import mock -import pandas as pd - from pyspark import SparkConf, SparkContext from pyspark.profiler import has_memory_profiler from pyspark.sql import SparkSession from pyspark.sql.functions import pandas_udf, udf +from pyspark.testing.sqlutils import have_pandas, pandas_requirement_message from pyspark.testing.utils import PySparkTestCase @unittest.skipIf(not has_memory_profiler, "Must have memory-profiler installed.") +@unittest.skipIf(not have_pandas, pandas_requirement_message) # type: ignore class MemoryProfilerTests(PySparkTestCase): def setUp(self): self._old_sys_path = list(sys.path) @@ -103,6 +103,8 @@ class MemoryProfilerTests(PySparkTestCase): self.spark.range(10).select(plus_one("id")).collect() def exec_pandas_udf_ser_to_ser(self): + import pandas as pd + @pandas_udf("int") def ser_to_ser(ser: pd.Series) -> pd.Series: return ser + 1 @@ -110,6 +112,8 @@ class MemoryProfilerTests(PySparkTestCase): self.spark.range(10).select(ser_to_ser("id")).collect() def exec_pandas_udf_ser_to_scalar(self): + import pandas as pd + @pandas_udf("int") def ser_to_scalar(ser: pd.Series) -> float: return ser.median() @@ -118,6 +122,8 @@ class MemoryProfilerTests(PySparkTestCase): # Unsupported def exec_pandas_udf_iter_to_iter(self): + import pandas as pd + @pandas_udf("int") def iter_to_iter(batch_ser: Iterator[pd.Series]) -> Iterator[pd.Series]: for ser in batch_ser: @@ -126,6 +132,8 @@ class MemoryProfilerTests(PySparkTestCase): self.spark.range(10).select(iter_to_iter("id")).collect() def exec_grouped_map(self): + import pandas as pd + def grouped_map(pdf: pd.DataFrame) -> pd.DataFrame: return pdf.assign(v=pdf.v - pdf.v.mean()) @@ -134,6 +142,8 @@ class MemoryProfilerTests(PySparkTestCase): # Unsupported def exec_map(self): + import pandas as pd + def map(pdfs: Iterator[pd.DataFrame]) -> Iterator[pd.DataFrame]: for pdf in pdfs: yield pdf[pdf.id == 1] --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org