(spark) branch master updated: [SPARK-47027][PYTHON][TESTS] Use temporary directories for profiler test outputs

dongjoon Mon, 12 Feb 2024 13:36:01 -0800

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 24a9d25358f7 [SPARK-47027][PYTHON][TESTS] Use temporary directories 
for profiler test outputs
24a9d25358f7 is described below

commit 24a9d25358f71e5634240aa29c600588b838edb2
Author: Takuya UESHIN <ues...@databricks.com>
AuthorDate: Mon Feb 12 13:35:45 2024 -0800

    [SPARK-47027][PYTHON][TESTS] Use temporary directories for profiler test 
outputs
    
    ### What changes were proposed in this pull request?
    
    Use temporary directories for profiler test outputs instead of 
`tempfile.gettempdir()`.
    
    ### Why are the changes needed?
    
    Directly using `tempfile.gettempdir()` can leave the files there after each 
test.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Existing tests.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    No.
    
    Closes #45087 from ueshin/issues/SPARK-47027/tempdir.
    
    Authored-by: Takuya UESHIN <ues...@databricks.com>
    Signed-off-by: Dongjoon Hyun <dh...@apple.com>
---
 python/pyspark/sql/tests/test_udf_profiler.py | 28 +++++++++++++--------------
 python/pyspark/tests/test_memory_profiler.py  |  6 +++---
 python/pyspark/tests/test_profiler.py         |  6 +++---
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/python/pyspark/sql/tests/test_udf_profiler.py 
b/python/pyspark/sql/tests/test_udf_profiler.py
index 4f767d274414..764a860026f8 100644
--- a/python/pyspark/sql/tests/test_udf_profiler.py
+++ b/python/pyspark/sql/tests/test_udf_profiler.py
@@ -82,20 +82,20 @@ class UDFProfilerTests(unittest.TestCase):
         finally:
             sys.stdout = old_stdout
 
-        d = tempfile.gettempdir()
-        self.sc.dump_profiles(d)
-
-        for i, udf_name in enumerate(["add1", "add2", "add1", "add2"]):
-            id, profiler, _ = profilers[i]
-            with self.subTest(id=id, udf_name=udf_name):
-                stats = profiler.stats()
-                self.assertTrue(stats is not None)
-                width, stat_list = stats.get_print_list([])
-                func_names = [func_name for fname, n, func_name in stat_list]
-                self.assertTrue(udf_name in func_names)
-
-                self.assertTrue(udf_name in io.getvalue())
-                self.assertTrue("udf_%d.pstats" % id in os.listdir(d))
+        with tempfile.TemporaryDirectory() as d:
+            self.sc.dump_profiles(d)
+
+            for i, udf_name in enumerate(["add1", "add2", "add1", "add2"]):
+                id, profiler, _ = profilers[i]
+                with self.subTest(id=id, udf_name=udf_name):
+                    stats = profiler.stats()
+                    self.assertTrue(stats is not None)
+                    width, stat_list = stats.get_print_list([])
+                    func_names = [func_name for fname, n, func_name in 
stat_list]
+                    self.assertTrue(udf_name in func_names)
+
+                    self.assertTrue(udf_name in io.getvalue())
+                    self.assertTrue("udf_%d.pstats" % id in os.listdir(d))
 
     def test_custom_udf_profiler(self):
         class TestCustomProfiler(UDFBasicProfiler):
diff --git a/python/pyspark/tests/test_memory_profiler.py 
b/python/pyspark/tests/test_memory_profiler.py
index 536f38679c3e..aa3541620446 100644
--- a/python/pyspark/tests/test_memory_profiler.py
+++ b/python/pyspark/tests/test_memory_profiler.py
@@ -106,9 +106,9 @@ class MemoryProfilerTests(PySparkTestCase):
             self.sc.show_profiles()
         self.assertTrue("plus_one" in fake_out.getvalue())
 
-        d = tempfile.gettempdir()
-        self.sc.dump_profiles(d)
-        self.assertTrue("udf_%d_memory.txt" % id in os.listdir(d))
+        with tempfile.TemporaryDirectory() as d:
+            self.sc.dump_profiles(d)
+            self.assertTrue("udf_%d_memory.txt" % id in os.listdir(d))
 
     def test_profile_pandas_udf(self):
         udfs = [self.exec_pandas_udf_ser_to_ser, 
self.exec_pandas_udf_ser_to_scalar]
diff --git a/python/pyspark/tests/test_profiler.py 
b/python/pyspark/tests/test_profiler.py
index b7797ead2adb..a12bc99c54ae 100644
--- a/python/pyspark/tests/test_profiler.py
+++ b/python/pyspark/tests/test_profiler.py
@@ -54,9 +54,9 @@ class ProfilerTests(PySparkTestCase):
         self.assertTrue("heavy_foo" in io.getvalue())
         sys.stdout = old_stdout
 
-        d = tempfile.gettempdir()
-        self.sc.dump_profiles(d)
-        self.assertTrue("rdd_%d.pstats" % id in os.listdir(d))
+        with tempfile.TemporaryDirectory() as d:
+            self.sc.dump_profiles(d)
+            self.assertTrue("rdd_%d.pstats" % id in os.listdir(d))
 
     def test_custom_profiler(self):
         class TestCustomProfiler(BasicProfiler):


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-47027][PYTHON][TESTS] Use temporary directories for profiler test outputs

Reply via email to