This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new cac6f58318b [SPARK-43176][CONNECT][PYTHON][TESTS] Deduplicate imports 
in Connect Tests
cac6f58318b is described below

commit cac6f58318bb84d532f02d245a50d3c66daa3e4b
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Wed Apr 19 19:33:49 2023 +0900

    [SPARK-43176][CONNECT][PYTHON][TESTS] Deduplicate imports in Connect Tests
    
    ### What changes were proposed in this pull request?
    Deduplicate imports in Connect Tests
    
    ### Why are the changes needed?
    for simplicity
    
    ### Does this PR introduce _any_ user-facing change?
    No, test-only
    
    ### How was this patch tested?
    updated unittests
    
    Closes #40839 from zhengruifeng/connect_test_import.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Hyukjin Kwon <gurwls...@apache.org>
---
 .../sql/tests/connect/test_connect_basic.py        | 10 ---
 .../sql/tests/connect/test_connect_column.py       | 15 +---
 .../sql/tests/connect/test_connect_function.py     | 96 +++-------------------
 3 files changed, 11 insertions(+), 110 deletions(-)

diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py 
b/python/pyspark/sql/tests/connect/test_connect_basic.py
index 2c1b6342924..9d12eb2b26e 100644
--- a/python/pyspark/sql/tests/connect/test_connect_basic.py
+++ b/python/pyspark/sql/tests/connect/test_connect_basic.py
@@ -466,9 +466,6 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase):
         )
 
     def test_collect_timestamp(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (TIMESTAMP('2022-12-25 10:30:00'), 1),
@@ -652,10 +649,6 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase):
 
     def test_with_none_and_nan(self):
         # SPARK-41855: make createDataFrame support None and NaN
-
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         # SPARK-41814: test with eqNullSafe
         data1 = [Row(id=1, value=float("NaN")), Row(id=2, value=42.0), 
Row(id=3, value=None)]
         data2 = [Row(id=1, value=np.nan), Row(id=2, value=42.0), Row(id=3, 
value=None)]
@@ -1662,9 +1655,6 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase):
 
     def test_observe(self):
         # SPARK-41527: test DataFrame.observe()
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         observation_name = "my_metric"
 
         self.assert_eq(
diff --git a/python/pyspark/sql/tests/connect/test_connect_column.py 
b/python/pyspark/sql/tests/connect/test_connect_column.py
index 2a22ca6ad8d..5703f8d2a3c 100644
--- a/python/pyspark/sql/tests/connect/test_connect_column.py
+++ b/python/pyspark/sql/tests/connect/test_connect_column.py
@@ -18,7 +18,6 @@
 import decimal
 import datetime
 
-from pyspark.sql import functions as SF
 from pyspark.sql.types import (
     Row,
     StructField,
@@ -48,6 +47,7 @@ from pyspark.sql.tests.connect.test_connect_basic import 
SparkConnectSQLTestCase
 
 if should_test_connect:
     import pandas as pd
+    from pyspark.sql import functions as SF
     from pyspark.sql.connect import functions as CF
     from pyspark.sql.connect.column import Column
     from pyspark.sql.connect.expressions import DistributedSequenceID, 
LiteralExpression
@@ -482,9 +482,6 @@ class SparkConnectColumnTests(SparkConnectSQLTestCase):
         cdf = self.connect.range(0, 1)
         sdf = self.spark.range(0, 1)
 
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         cdf1 = cdf.select(
             CF.lit(0),
             CF.lit(1),
@@ -679,9 +676,6 @@ class SparkConnectColumnTests(SparkConnectSQLTestCase):
 
     def test_column_bitwise_ops(self):
         # SPARK-41751: test bitwiseAND, bitwiseOR, bitwiseXOR
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (1, 1, 0), (2, NULL, 1), (3, 3, 4)
@@ -718,9 +712,6 @@ class SparkConnectColumnTests(SparkConnectSQLTestCase):
         )
 
     def test_column_accessor(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT STRUCT(a, b, c) AS x, y, z, c FROM VALUES
             (float(1.0), double(1.0), '2022', MAP('b', '123', 'a', 'kk'), 
ARRAY(1, 2, 3)),
@@ -840,10 +831,6 @@ class SparkConnectColumnTests(SparkConnectSQLTestCase):
 
     def test_column_field_ops(self):
         # SPARK-41767: test withField, dropFields
-
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT STRUCT(a, b, c, d) AS x, e FROM VALUES
             (float(1.0), double(1.0), '2022', 1, 0),
diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py 
b/python/pyspark/sql/tests/connect/test_connect_function.py
index 563db9ea63d..57b39310fe8 100644
--- a/python/pyspark/sql/tests/connect/test_connect_function.py
+++ b/python/pyspark/sql/tests/connect/test_connect_function.py
@@ -21,10 +21,19 @@ from pyspark.errors import PySparkTypeError
 from pyspark.sql import SparkSession as PySparkSession
 from pyspark.sql.types import StringType, StructType, StructField, ArrayType, 
IntegerType
 from pyspark.testing.pandasutils import PandasOnSparkTestUtils
-from pyspark.testing.connectutils import ReusedConnectTestCase
+from pyspark.testing.connectutils import ReusedConnectTestCase, 
should_test_connect
 from pyspark.testing.sqlutils import SQLTestUtils
 from pyspark.errors.exceptions.connect import AnalysisException, 
SparkConnectException
 
+if should_test_connect:
+    from pyspark.sql.connect.column import Column
+    from pyspark.sql import functions as SF
+    from pyspark.sql.window import Window as SW
+    from pyspark.sql.dataframe import DataFrame as SDF
+    from pyspark.sql.connect import functions as CF
+    from pyspark.sql.connect.window import Window as CW
+    from pyspark.sql.connect.dataframe import DataFrame as CDF
+
 
 class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, 
SQLTestUtils):
     """These test cases exercise the interface to the proto plan
@@ -47,9 +56,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         del os.environ["PYSPARK_NO_NAMESPACE_SHARE"]
 
     def compare_by_show(self, df1, df2, n: int = 20, truncate: int = 20):
-        from pyspark.sql.dataframe import DataFrame as SDF
-        from pyspark.sql.connect.dataframe import DataFrame as CDF
-
         assert isinstance(df1, (SDF, CDF))
         if isinstance(df1, SDF):
             str1 = df1._jdf.showString(n, truncate, False)
@@ -66,10 +72,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
 
     def test_count_star(self):
         # SPARK-42099: test count(*), count(col(*)) and count(expr(*))
-
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         data = [(2, "Alice"), (3, "Alice"), (5, "Bob"), (10, "Bob")]
 
         cdf = self.connect.createDataFrame(data, schema=["age", "name"])
@@ -123,9 +125,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_broadcast(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (0, float("NAN"), NULL), (1, NULL, 2.0), (2, 2.1, 3.5)
@@ -174,9 +173,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_normal_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (0, float("NAN"), NULL), (1, NULL, 2.0), (2, 2.1, 3.5)
@@ -261,9 +257,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_when_otherwise(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (0, float("NAN"), NULL), (1, NULL, 2.0), (2, 2.1, 3.5), (3, 3.1, 
float("NAN"))
@@ -375,9 +368,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_sorting_functions_with_column(self):
-        from pyspark.sql.connect import functions as CF
-        from pyspark.sql.connect.column import Column
-
         funs = [
             CF.asc_nulls_first,
             CF.asc_nulls_last,
@@ -403,9 +393,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
             self.assertIn("""DESC NULLS LAST'""", str(res))
 
     def test_sort_with_nulls_order(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (false, 1, NULL), (true, NULL, 2.0), (NULL, 3, 3.0)
@@ -449,9 +436,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
             )
 
     def test_math_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (false, 1, NULL), (true, NULL, 2.0), (NULL, 3, 3.5)
@@ -571,9 +555,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_aggregation_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (0, float("NAN"), NULL), (1, NULL, 2.0), (1, 2.1, 3.5), (0, 0.5, 
1.0)
@@ -694,11 +675,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_window_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.window import Window as SW
-        from pyspark.sql.connect import functions as CF
-        from pyspark.sql.connect.window import Window as CW
-
         self.assertEqual(CW.unboundedPreceding, SW.unboundedPreceding)
 
         self.assertEqual(CW.unboundedFollowing, SW.unboundedFollowing)
@@ -950,12 +926,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
 
     def test_window_order(self):
         # SPARK-41773: test window function with order
-
-        from pyspark.sql import functions as SF
-        from pyspark.sql.window import Window as SW
-        from pyspark.sql.connect import functions as CF
-        from pyspark.sql.connect.window import Window as CW
-
         data = [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")]
         # +---+--------+
         # | id|category|
@@ -1000,9 +970,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_collection_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (ARRAY('a', 'ab'), ARRAY(1, 2, 3), ARRAY(1, NULL, 3), 1, 2, 'a'),
@@ -1257,9 +1224,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_map_collection_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (MAP('a', 'ab'), MAP('x', 'ab'), MAP(1, 2, 3, 4), 1, 'a', ARRAY(1, 
2), ARRAY('X', 'Y')),
@@ -1315,9 +1279,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_generator_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (ARRAY('a', 'ab'), ARRAY(1, 2, 3), ARRAY(1, NULL, 3),
@@ -1442,9 +1403,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_lambda_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (ARRAY('a', 'ab'), ARRAY(1, 2, 3), ARRAY(1, NULL, 3), 1, 2, 'a', 
NULL, MAP(0, 0)),
@@ -1619,10 +1577,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
 
     def test_nested_lambda_function(self):
         # SPARK-42089: test nested lambda function
-
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = "SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') as 
letters"
 
         cdf = self.connect.sql(query).select(
@@ -1652,9 +1606,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         self.assertEqual(cdf.collect(), sdf.collect())
 
     def test_csv_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             ('1,2,3', 'a,b,5.0'),
@@ -1732,9 +1683,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_json_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             ('{"a": 1}', '[1, 2, 3]', '{"f1": "value1", "f2": "value2"}'),
@@ -1869,9 +1817,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_string_functions_one_arg(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             ('   ab   ', 'ab   ', NULL), ('   ab', NULL, 'ab')
@@ -1913,9 +1858,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
             )
 
     def test_string_functions_multi_args(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (1, 'abcdef', 'ghij', 'hello world', 'a.b.c.d'),
@@ -2013,9 +1955,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
 
     # TODO(SPARK-41283): To compare toPandas for test cases with dtypes marked
     def test_date_ts_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             ('1997/02/28 10:30:00', '2023/03/01 06:00:00', 'JST', 1428476400, 
2020, 12, 6),
@@ -2160,9 +2099,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_time_window_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT * FROM VALUES
             (TIMESTAMP('2022-12-25 10:30:00'), 1),
@@ -2264,9 +2200,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_misc_functions(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT a, b, c, BINARY(c) as d FROM VALUES
             (0, float("NAN"), 'x'), (1, NULL, 'y'), (1, 2.1, 'z'), (0, 0.5, 
NULL)
@@ -2329,9 +2262,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_call_udf(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT a, b, c, BINARY(c) as d FROM VALUES
             (-1.0, float("NAN"), 'x'), (-2.1, NULL, 'y'), (1, 2.1, 'z'), (0, 
0.5, NULL)
@@ -2360,9 +2290,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_udf(self):
-        from pyspark.sql import functions as SF
-        from pyspark.sql.connect import functions as CF
-
         query = """
             SELECT a, b, c FROM VALUES
             (1, 1.0, 'x'), (2, 2.0, 'y'), (3, 3.0, 'z')
@@ -2408,9 +2335,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, 
PandasOnSparkTestUtils, S
         )
 
     def test_pandas_udf_import(self):
-        from pyspark.sql.connect import functions as CF
-        from pyspark.sql import functions as SF
-
         self.assert_eq(getattr(CF, "pandas_udf"), getattr(SF, "pandas_udf"))
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to