This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new cac6f58318b [SPARK-43176][CONNECT][PYTHON][TESTS] Deduplicate imports in Connect Tests cac6f58318b is described below commit cac6f58318bb84d532f02d245a50d3c66daa3e4b Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Wed Apr 19 19:33:49 2023 +0900 [SPARK-43176][CONNECT][PYTHON][TESTS] Deduplicate imports in Connect Tests ### What changes were proposed in this pull request? Deduplicate imports in Connect Tests ### Why are the changes needed? for simplicity ### Does this PR introduce _any_ user-facing change? No, test-only ### How was this patch tested? updated unittests Closes #40839 from zhengruifeng/connect_test_import. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../sql/tests/connect/test_connect_basic.py | 10 --- .../sql/tests/connect/test_connect_column.py | 15 +--- .../sql/tests/connect/test_connect_function.py | 96 +++------------------- 3 files changed, 11 insertions(+), 110 deletions(-) diff --git a/python/pyspark/sql/tests/connect/test_connect_basic.py b/python/pyspark/sql/tests/connect/test_connect_basic.py index 2c1b6342924..9d12eb2b26e 100644 --- a/python/pyspark/sql/tests/connect/test_connect_basic.py +++ b/python/pyspark/sql/tests/connect/test_connect_basic.py @@ -466,9 +466,6 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase): ) def test_collect_timestamp(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (TIMESTAMP('2022-12-25 10:30:00'), 1), @@ -652,10 +649,6 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase): def test_with_none_and_nan(self): # SPARK-41855: make createDataFrame support None and NaN - - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - # SPARK-41814: test with eqNullSafe data1 = [Row(id=1, value=float("NaN")), Row(id=2, value=42.0), Row(id=3, value=None)] data2 = [Row(id=1, value=np.nan), Row(id=2, value=42.0), Row(id=3, value=None)] @@ -1662,9 +1655,6 @@ class SparkConnectBasicTests(SparkConnectSQLTestCase): def test_observe(self): # SPARK-41527: test DataFrame.observe() - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - observation_name = "my_metric" self.assert_eq( diff --git a/python/pyspark/sql/tests/connect/test_connect_column.py b/python/pyspark/sql/tests/connect/test_connect_column.py index 2a22ca6ad8d..5703f8d2a3c 100644 --- a/python/pyspark/sql/tests/connect/test_connect_column.py +++ b/python/pyspark/sql/tests/connect/test_connect_column.py @@ -18,7 +18,6 @@ import decimal import datetime -from pyspark.sql import functions as SF from pyspark.sql.types import ( Row, StructField, @@ -48,6 +47,7 @@ from pyspark.sql.tests.connect.test_connect_basic import SparkConnectSQLTestCase if should_test_connect: import pandas as pd + from pyspark.sql import functions as SF from pyspark.sql.connect import functions as CF from pyspark.sql.connect.column import Column from pyspark.sql.connect.expressions import DistributedSequenceID, LiteralExpression @@ -482,9 +482,6 @@ class SparkConnectColumnTests(SparkConnectSQLTestCase): cdf = self.connect.range(0, 1) sdf = self.spark.range(0, 1) - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - cdf1 = cdf.select( CF.lit(0), CF.lit(1), @@ -679,9 +676,6 @@ class SparkConnectColumnTests(SparkConnectSQLTestCase): def test_column_bitwise_ops(self): # SPARK-41751: test bitwiseAND, bitwiseOR, bitwiseXOR - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (1, 1, 0), (2, NULL, 1), (3, 3, 4) @@ -718,9 +712,6 @@ class SparkConnectColumnTests(SparkConnectSQLTestCase): ) def test_column_accessor(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT STRUCT(a, b, c) AS x, y, z, c FROM VALUES (float(1.0), double(1.0), '2022', MAP('b', '123', 'a', 'kk'), ARRAY(1, 2, 3)), @@ -840,10 +831,6 @@ class SparkConnectColumnTests(SparkConnectSQLTestCase): def test_column_field_ops(self): # SPARK-41767: test withField, dropFields - - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT STRUCT(a, b, c, d) AS x, e FROM VALUES (float(1.0), double(1.0), '2022', 1, 0), diff --git a/python/pyspark/sql/tests/connect/test_connect_function.py b/python/pyspark/sql/tests/connect/test_connect_function.py index 563db9ea63d..57b39310fe8 100644 --- a/python/pyspark/sql/tests/connect/test_connect_function.py +++ b/python/pyspark/sql/tests/connect/test_connect_function.py @@ -21,10 +21,19 @@ from pyspark.errors import PySparkTypeError from pyspark.sql import SparkSession as PySparkSession from pyspark.sql.types import StringType, StructType, StructField, ArrayType, IntegerType from pyspark.testing.pandasutils import PandasOnSparkTestUtils -from pyspark.testing.connectutils import ReusedConnectTestCase +from pyspark.testing.connectutils import ReusedConnectTestCase, should_test_connect from pyspark.testing.sqlutils import SQLTestUtils from pyspark.errors.exceptions.connect import AnalysisException, SparkConnectException +if should_test_connect: + from pyspark.sql.connect.column import Column + from pyspark.sql import functions as SF + from pyspark.sql.window import Window as SW + from pyspark.sql.dataframe import DataFrame as SDF + from pyspark.sql.connect import functions as CF + from pyspark.sql.connect.window import Window as CW + from pyspark.sql.connect.dataframe import DataFrame as CDF + class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, SQLTestUtils): """These test cases exercise the interface to the proto plan @@ -47,9 +56,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S del os.environ["PYSPARK_NO_NAMESPACE_SHARE"] def compare_by_show(self, df1, df2, n: int = 20, truncate: int = 20): - from pyspark.sql.dataframe import DataFrame as SDF - from pyspark.sql.connect.dataframe import DataFrame as CDF - assert isinstance(df1, (SDF, CDF)) if isinstance(df1, SDF): str1 = df1._jdf.showString(n, truncate, False) @@ -66,10 +72,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S def test_count_star(self): # SPARK-42099: test count(*), count(col(*)) and count(expr(*)) - - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - data = [(2, "Alice"), (3, "Alice"), (5, "Bob"), (10, "Bob")] cdf = self.connect.createDataFrame(data, schema=["age", "name"]) @@ -123,9 +125,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_broadcast(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (0, float("NAN"), NULL), (1, NULL, 2.0), (2, 2.1, 3.5) @@ -174,9 +173,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_normal_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (0, float("NAN"), NULL), (1, NULL, 2.0), (2, 2.1, 3.5) @@ -261,9 +257,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_when_otherwise(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (0, float("NAN"), NULL), (1, NULL, 2.0), (2, 2.1, 3.5), (3, 3.1, float("NAN")) @@ -375,9 +368,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_sorting_functions_with_column(self): - from pyspark.sql.connect import functions as CF - from pyspark.sql.connect.column import Column - funs = [ CF.asc_nulls_first, CF.asc_nulls_last, @@ -403,9 +393,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S self.assertIn("""DESC NULLS LAST'""", str(res)) def test_sort_with_nulls_order(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (false, 1, NULL), (true, NULL, 2.0), (NULL, 3, 3.0) @@ -449,9 +436,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_math_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (false, 1, NULL), (true, NULL, 2.0), (NULL, 3, 3.5) @@ -571,9 +555,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_aggregation_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (0, float("NAN"), NULL), (1, NULL, 2.0), (1, 2.1, 3.5), (0, 0.5, 1.0) @@ -694,11 +675,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_window_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.window import Window as SW - from pyspark.sql.connect import functions as CF - from pyspark.sql.connect.window import Window as CW - self.assertEqual(CW.unboundedPreceding, SW.unboundedPreceding) self.assertEqual(CW.unboundedFollowing, SW.unboundedFollowing) @@ -950,12 +926,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S def test_window_order(self): # SPARK-41773: test window function with order - - from pyspark.sql import functions as SF - from pyspark.sql.window import Window as SW - from pyspark.sql.connect import functions as CF - from pyspark.sql.connect.window import Window as CW - data = [(1, "a"), (1, "a"), (2, "a"), (1, "b"), (2, "b"), (3, "b")] # +---+--------+ # | id|category| @@ -1000,9 +970,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_collection_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (ARRAY('a', 'ab'), ARRAY(1, 2, 3), ARRAY(1, NULL, 3), 1, 2, 'a'), @@ -1257,9 +1224,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_map_collection_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (MAP('a', 'ab'), MAP('x', 'ab'), MAP(1, 2, 3, 4), 1, 'a', ARRAY(1, 2), ARRAY('X', 'Y')), @@ -1315,9 +1279,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_generator_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (ARRAY('a', 'ab'), ARRAY(1, 2, 3), ARRAY(1, NULL, 3), @@ -1442,9 +1403,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_lambda_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (ARRAY('a', 'ab'), ARRAY(1, 2, 3), ARRAY(1, NULL, 3), 1, 2, 'a', NULL, MAP(0, 0)), @@ -1619,10 +1577,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S def test_nested_lambda_function(self): # SPARK-42089: test nested lambda function - - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = "SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') as letters" cdf = self.connect.sql(query).select( @@ -1652,9 +1606,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S self.assertEqual(cdf.collect(), sdf.collect()) def test_csv_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES ('1,2,3', 'a,b,5.0'), @@ -1732,9 +1683,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_json_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES ('{"a": 1}', '[1, 2, 3]', '{"f1": "value1", "f2": "value2"}'), @@ -1869,9 +1817,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_string_functions_one_arg(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (' ab ', 'ab ', NULL), (' ab', NULL, 'ab') @@ -1913,9 +1858,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_string_functions_multi_args(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (1, 'abcdef', 'ghij', 'hello world', 'a.b.c.d'), @@ -2013,9 +1955,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S # TODO(SPARK-41283): To compare toPandas for test cases with dtypes marked def test_date_ts_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES ('1997/02/28 10:30:00', '2023/03/01 06:00:00', 'JST', 1428476400, 2020, 12, 6), @@ -2160,9 +2099,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_time_window_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT * FROM VALUES (TIMESTAMP('2022-12-25 10:30:00'), 1), @@ -2264,9 +2200,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_misc_functions(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT a, b, c, BINARY(c) as d FROM VALUES (0, float("NAN"), 'x'), (1, NULL, 'y'), (1, 2.1, 'z'), (0, 0.5, NULL) @@ -2329,9 +2262,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_call_udf(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT a, b, c, BINARY(c) as d FROM VALUES (-1.0, float("NAN"), 'x'), (-2.1, NULL, 'y'), (1, 2.1, 'z'), (0, 0.5, NULL) @@ -2360,9 +2290,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_udf(self): - from pyspark.sql import functions as SF - from pyspark.sql.connect import functions as CF - query = """ SELECT a, b, c FROM VALUES (1, 1.0, 'x'), (2, 2.0, 'y'), (3, 3.0, 'z') @@ -2408,9 +2335,6 @@ class SparkConnectFunctionTests(ReusedConnectTestCase, PandasOnSparkTestUtils, S ) def test_pandas_udf_import(self): - from pyspark.sql.connect import functions as CF - from pyspark.sql import functions as SF - self.assert_eq(getattr(CF, "pandas_udf"), getattr(SF, "pandas_udf")) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org