This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 4dbe4ffebfc [SPARK-45450][PYTHON] Fix imports according to PEP8: pyspark.pandas and pyspark (core) 4dbe4ffebfc is described below commit 4dbe4ffebfc8cc3a894c9e798c5a7b364cf7a399 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Tue Oct 10 14:26:45 2023 +0900 [SPARK-45450][PYTHON] Fix imports according to PEP8: pyspark.pandas and pyspark (core) ### What changes were proposed in this pull request? This PR proposes to fix imports according to PEP8 in `pyspark.pandas` and `pyspark.*` (core), see https://peps.python.org/pep-0008/#imports. ### Why are the changes needed? I have not been fixing them as they are too minor. However, this practice is being propagated across the whole PySpark packages, and I think we should fix them all so other users do not follow the non-standard practice. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Existing linters and tests should cover. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #43257 from HyukjinKwon/SPARK-45450. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- python/pyspark/conf.py | 1 + python/pyspark/errors_doc_gen.py | 1 + python/pyspark/java_gateway.py | 1 + python/pyspark/join.py | 3 ++- python/pyspark/pandas/accessors.py | 1 - python/pyspark/pandas/base.py | 2 +- python/pyspark/pandas/config.py | 1 - python/pyspark/pandas/correlation.py | 1 - python/pyspark/pandas/data_type_ops/date_ops.py | 1 - python/pyspark/pandas/data_type_ops/datetime_ops.py | 1 - python/pyspark/pandas/data_type_ops/string_ops.py | 1 - python/pyspark/pandas/frame.py | 7 ++----- python/pyspark/pandas/generic.py | 1 - python/pyspark/pandas/groupby.py | 2 -- python/pyspark/pandas/indexes/base.py | 1 - python/pyspark/pandas/indexes/multi.py | 2 -- python/pyspark/pandas/indexing.py | 6 ++---- python/pyspark/pandas/internal.py | 11 +++-------- python/pyspark/pandas/mlflow.py | 4 ++-- python/pyspark/pandas/namespace.py | 2 +- python/pyspark/pandas/numpy_compat.py | 2 +- python/pyspark/pandas/plot/core.py | 6 +++--- python/pyspark/pandas/plot/matplotlib.py | 1 - python/pyspark/pandas/resample.py | 2 -- python/pyspark/pandas/series.py | 2 +- python/pyspark/pandas/spark/accessors.py | 3 --- python/pyspark/pandas/spark/functions.py | 2 -- python/pyspark/pandas/sql_processor.py | 2 +- python/pyspark/pandas/strings.py | 3 +-- python/pyspark/pandas/supported_api_gen.py | 6 +++--- python/pyspark/pandas/tests/computation/test_corrwith.py | 1 - python/pyspark/pandas/tests/computation/test_cov.py | 1 - .../pandas/tests/connect/data_type_ops/testing_utils.py | 1 - python/pyspark/pandas/tests/connect/test_parity_extension.py | 1 + python/pyspark/pandas/tests/connect/test_parity_indexing.py | 1 + .../pyspark/pandas/tests/connect/test_parity_numpy_compat.py | 1 + python/pyspark/pandas/tests/data_type_ops/testing_utils.py | 2 -- python/pyspark/pandas/tests/frame/test_reshaping.py | 1 - python/pyspark/pandas/tests/frame/test_spark.py | 2 +- python/pyspark/pandas/tests/series/test_series.py | 1 - python/pyspark/pandas/tests/series/test_stat.py | 2 +- python/pyspark/pandas/tests/test_indexops_spark.py | 2 +- python/pyspark/pandas/tests/test_stats.py | 1 + python/pyspark/pandas/utils.py | 7 +++---- python/pyspark/pandas/window.py | 2 -- python/pyspark/profiler.py | 1 - python/pyspark/rdd.py | 6 +++--- python/pyspark/shuffle.py | 2 +- python/pyspark/tests/test_statcounter.py | 3 ++- python/pyspark/util.py | 4 ++-- python/pyspark/worker.py | 1 - 51 files changed, 44 insertions(+), 78 deletions(-) diff --git a/python/pyspark/conf.py b/python/pyspark/conf.py index ddf8c22feea..ba43a506375 100644 --- a/python/pyspark/conf.py +++ b/python/pyspark/conf.py @@ -21,6 +21,7 @@ import sys from typing import Dict, List, Optional, Tuple, cast, overload from py4j.java_gateway import JVMView, JavaObject + from pyspark.errors import PySparkRuntimeError diff --git a/python/pyspark/errors_doc_gen.py b/python/pyspark/errors_doc_gen.py index e9b229062ba..a30e2513f91 100644 --- a/python/pyspark/errors_doc_gen.py +++ b/python/pyspark/errors_doc_gen.py @@ -1,4 +1,5 @@ import re + from pyspark.errors.error_classes import ERROR_CLASSES_MAP diff --git a/python/pyspark/java_gateway.py b/python/pyspark/java_gateway.py index 691120a1312..39a90a0afba 100644 --- a/python/pyspark/java_gateway.py +++ b/python/pyspark/java_gateway.py @@ -28,6 +28,7 @@ from subprocess import Popen, PIPE from py4j.java_gateway import java_import, JavaGateway, JavaObject, GatewayParameters from py4j.clientserver import ClientServer, JavaParameters, PythonParameters + from pyspark.find_spark_home import _find_spark_home from pyspark.serializers import read_int, write_with_length, UTF8Deserializer from pyspark.errors import PySparkRuntimeError diff --git a/python/pyspark/join.py b/python/pyspark/join.py index 003e9ec2fc8..e12d674614d 100644 --- a/python/pyspark/join.py +++ b/python/pyspark/join.py @@ -31,9 +31,10 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ -from pyspark.resultiterable import ResultIterable from functools import reduce +from pyspark.resultiterable import ResultIterable + def _do_python_join(rdd, other, numPartitions, dispatch): vs = rdd.mapValues(lambda v: (1, v)) diff --git a/python/pyspark/pandas/accessors.py b/python/pyspark/pandas/accessors.py index 4e96f4d4cf3..4c36f7976af 100644 --- a/python/pyspark/pandas/accessors.py +++ b/python/pyspark/pandas/accessors.py @@ -27,7 +27,6 @@ import pandas as pd from pyspark.sql import functions as F from pyspark.sql.functions import pandas_udf from pyspark.sql.types import DataType, LongType, StructField, StructType - from pyspark.pandas._typing import DataFrameOrSeries, Name from pyspark.pandas.internal import ( InternalField, diff --git a/python/pyspark/pandas/base.py b/python/pyspark/pandas/base.py index fa513e8b9b6..771d79dc6e0 100644 --- a/python/pyspark/pandas/base.py +++ b/python/pyspark/pandas/base.py @@ -27,9 +27,9 @@ from typing import Any, Callable, Optional, Sequence, Tuple, Union, cast, TYPE_C import numpy as np import pandas as pd from pandas.api.types import is_list_like, CategoricalDtype # type: ignore[attr-defined] + from pyspark.sql import functions as F, Column, Window from pyspark.sql.types import LongType, BooleanType, NumericType - from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm. from pyspark.pandas._typing import Axis, Dtype, IndexOpsLike, Label, SeriesOrIndex from pyspark.pandas.config import get_option, option_context diff --git a/python/pyspark/pandas/config.py b/python/pyspark/pandas/config.py index 79cb859faa2..2228e41c1df 100644 --- a/python/pyspark/pandas/config.py +++ b/python/pyspark/pandas/config.py @@ -23,7 +23,6 @@ import json from typing import Any, Callable, Dict, Iterator, List, Tuple, Union from pyspark._globals import _NoValue, _NoValueType - from pyspark.pandas.utils import default_session diff --git a/python/pyspark/pandas/correlation.py b/python/pyspark/pandas/correlation.py index 75d3a857a0f..da51dc2cc61 100644 --- a/python/pyspark/pandas/correlation.py +++ b/python/pyspark/pandas/correlation.py @@ -19,7 +19,6 @@ from typing import List from pyspark.sql import DataFrame as SparkDataFrame, functions as F from pyspark.sql.window import Window - from pyspark.pandas.utils import verify_temp_column_name diff --git a/python/pyspark/pandas/data_type_ops/date_ops.py b/python/pyspark/pandas/data_type_ops/date_ops.py index 51d1018a304..771b5d38a17 100644 --- a/python/pyspark/pandas/data_type_ops/date_ops.py +++ b/python/pyspark/pandas/data_type_ops/date_ops.py @@ -26,7 +26,6 @@ from pandas.api.types import CategoricalDtype from pyspark.sql import functions as F from pyspark.sql.types import BooleanType, DateType, StringType from pyspark.sql.utils import get_column_class - from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex from pyspark.pandas.base import column_op, IndexOpsMixin from pyspark.pandas.data_type_ops.base import ( diff --git a/python/pyspark/pandas/data_type_ops/datetime_ops.py b/python/pyspark/pandas/data_type_ops/datetime_ops.py index ea9b994076b..8d5853b6824 100644 --- a/python/pyspark/pandas/data_type_ops/datetime_ops.py +++ b/python/pyspark/pandas/data_type_ops/datetime_ops.py @@ -34,7 +34,6 @@ from pyspark.sql.types import ( NumericType, ) from pyspark.sql.utils import pyspark_column_op - from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex from pyspark.pandas.base import IndexOpsMixin from pyspark.pandas.data_type_ops.base import ( diff --git a/python/pyspark/pandas/data_type_ops/string_ops.py b/python/pyspark/pandas/data_type_ops/string_ops.py index 53095c55e81..6c8bc754ac9 100644 --- a/python/pyspark/pandas/data_type_ops/string_ops.py +++ b/python/pyspark/pandas/data_type_ops/string_ops.py @@ -23,7 +23,6 @@ from pandas.api.types import CategoricalDtype from pyspark.sql import functions as F from pyspark.sql.types import IntegralType, StringType from pyspark.sql.utils import pyspark_column_op - from pyspark.pandas._typing import Dtype, IndexOpsLike, SeriesOrIndex from pyspark.pandas.base import column_op, IndexOpsMixin from pyspark.pandas.data_type_ops.base import ( diff --git a/python/pyspark/pandas/frame.py b/python/pyspark/pandas/frame.py index faa595f80e3..8f3555685ff 100644 --- a/python/pyspark/pandas/frame.py +++ b/python/pyspark/pandas/frame.py @@ -59,14 +59,14 @@ from pandas.api.types import ( # type: ignore[attr-defined] ) from pandas.tseries.frequencies import DateOffset, to_offset -from pyspark.errors import PySparkValueError - if TYPE_CHECKING: from pandas.io.formats.style import Styler from pandas.core.dtypes.common import infer_dtype_from_object from pandas.core.accessor import CachedAccessor from pandas.core.dtypes.inference import is_sequence + +from pyspark.errors import PySparkValueError from pyspark import StorageLevel from pyspark.sql import Column as PySparkColumn, DataFrame as PySparkDataFrame, functions as F from pyspark.sql.functions import pandas_udf @@ -86,7 +86,6 @@ from pyspark.sql.types import ( NullType, ) from pyspark.sql.window import Window - from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm. from pyspark.pandas._typing import ( Axis, @@ -150,8 +149,6 @@ from pyspark.pandas.typedef.typehints import ( create_tuple_for_frame_type, ) from pyspark.pandas.plot import PandasOnSparkPlotAccessor - -# For supporting Spark Connect from pyspark.sql.utils import get_column_class, get_dataframe_class if TYPE_CHECKING: diff --git a/python/pyspark/pandas/generic.py b/python/pyspark/pandas/generic.py index 81f4f6db7ed..300fd73e43f 100644 --- a/python/pyspark/pandas/generic.py +++ b/python/pyspark/pandas/generic.py @@ -48,7 +48,6 @@ from pyspark.sql.types import ( LongType, NumericType, ) - from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm. from pyspark.pandas._typing import ( Axis, diff --git a/python/pyspark/pandas/groupby.py b/python/pyspark/pandas/groupby.py index 3d51fabd4b2..b19a40b837a 100644 --- a/python/pyspark/pandas/groupby.py +++ b/python/pyspark/pandas/groupby.py @@ -44,7 +44,6 @@ import warnings import pandas as pd from pandas.api.types import is_number, is_hashable, is_list_like # type: ignore[attr-defined] - from pandas.core.common import _builtin_table # type: ignore[attr-defined] from pyspark.sql import Column, DataFrame as SparkDataFrame, Window, functions as F @@ -57,7 +56,6 @@ from pyspark.sql.types import ( StructType, StringType, ) - from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm. from pyspark.pandas._typing import Axis, FrameLike, Label, Name from pyspark.pandas.typedef import infer_return_type, DataFrameType, ScalarType, SeriesType diff --git a/python/pyspark/pandas/indexes/base.py b/python/pyspark/pandas/indexes/base.py index 5652c6a8a85..2ec0a39dc71 100644 --- a/python/pyspark/pandas/indexes/base.py +++ b/python/pyspark/pandas/indexes/base.py @@ -53,7 +53,6 @@ from pyspark.sql.types import ( TimestampType, TimestampNTZType, ) - from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm. from pyspark.pandas._typing import Dtype, Label, Name, Scalar from pyspark.pandas.config import get_option, option_context diff --git a/python/pyspark/pandas/indexes/multi.py b/python/pyspark/pandas/indexes/multi.py index 043d6762fb7..9917a42fb38 100644 --- a/python/pyspark/pandas/indexes/multi.py +++ b/python/pyspark/pandas/indexes/multi.py @@ -24,8 +24,6 @@ from pandas.api.types import is_hashable, is_list_like # type: ignore[attr-defi from pyspark.sql import functions as F, Column as PySparkColumn, Window from pyspark.sql.types import DataType from pyspark.sql.utils import get_column_class - -# For running doctests and reference resolution in PyCharm. from pyspark import pandas as ps from pyspark.pandas._typing import Label, Name, Scalar from pyspark.pandas.exceptions import PandasNotImplementedError diff --git a/python/pyspark/pandas/indexing.py b/python/pyspark/pandas/indexing.py index c725d01d673..de5baa3fae1 100644 --- a/python/pyspark/pandas/indexing.py +++ b/python/pyspark/pandas/indexing.py @@ -25,11 +25,11 @@ from typing import Any, Optional, List, Tuple, TYPE_CHECKING, Union, cast, Sized import pandas as pd from pandas.api.types import is_list_like # type: ignore[attr-defined] +import numpy as np + from pyspark.sql import functions as F, Column as PySparkColumn from pyspark.sql.types import BooleanType, LongType, DataType from pyspark.errors import AnalysisException -import numpy as np - from pyspark import pandas as ps # noqa: F401 from pyspark.pandas._typing import Label, Name, Scalar from pyspark.pandas.internal import ( @@ -50,8 +50,6 @@ from pyspark.pandas.utils import ( spark_column_equals, verify_temp_column_name, ) - -# For Supporting Spark Connect from pyspark.sql.utils import get_column_class if TYPE_CHECKING: diff --git a/python/pyspark/pandas/internal.py b/python/pyspark/pandas/internal.py index e025d91e7b7..2966db073d0 100644 --- a/python/pyspark/pandas/internal.py +++ b/python/pyspark/pandas/internal.py @@ -24,6 +24,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple, Union, TYPE_CHECK import numpy as np import pandas as pd from pandas.api.types import CategoricalDtype # noqa: F401 + from pyspark._globals import _NoValue, _NoValueType from pyspark.sql import ( functions as F, @@ -40,17 +41,9 @@ from pyspark.sql.types import ( # noqa: F401 StringType, ) from pyspark.sql.utils import is_timestamp_ntz_preferred - -# For supporting Spark Connect from pyspark.sql.utils import is_remote, get_column_class, get_dataframe_class - -# For running doctests and reference resolution in PyCharm. from pyspark import pandas as ps from pyspark.pandas._typing import Label - -if TYPE_CHECKING: - # This is required in old Python 3.5 to prevent circular reference. - from pyspark.pandas.series import Series from pyspark.pandas.spark.utils import as_nullable_spark_type, force_decimal_precision_scale from pyspark.pandas.data_type_ops.base import DataTypeOps from pyspark.pandas.typedef import ( @@ -71,6 +64,8 @@ from pyspark.pandas.utils import ( spark_column_equals, ) +if TYPE_CHECKING: + from pyspark.pandas.series import Series # A function to turn given numbers to Spark columns that represent pandas-on-Spark index. SPARK_INDEX_NAME_FORMAT = "__index_level_{}__".format diff --git a/python/pyspark/pandas/mlflow.py b/python/pyspark/pandas/mlflow.py index b78ae934d74..a609f9b7069 100644 --- a/python/pyspark/pandas/mlflow.py +++ b/python/pyspark/pandas/mlflow.py @@ -19,12 +19,12 @@ MLflow-related functions to load models and apply them to pandas-on-Spark dataframes. """ from typing import List, Union +from typing import Any -from pyspark.sql.types import DataType import pandas as pd import numpy as np -from typing import Any +from pyspark.sql.types import DataType from pyspark.pandas._typing import Label, Dtype from pyspark.pandas.utils import lazy_property, default_session from pyspark.pandas.frame import DataFrame diff --git a/python/pyspark/pandas/namespace.py b/python/pyspark/pandas/namespace.py index a700a243e5d..e8898ab4893 100644 --- a/python/pyspark/pandas/namespace.py +++ b/python/pyspark/pandas/namespace.py @@ -48,6 +48,7 @@ from pandas.api.types import ( # type: ignore[attr-defined] from pandas.tseries.offsets import DateOffset import pyarrow as pa import pyarrow.parquet as pq + from pyspark.sql import functions as F, Column as PySparkColumn from pyspark.sql.functions import pandas_udf from pyspark.sql.types import ( @@ -67,7 +68,6 @@ from pyspark.sql.types import ( DataType, ) from pyspark.sql.dataframe import DataFrame as PySparkDataFrame - from pyspark import pandas as ps from pyspark.pandas._typing import Axis, Dtype, Label, Name from pyspark.pandas.base import IndexOpsMixin diff --git a/python/pyspark/pandas/numpy_compat.py b/python/pyspark/pandas/numpy_compat.py index efffaa7042c..6fed89157d2 100644 --- a/python/pyspark/pandas/numpy_compat.py +++ b/python/pyspark/pandas/numpy_compat.py @@ -17,10 +17,10 @@ from typing import Any, Callable, no_type_check import numpy as np + from pyspark.sql import functions as F from pyspark.sql.pandas.functions import pandas_udf from pyspark.sql.types import DoubleType, LongType, BooleanType - from pyspark.pandas.base import IndexOpsMixin diff --git a/python/pyspark/pandas/plot/core.py b/python/pyspark/pandas/plot/core.py index ccae96a2ef8..e7e716f52ac 100644 --- a/python/pyspark/pandas/plot/core.py +++ b/python/pyspark/pandas/plot/core.py @@ -19,12 +19,12 @@ import importlib import pandas as pd import numpy as np -from pyspark.ml.feature import Bucketizer -from pyspark.mllib.stat import KernelDensity -from pyspark.sql import functions as F from pandas.core.base import PandasObject from pandas.core.dtypes.inference import is_integer +from pyspark.ml.feature import Bucketizer +from pyspark.mllib.stat import KernelDensity +from pyspark.sql import functions as F from pyspark.pandas.missing import unsupported_function from pyspark.pandas.config import get_option from pyspark.pandas.utils import name_like_string diff --git a/python/pyspark/pandas/plot/matplotlib.py b/python/pyspark/pandas/plot/matplotlib.py index 42f30ebf7ae..fe23f457187 100644 --- a/python/pyspark/pandas/plot/matplotlib.py +++ b/python/pyspark/pandas/plot/matplotlib.py @@ -22,7 +22,6 @@ import numpy as np from matplotlib.axes._base import _process_plot_format # type: ignore[attr-defined] from pandas.core.dtypes.inference import is_list_like from pandas.io.formats.printing import pprint_thing - from pandas.plotting._matplotlib import ( # type: ignore[attr-defined] BarPlot as PandasBarPlot, BoxPlot as PandasBoxPlot, diff --git a/python/pyspark/pandas/resample.py b/python/pyspark/pandas/resample.py index 5bb754d69b2..fdcfa3243c0 100644 --- a/python/pyspark/pandas/resample.py +++ b/python/pyspark/pandas/resample.py @@ -29,7 +29,6 @@ from typing import ( ) import numpy as np - import pandas as pd from pandas.tseries.frequencies import to_offset @@ -40,7 +39,6 @@ from pyspark.sql.types import ( TimestampNTZType, DataType, ) - from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm. from pyspark.pandas._typing import FrameLike from pyspark.pandas.frame import DataFrame diff --git a/python/pyspark/pandas/series.py b/python/pyspark/pandas/series.py index e96e5c3b3dc..e0cdb1ea030 100644 --- a/python/pyspark/pandas/series.py +++ b/python/pyspark/pandas/series.py @@ -53,6 +53,7 @@ from pandas.api.types import ( # type: ignore[attr-defined] CategoricalDtype, ) from pandas.tseries.frequencies import DateOffset + from pyspark.sql import functions as F, Column as PySparkColumn, DataFrame as SparkDataFrame from pyspark.sql.types import ( ArrayType, @@ -71,7 +72,6 @@ from pyspark.sql.types import ( ) from pyspark.sql.window import Window from pyspark.sql.utils import get_column_class, get_window_class - from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm. from pyspark.pandas._typing import Axis, Dtype, Label, Name, Scalar, T from pyspark.pandas.accessors import PandasOnSparkSeriesMethods diff --git a/python/pyspark/pandas/spark/accessors.py b/python/pyspark/pandas/spark/accessors.py index bcbe044185a..7fb14e6ed75 100644 --- a/python/pyspark/pandas/spark/accessors.py +++ b/python/pyspark/pandas/spark/accessors.py @@ -25,11 +25,8 @@ from typing import TYPE_CHECKING, Callable, Generic, List, Optional, Union from pyspark import StorageLevel from pyspark.sql import Column as PySparkColumn, DataFrame as PySparkDataFrame from pyspark.sql.types import DataType, StructType - from pyspark.pandas._typing import IndexOpsLike from pyspark.pandas.internal import InternalField - -# For Supporting Spark Connect from pyspark.sql.utils import get_column_class, get_dataframe_class if TYPE_CHECKING: diff --git a/python/pyspark/pandas/spark/functions.py b/python/pyspark/pandas/spark/functions.py index 9fef983b46a..36ea007c4d7 100644 --- a/python/pyspark/pandas/spark/functions.py +++ b/python/pyspark/pandas/spark/functions.py @@ -19,8 +19,6 @@ Additional Spark functions used in pandas-on-Spark. """ from pyspark import SparkContext from pyspark.sql.column import Column - -# For supporting Spark Connect from pyspark.sql.utils import is_remote diff --git a/python/pyspark/pandas/sql_processor.py b/python/pyspark/pandas/sql_processor.py index bce6921e029..1bd1cb9823c 100644 --- a/python/pyspark/pandas/sql_processor.py +++ b/python/pyspark/pandas/sql_processor.py @@ -18,10 +18,10 @@ import _string # type: ignore[import] from typing import Any, Dict, Optional, Union, List import inspect + import pandas as pd from pyspark.sql import SparkSession, DataFrame as SDataFrame - from pyspark import pandas as ps # For running doctests and reference resolution in PyCharm. from pyspark.pandas.utils import default_session from pyspark.pandas.frame import DataFrame diff --git a/python/pyspark/pandas/strings.py b/python/pyspark/pandas/strings.py index bf9cafbaf09..abf3103dd43 100644 --- a/python/pyspark/pandas/strings.py +++ b/python/pyspark/pandas/strings.py @@ -30,12 +30,11 @@ from typing import ( ) import numpy as np - import pandas as pd + from pyspark.sql.types import StringType, BinaryType, ArrayType, LongType, MapType from pyspark.sql import functions as F from pyspark.sql.functions import pandas_udf - import pyspark.pandas as ps diff --git a/python/pyspark/pandas/supported_api_gen.py b/python/pyspark/pandas/supported_api_gen.py index a43ad198011..c4471a0af36 100644 --- a/python/pyspark/pandas/supported_api_gen.py +++ b/python/pyspark/pandas/supported_api_gen.py @@ -19,7 +19,6 @@ Generate 'Supported pandas APIs' documentation file """ import warnings -from pyspark.loose_version import LooseVersion from enum import Enum, unique from inspect import getmembers, isclass, isfunction, signature from typing import Any, Callable, Dict, List, NamedTuple, Set, TextIO, Tuple @@ -27,12 +26,13 @@ from typing import Any, Callable, Dict, List, NamedTuple, Set, TextIO, Tuple import pyspark.pandas as ps import pyspark.pandas.groupby as psg import pyspark.pandas.window as psw -from pyspark.pandas.exceptions import PandasNotImplementedError - import pandas as pd import pandas.core.groupby as pdg import pandas.core.window as pdw +from pyspark.loose_version import LooseVersion +from pyspark.pandas.exceptions import PandasNotImplementedError + MAX_MISSING_PARAMS_SIZE = 5 COMMON_PARAMETER_SET = { "kwargs", diff --git a/python/pyspark/pandas/tests/computation/test_corrwith.py b/python/pyspark/pandas/tests/computation/test_corrwith.py index b64bf2d411b..a74c1f7c3df 100644 --- a/python/pyspark/pandas/tests/computation/test_corrwith.py +++ b/python/pyspark/pandas/tests/computation/test_corrwith.py @@ -16,7 +16,6 @@ # import unittest - import numpy as np import pandas as pd diff --git a/python/pyspark/pandas/tests/computation/test_cov.py b/python/pyspark/pandas/tests/computation/test_cov.py index 23e5ec587e9..fb40884e1c3 100644 --- a/python/pyspark/pandas/tests/computation/test_cov.py +++ b/python/pyspark/pandas/tests/computation/test_cov.py @@ -17,7 +17,6 @@ import unittest import decimal - import numpy as np import pandas as pd diff --git a/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py index b62eb734a93..f1e36aecd19 100644 --- a/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py +++ b/python/pyspark/pandas/tests/connect/data_type_ops/testing_utils.py @@ -22,7 +22,6 @@ import numpy as np import pandas as pd import pyspark.pandas as ps - from pyspark.pandas.typedef.typehints import ( extension_dtypes_available, extension_float_dtypes_available, diff --git a/python/pyspark/pandas/tests/connect/test_parity_extension.py b/python/pyspark/pandas/tests/connect/test_parity_extension.py index 849139980b2..7413801d3f8 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_extension.py +++ b/python/pyspark/pandas/tests/connect/test_parity_extension.py @@ -18,6 +18,7 @@ import unittest import pandas as pd import numpy as np + from pyspark import pandas as ps from pyspark.pandas.tests.test_extension import ExtensionTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase diff --git a/python/pyspark/pandas/tests/connect/test_parity_indexing.py b/python/pyspark/pandas/tests/connect/test_parity_indexing.py index 9a14978539f..950bd2d0b2d 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_indexing.py +++ b/python/pyspark/pandas/tests/connect/test_parity_indexing.py @@ -17,6 +17,7 @@ import unittest import pandas as pd + from pyspark import pandas as ps from pyspark.pandas.tests.test_indexing import BasicIndexingTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase diff --git a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py index 6bf0a02589b..6cc0a277718 100644 --- a/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py +++ b/python/pyspark/pandas/tests/connect/test_parity_numpy_compat.py @@ -17,6 +17,7 @@ import unittest import pandas as pd + from pyspark import pandas as ps from pyspark.pandas.tests.test_numpy_compat import NumPyCompatTestsMixin from pyspark.testing.connectutils import ReusedConnectTestCase diff --git a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py index 44673053bc2..8bafc86c8dd 100644 --- a/python/pyspark/pandas/tests/data_type_ops/testing_utils.py +++ b/python/pyspark/pandas/tests/data_type_ops/testing_utils.py @@ -23,13 +23,11 @@ import pandas as pd import pyspark.pandas as ps from pyspark.pandas.typedef import extension_dtypes - from pyspark.pandas.typedef.typehints import ( extension_dtypes_available, extension_float_dtypes_available, extension_object_dtypes_available, ) - from pyspark.testing.pandasutils import ComparisonTestBase if extension_dtypes_available: diff --git a/python/pyspark/pandas/tests/frame/test_reshaping.py b/python/pyspark/pandas/tests/frame/test_reshaping.py index b5176a87ca6..0a1c5e5b098 100644 --- a/python/pyspark/pandas/tests/frame/test_reshaping.py +++ b/python/pyspark/pandas/tests/frame/test_reshaping.py @@ -22,7 +22,6 @@ import pandas as pd from pyspark import pandas as ps from pyspark.pandas.config import option_context - from pyspark.testing.pandasutils import ComparisonTestBase from pyspark.testing.sqlutils import SQLTestUtils diff --git a/python/pyspark/pandas/tests/frame/test_spark.py b/python/pyspark/pandas/tests/frame/test_spark.py index da16e943578..4413279e32f 100644 --- a/python/pyspark/pandas/tests/frame/test_spark.py +++ b/python/pyspark/pandas/tests/frame/test_spark.py @@ -21,10 +21,10 @@ from io import StringIO import numpy as np import pandas as pd + from pyspark import StorageLevel from pyspark.ml.linalg import SparseVector from pyspark.sql.types import StructType - from pyspark import pandas as ps from pyspark.pandas.frame import CachedDataFrame from pyspark.pandas.exceptions import PandasNotImplementedError diff --git a/python/pyspark/pandas/tests/series/test_series.py b/python/pyspark/pandas/tests/series/test_series.py index 75c81431d32..b9fa6f6063f 100644 --- a/python/pyspark/pandas/tests/series/test_series.py +++ b/python/pyspark/pandas/tests/series/test_series.py @@ -18,7 +18,6 @@ import unittest from collections import defaultdict import inspect - from datetime import datetime, timedelta import numpy as np diff --git a/python/pyspark/pandas/tests/series/test_stat.py b/python/pyspark/pandas/tests/series/test_stat.py index 62672b1e8d5..1e379d32d56 100644 --- a/python/pyspark/pandas/tests/series/test_stat.py +++ b/python/pyspark/pandas/tests/series/test_stat.py @@ -15,10 +15,10 @@ # limitations under the License. # import unittest +from decimal import Decimal import numpy as np import pandas as pd -from decimal import Decimal from pyspark import pandas as ps from pyspark.testing.pandasutils import ComparisonTestBase diff --git a/python/pyspark/pandas/tests/test_indexops_spark.py b/python/pyspark/pandas/tests/test_indexops_spark.py index 3f7691a3863..2a0bcf242fa 100644 --- a/python/pyspark/pandas/tests/test_indexops_spark.py +++ b/python/pyspark/pandas/tests/test_indexops_spark.py @@ -16,9 +16,9 @@ # import pandas as pd + from pyspark.errors import AnalysisException from pyspark.sql import functions as F - from pyspark import pandas as ps from pyspark.testing.pandasutils import PandasOnSparkTestCase from pyspark.testing.sqlutils import SQLTestUtils diff --git a/python/pyspark/pandas/tests/test_stats.py b/python/pyspark/pandas/tests/test_stats.py index 40ee64a5f68..bdc83ad7d5f 100644 --- a/python/pyspark/pandas/tests/test_stats.py +++ b/python/pyspark/pandas/tests/test_stats.py @@ -16,6 +16,7 @@ # import unittest + import numpy as np import pandas as pd diff --git a/python/pyspark/pandas/utils.py b/python/pyspark/pandas/utils.py index b647697edf9..9f372a53079 100644 --- a/python/pyspark/pandas/utils.py +++ b/python/pyspark/pandas/utils.py @@ -37,14 +37,13 @@ from typing import ( ) import warnings +import pandas as pd +from pandas.api.types import is_list_like # type: ignore[attr-defined] + from pyspark.sql import functions as F, Column, DataFrame as PySparkDataFrame, SparkSession from pyspark.sql.types import DoubleType from pyspark.sql.utils import is_remote, get_dataframe_class from pyspark.errors import PySparkTypeError -import pandas as pd -from pandas.api.types import is_list_like # type: ignore[attr-defined] - -# For running doctests and reference resolution in PyCharm. from pyspark import pandas as ps # noqa: F401 from pyspark.pandas._typing import ( Axis, diff --git a/python/pyspark/pandas/window.py b/python/pyspark/pandas/window.py index db98867674a..0aaeb7df89b 100644 --- a/python/pyspark/pandas/window.py +++ b/python/pyspark/pandas/window.py @@ -30,8 +30,6 @@ from pyspark.pandas.missing.window import ( MissingPandasLikeExponentialMoving, MissingPandasLikeExponentialMovingGroupby, ) - -# For running doctests and reference resolution in PyCharm. from pyspark import pandas as ps # noqa: F401 from pyspark.pandas._typing import FrameLike from pyspark.pandas.groupby import GroupBy, DataFrameGroupBy diff --git a/python/pyspark/profiler.py b/python/pyspark/profiler.py index 6aa504c7c08..b7ea6a19063 100644 --- a/python/pyspark/profiler.py +++ b/python/pyspark/profiler.py @@ -27,7 +27,6 @@ from typing import ( Union, cast, ) - import cProfile import inspect import pstats diff --git a/python/pyspark/rdd.py b/python/pyspark/rdd.py index 0e0d958f46a..d2a8bc4b111 100644 --- a/python/pyspark/rdd.py +++ b/python/pyspark/rdd.py @@ -94,6 +94,9 @@ if TYPE_CHECKING: import socket import io + from py4j.java_gateway import JavaObject + from py4j.java_collections import JavaArray + from pyspark._typing import NonUDFType from pyspark._typing import S, NumberOrArray from pyspark.context import SparkContext @@ -119,9 +122,6 @@ if TYPE_CHECKING: SQLTableUDFType, ) - from py4j.java_gateway import JavaObject - from py4j.java_collections import JavaArray - T = TypeVar("T") T_co = TypeVar("T_co", covariant=True) U = TypeVar("U") diff --git a/python/pyspark/shuffle.py b/python/pyspark/shuffle.py index da03110c321..57964654963 100644 --- a/python/pyspark/shuffle.py +++ b/python/pyspark/shuffle.py @@ -24,8 +24,8 @@ import itertools import operator import random import sys - import heapq + from pyspark.serializers import ( BatchedSerializer, CPickleSerializer, diff --git a/python/pyspark/tests/test_statcounter.py b/python/pyspark/tests/test_statcounter.py index 747f42e67b2..1879ea6974e 100644 --- a/python/pyspark/tests/test_statcounter.py +++ b/python/pyspark/tests/test_statcounter.py @@ -14,9 +14,10 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import math + from pyspark.statcounter import StatCounter from pyspark.testing.utils import ReusedPySparkTestCase -import math class StatCounterTests(ReusedPySparkTestCase): diff --git a/python/pyspark/util.py b/python/pyspark/util.py index 47f5933079e..9c70bac2a3d 100644 --- a/python/pyspark/util.py +++ b/python/pyspark/util.py @@ -28,10 +28,10 @@ import typing from types import TracebackType from typing import Any, Callable, IO, Iterator, List, Optional, TextIO, Tuple, Union -from pyspark.errors import PySparkRuntimeError - from py4j.clientserver import ClientServer +from pyspark.errors import PySparkRuntimeError + __all__: List[str] = [] from py4j.java_gateway import JavaObject diff --git a/python/pyspark/worker.py b/python/pyspark/worker.py index a073942adb6..3d08f6c4bae 100644 --- a/python/pyspark/worker.py +++ b/python/pyspark/worker.py @@ -24,7 +24,6 @@ import time from inspect import getfullargspec import json from typing import Any, Callable, Iterable, Iterator - import faulthandler from pyspark.accumulators import _accumulatorRegistry --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org