This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 10751dc285c [SPARK-44133][PYTHON] Upgrade MyPy from 0.920 to 0.982 10751dc285c is described below commit 10751dc285c5c639e3343a8abc26857407522822 Author: Hyukjin Kwon <gurwls...@apache.org> AuthorDate: Wed Jun 21 12:56:57 2023 -0700 [SPARK-44133][PYTHON] Upgrade MyPy from 0.920 to 0.982 ### What changes were proposed in this pull request? This PR upgrade MyPy version from 0.920 to 0.982. ### Why are the changes needed? To detect type related changes better by static analysys. ### Does this PR introduce _any_ user-facing change? No, dev-only. ### How was this patch tested? ```bash ./dev/linter-python ``` Closes #41690 from HyukjinKwon/SPARK-44133. Authored-by: Hyukjin Kwon <gurwls...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .github/workflows/build_and_test.yml | 2 +- dev/requirements.txt | 2 +- python/pyspark/ml/base.py | 2 +- python/pyspark/ml/classification.py | 84 +++++++++---------- python/pyspark/ml/clustering.py | 36 ++++---- python/pyspark/ml/connect/base.py | 2 +- python/pyspark/ml/connect/classification.py | 2 +- python/pyspark/ml/feature.py | 44 +++++----- python/pyspark/ml/fpm.py | 4 +- python/pyspark/ml/recommendation.py | 6 +- python/pyspark/ml/regression.py | 96 +++++++++++----------- .../pyspark/ml/tests/typing/test_clustering.yaml | 6 +- python/pyspark/ml/tests/typing/test_evaluation.yml | 6 +- python/pyspark/ml/torch/distributor.py | 6 +- python/pyspark/ml/tree.py | 16 ++-- python/pyspark/ml/tuning.py | 2 +- python/pyspark/ml/util.py | 4 +- python/pyspark/ml/wrapper.py | 4 +- python/pyspark/mllib/classification.py | 6 +- python/pyspark/mllib/clustering.py | 18 ++-- python/pyspark/mllib/evaluation.py | 38 ++++----- python/pyspark/mllib/feature.py | 8 +- python/pyspark/mllib/linalg/__init__.py | 4 +- python/pyspark/mllib/linalg/distributed.py | 6 +- python/pyspark/mllib/recommendation.py | 2 +- python/pyspark/mllib/regression.py | 4 +- python/pyspark/sql/observation.py | 2 +- python/pyspark/sql/tests/typing/test_dataframe.yml | 4 +- python/pyspark/sql/tests/typing/test_functions.yml | 32 ++++---- python/pyspark/sql/tests/typing/test_session.yml | 7 +- python/pyspark/sql/types.py | 2 +- python/pyspark/streaming/context.py | 4 +- python/pyspark/tests/typing/test_rdd.yml | 4 +- 33 files changed, 235 insertions(+), 230 deletions(-) diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index a03aa53dc88..47732a5c9f6 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -624,7 +624,7 @@ jobs: # See also https://github.com/sphinx-doc/sphinx/issues/7551. # Jinja2 3.0.0+ causes error when building with Sphinx. # See also https://issues.apache.org/jira/browse/SPARK-35375. - python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.920' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' + python3.9 -m pip install 'flake8==3.9.0' pydata_sphinx_theme 'mypy==0.982' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' numpydoc 'jinja2<3.0.0' 'black==22.6.0' python3.9 -m pip install 'pandas-stubs==1.2.0.53' ipython 'grpcio==1.48.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' - name: Python linter run: PYTHON_EXECUTABLE=python3.9 ./dev/lint-python diff --git a/dev/requirements.txt b/dev/requirements.txt index 1af7256e0b3..72da5dbe163 100644 --- a/dev/requirements.txt +++ b/dev/requirements.txt @@ -20,7 +20,7 @@ openpyxl coverage # Linter -mypy==0.920 +mypy==0.982 pytest-mypy-plugins==1.9.3 flake8==3.9.0 # See SPARK-38680. diff --git a/python/pyspark/ml/base.py b/python/pyspark/ml/base.py index 34c3aa9c62c..b94358d26fd 100644 --- a/python/pyspark/ml/base.py +++ b/python/pyspark/ml/base.py @@ -396,7 +396,7 @@ class PredictionModel(Model, _PredictorParams, Generic[T], metaclass=ABCMeta): """ return self._set(predictionCol=value) - @property # type: ignore[misc] + @property @abstractmethod @since("2.1.0") def numFeatures(self) -> int: diff --git a/python/pyspark/ml/classification.py b/python/pyspark/ml/classification.py index c09a510d76b..81d7a1d51da 100644 --- a/python/pyspark/ml/classification.py +++ b/python/pyspark/ml/classification.py @@ -180,7 +180,7 @@ class ClassificationModel(PredictionModel, _ClassifierParams, metaclass=ABCMeta) """ return self._set(rawPredictionCol=value) - @property # type: ignore[misc] + @property @abstractmethod @since("2.1.0") def numClasses(self) -> int: @@ -284,7 +284,7 @@ class _JavaClassificationModel(ClassificationModel, JavaPredictionModel[T]): To be mixed in with :class:`pyspark.ml.JavaModel` """ - @property # type: ignore[misc] + @property @since("2.1.0") def numClasses(self) -> int: """ @@ -335,7 +335,7 @@ class _ClassificationSummary(JavaWrapper): .. versionadded:: 3.1.0 """ - @property # type: ignore[misc] + @property @since("3.1.0") def predictions(self) -> DataFrame: """ @@ -343,7 +343,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("predictions") - @property # type: ignore[misc] + @property @since("3.1.0") def predictionCol(self) -> str: """ @@ -351,7 +351,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("predictionCol") - @property # type: ignore[misc] + @property @since("3.1.0") def labelCol(self) -> str: """ @@ -360,7 +360,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("labelCol") - @property # type: ignore[misc] + @property @since("3.1.0") def weightCol(self) -> str: """ @@ -386,7 +386,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("labels") - @property # type: ignore[misc] + @property @since("3.1.0") def truePositiveRateByLabel(self) -> List[float]: """ @@ -394,7 +394,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("truePositiveRateByLabel") - @property # type: ignore[misc] + @property @since("3.1.0") def falsePositiveRateByLabel(self) -> List[float]: """ @@ -402,7 +402,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("falsePositiveRateByLabel") - @property # type: ignore[misc] + @property @since("3.1.0") def precisionByLabel(self) -> List[float]: """ @@ -410,7 +410,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("precisionByLabel") - @property # type: ignore[misc] + @property @since("3.1.0") def recallByLabel(self) -> List[float]: """ @@ -425,7 +425,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("fMeasureByLabel", beta) - @property # type: ignore[misc] + @property @since("3.1.0") def accuracy(self) -> float: """ @@ -435,7 +435,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("accuracy") - @property # type: ignore[misc] + @property @since("3.1.0") def weightedTruePositiveRate(self) -> float: """ @@ -444,7 +444,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("weightedTruePositiveRate") - @property # type: ignore[misc] + @property @since("3.1.0") def weightedFalsePositiveRate(self) -> float: """ @@ -452,7 +452,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("weightedFalsePositiveRate") - @property # type: ignore[misc] + @property @since("3.1.0") def weightedRecall(self) -> float: """ @@ -461,7 +461,7 @@ class _ClassificationSummary(JavaWrapper): """ return self._call_java("weightedRecall") - @property # type: ignore[misc] + @property @since("3.1.0") def weightedPrecision(self) -> float: """ @@ -485,7 +485,7 @@ class _TrainingSummary(JavaWrapper): .. versionadded:: 3.1.0 """ - @property # type: ignore[misc] + @property @since("3.1.0") def objectiveHistory(self) -> List[float]: """ @@ -495,7 +495,7 @@ class _TrainingSummary(JavaWrapper): """ return self._call_java("objectiveHistory") - @property # type: ignore[misc] + @property @since("3.1.0") def totalIterations(self) -> int: """ @@ -512,7 +512,7 @@ class _BinaryClassificationSummary(_ClassificationSummary): .. versionadded:: 3.1.0 """ - @property # type: ignore[misc] + @property @since("3.1.0") def scoreCol(self) -> str: """ @@ -536,7 +536,7 @@ class _BinaryClassificationSummary(_ClassificationSummary): """ return self._call_java("roc") - @property # type: ignore[misc] + @property @since("3.1.0") def areaUnderROC(self) -> float: """ @@ -545,7 +545,7 @@ class _BinaryClassificationSummary(_ClassificationSummary): """ return self._call_java("areaUnderROC") - @property # type: ignore[misc] + @property @since("3.1.0") def pr(self) -> DataFrame: """ @@ -555,7 +555,7 @@ class _BinaryClassificationSummary(_ClassificationSummary): """ return self._call_java("pr") - @property # type: ignore[misc] + @property @since("3.1.0") def fMeasureByThreshold(self) -> DataFrame: """ @@ -564,7 +564,7 @@ class _BinaryClassificationSummary(_ClassificationSummary): """ return self._call_java("fMeasureByThreshold") - @property # type: ignore[misc] + @property @since("3.1.0") def precisionByThreshold(self) -> DataFrame: """ @@ -574,7 +574,7 @@ class _BinaryClassificationSummary(_ClassificationSummary): """ return self._call_java("precisionByThreshold") - @property # type: ignore[misc] + @property @since("3.1.0") def recallByThreshold(self) -> DataFrame: """ @@ -857,7 +857,7 @@ class LinearSVCModel( """ return self._set(threshold=value) - @property # type: ignore[misc] + @property @since("2.2.0") def coefficients(self) -> Vector: """ @@ -865,7 +865,7 @@ class LinearSVCModel( """ return self._call_java("coefficients") - @property # type: ignore[misc] + @property @since("2.2.0") def intercept(self) -> float: """ @@ -1527,7 +1527,7 @@ class LogisticRegressionModel( .. versionadded:: 1.3.0 """ - @property # type: ignore[misc] + @property @since("2.0.0") def coefficients(self) -> Vector: """ @@ -1536,7 +1536,7 @@ class LogisticRegressionModel( """ return self._call_java("coefficients") - @property # type: ignore[misc] + @property @since("1.4.0") def intercept(self) -> float: """ @@ -1545,7 +1545,7 @@ class LogisticRegressionModel( """ return self._call_java("intercept") - @property # type: ignore[misc] + @property @since("2.1.0") def coefficientMatrix(self) -> Matrix: """ @@ -1553,7 +1553,7 @@ class LogisticRegressionModel( """ return self._call_java("coefficientMatrix") - @property # type: ignore[misc] + @property @since("2.1.0") def interceptVector(self) -> Vector: """ @@ -1561,7 +1561,7 @@ class LogisticRegressionModel( """ return self._call_java("interceptVector") - @property # type: ignore[misc] + @property @since("2.0.0") def summary(self) -> "LogisticRegressionTrainingSummary": """ @@ -1609,7 +1609,7 @@ class LogisticRegressionSummary(_ClassificationSummary): .. versionadded:: 2.0.0 """ - @property # type: ignore[misc] + @property @since("2.0.0") def probabilityCol(self) -> str: """ @@ -1618,7 +1618,7 @@ class LogisticRegressionSummary(_ClassificationSummary): """ return self._call_java("probabilityCol") - @property # type: ignore[misc] + @property @since("2.0.0") def featuresCol(self) -> str: """ @@ -2279,13 +2279,13 @@ class RandomForestClassificationModel( """ return self._call_java("featureImportances") - @property # type: ignore[misc] + @property @since("2.0.0") def trees(self) -> List[DecisionTreeClassificationModel]: """Trees in this ensemble. Warning: These have null parent Estimators.""" return [DecisionTreeClassificationModel(m) for m in list(self._call_java("trees"))] - @property # type: ignore[misc] + @property @since("3.1.0") def summary(self) -> "RandomForestClassificationTrainingSummary": """ @@ -2767,7 +2767,7 @@ class GBTClassificationModel( """ return self._call_java("featureImportances") - @property # type: ignore[misc] + @property @since("2.0.0") def trees(self) -> List[DecisionTreeRegressionModel]: """Trees in this ensemble. Warning: These have null parent Estimators.""" @@ -3018,7 +3018,7 @@ class NaiveBayesModel( .. versionadded:: 1.5.0 """ - @property # type: ignore[misc] + @property @since("2.0.0") def pi(self) -> Vector: """ @@ -3026,7 +3026,7 @@ class NaiveBayesModel( """ return self._call_java("pi") - @property # type: ignore[misc] + @property @since("2.0.0") def theta(self) -> Matrix: """ @@ -3034,7 +3034,7 @@ class NaiveBayesModel( """ return self._call_java("theta") - @property # type: ignore[misc] + @property @since("3.0.0") def sigma(self) -> Matrix: """ @@ -3311,7 +3311,7 @@ class MultilayerPerceptronClassificationModel( .. versionadded:: 1.6.0 """ - @property # type: ignore[misc] + @property @since("2.0.0") def weights(self) -> Vector: """ @@ -4227,7 +4227,7 @@ class FMClassificationModel( .. versionadded:: 3.0.0 """ - @property # type: ignore[misc] + @property @since("3.0.0") def intercept(self) -> float: """ @@ -4235,7 +4235,7 @@ class FMClassificationModel( """ return self._call_java("intercept") - @property # type: ignore[misc] + @property @since("3.0.0") def linear(self) -> Vector: """ @@ -4243,7 +4243,7 @@ class FMClassificationModel( """ return self._call_java("linear") - @property # type: ignore[misc] + @property @since("3.0.0") def factors(self) -> Matrix: """ diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py index 19305749003..41108782a47 100644 --- a/python/pyspark/ml/clustering.py +++ b/python/pyspark/ml/clustering.py @@ -83,7 +83,7 @@ class ClusteringSummary(JavaWrapper): .. versionadded:: 2.1.0 """ - @property # type: ignore[misc] + @property @since("2.1.0") def predictionCol(self) -> str: """ @@ -91,7 +91,7 @@ class ClusteringSummary(JavaWrapper): """ return self._call_java("predictionCol") - @property # type: ignore[misc] + @property @since("2.1.0") def predictions(self) -> DataFrame: """ @@ -99,7 +99,7 @@ class ClusteringSummary(JavaWrapper): """ return self._call_java("predictions") - @property # type: ignore[misc] + @property @since("2.1.0") def featuresCol(self) -> str: """ @@ -107,7 +107,7 @@ class ClusteringSummary(JavaWrapper): """ return self._call_java("featuresCol") - @property # type: ignore[misc] + @property @since("2.1.0") def k(self) -> int: """ @@ -115,7 +115,7 @@ class ClusteringSummary(JavaWrapper): """ return self._call_java("k") - @property # type: ignore[misc] + @property @since("2.1.0") def cluster(self) -> DataFrame: """ @@ -123,7 +123,7 @@ class ClusteringSummary(JavaWrapper): """ return self._call_java("cluster") - @property # type: ignore[misc] + @property @since("2.1.0") def clusterSizes(self) -> List[int]: """ @@ -131,7 +131,7 @@ class ClusteringSummary(JavaWrapper): """ return self._call_java("clusterSizes") - @property # type: ignore[misc] + @property @since("2.4.0") def numIter(self) -> int: """ @@ -210,7 +210,7 @@ class GaussianMixtureModel( """ return self._set(probabilityCol=value) - @property # type: ignore[misc] + @property @since("2.0.0") def weights(self) -> List[float]: """ @@ -220,7 +220,7 @@ class GaussianMixtureModel( """ return self._call_java("weights") - @property # type: ignore[misc] + @property @since("3.0.0") def gaussians(self) -> List[MultivariateGaussian]: """ @@ -236,7 +236,7 @@ class GaussianMixtureModel( for jgaussian in jgaussians ] - @property # type: ignore[misc] + @property @since("2.0.0") def gaussiansDF(self) -> DataFrame: """ @@ -246,7 +246,7 @@ class GaussianMixtureModel( """ return self._call_java("gaussiansDF") - @property # type: ignore[misc] + @property @since("2.1.0") def summary(self) -> "GaussianMixtureSummary": """ @@ -529,7 +529,7 @@ class GaussianMixtureSummary(ClusteringSummary): .. versionadded:: 2.1.0 """ - @property # type: ignore[misc] + @property @since("2.1.0") def probabilityCol(self) -> str: """ @@ -537,7 +537,7 @@ class GaussianMixtureSummary(ClusteringSummary): """ return self._call_java("probabilityCol") - @property # type: ignore[misc] + @property @since("2.1.0") def probability(self) -> DataFrame: """ @@ -545,7 +545,7 @@ class GaussianMixtureSummary(ClusteringSummary): """ return self._call_java("probability") - @property # type: ignore[misc] + @property @since("2.2.0") def logLikelihood(self) -> float: """ @@ -561,7 +561,7 @@ class KMeansSummary(ClusteringSummary): .. versionadded:: 2.1.0 """ - @property # type: ignore[misc] + @property @since("2.4.0") def trainingCost(self) -> float: """ @@ -683,7 +683,7 @@ class KMeansModel( """Get the cluster centers, represented as a list of NumPy arrays.""" return [c.toArray() for c in self._call_java("clusterCenters")] - @property # type: ignore[misc] + @property @since("2.1.0") def summary(self) -> KMeansSummary: """ @@ -1021,7 +1021,7 @@ class BisectingKMeansModel( ) return self._call_java("computeCost", dataset) - @property # type: ignore[misc] + @property @since("2.1.0") def summary(self) -> "BisectingKMeansSummary": """ @@ -1245,7 +1245,7 @@ class BisectingKMeansSummary(ClusteringSummary): .. versionadded:: 2.1.0 """ - @property # type: ignore[misc] + @property @since("3.0.0") def trainingCost(self) -> float: """ diff --git a/python/pyspark/ml/connect/base.py b/python/pyspark/ml/connect/base.py index 4f1f77ac627..f86b1e928c2 100644 --- a/python/pyspark/ml/connect/base.py +++ b/python/pyspark/ml/connect/base.py @@ -322,7 +322,7 @@ class PredictionModel(Model, _PredictorParams, metaclass=ABCMeta): """ return self._set(predictionCol=value) - @property # type: ignore[misc] + @property @abstractmethod @since("3.5.0") def numFeatures(self) -> int: diff --git a/python/pyspark/ml/connect/classification.py b/python/pyspark/ml/connect/classification.py index 8d2006c7af9..eaad09920c0 100644 --- a/python/pyspark/ml/connect/classification.py +++ b/python/pyspark/ml/connect/classification.py @@ -113,7 +113,7 @@ def _train_logistic_regression_model_worker_fn( num_samples_per_worker, batch_size, num_workers=0, - prefetch_factor=None, + prefetch_factor=None, # type: ignore ) for i in range(max_iter): ddp_model.train() diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py index e7ec35bffa0..349b50913d7 100755 --- a/python/pyspark/ml/feature.py +++ b/python/pyspark/ml/feature.py @@ -1219,7 +1219,7 @@ class CountVectorizerModel( model._set(vocabSize=len(vocabulary)) return model - @property # type: ignore[misc] + @property @since("1.6.0") def vocabulary(self) -> List[str]: """ @@ -1889,7 +1889,7 @@ class IDFModel(JavaModel, _IDFParams, JavaMLReadable["IDFModel"], JavaMLWritable """ return self._set(outputCol=value) - @property # type: ignore[misc] + @property @since("2.0.0") def idf(self) -> Vector: """ @@ -1897,7 +1897,7 @@ class IDFModel(JavaModel, _IDFParams, JavaMLReadable["IDFModel"], JavaMLWritable """ return self._call_java("idf") - @property # type: ignore[misc] + @property @since("3.0.0") def docFreq(self) -> List[int]: """ @@ -1905,7 +1905,7 @@ class IDFModel(JavaModel, _IDFParams, JavaMLReadable["IDFModel"], JavaMLWritable """ return self._call_java("docFreq") - @property # type: ignore[misc] + @property @since("3.0.0") def numDocs(self) -> int: """ @@ -2255,7 +2255,7 @@ class ImputerModel(JavaModel, _ImputerParams, JavaMLReadable["ImputerModel"], Ja """ return self._set(outputCol=value) - @property # type: ignore[misc] + @property @since("2.2.0") def surrogateDF(self) -> DataFrame: """ @@ -2470,7 +2470,7 @@ class MaxAbsScalerModel( """ return self._set(outputCol=value) - @property # type: ignore[misc] + @property @since("2.0.0") def maxAbs(self) -> Vector: """ @@ -2820,7 +2820,7 @@ class MinMaxScalerModel( """ return self._set(max=value) - @property # type: ignore[misc] + @property @since("2.0.0") def originalMin(self) -> Vector: """ @@ -2828,7 +2828,7 @@ class MinMaxScalerModel( """ return self._call_java("originalMin") - @property # type: ignore[misc] + @property @since("2.0.0") def originalMax(self) -> Vector: """ @@ -3329,7 +3329,7 @@ class OneHotEncoderModel( """ return self._set(handleInvalid=value) - @property # type: ignore[misc] + @property @since("2.3.0") def categorySizes(self) -> List[int]: """ @@ -4007,7 +4007,7 @@ class RobustScalerModel( """ return self._set(outputCol=value) - @property # type: ignore[misc] + @property @since("3.0.0") def median(self) -> Vector: """ @@ -4015,7 +4015,7 @@ class RobustScalerModel( """ return self._call_java("median") - @property # type: ignore[misc] + @property @since("3.0.0") def range(self) -> Vector: """ @@ -4459,7 +4459,7 @@ class StandardScalerModel( """ return self._set(outputCol=value) - @property # type: ignore[misc] + @property @since("2.0.0") def std(self) -> Vector: """ @@ -4467,7 +4467,7 @@ class StandardScalerModel( """ return self._call_java("std") - @property # type: ignore[misc] + @property @since("2.0.0") def mean(self) -> Vector: """ @@ -4832,7 +4832,7 @@ class StringIndexerModel( model.setHandleInvalid(handleInvalid) return model - @property # type: ignore[misc] + @property @since("1.5.0") def labels(self) -> List[str]: """ @@ -4843,7 +4843,7 @@ class StringIndexerModel( """ return self._call_java("labels") - @property # type: ignore[misc] + @property @since("3.0.2") def labelsArray(self) -> List[str]: """ @@ -5627,7 +5627,7 @@ class VectorIndexerModel( """ return self._set(outputCol=value) - @property # type: ignore[misc] + @property @since("1.4.0") def numFeatures(self) -> int: """ @@ -5635,7 +5635,7 @@ class VectorIndexerModel( """ return self._call_java("numFeatures") - @property # type: ignore[misc] + @property @since("1.4.0") def categoryMaps(self) -> Dict[int, Tuple[float, int]]: """ @@ -6263,7 +6263,7 @@ class PCAModel(JavaModel, _PCAParams, JavaMLReadable["PCAModel"], JavaMLWritable """ return self._set(outputCol=value) - @property # type: ignore[misc] + @property @since("2.0.0") def pc(self) -> DenseMatrix: """ @@ -6272,7 +6272,7 @@ class PCAModel(JavaModel, _PCAParams, JavaMLReadable["PCAModel"], JavaMLWritable """ return self._call_java("pc") - @property # type: ignore[misc] + @property @since("2.0.0") def explainedVariance(self) -> DenseVector: """ @@ -6729,7 +6729,7 @@ class _SelectorModel(JavaModel, _SelectorParams): """ return self._set(outputCol=value) - @property # type: ignore[misc] + @property @since("2.0.0") def selectedFeatures(self) -> List[int]: """ @@ -7144,7 +7144,7 @@ class VarianceThresholdSelectorModel( """ return self._set(outputCol=value) - @property # type: ignore[misc] + @property @since("3.1.0") def selectedFeatures(self) -> List[int]: """ @@ -7419,7 +7419,7 @@ class UnivariateFeatureSelectorModel( """ return self._set(outputCol=value) - @property # type: ignore[misc] + @property @since("3.1.1") def selectedFeatures(self) -> List[int]: """ diff --git a/python/pyspark/ml/fpm.py b/python/pyspark/ml/fpm.py index 00f77c5a54a..cba4219a069 100644 --- a/python/pyspark/ml/fpm.py +++ b/python/pyspark/ml/fpm.py @@ -124,7 +124,7 @@ class FPGrowthModel(JavaModel, _FPGrowthParams, JavaMLWritable, JavaMLReadable[" """ return self._set(predictionCol=value) - @property # type: ignore[misc] + @property @since("2.2.0") def freqItemsets(self) -> DataFrame: """ @@ -134,7 +134,7 @@ class FPGrowthModel(JavaModel, _FPGrowthParams, JavaMLWritable, JavaMLReadable[" """ return self._call_java("freqItemsets") - @property # type: ignore[misc] + @property @since("2.2.0") def associationRules(self) -> DataFrame: """ diff --git a/python/pyspark/ml/recommendation.py b/python/pyspark/ml/recommendation.py index f13fb721b9a..873140e51af 100644 --- a/python/pyspark/ml/recommendation.py +++ b/python/pyspark/ml/recommendation.py @@ -609,13 +609,13 @@ class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable["ALSMo """ return self._set(blockSize=value) - @property # type: ignore[misc] + @property @since("1.4.0") def rank(self) -> int: """rank of the matrix factorization model""" return self._call_java("rank") - @property # type: ignore[misc] + @property @since("1.4.0") def userFactors(self) -> DataFrame: """ @@ -624,7 +624,7 @@ class ALSModel(JavaModel, _ALSModelParams, JavaMLWritable, JavaMLReadable["ALSMo """ return self._call_java("userFactors") - @property # type: ignore[misc] + @property @since("1.4.0") def itemFactors(self) -> DataFrame: """ diff --git a/python/pyspark/ml/regression.py b/python/pyspark/ml/regression.py index 8678ec3f31e..a4ce961c92e 100644 --- a/python/pyspark/ml/regression.py +++ b/python/pyspark/ml/regression.py @@ -453,7 +453,7 @@ class LinearRegressionModel( .. versionadded:: 1.4.0 """ - @property # type: ignore[misc] + @property @since("2.0.0") def coefficients(self) -> Vector: """ @@ -461,7 +461,7 @@ class LinearRegressionModel( """ return self._call_java("coefficients") - @property # type: ignore[misc] + @property @since("1.4.0") def intercept(self) -> float: """ @@ -469,7 +469,7 @@ class LinearRegressionModel( """ return self._call_java("intercept") - @property # type: ignore[misc] + @property @since("2.3.0") def scale(self) -> float: r""" @@ -477,7 +477,7 @@ class LinearRegressionModel( """ return self._call_java("scale") - @property # type: ignore[misc] + @property @since("2.0.0") def summary(self) -> "LinearRegressionTrainingSummary": """ @@ -517,7 +517,7 @@ class LinearRegressionSummary(JavaWrapper): .. versionadded:: 2.0.0 """ - @property # type: ignore[misc] + @property @since("2.0.0") def predictions(self) -> DataFrame: """ @@ -525,7 +525,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("predictions") - @property # type: ignore[misc] + @property @since("2.0.0") def predictionCol(self) -> str: """ @@ -534,7 +534,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("predictionCol") - @property # type: ignore[misc] + @property @since("2.0.0") def labelCol(self) -> str: """ @@ -543,7 +543,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("labelCol") - @property # type: ignore[misc] + @property @since("2.0.0") def featuresCol(self) -> str: """ @@ -552,7 +552,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("featuresCol") - @property # type: ignore[misc] + @property @since("2.0.0") def explainedVariance(self) -> float: r""" @@ -571,7 +571,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("explainedVariance") - @property # type: ignore[misc] + @property @since("2.0.0") def meanAbsoluteError(self) -> float: """ @@ -587,7 +587,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("meanAbsoluteError") - @property # type: ignore[misc] + @property @since("2.0.0") def meanSquaredError(self) -> float: """ @@ -603,7 +603,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("meanSquaredError") - @property # type: ignore[misc] + @property @since("2.0.0") def rootMeanSquaredError(self) -> float: """ @@ -618,7 +618,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("rootMeanSquaredError") - @property # type: ignore[misc] + @property @since("2.0.0") def r2(self) -> float: """ @@ -635,7 +635,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("r2") - @property # type: ignore[misc] + @property @since("2.4.0") def r2adj(self) -> float: """ @@ -651,7 +651,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("r2adj") - @property # type: ignore[misc] + @property @since("2.0.0") def residuals(self) -> DataFrame: """ @@ -659,7 +659,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("residuals") - @property # type: ignore[misc] + @property @since("2.0.0") def numInstances(self) -> int: """ @@ -667,7 +667,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("numInstances") - @property # type: ignore[misc] + @property @since("2.2.0") def degreesOfFreedom(self) -> int: """ @@ -675,7 +675,7 @@ class LinearRegressionSummary(JavaWrapper): """ return self._call_java("degreesOfFreedom") - @property # type: ignore[misc] + @property @since("2.0.0") def devianceResiduals(self) -> List[float]: """ @@ -977,7 +977,7 @@ class IsotonicRegressionModel( """ return self._set(featureIndex=value) - @property # type: ignore[misc] + @property @since("1.6.0") def boundaries(self) -> Vector: """ @@ -985,7 +985,7 @@ class IsotonicRegressionModel( """ return self._call_java("boundaries") - @property # type: ignore[misc] + @property @since("1.6.0") def predictions(self) -> Vector: """ @@ -994,7 +994,7 @@ class IsotonicRegressionModel( """ return self._call_java("predictions") - @property # type: ignore[misc] + @property @since("3.0.0") def numFeatures(self) -> int: """ @@ -1600,7 +1600,7 @@ class RandomForestRegressionModel( .. versionadded:: 1.4.0 """ - @property # type: ignore[misc] + @property @since("2.0.0") def trees(self) -> List[DecisionTreeRegressionModel]: """Trees in this ensemble. Warning: These have null parent Estimators.""" @@ -1989,7 +1989,7 @@ class GBTRegressionModel( """ return self._call_java("featureImportances") - @property # type: ignore[misc] + @property @since("2.0.0") def trees(self) -> List[DecisionTreeRegressionModel]: """Trees in this ensemble. Warning: These have null parent Estimators.""" @@ -2308,7 +2308,7 @@ class AFTSurvivalRegressionModel( """ return self._set(quantilesCol=value) - @property # type: ignore[misc] + @property @since("2.0.0") def coefficients(self) -> Vector: """ @@ -2316,7 +2316,7 @@ class AFTSurvivalRegressionModel( """ return self._call_java("coefficients") - @property # type: ignore[misc] + @property @since("1.6.0") def intercept(self) -> float: """ @@ -2324,7 +2324,7 @@ class AFTSurvivalRegressionModel( """ return self._call_java("intercept") - @property # type: ignore[misc] + @property @since("1.6.0") def scale(self) -> float: """ @@ -2734,7 +2734,7 @@ class GeneralizedLinearRegressionModel( """ return self._set(linkPredictionCol=value) - @property # type: ignore[misc] + @property @since("2.0.0") def coefficients(self) -> Vector: """ @@ -2742,7 +2742,7 @@ class GeneralizedLinearRegressionModel( """ return self._call_java("coefficients") - @property # type: ignore[misc] + @property @since("2.0.0") def intercept(self) -> float: """ @@ -2750,7 +2750,7 @@ class GeneralizedLinearRegressionModel( """ return self._call_java("intercept") - @property # type: ignore[misc] + @property @since("2.0.0") def summary(self) -> "GeneralizedLinearRegressionTrainingSummary": """ @@ -2792,7 +2792,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper): .. versionadded:: 2.0.0 """ - @property # type: ignore[misc] + @property @since("2.0.0") def predictions(self) -> DataFrame: """ @@ -2800,7 +2800,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper): """ return self._call_java("predictions") - @property # type: ignore[misc] + @property @since("2.0.0") def predictionCol(self) -> str: """ @@ -2809,7 +2809,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper): """ return self._call_java("predictionCol") - @property # type: ignore[misc] + @property @since("2.2.0") def numInstances(self) -> int: """ @@ -2817,7 +2817,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper): """ return self._call_java("numInstances") - @property # type: ignore[misc] + @property @since("2.0.0") def rank(self) -> int: """ @@ -2825,7 +2825,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper): """ return self._call_java("rank") - @property # type: ignore[misc] + @property @since("2.0.0") def degreesOfFreedom(self) -> int: """ @@ -2833,7 +2833,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper): """ return self._call_java("degreesOfFreedom") - @property # type: ignore[misc] + @property @since("2.0.0") def residualDegreeOfFreedom(self) -> int: """ @@ -2841,7 +2841,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper): """ return self._call_java("residualDegreeOfFreedom") - @property # type: ignore[misc] + @property @since("2.0.0") def residualDegreeOfFreedomNull(self) -> int: """ @@ -2863,7 +2863,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper): """ return self._call_java("residuals", residualsType) - @property # type: ignore[misc] + @property @since("2.0.0") def nullDeviance(self) -> float: """ @@ -2871,7 +2871,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper): """ return self._call_java("nullDeviance") - @property # type: ignore[misc] + @property @since("2.0.0") def deviance(self) -> float: """ @@ -2879,7 +2879,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper): """ return self._call_java("deviance") - @property # type: ignore[misc] + @property @since("2.0.0") def dispersion(self) -> float: """ @@ -2890,7 +2890,7 @@ class GeneralizedLinearRegressionSummary(JavaWrapper): """ return self._call_java("dispersion") - @property # type: ignore[misc] + @property @since("2.0.0") def aic(self) -> float: """ @@ -2907,7 +2907,7 @@ class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSumm .. versionadded:: 2.0.0 """ - @property # type: ignore[misc] + @property @since("2.0.0") def numIterations(self) -> int: """ @@ -2915,7 +2915,7 @@ class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSumm """ return self._call_java("numIterations") - @property # type: ignore[misc] + @property @since("2.0.0") def solver(self) -> str: """ @@ -2923,7 +2923,7 @@ class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSumm """ return self._call_java("solver") - @property # type: ignore[misc] + @property @since("2.0.0") def coefficientStandardErrors(self) -> List[float]: """ @@ -2934,7 +2934,7 @@ class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSumm """ return self._call_java("coefficientStandardErrors") - @property # type: ignore[misc] + @property @since("2.0.0") def tValues(self) -> List[float]: """ @@ -2945,7 +2945,7 @@ class GeneralizedLinearRegressionTrainingSummary(GeneralizedLinearRegressionSumm """ return self._call_java("tValues") - @property # type: ignore[misc] + @property @since("2.0.0") def pValues(self) -> List[float]: """ @@ -3280,7 +3280,7 @@ class FMRegressionModel( .. versionadded:: 3.0.0 """ - @property # type: ignore[misc] + @property @since("3.0.0") def intercept(self) -> float: """ @@ -3288,7 +3288,7 @@ class FMRegressionModel( """ return self._call_java("intercept") - @property # type: ignore[misc] + @property @since("3.0.0") def linear(self) -> Vector: """ @@ -3296,7 +3296,7 @@ class FMRegressionModel( """ return self._call_java("linear") - @property # type: ignore[misc] + @property @since("3.0.0") def factors(self) -> Matrix: """ diff --git a/python/pyspark/ml/tests/typing/test_clustering.yaml b/python/pyspark/ml/tests/typing/test_clustering.yaml index b208573975d..bc50cf5b258 100644 --- a/python/pyspark/ml/tests/typing/test_clustering.yaml +++ b/python/pyspark/ml/tests/typing/test_clustering.yaml @@ -27,7 +27,7 @@ reveal_type(local_model) reveal_type(local_model.setFeaturesCol("foo")) out: | - main:4: note: Revealed type is "pyspark.ml.clustering.DistributedLDAModel*" - main:5: note: Revealed type is "pyspark.ml.clustering.DistributedLDAModel*" + main:4: note: Revealed type is "pyspark.ml.clustering.DistributedLDAModel" + main:5: note: Revealed type is "pyspark.ml.clustering.DistributedLDAModel" main:8: note: Revealed type is "pyspark.ml.clustering.LocalLDAModel" - main:9: note: Revealed type is "pyspark.ml.clustering.LocalLDAModel*" + main:9: note: Revealed type is "pyspark.ml.clustering.LocalLDAModel" diff --git a/python/pyspark/ml/tests/typing/test_evaluation.yml b/python/pyspark/ml/tests/typing/test_evaluation.yml index a60166dfb96..51e076cf6bc 100644 --- a/python/pyspark/ml/tests/typing/test_evaluation.yml +++ b/python/pyspark/ml/tests/typing/test_evaluation.yml @@ -22,7 +22,7 @@ BinaryClassificationEvaluator().setMetricName("areaUnderROC") BinaryClassificationEvaluator(metricName="areaUnderPR") - BinaryClassificationEvaluator().setMetricName("foo") # E: Argument 1 to "setMetricName" of "BinaryClassificationEvaluator" has incompatible type "Literal['foo']"; expected "Union[Literal['areaUnderROC'], Literal['areaUnderPR']]" [arg-type] - BinaryClassificationEvaluator(metricName="bar") # E: Argument "metricName" to "BinaryClassificationEvaluator" has incompatible type "Literal['bar']"; expected "Union[Literal['areaUnderROC'], Literal['areaUnderPR']]" [arg-type] + BinaryClassificationEvaluator().setMetricName("foo") # E: Argument 1 to "setMetricName" of "BinaryClassificationEvaluator" has incompatible type "Literal['foo']"; expected "Literal['areaUnderROC', 'areaUnderPR']" [arg-type] + BinaryClassificationEvaluator(metricName="bar") # E: Argument "metricName" to "BinaryClassificationEvaluator" has incompatible type "Literal['bar']"; expected "Literal['areaUnderROC', 'areaUnderPR']" [arg-type] - reveal_type(BinaryClassificationEvaluator.load("foo")) # N: Revealed type is "pyspark.ml.evaluation.BinaryClassificationEvaluator*" + reveal_type(BinaryClassificationEvaluator.load("foo")) # N: Revealed type is "pyspark.ml.evaluation.BinaryClassificationEvaluator" diff --git a/python/pyspark/ml/torch/distributor.py b/python/pyspark/ml/torch/distributor.py index d40fbc61766..9f9636e6b10 100644 --- a/python/pyspark/ml/torch/distributor.py +++ b/python/pyspark/ml/torch/distributor.py @@ -767,8 +767,8 @@ class TorchDistributor(Distributor): schema_file_path = os.path.join(save_dir, "schema.json") schema_json_string = json.dumps(input_schema_json) - with open(schema_file_path, "w") as f: # type:ignore - f.write(schema_json_string) # type:ignore + with open(schema_file_path, "w") as f: + f.write(schema_json_string) os.environ[SPARK_PARTITION_ARROW_DATA_FILE] = arrow_file_path os.environ[SPARK_DATAFRAME_SCHEMA_FILE] = schema_file_path @@ -959,7 +959,7 @@ class TorchDistributor(Distributor): def _get_spark_partition_data_loader( - num_samples: int, batch_size: int, num_workers: int = 1, prefetch_factor: Optional[int] = 2 + num_samples: int, batch_size: int, num_workers: int = 1, prefetch_factor: int = 2 ) -> Any: """ This function must be called inside the `train_function` where `train_function` diff --git a/python/pyspark/ml/tree.py b/python/pyspark/ml/tree.py index ad405b742bd..5143c3214b6 100644 --- a/python/pyspark/ml/tree.py +++ b/python/pyspark/ml/tree.py @@ -46,19 +46,19 @@ class _DecisionTreeModel(JavaPredictionModel[T]): .. versionadded:: 1.5.0 """ - @property # type: ignore[misc] + @property @since("1.5.0") def numNodes(self) -> int: """Return number of nodes of the decision tree.""" return self._call_java("numNodes") - @property # type: ignore[misc] + @property @since("1.5.0") def depth(self) -> int: """Return depth of the decision tree.""" return self._call_java("depth") - @property # type: ignore[misc] + @property @since("2.0.0") def toDebugString(self) -> str: """Full description of model.""" @@ -218,31 +218,31 @@ class _TreeEnsembleModel(JavaPredictionModel[T]): Represents a tree ensemble model. """ - @property # type: ignore[misc] + @property @since("2.0.0") def trees(self) -> Sequence["_DecisionTreeModel"]: """Trees in this ensemble. Warning: These have null parent Estimators.""" return [_DecisionTreeModel(m) for m in list(self._call_java("trees"))] - @property # type: ignore[misc] + @property @since("2.0.0") def getNumTrees(self) -> int: """Number of trees in ensemble.""" return self._call_java("getNumTrees") - @property # type: ignore[misc] + @property @since("1.5.0") def treeWeights(self) -> List[float]: """Return the weights for each tree""" return list(self._call_java("javaTreeWeights")) - @property # type: ignore[misc] + @property @since("2.0.0") def totalNumNodes(self) -> int: """Total number of nodes, summed over all trees in the ensemble.""" return self._call_java("totalNumNodes") - @property # type: ignore[misc] + @property @since("2.0.0") def toDebugString(self) -> str: """Full description of model.""" diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py index 0dabcdd7f27..63f51229a9f 100644 --- a/python/pyspark/ml/tuning.py +++ b/python/pyspark/ml/tuning.py @@ -461,7 +461,7 @@ class _ValidatorSharedReadWrite: evaluator = instance.getEvaluator() uidMap = MetaAlgorithmReadWrite.getUidMap(estiamtor) - for elem in [evaluator] + list(uidMap.values()): # type: ignore[arg-type] + for elem in [evaluator] + list(uidMap.values()): if not isinstance(elem, MLWritable): raise ValueError( f"Validator write will fail because it contains {elem.uid} " diff --git a/python/pyspark/ml/util.py b/python/pyspark/ml/util.py index 74ce8162d18..2c90ff3cb7b 100644 --- a/python/pyspark/ml/util.py +++ b/python/pyspark/ml/util.py @@ -659,7 +659,7 @@ class HasTrainingSummary(Generic[T]): .. versionadded:: 3.0.0 """ - @property # type: ignore[misc] + @property @since("2.1.0") def hasSummary(self) -> bool: """ @@ -668,7 +668,7 @@ class HasTrainingSummary(Generic[T]): """ return cast("JavaWrapper", self)._call_java("hasSummary") - @property # type: ignore[misc] + @property @since("2.1.0") def summary(self) -> T: """ diff --git a/python/pyspark/ml/wrapper.py b/python/pyspark/ml/wrapper.py index a83ed4c3d4b..5eee3eeef11 100644 --- a/python/pyspark/ml/wrapper.py +++ b/python/pyspark/ml/wrapper.py @@ -268,7 +268,7 @@ class JavaParams(JavaWrapper, Params, metaclass=ABCMeta): return self._java_obj @staticmethod - def _from_java(java_stage: "JavaObject") -> "JP": + def _from_java(java_stage: "JavaObject") -> "JP": # type: ignore """ Given a Java object, create and return a Python wrapper of it. Used for ML persistence. @@ -449,7 +449,7 @@ class JavaPredictionModel(PredictionModel[T], JavaModel, _PredictorParams): (Private) Java Model for prediction tasks (regression and classification). """ - @property # type: ignore[misc] + @property @since("2.1.0") def numFeatures(self) -> int: """ diff --git a/python/pyspark/mllib/classification.py b/python/pyspark/mllib/classification.py index 1a3b3581e96..1e1795d9fb3 100644 --- a/python/pyspark/mllib/classification.py +++ b/python/pyspark/mllib/classification.py @@ -73,7 +73,7 @@ class LinearClassificationModel(LinearModel): """ self._threshold = value - @property # type: ignore[misc] + @property @since("1.4.0") def threshold(self) -> Optional[float]: """ @@ -214,7 +214,7 @@ class LogisticRegressionModel(LinearClassificationModel): self._numClasses - 1, self._dataWithBiasSize ) - @property # type: ignore[misc] + @property @since("1.4.0") def numFeatures(self) -> int: """ @@ -222,7 +222,7 @@ class LogisticRegressionModel(LinearClassificationModel): """ return self._numFeatures - @property # type: ignore[misc] + @property @since("1.4.0") def numClasses(self) -> int: """ diff --git a/python/pyspark/mllib/clustering.py b/python/pyspark/mllib/clustering.py index 89210a8e0a4..4595268edc6 100644 --- a/python/pyspark/mllib/clustering.py +++ b/python/pyspark/mllib/clustering.py @@ -79,14 +79,14 @@ class BisectingKMeansModel(JavaModelWrapper): super(BisectingKMeansModel, self).__init__(java_model) self.centers = [c.toArray() for c in self.call("clusterCenters")] - @property # type: ignore[misc] + @property @since("2.0.0") def clusterCenters(self) -> List[np.ndarray]: """Get the cluster centers, represented as a list of NumPy arrays.""" return self.centers - @property # type: ignore[misc] + @property @since("2.0.0") def k(self) -> int: """Get the number of clusters""" @@ -281,13 +281,13 @@ class KMeansModel(Saveable, Loader["KMeansModel"]): def __init__(self, centers: List["VectorLike"]): self.centers = centers - @property # type: ignore[misc] + @property @since("1.0.0") def clusterCenters(self) -> List["VectorLike"]: """Get the cluster centers, represented as a list of NumPy arrays.""" return self.centers - @property # type: ignore[misc] + @property @since("1.4.0") def k(self) -> int: """Total number of clusters.""" @@ -532,7 +532,7 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader["GaussianM True """ - @property # type: ignore[misc] + @property @since("1.4.0") def weights(self) -> np.ndarray: """ @@ -541,7 +541,7 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader["GaussianM """ return array(self.call("weights")) - @property # type: ignore[misc] + @property @since("1.4.0") def gaussians(self) -> List[MultivariateGaussian]: """ @@ -552,7 +552,7 @@ class GaussianMixtureModel(JavaModelWrapper, JavaSaveable, JavaLoader["GaussianM MultivariateGaussian(gaussian[0], gaussian[1]) for gaussian in self.call("gaussians") ] - @property # type: ignore[misc] + @property @since("1.4.0") def k(self) -> int: """Number of gaussians in mixture.""" @@ -778,7 +778,7 @@ class PowerIterationClusteringModel( ... pass """ - @property # type: ignore[misc] + @property @since("1.5.0") def k(self) -> int: """ @@ -946,7 +946,7 @@ class StreamingKMeansModel(KMeansModel): super(StreamingKMeansModel, self).__init__(centers=clusterCenters) self._clusterWeights = list(clusterWeights) # type: ignore[arg-type] - @property # type: ignore[misc] + @property @since("1.5.0") def clusterWeights(self) -> List[np.float64]: """Return the cluster weights.""" diff --git a/python/pyspark/mllib/evaluation.py b/python/pyspark/mllib/evaluation.py index cee61a1b241..2a8991df050 100644 --- a/python/pyspark/mllib/evaluation.py +++ b/python/pyspark/mllib/evaluation.py @@ -85,7 +85,7 @@ class BinaryClassificationMetrics(JavaModelWrapper): java_model = java_class(df._jdf) super(BinaryClassificationMetrics, self).__init__(java_model) - @property # type: ignore[misc] + @property @since("1.4.0") def areaUnderROC(self) -> float: """ @@ -94,7 +94,7 @@ class BinaryClassificationMetrics(JavaModelWrapper): """ return self.call("areaUnderROC") - @property # type: ignore[misc] + @property @since("1.4.0") def areaUnderPR(self) -> float: """ @@ -161,7 +161,7 @@ class RegressionMetrics(JavaModelWrapper): java_model = java_class(df._jdf) super(RegressionMetrics, self).__init__(java_model) - @property # type: ignore[misc] + @property @since("1.4.0") def explainedVariance(self) -> float: r""" @@ -170,7 +170,7 @@ class RegressionMetrics(JavaModelWrapper): """ return self.call("explainedVariance") - @property # type: ignore[misc] + @property @since("1.4.0") def meanAbsoluteError(self) -> float: """ @@ -179,7 +179,7 @@ class RegressionMetrics(JavaModelWrapper): """ return self.call("meanAbsoluteError") - @property # type: ignore[misc] + @property @since("1.4.0") def meanSquaredError(self) -> float: """ @@ -188,7 +188,7 @@ class RegressionMetrics(JavaModelWrapper): """ return self.call("meanSquaredError") - @property # type: ignore[misc] + @property @since("1.4.0") def rootMeanSquaredError(self) -> float: """ @@ -197,7 +197,7 @@ class RegressionMetrics(JavaModelWrapper): """ return self.call("rootMeanSquaredError") - @property # type: ignore[misc] + @property @since("1.4.0") def r2(self) -> float: """ @@ -348,7 +348,7 @@ class MulticlassMetrics(JavaModelWrapper): else: return self.call("fMeasure", label, beta) - @property # type: ignore[misc] + @property @since("2.0.0") def accuracy(self) -> float: """ @@ -357,7 +357,7 @@ class MulticlassMetrics(JavaModelWrapper): """ return self.call("accuracy") - @property # type: ignore[misc] + @property @since("1.4.0") def weightedTruePositiveRate(self) -> float: """ @@ -366,7 +366,7 @@ class MulticlassMetrics(JavaModelWrapper): """ return self.call("weightedTruePositiveRate") - @property # type: ignore[misc] + @property @since("1.4.0") def weightedFalsePositiveRate(self) -> float: """ @@ -374,7 +374,7 @@ class MulticlassMetrics(JavaModelWrapper): """ return self.call("weightedFalsePositiveRate") - @property # type: ignore[misc] + @property @since("1.4.0") def weightedRecall(self) -> float: """ @@ -383,7 +383,7 @@ class MulticlassMetrics(JavaModelWrapper): """ return self.call("weightedRecall") - @property # type: ignore[misc] + @property @since("1.4.0") def weightedPrecision(self) -> float: """ @@ -482,7 +482,7 @@ class RankingMetrics(JavaModelWrapper, Generic[T]): """ return self.call("precisionAt", int(k)) - @property # type: ignore[misc] + @property @since("1.4.0") def meanAveragePrecision(self) -> float: """ @@ -614,7 +614,7 @@ class MultilabelMetrics(JavaModelWrapper): else: return self.call("f1Measure", float(label)) - @property # type: ignore[misc] + @property @since("1.4.0") def microPrecision(self) -> float: """ @@ -623,7 +623,7 @@ class MultilabelMetrics(JavaModelWrapper): """ return self.call("microPrecision") - @property # type: ignore[misc] + @property @since("1.4.0") def microRecall(self) -> float: """ @@ -632,7 +632,7 @@ class MultilabelMetrics(JavaModelWrapper): """ return self.call("microRecall") - @property # type: ignore[misc] + @property @since("1.4.0") def microF1Measure(self) -> float: """ @@ -641,7 +641,7 @@ class MultilabelMetrics(JavaModelWrapper): """ return self.call("microF1Measure") - @property # type: ignore[misc] + @property @since("1.4.0") def hammingLoss(self) -> float: """ @@ -649,7 +649,7 @@ class MultilabelMetrics(JavaModelWrapper): """ return self.call("hammingLoss") - @property # type: ignore[misc] + @property @since("1.4.0") def subsetAccuracy(self) -> float: """ @@ -658,7 +658,7 @@ class MultilabelMetrics(JavaModelWrapper): """ return self.call("subsetAccuracy") - @property # type: ignore[misc] + @property @since("1.4.0") def accuracy(self) -> float: """ diff --git a/python/pyspark/mllib/feature.py b/python/pyspark/mllib/feature.py index 17dab6ac057..2a8cb7d8df3 100644 --- a/python/pyspark/mllib/feature.py +++ b/python/pyspark/mllib/feature.py @@ -251,7 +251,7 @@ class StandardScalerModel(JavaVectorTransformer): self.call("setWithStd", withStd) return self - @property # type: ignore[misc] + @property @since("2.0.0") def withStd(self) -> bool: """ @@ -259,7 +259,7 @@ class StandardScalerModel(JavaVectorTransformer): """ return self.call("withStd") - @property # type: ignore[misc] + @property @since("2.0.0") def withMean(self) -> bool: """ @@ -267,7 +267,7 @@ class StandardScalerModel(JavaVectorTransformer): """ return self.call("withMean") - @property # type: ignore[misc] + @property @since("2.0.0") def std(self) -> Vector: """ @@ -275,7 +275,7 @@ class StandardScalerModel(JavaVectorTransformer): """ return self.call("std") - @property # type: ignore[misc] + @property @since("2.0.0") def mean(self) -> Vector: """ diff --git a/python/pyspark/mllib/linalg/__init__.py b/python/pyspark/mllib/linalg/__init__.py index 4dcd0c97d89..f752f5458ff 100644 --- a/python/pyspark/mllib/linalg/__init__.py +++ b/python/pyspark/mllib/linalg/__init__.py @@ -1617,7 +1617,7 @@ class QRDecomposition(Generic[QT, RT]): self._Q = Q self._R = R - @property # type: ignore[misc] + @property @since("2.0.0") def Q(self) -> QT: """ @@ -1626,7 +1626,7 @@ class QRDecomposition(Generic[QT, RT]): """ return self._Q - @property # type: ignore[misc] + @property @since("2.0.0") def R(self) -> RT: """ diff --git a/python/pyspark/mllib/linalg/distributed.py b/python/pyspark/mllib/linalg/distributed.py index 1a2e38f81e7..ecdb4e75ed4 100644 --- a/python/pyspark/mllib/linalg/distributed.py +++ b/python/pyspark/mllib/linalg/distributed.py @@ -500,7 +500,7 @@ class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]): .. versionadded:: 2.2.0 """ - @property # type: ignore[misc] + @property @since("2.2.0") def U(self) -> Optional[UT]: # type: ignore[return] """ @@ -517,7 +517,7 @@ class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]): else: raise TypeError("Expected RowMatrix/IndexedRowMatrix got %s" % mat_name) - @property # type: ignore[misc] + @property @since("2.2.0") def s(self) -> Vector: """ @@ -525,7 +525,7 @@ class SingularValueDecomposition(JavaModelWrapper, Generic[UT, VT]): """ return self.call("s") - @property # type: ignore[misc] + @property @since("2.2.0") def V(self) -> VT: """ diff --git a/python/pyspark/mllib/recommendation.py b/python/pyspark/mllib/recommendation.py index 55eae10893e..7ff8fddf88d 100644 --- a/python/pyspark/mllib/recommendation.py +++ b/python/pyspark/mllib/recommendation.py @@ -208,7 +208,7 @@ class MatrixFactorizationModel( """ return self.call("wrappedRecommendUsersForProducts", num) - @property # type: ignore[misc] + @property @since("1.4.0") def rank(self) -> int: """Rank for the features in this model""" diff --git a/python/pyspark/mllib/regression.py b/python/pyspark/mllib/regression.py index 18f37b4a71a..cac3294ade6 100644 --- a/python/pyspark/mllib/regression.py +++ b/python/pyspark/mllib/regression.py @@ -117,13 +117,13 @@ class LinearModel: self._coeff = _convert_to_vector(weights) self._intercept = float(intercept) - @property # type: ignore[misc] + @property @since("1.0.0") def weights(self) -> Vector: """Weights computed for every feature.""" return self._coeff - @property # type: ignore[misc] + @property @since("1.0.0") def intercept(self) -> float: """Intercept computed for this model.""" diff --git a/python/pyspark/sql/observation.py b/python/pyspark/sql/observation.py index 67bb1f36305..686b036bb9e 100644 --- a/python/pyspark/sql/observation.py +++ b/python/pyspark/sql/observation.py @@ -112,7 +112,7 @@ class Observation: return DataFrame(observed_df, df.sparkSession) # Note that decorated property only works with Python 3.9+ which Spark Connect requires. - @property # type: ignore[misc] + @property @try_remote_observation def get(self) -> Dict[str, Any]: """Get the observed metrics. diff --git a/python/pyspark/sql/tests/typing/test_dataframe.yml b/python/pyspark/sql/tests/typing/test_dataframe.yml index 79a3bcd8dfc..d32a09cea82 100644 --- a/python/pyspark/sql/tests/typing/test_dataframe.yml +++ b/python/pyspark/sql/tests/typing/test_dataframe.yml @@ -123,9 +123,9 @@ out: | main:10: error: No overload variant of "drop" of "DataFrame" matches argument types "Column", "Column" [call-overload] - main:10: note: Possible overload variant: + main:10: note: Possible overload variants: + main:10: note: def drop(self, cols: Union[Column, str]) -> DataFrame main:10: note: def drop(self, *cols: str) -> DataFrame - main:10: note: <1 more non-matching overload not shown> - case: fillNullValues diff --git a/python/pyspark/sql/tests/typing/test_functions.yml b/python/pyspark/sql/tests/typing/test_functions.yml index efb3293472d..6c80420bf0a 100644 --- a/python/pyspark/sql/tests/typing/test_functions.yml +++ b/python/pyspark/sql/tests/typing/test_functions.yml @@ -68,34 +68,34 @@ out: | main:29: error: No overload variant of "array" matches argument types "List[Column]", "List[Column]" [call-overload] - main:29: note: Possible overload variant: + main:29: note: Possible overload variants: main:29: note: def array(*cols: Union[Column, str]) -> Column - main:29: note: <1 more non-matching overload not shown> + main:29: note: def [ColumnOrName_] array(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column main:30: error: No overload variant of "create_map" matches argument types "List[Column]", "List[Column]" [call-overload] - main:30: note: Possible overload variant: + main:30: note: Possible overload variants: main:30: note: def create_map(*cols: Union[Column, str]) -> Column - main:30: note: <1 more non-matching overload not shown> + main:30: note: def [ColumnOrName_] create_map(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column main:31: error: No overload variant of "map_concat" matches argument types "List[Column]", "List[Column]" [call-overload] - main:31: note: Possible overload variant: + main:31: note: Possible overload variants: main:31: note: def map_concat(*cols: Union[Column, str]) -> Column - main:31: note: <1 more non-matching overload not shown> + main:31: note: def [ColumnOrName_] map_concat(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column main:32: error: No overload variant of "struct" matches argument types "List[str]", "List[str]" [call-overload] - main:32: note: Possible overload variant: + main:32: note: Possible overload variants: main:32: note: def struct(*cols: Union[Column, str]) -> Column - main:32: note: <1 more non-matching overload not shown> + main:32: note: def [ColumnOrName_] struct(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column main:33: error: No overload variant of "array" matches argument types "List[str]", "List[str]" [call-overload] - main:33: note: Possible overload variant: + main:33: note: Possible overload variants: main:33: note: def array(*cols: Union[Column, str]) -> Column - main:33: note: <1 more non-matching overload not shown> + main:33: note: def [ColumnOrName_] array(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column main:34: error: No overload variant of "create_map" matches argument types "List[str]", "List[str]" [call-overload] - main:34: note: Possible overload variant: + main:34: note: Possible overload variants: main:34: note: def create_map(*cols: Union[Column, str]) -> Column - main:34: note: <1 more non-matching overload not shown> + main:34: note: def [ColumnOrName_] create_map(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column main:35: error: No overload variant of "map_concat" matches argument types "List[str]", "List[str]" [call-overload] - main:35: note: Possible overload variant: + main:35: note: Possible overload variants: main:35: note: def map_concat(*cols: Union[Column, str]) -> Column - main:35: note: <1 more non-matching overload not shown> + main:35: note: def [ColumnOrName_] map_concat(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column main:36: error: No overload variant of "struct" matches argument types "List[str]", "List[str]" [call-overload] - main:36: note: Possible overload variant: + main:36: note: Possible overload variants: main:36: note: def struct(*cols: Union[Column, str]) -> Column - main:36: note: <1 more non-matching overload not shown> + main:36: note: def [ColumnOrName_] struct(Union[List[ColumnOrName_], Tuple[ColumnOrName_, ...]]) -> Column diff --git a/python/pyspark/sql/tests/typing/test_session.yml b/python/pyspark/sql/tests/typing/test_session.yml index 70d0001c47c..5c9fd9f197a 100644 --- a/python/pyspark/sql/tests/typing/test_session.yml +++ b/python/pyspark/sql/tests/typing/test_session.yml @@ -97,7 +97,12 @@ main:18: note: Possible overload variants: main:18: note: def [RowLike in (List[Any], Tuple[Any, ...], Row)] createDataFrame(self, data: Iterable[RowLike], schema: Union[List[str], Tuple[str, ...]] = ..., samplingRatio: Optional[float] = ...) -> DataFrame main:18: note: def [RowLike in (List[Any], Tuple[Any, ...], Row)] createDataFrame(self, data: RDD[RowLike], schema: Union[List[str], Tuple[str, ...]] = ..., samplingRatio: Optional[float] = ...) -> DataFrame - main:18: note: <6 more non-matching overloads not shown> + main:18: note: def [RowLike in (List[Any], Tuple[Any, ...], Row)] createDataFrame(self, data: Iterable[RowLike], schema: Union[StructType, str], *, verifySchema: bool = ...) -> DataFrame + main:18: note: def [RowLike in (List[Any], Tuple[Any, ...], Row)] createDataFrame(self, data: RDD[RowLike], schema: Union[StructType, str], *, verifySchema: bool = ...) -> DataFrame + main:18: note: def [AtomicValue in (datetime, date, Decimal, bool, str, int, float)] createDataFrame(self, data: RDD[AtomicValue], schema: Union[AtomicType, str], verifySchema: bool = ...) -> DataFrame + main:18: note: def [AtomicValue in (datetime, date, Decimal, bool, str, int, float)] createDataFrame(self, data: Iterable[AtomicValue], schema: Union[AtomicType, str], verifySchema: bool = ...) -> DataFrame + main:18: note: def createDataFrame(self, data: DataFrame, samplingRatio: Optional[float] = ...) -> DataFrame + main:18: note: def createDataFrame(self, data: DataFrame, schema: Union[StructType, str], verifySchema: bool = ...) -> DataFrame - case: createDataFrameFromEmptyRdd diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py index acc3f407f9d..db615d339b5 100644 --- a/python/pyspark/sql/types.py +++ b/python/pyspark/sql/types.py @@ -146,7 +146,7 @@ class DataTypeSingleton(type): _instances: ClassVar[Dict[Type["DataTypeSingleton"], "DataTypeSingleton"]] = {} - def __call__(cls: Type[T]) -> T: # type: ignore[override] + def __call__(cls: Type[T]) -> T: if cls not in cls._instances: # type: ignore[attr-defined] cls._instances[cls] = super( # type: ignore[misc, attr-defined] DataTypeSingleton, cls diff --git a/python/pyspark/streaming/context.py b/python/pyspark/streaming/context.py index ffebf99685d..84e7cd7fcc6 100644 --- a/python/pyspark/streaming/context.py +++ b/python/pyspark/streaming/context.py @@ -381,13 +381,13 @@ class StreamingContext: Changes to the queue after the stream is created will not be recognized. """ if default and not isinstance(default, RDD): - default = self._sc.parallelize(default) # type: ignore[arg-type] + default = self._sc.parallelize(default) if not rdds and default: rdds = [rdds] # type: ignore[list-item] if rdds and not isinstance(rdds[0], RDD): - rdds = [self._sc.parallelize(input) for input in rdds] # type: ignore[arg-type] + rdds = [self._sc.parallelize(input) for input in rdds] self._check_serializers(rdds) assert self._jvm is not None diff --git a/python/pyspark/tests/typing/test_rdd.yml b/python/pyspark/tests/typing/test_rdd.yml index 48965829cfd..358553327df 100644 --- a/python/pyspark/tests/typing/test_rdd.yml +++ b/python/pyspark/tests/typing/test_rdd.yml @@ -100,8 +100,8 @@ reveal_type(sc.parallelize([("a", 1)]).aggregateByKey(zero, seq_func, comb_func)) out: | - main:11: note: Revealed type is "pyspark.rdd.RDD[builtins.str*]" - main:16: note: Revealed type is "pyspark.rdd.RDD[builtins.int*]" + main:11: note: Revealed type is "pyspark.rdd.RDD[builtins.str]" + main:16: note: Revealed type is "pyspark.rdd.RDD[builtins.int]" main:18: note: Revealed type is "pyspark.rdd.RDD[Tuple[builtins.str, builtins.int]]" main:20: note: Revealed type is "Tuple[builtins.str, builtins.int]" main:22: note: Revealed type is "builtins.int" --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org