This is an automated email from the ASF dual-hosted git repository. ruifengz pushed a commit to branch branch-4.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push: new a6298d1e97a5 [SPARK-51217][ML][CONNECT] ML model helper constructor clean up a6298d1e97a5 is described below commit a6298d1e97a5d5e5a4ad1bd9a235e03be9f727a5 Author: Ruifeng Zheng <ruife...@apache.org> AuthorDate: Mon Feb 17 11:07:07 2025 +0800 [SPARK-51217][ML][CONNECT] ML model helper constructor clean up ### What changes were proposed in this pull request? ML model helper constructor clean up: 1, add comments; 2, set invalid values, e.g. empty uid, NaN efficient ### Why are the changes needed? 1, to avoid unintentionally incorrect usage; 2, to differentiate from normal models; ### Does this PR introduce _any_ user-facing change? no, internal change ### How was this patch tested? existing tests ### Was this patch authored or co-authored using generative AI tooling? no Closes #49956 from zhengruifeng/ml_connect_const. Authored-by: Ruifeng Zheng <ruife...@apache.org> Signed-off-by: Ruifeng Zheng <ruife...@apache.org> (cherry picked from commit d75a7d6fbe4ab97dbaa78200e5d01a4bfce13736) Signed-off-by: Ruifeng Zheng <ruife...@apache.org> --- .../org/apache/spark/ml/classification/DecisionTreeClassifier.scala | 3 +-- .../scala/org/apache/spark/ml/classification/FMClassifier.scala | 4 ++-- .../scala/org/apache/spark/ml/classification/GBTClassifier.scala | 5 ++--- .../main/scala/org/apache/spark/ml/classification/LinearSVC.scala | 3 ++- .../org/apache/spark/ml/classification/LogisticRegression.scala | 3 +-- .../spark/ml/classification/MultilayerPerceptronClassifier.scala | 3 ++- .../main/scala/org/apache/spark/ml/classification/NaiveBayes.scala | 4 ++-- .../org/apache/spark/ml/classification/RandomForestClassifier.scala | 3 +-- .../main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala | 5 ++--- .../main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala | 4 ++-- mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala | 4 +--- mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala | 6 ++++-- .../org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala | 3 ++- .../src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala | 4 ++-- .../main/scala/org/apache/spark/ml/feature/CountVectorizer.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala | 3 ++- .../src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala | 4 +--- mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala | 3 ++- .../src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala | 3 ++- .../src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala | 3 +-- .../src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala | 3 ++- .../org/apache/spark/ml/feature/UnivariateFeatureSelector.scala | 4 ++-- .../org/apache/spark/ml/feature/VarianceThresholdSelector.scala | 4 ++-- .../src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala | 3 ++- mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala | 3 +-- .../org/apache/spark/ml/regression/AFTSurvivalRegression.scala | 4 ++-- .../org/apache/spark/ml/regression/DecisionTreeRegressor.scala | 3 +-- .../src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala | 4 ++-- .../main/scala/org/apache/spark/ml/regression/GBTRegressor.scala | 4 +--- .../apache/spark/ml/regression/GeneralizedLinearRegression.scala | 3 ++- .../scala/org/apache/spark/ml/regression/IsotonicRegression.scala | 3 ++- .../scala/org/apache/spark/ml/regression/LinearRegression.scala | 3 +-- .../org/apache/spark/ml/regression/RandomForestRegressor.scala | 3 +-- 41 files changed, 73 insertions(+), 68 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala index 761741e7f42d..2c9f518c772c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala @@ -193,8 +193,7 @@ class DecisionTreeClassificationModel private[ml] ( this(Identifiable.randomUID("dtc"), rootNode, numFeatures, numClasses) // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(Node.dummyNode, 0, 0) + private[ml] def this() = this("", Node.dummyNode, -1, -1) override def predict(features: Vector): Double = { rootNode.predictImpl(features).prediction diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala index 0ef16cb42776..3e88a6d10bcd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala @@ -259,8 +259,8 @@ class FMClassificationModel private[classification] ( with FMClassifierParams with MLWritable with HasTrainingSummary[FMClassificationTrainingSummary]{ - private[ml] def this() = this(Identifiable.randomUID("fmc"), - Double.NaN, Vectors.empty, Matrices.empty) + // For ml connect only + private[ml] def this() = this("", Double.NaN, Vectors.empty, Matrices.empty) @Since("3.0.0") override val numClasses: Int = 2 diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala index 8ed52d5e09e0..9f2c2c85115b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala @@ -273,9 +273,8 @@ class GBTClassificationModel private[ml]( this(uid, _trees, _treeWeights, -1, 2) // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(Identifiable.randomUID("gbtc"), - Array(new DecisionTreeRegressionModel), Array(0.0)) + private[ml] def this() = this("", + Array(new DecisionTreeRegressionModel), Array(Double.NaN), -1, -1) @Since("1.4.0") override def trees: Array[DecisionTreeRegressionModel] = _trees diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala index 6fa7f4d5d493..522db5f15fe3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala @@ -365,7 +365,8 @@ class LinearSVCModel private[classification] ( extends ClassificationModel[Vector, LinearSVCModel] with LinearSVCParams with MLWritable with HasTrainingSummary[LinearSVCTrainingSummary] { - private[ml] def this() = this(Identifiable.randomUID("linearsvc"), Vectors.empty, 0.0) + // For ml connect only + private[ml] def this() = this("", Vectors.empty, Double.NaN) @Since("2.2.0") override val numClasses: Int = 2 diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala index 8a162d97384b..c6fa15aaf919 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala @@ -1077,8 +1077,7 @@ class LogisticRegressionModel private[spark] ( Vectors.dense(intercept), 2, isMultinomial = false) // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(Identifiable.randomUID("logreg"), Vectors.empty, 0) + private[ml] def this() = this("", Matrices.empty, Vectors.empty, -1, false) /** * A vector of model coefficients for "binomial" logistic regression. If this model was trained diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala index a09bf7a7aa36..73db8f189f81 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala @@ -283,7 +283,8 @@ class MultilayerPerceptronClassificationModel private[ml] ( with MultilayerPerceptronParams with Serializable with MLWritable with HasTrainingSummary[MultilayerPerceptronClassificationTrainingSummary]{ - private[ml] def this() = this(Identifiable.randomUID("mlpc"), Vectors.empty) + // For ml connect only + private[ml] def this() = this("", Vectors.empty) @Since("1.6.0") override lazy val numFeatures: Int = $(layers).head diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala index de2023899ee5..513b6e676e5d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala @@ -401,8 +401,8 @@ class NaiveBayesModel private[ml] ( import NaiveBayes._ - private[ml] def this() = this(Identifiable.randomUID("nb"), - Vectors.empty, Matrices.empty, Matrices.empty) + // For ml connect only + private[ml] def this() = this("", Vectors.empty, Matrices.empty, Matrices.empty) /** * mllib NaiveBayes is a wrapper of ml implementation currently. diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala index 0833ad0d402b..24dd7095513a 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala @@ -256,8 +256,7 @@ class RandomForestClassificationModel private[ml] ( this(Identifiable.randomUID("rfc"), trees, numFeatures, numClasses) // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(Array(new DecisionTreeClassificationModel), 0, 0) + private[ml] def this() = this("", Array(new DecisionTreeClassificationModel), -1, -1) @Since("1.4.0") override def trees: Array[DecisionTreeClassificationModel] = _trees diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala index c1ef69e8b047..f1cd126a6406 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala @@ -96,9 +96,8 @@ class BisectingKMeansModel private[ml] ( extends Model[BisectingKMeansModel] with BisectingKMeansParams with MLWritable with HasTrainingSummary[BisectingKMeansSummary] { - @Since("4.0.0") - private[ml] def this() = this(Identifiable.randomUID("bisecting-kmeans"), - new MLlibBisectingKMeansModel(null)) + // For ml connect only + private[ml] def this() = this("", null) @Since("3.0.0") lazy val numFeatures: Int = parentModel.clusterCenters.head.size diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala index ad1533cd37a9..42ce5d329ce0 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala @@ -93,8 +93,8 @@ class GaussianMixtureModel private[ml] ( extends Model[GaussianMixtureModel] with GaussianMixtureParams with MLWritable with HasTrainingSummary[GaussianMixtureSummary] { - private[ml] def this() = this(Identifiable.randomUID("gmm"), - Array.emptyDoubleArray, Array.empty) + // For ml connect only + private[ml] def this() = this("", Array.emptyDoubleArray, Array.empty) @Since("3.0.0") lazy val numFeatures: Int = gaussians.head.mean.size diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala index e878e12f4df4..0821d9a841cc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala @@ -139,9 +139,7 @@ class KMeansModel private[ml] ( with HasTrainingSummary[KMeansSummary] { // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(Identifiable.randomUID("kmeans"), - new MLlibKMeansModel(clusterCenters = null)) + private[ml] def this() = this("", null) @Since("3.0.0") lazy val numFeatures: Int = parentModel.clusterCenters.head.size diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala index 3fce96fbfbb0..3ea1c8594e1f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala @@ -617,7 +617,8 @@ class LocalLDAModel private[ml] ( sparkSession: SparkSession) extends LDAModel(uid, vocabSize, sparkSession) { - private[ml] def this() = this(Identifiable.randomUID("lda"), -1, null, null) + // For ml connect only + private[ml] def this() = this("", -1, null, null) oldLocalModel.setSeed(getSeed) @@ -715,7 +716,8 @@ class DistributedLDAModel private[ml] ( private var oldLocalModelOption: Option[OldLocalLDAModel]) extends LDAModel(uid, vocabSize, sparkSession) { - private[ml] def this() = this(Identifiable.randomUID("lda"), -1, null, null, None) + // For ml connect only + private[ml] def this() = this("", -1, null, null, None) override private[clustering] def oldLocalModel: OldLocalLDAModel = { if (oldLocalModelOption.isEmpty) { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala index 5037ac941afb..c429788ee368 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala @@ -68,7 +68,8 @@ class BucketedRandomProjectionLSHModel private[ml]( private[ml] val randMatrix: Matrix) extends LSHModel[BucketedRandomProjectionLSHModel] with BucketedRandomProjectionLSHParams { - private[ml] def this() = this(Identifiable.randomUID("brp-lsh"), Matrices.empty) + // For ml connect only + private[ml] def this() = this("", Matrices.empty) private[ml] def this(uid: String, randUnitVectors: Array[Vector]) = { this(uid, Matrices.fromVectors(randUnitVectors.toImmutableArraySeq)) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala index e93d96cf9717..ff18efb14939 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala @@ -137,8 +137,8 @@ final class ChiSqSelectorModel private[ml] ( import ChiSqSelectorModel._ - private[ml] def this() = this( - Identifiable.randomUID("chiSqSelector"), Array.emptyIntArray) + // For ml connect only + private[ml] def this() = this("", Array.emptyIntArray) override protected def isNumericAttribute = false diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala index 95788be6bd2b..34465248f20d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala @@ -277,7 +277,8 @@ class CountVectorizerModel( import CountVectorizerModel._ - private[ml] def this() = this(Identifiable.randomUID("cntVecModel"), Array.empty) + // For ml connect only + private[ml] def this() = this("", Array.empty) @Since("1.5.0") def this(vocabulary: Array[String]) = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala index 5459bb3f31da..c2b7ff7b00a3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala @@ -121,7 +121,8 @@ class IDFModel private[ml] ( import IDFModel._ - private[ml] def this() = this(Identifiable.randomUID("idf"), null) + // For ml connect only + private[ml] def this() = this("", null) /** @group setParam */ @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala index 2f51ae2d7fe3..4e169ab178b9 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala @@ -246,7 +246,8 @@ class ImputerModel private[ml] ( import ImputerModel._ - private[ml] def this() = this(Identifiable.randomUID("imputer"), null) + // For ml connect only + private[ml] def this() = this("", null) /** @group setParam */ @Since("3.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala index 66dbabc6187e..a9f1cd34ba3e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala @@ -107,7 +107,8 @@ class MaxAbsScalerModel private[ml] ( import MaxAbsScalerModel._ - private[ml] def this() = this(Identifiable.randomUID("maxAbsScal"), Vectors.empty) + // For ml connect only + private[ml] def this() = this("", Vectors.empty) /** @group setParam */ @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala index d077b0a4a022..96d341b16347 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala @@ -48,7 +48,8 @@ class MinHashLSHModel private[ml]( private[ml] val randCoefficients: Array[(Int, Int)]) extends LSHModel[MinHashLSHModel] { - private[ml] def this() = this(Identifiable.randomUID("mh-lsh"), Array.empty) + // For ml connect only + private[ml] def this() = this("", Array.empty) /** @group setParam */ @Since("2.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index e3b0590524f3..c54e64f97953 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -154,7 +154,8 @@ class MinMaxScalerModel private[ml] ( import MinMaxScalerModel._ - private[ml] def this() = this(Identifiable.randomUID("minMaxScal"), Vectors.empty, Vectors.empty) + // For ml connect only + private[ml] def this() = this("", Vectors.empty, Vectors.empty) /** @group setParam */ @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala index 25bcdc9a1c29..3eaff518e8fc 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala @@ -234,7 +234,8 @@ class OneHotEncoderModel private[ml] ( import OneHotEncoderModel._ - private[ml] def this() = this(Identifiable.randomUID("oneHotEncoder)"), Array.emptyIntArray) + // For ml connect only + private[ml] def this() = this("", Array.emptyIntArray) // Returns the category size for each index with `dropLast` and `handleInvalid` // taken into account. diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala index 67c8fcf15eec..6b61e761f589 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala @@ -128,9 +128,7 @@ class PCAModel private[ml] ( import PCAModel._ // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(Identifiable.randomUID("pca"), - DenseMatrix.zeros(1, 1), Vectors.empty) + private[ml] def this() = this("", Matrices.empty, Vectors.empty) /** @group setParam */ @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala index 2eb37fd65d7f..d2191185dddd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala @@ -349,7 +349,8 @@ class RFormulaModel private[feature]( private[ml] val pipelineModel: PipelineModel) extends Model[RFormulaModel] with RFormulaBase with MLWritable { - private[ml] def this() = this(Identifiable.randomUID("rFormula"), null, null) + // For ml connect only + private[ml] def this() = this("", null, null) @Since("2.0.0") override def transform(dataset: Dataset[_]): DataFrame = { diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala index c77f7008d05a..1779f0d6278f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala @@ -230,7 +230,8 @@ class RobustScalerModel private[ml] ( import RobustScalerModel._ - private[ml] def this() = this(Identifiable.randomUID("robustScal"), Vectors.empty, Vectors.empty) + // For ml connect only + private[ml] def this() = this("", Vectors.empty, Vectors.empty) /** @group setParam */ def setInputCol(value: String): this.type = set(inputCol, value) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index 546463c15844..c1ac1fdbba7d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -147,7 +147,8 @@ class StandardScalerModel private[ml] ( import StandardScalerModel._ - private[ml] def this() = this(Identifiable.randomUID("stdScal"), Vectors.empty, Vectors.empty) + // For ml connect only + private[ml] def this() = this("", Vectors.empty, Vectors.empty) /** @group setParam */ @Since("1.2.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala index 8b643372ec93..06a88e9b1c49 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala @@ -302,8 +302,7 @@ class StringIndexerModel ( def this(labelsArray: Array[Array[String]]) = this(Identifiable.randomUID("strIdx"), labelsArray) // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(labels = Array.empty) + private[ml] def this() = this("", Array.empty[Array[String]]) @deprecated("`labels` is deprecated and will be removed in 3.1.0. Use `labelsArray` " + "instead.", "3.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala index 31504fead161..39ffaf32a1f3 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala @@ -286,7 +286,8 @@ class TargetEncoderModel private[ml] ( @Since("4.0.0") private[ml] val stats: Array[Map[Double, (Double, Double)]]) extends Model[TargetEncoderModel] with TargetEncoderBase with MLWritable { - private[ml] def this() = this(Identifiable.randomUID("TargetEncoder"), Array.empty) + // For ml connect only + private[ml] def this() = this("", Array.empty) /** @group setParam */ @Since("4.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala index d845e2887a64..704166d9b657 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala @@ -289,8 +289,8 @@ class UnivariateFeatureSelectorModel private[ml]( extends Model[UnivariateFeatureSelectorModel] with UnivariateFeatureSelectorParams with MLWritable { - private[ml] def this() = this( - Identifiable.randomUID("UnivariateFeatureSelector"), Array.emptyIntArray) + // For ml connect only + private[ml] def this() = this("", Array.emptyIntArray) /** @group setParam */ @Since("3.1.1") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala index 23ea1ee3066e..cd1905b90ace 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala @@ -126,8 +126,8 @@ class VarianceThresholdSelectorModel private[ml]( extends Model[VarianceThresholdSelectorModel] with VarianceThresholdSelectorParams with MLWritable { - private[ml] def this() = this( - Identifiable.randomUID("VarianceThresholdSelector"), Array.emptyIntArray) + // For ml connect only + private[ml] def this() = this("", Array.emptyIntArray) if (selectedFeatures.length >= 2) { require(selectedFeatures.sliding(2).forall(l => l(0) < l(1)), diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala index 5063f15302a3..091e20922782 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala @@ -298,7 +298,8 @@ class VectorIndexerModel private[ml] ( import VectorIndexerModel._ - private[ml] def this() = this(Identifiable.randomUID("vecIdx"), -1, Map.empty) + // For ml connect only + private[ml] def this() = this("", -1, Map.empty) /** Java-friendly version of [[categoryMaps]] */ @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala index c3eeb394c5d4..7d6765b231b5 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala @@ -211,7 +211,8 @@ class Word2VecModel private[ml] ( import Word2VecModel._ - private[ml] def this() = this(Identifiable.randomUID("w2v"), null) + // For ml connect only + private[ml] def this() = this("", null) /** * Returns a dataframe with two fields, "word" and "vector", with "word" being a String and diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala index d90124c62d54..0b75753695fd 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala @@ -223,7 +223,8 @@ class FPGrowthModel private[ml] ( private val numTrainingRecords: Long) extends Model[FPGrowthModel] with FPGrowthParams with MLWritable { - private[ml] def this() = this(Identifiable.randomUID("fpgrowth"), null, Map.empty, 0L) + // For ml connect only + private[ml] def this() = this("", null, Map.empty, -1L) /** @group setParam */ @Since("2.2.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index 4120e16794a8..95c47531720d 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -281,8 +281,7 @@ class ALSModel private[ml] ( extends Model[ALSModel] with ALSModelParams with MLWritable { // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(Identifiable.randomUID("als"), 0, null, null) + private[ml] def this() = this("", -1, null, null) /** @group setParam */ @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala index d9f7af73ce33..8bc0f641aaad 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala @@ -371,8 +371,8 @@ class AFTSurvivalRegressionModel private[ml] ( extends RegressionModel[Vector, AFTSurvivalRegressionModel] with AFTSurvivalRegressionParams with MLWritable { - private[ml] def this() = this(Identifiable.randomUID("aftSurvReg"), - Vectors.empty, Double.NaN, Double.NaN) + // For ml connect only + private[ml] def this() = this("", Vectors.empty, Double.NaN, Double.NaN) @Since("3.0.0") override def numFeatures: Int = coefficients.size diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala index 2c692d33a38d..4f38d8757413 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala @@ -188,8 +188,7 @@ class DecisionTreeRegressionModel private[ml] ( this(Identifiable.randomUID("dtr"), rootNode, numFeatures) // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(Node.dummyNode, 0) + private[ml] def this() = this("", Node.dummyNode, -1) override def predict(features: Vector): Double = { rootNode.predictImpl(features).prediction diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala index 02ef1df2c44e..994ee3f77f3c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala @@ -461,8 +461,8 @@ class FMRegressionModel private[regression] ( extends RegressionModel[Vector, FMRegressionModel] with FMRegressorParams with MLWritable { - private[ml] def this() = this(Identifiable.randomUID("fmr"), - Double.NaN, Vectors.empty, Matrices.empty) + // For ml connect only + private[ml] def this() = this("", Double.NaN, Vectors.empty, Matrices.empty) @Since("3.0.0") override val numFeatures: Int = linear.size diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala index c2c672a7fa60..f71eea6c6293 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala @@ -243,9 +243,7 @@ class GBTRegressionModel private[ml]( this(uid, _trees, _treeWeights, -1) // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(Identifiable.randomUID("gbtr"), - Array(new DecisionTreeRegressionModel), Array(0.0)) + private[ml] def this() = this("", Array(new DecisionTreeRegressionModel), Array(Double.NaN), -1) @Since("1.4.0") override def trees: Array[DecisionTreeRegressionModel] = _trees diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala index dea182902ace..d5141ec4ccee 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala @@ -1009,7 +1009,8 @@ class GeneralizedLinearRegressionModel private[ml] ( with GeneralizedLinearRegressionBase with MLWritable with HasTrainingSummary[GeneralizedLinearRegressionTrainingSummary] { - private[ml] def this() = this(Identifiable.randomUID("glm"), Vectors.empty, Double.NaN) + // For ml connect only + private[ml] def this() = this("", Vectors.empty, Double.NaN) /** * Sets the link prediction (linear predictor) column name. diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala index 53850089a5a4..e1bfff068cfe 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala @@ -213,7 +213,8 @@ class IsotonicRegressionModel private[ml] ( private val oldModel: MLlibIsotonicRegressionModel) extends Model[IsotonicRegressionModel] with IsotonicRegressionBase with MLWritable { - private[ml] def this() = this(Identifiable.randomUID("isoReg"), null) + // For ml connect only + private[ml] def this() = this("", null) /** @group setParam */ @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala index b9cd138a51ec..b20c09db1fa7 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala @@ -703,8 +703,7 @@ class LinearRegressionModel private[ml] ( this(uid, coefficients, intercept, 1.0) // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(Identifiable.randomUID("linReg"), Vectors.empty, 0.0, 0.0) + private[ml] def this() = this("", Vectors.empty, Double.NaN, Double.NaN) override val numFeatures: Int = coefficients.size diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala index b0409c916a05..97d0f54d0eca 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala @@ -213,8 +213,7 @@ class RandomForestRegressionModel private[ml] ( this(Identifiable.randomUID("rfr"), trees, numFeatures) // For ml connect only - @Since("4.0.0") - private[ml] def this() = this(Array(new DecisionTreeRegressionModel), 0) + private[ml] def this() = this("", Array(new DecisionTreeRegressionModel), -1) @Since("1.4.0") override def trees: Array[DecisionTreeRegressionModel] = _trees --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org