(spark) branch branch-4.0 updated: [SPARK-51217][ML][CONNECT] ML model helper constructor clean up

ruifengz Sun, 16 Feb 2025 19:09:35 -0800

This is an automated email from the ASF dual-hosted git repository.

ruifengz pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-4.0 by this push:
     new a6298d1e97a5 [SPARK-51217][ML][CONNECT] ML model helper constructor 
clean up
a6298d1e97a5 is described below

commit a6298d1e97a5d5e5a4ad1bd9a235e03be9f727a5
Author: Ruifeng Zheng <ruife...@apache.org>
AuthorDate: Mon Feb 17 11:07:07 2025 +0800

    [SPARK-51217][ML][CONNECT] ML model helper constructor clean up
    
    ### What changes were proposed in this pull request?
    ML model helper constructor clean up:
    1, add comments;
    2, set invalid values, e.g. empty uid, NaN efficient
    
    ### Why are the changes needed?
    1, to avoid unintentionally incorrect usage;
    2, to differentiate from normal models;
    
    ### Does this PR introduce _any_ user-facing change?
    no, internal change
    
    ### How was this patch tested?
    existing tests
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #49956 from zhengruifeng/ml_connect_const.
    
    Authored-by: Ruifeng Zheng <ruife...@apache.org>
    Signed-off-by: Ruifeng Zheng <ruife...@apache.org>
    (cherry picked from commit d75a7d6fbe4ab97dbaa78200e5d01a4bfce13736)
    Signed-off-by: Ruifeng Zheng <ruife...@apache.org>
---
 .../org/apache/spark/ml/classification/DecisionTreeClassifier.scala | 3 +--
 .../scala/org/apache/spark/ml/classification/FMClassifier.scala     | 4 ++--
 .../scala/org/apache/spark/ml/classification/GBTClassifier.scala    | 5 ++---
 .../main/scala/org/apache/spark/ml/classification/LinearSVC.scala   | 3 ++-
 .../org/apache/spark/ml/classification/LogisticRegression.scala     | 3 +--
 .../spark/ml/classification/MultilayerPerceptronClassifier.scala    | 3 ++-
 .../main/scala/org/apache/spark/ml/classification/NaiveBayes.scala  | 4 ++--
 .../org/apache/spark/ml/classification/RandomForestClassifier.scala | 3 +--
 .../main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala | 5 ++---
 .../main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala | 4 ++--
 mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala    | 4 +---
 mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala       | 6 ++++--
 .../org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala   | 3 ++-
 .../src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala  | 4 ++--
 .../main/scala/org/apache/spark/ml/feature/CountVectorizer.scala    | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala          | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala      | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala   | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala | 3 ++-
 .../src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala  | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala          | 4 +---
 mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala     | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala | 3 ++-
 .../src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala | 3 ++-
 .../src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala  | 3 +--
 .../src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala  | 3 ++-
 .../org/apache/spark/ml/feature/UnivariateFeatureSelector.scala     | 4 ++--
 .../org/apache/spark/ml/feature/VarianceThresholdSelector.scala     | 4 ++--
 .../src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala  | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala     | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala         | 3 ++-
 mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala   | 3 +--
 .../org/apache/spark/ml/regression/AFTSurvivalRegression.scala      | 4 ++--
 .../org/apache/spark/ml/regression/DecisionTreeRegressor.scala      | 3 +--
 .../src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala | 4 ++--
 .../main/scala/org/apache/spark/ml/regression/GBTRegressor.scala    | 4 +---
 .../apache/spark/ml/regression/GeneralizedLinearRegression.scala    | 3 ++-
 .../scala/org/apache/spark/ml/regression/IsotonicRegression.scala   | 3 ++-
 .../scala/org/apache/spark/ml/regression/LinearRegression.scala     | 3 +--
 .../org/apache/spark/ml/regression/RandomForestRegressor.scala      | 3 +--
 41 files changed, 73 insertions(+), 68 deletions(-)

diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 761741e7f42d..2c9f518c772c 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -193,8 +193,7 @@ class DecisionTreeClassificationModel private[ml] (
     this(Identifiable.randomUID("dtc"), rootNode, numFeatures, numClasses)
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(Node.dummyNode, 0, 0)
+  private[ml] def this() = this("", Node.dummyNode, -1, -1)
 
   override def predict(features: Vector): Double = {
     rootNode.predictImpl(features).prediction
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
index 0ef16cb42776..3e88a6d10bcd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
@@ -259,8 +259,8 @@ class FMClassificationModel private[classification] (
     with FMClassifierParams with MLWritable
     with HasTrainingSummary[FMClassificationTrainingSummary]{
 
-  private[ml] def this() = this(Identifiable.randomUID("fmc"),
-    Double.NaN, Vectors.empty, Matrices.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Double.NaN, Vectors.empty, Matrices.empty)
 
   @Since("3.0.0")
   override val numClasses: Int = 2
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 8ed52d5e09e0..9f2c2c85115b 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -273,9 +273,8 @@ class GBTClassificationModel private[ml](
     this(uid, _trees, _treeWeights, -1, 2)
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(Identifiable.randomUID("gbtc"),
-    Array(new DecisionTreeRegressionModel), Array(0.0))
+  private[ml] def this() = this("",
+    Array(new DecisionTreeRegressionModel), Array(Double.NaN), -1, -1)
 
   @Since("1.4.0")
   override def trees: Array[DecisionTreeRegressionModel] = _trees
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
index 6fa7f4d5d493..522db5f15fe3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/LinearSVC.scala
@@ -365,7 +365,8 @@ class LinearSVCModel private[classification] (
   extends ClassificationModel[Vector, LinearSVCModel]
   with LinearSVCParams with MLWritable with 
HasTrainingSummary[LinearSVCTrainingSummary] {
 
-  private[ml] def this() = this(Identifiable.randomUID("linearsvc"), 
Vectors.empty, 0.0)
+  // For ml connect only
+  private[ml] def this() = this("", Vectors.empty, Double.NaN)
 
   @Since("2.2.0")
   override val numClasses: Int = 2
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
index 8a162d97384b..c6fa15aaf919 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/LogisticRegression.scala
@@ -1077,8 +1077,7 @@ class LogisticRegressionModel private[spark] (
       Vectors.dense(intercept), 2, isMultinomial = false)
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(Identifiable.randomUID("logreg"), 
Vectors.empty, 0)
+  private[ml] def this() = this("", Matrices.empty, Vectors.empty, -1, false)
 
   /**
    * A vector of model coefficients for "binomial" logistic regression. If 
this model was trained
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
index a09bf7a7aa36..73db8f189f81 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala
@@ -283,7 +283,8 @@ class MultilayerPerceptronClassificationModel private[ml] (
   with MultilayerPerceptronParams with Serializable with MLWritable
   with HasTrainingSummary[MultilayerPerceptronClassificationTrainingSummary]{
 
-  private[ml] def this() = this(Identifiable.randomUID("mlpc"), Vectors.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Vectors.empty)
 
   @Since("1.6.0")
   override lazy val numFeatures: Int = $(layers).head
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala 
b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
index de2023899ee5..513b6e676e5d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/NaiveBayes.scala
@@ -401,8 +401,8 @@ class NaiveBayesModel private[ml] (
 
   import NaiveBayes._
 
-  private[ml] def this() = this(Identifiable.randomUID("nb"),
-    Vectors.empty, Matrices.empty, Matrices.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Vectors.empty, Matrices.empty, 
Matrices.empty)
 
   /**
    * mllib NaiveBayes is a wrapper of ml implementation currently.
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 0833ad0d402b..24dd7095513a 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -256,8 +256,7 @@ class RandomForestClassificationModel private[ml] (
     this(Identifiable.randomUID("rfc"), trees, numFeatures, numClasses)
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(Array(new DecisionTreeClassificationModel), 0, 
0)
+  private[ml] def this() = this("", Array(new 
DecisionTreeClassificationModel), -1, -1)
 
   @Since("1.4.0")
   override def trees: Array[DecisionTreeClassificationModel] = _trees
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala 
b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index c1ef69e8b047..f1cd126a6406 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -96,9 +96,8 @@ class BisectingKMeansModel private[ml] (
   extends Model[BisectingKMeansModel] with BisectingKMeansParams with 
MLWritable
   with HasTrainingSummary[BisectingKMeansSummary] {
 
-  @Since("4.0.0")
-  private[ml] def this() = this(Identifiable.randomUID("bisecting-kmeans"),
-    new MLlibBisectingKMeansModel(null))
+  // For ml connect only
+  private[ml] def this() = this("", null)
 
   @Since("3.0.0")
   lazy val numFeatures: Int = parentModel.clusterCenters.head.size
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala 
b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index ad1533cd37a9..42ce5d329ce0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -93,8 +93,8 @@ class GaussianMixtureModel private[ml] (
   extends Model[GaussianMixtureModel] with GaussianMixtureParams with 
MLWritable
   with HasTrainingSummary[GaussianMixtureSummary] {
 
-  private[ml] def this() = this(Identifiable.randomUID("gmm"),
-    Array.emptyDoubleArray, Array.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Array.emptyDoubleArray, Array.empty)
 
   @Since("3.0.0")
   lazy val numFeatures: Int = gaussians.head.mean.size
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala 
b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index e878e12f4df4..0821d9a841cc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -139,9 +139,7 @@ class KMeansModel private[ml] (
     with HasTrainingSummary[KMeansSummary] {
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(Identifiable.randomUID("kmeans"),
-    new MLlibKMeansModel(clusterCenters = null))
+  private[ml] def this() = this("", null)
 
   @Since("3.0.0")
   lazy val numFeatures: Int = parentModel.clusterCenters.head.size
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala 
b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 3fce96fbfbb0..3ea1c8594e1f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -617,7 +617,8 @@ class LocalLDAModel private[ml] (
     sparkSession: SparkSession)
   extends LDAModel(uid, vocabSize, sparkSession) {
 
-  private[ml] def this() = this(Identifiable.randomUID("lda"), -1, null, null)
+  // For ml connect only
+  private[ml] def this() = this("", -1, null, null)
 
   oldLocalModel.setSeed(getSeed)
 
@@ -715,7 +716,8 @@ class DistributedLDAModel private[ml] (
     private var oldLocalModelOption: Option[OldLocalLDAModel])
   extends LDAModel(uid, vocabSize, sparkSession) {
 
-  private[ml] def this() = this(Identifiable.randomUID("lda"), -1, null, null, 
None)
+  // For ml connect only
+  private[ml] def this() = this("", -1, null, null, None)
 
   override private[clustering] def oldLocalModel: OldLocalLDAModel = {
     if (oldLocalModelOption.isEmpty) {
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
index 5037ac941afb..c429788ee368 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
@@ -68,7 +68,8 @@ class BucketedRandomProjectionLSHModel private[ml](
     private[ml] val randMatrix: Matrix)
   extends LSHModel[BucketedRandomProjectionLSHModel] with 
BucketedRandomProjectionLSHParams {
 
-  private[ml] def this() = this(Identifiable.randomUID("brp-lsh"), 
Matrices.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Matrices.empty)
 
   private[ml] def this(uid: String, randUnitVectors: Array[Vector]) = {
     this(uid, Matrices.fromVectors(randUnitVectors.toImmutableArraySeq))
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
index e93d96cf9717..ff18efb14939 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/ChiSqSelector.scala
@@ -137,8 +137,8 @@ final class ChiSqSelectorModel private[ml] (
 
   import ChiSqSelectorModel._
 
-  private[ml] def this() = this(
-    Identifiable.randomUID("chiSqSelector"), Array.emptyIntArray)
+  // For ml connect only
+  private[ml] def this() = this("", Array.emptyIntArray)
 
   override protected def isNumericAttribute = false
 
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
index 95788be6bd2b..34465248f20d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala
@@ -277,7 +277,8 @@ class CountVectorizerModel(
 
   import CountVectorizerModel._
 
-  private[ml] def this() = this(Identifiable.randomUID("cntVecModel"), 
Array.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Array.empty)
 
   @Since("1.5.0")
   def this(vocabulary: Array[String]) = {
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
index 5459bb3f31da..c2b7ff7b00a3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/IDF.scala
@@ -121,7 +121,8 @@ class IDFModel private[ml] (
 
   import IDFModel._
 
-  private[ml] def this() = this(Identifiable.randomUID("idf"), null)
+  // For ml connect only
+  private[ml] def this() = this("", null)
 
   /** @group setParam */
   @Since("1.4.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
index 2f51ae2d7fe3..4e169ab178b9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Imputer.scala
@@ -246,7 +246,8 @@ class ImputerModel private[ml] (
 
   import ImputerModel._
 
-  private[ml] def this() = this(Identifiable.randomUID("imputer"), null)
+  // For ml connect only
+  private[ml] def this() = this("", null)
 
   /** @group setParam */
   @Since("3.0.0")
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
index 66dbabc6187e..a9f1cd34ba3e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MaxAbsScaler.scala
@@ -107,7 +107,8 @@ class MaxAbsScalerModel private[ml] (
 
   import MaxAbsScalerModel._
 
-  private[ml] def this() = this(Identifiable.randomUID("maxAbsScal"), 
Vectors.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Vectors.empty)
 
   /** @group setParam */
   @Since("2.0.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
index d077b0a4a022..96d341b16347 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
@@ -48,7 +48,8 @@ class MinHashLSHModel private[ml](
     private[ml] val randCoefficients: Array[(Int, Int)])
   extends LSHModel[MinHashLSHModel] {
 
-  private[ml] def this() = this(Identifiable.randomUID("mh-lsh"), Array.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Array.empty)
 
   /** @group setParam */
   @Since("2.4.0")
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
index e3b0590524f3..c54e64f97953 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala
@@ -154,7 +154,8 @@ class MinMaxScalerModel private[ml] (
 
   import MinMaxScalerModel._
 
-  private[ml] def this() = this(Identifiable.randomUID("minMaxScal"), 
Vectors.empty, Vectors.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Vectors.empty, Vectors.empty)
 
   /** @group setParam */
   @Since("1.5.0")
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
index 25bcdc9a1c29..3eaff518e8fc 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/OneHotEncoder.scala
@@ -234,7 +234,8 @@ class OneHotEncoderModel private[ml] (
 
   import OneHotEncoderModel._
 
-  private[ml] def this() = this(Identifiable.randomUID("oneHotEncoder)"), 
Array.emptyIntArray)
+  // For ml connect only
+  private[ml] def this() = this("", Array.emptyIntArray)
 
   // Returns the category size for each index with `dropLast` and 
`handleInvalid`
   // taken into account.
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
index 67c8fcf15eec..6b61e761f589 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PCA.scala
@@ -128,9 +128,7 @@ class PCAModel private[ml] (
   import PCAModel._
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(Identifiable.randomUID("pca"),
-    DenseMatrix.zeros(1, 1), Vectors.empty)
+  private[ml] def this() = this("", Matrices.empty, Vectors.empty)
 
   /** @group setParam */
   @Since("1.5.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
index 2eb37fd65d7f..d2191185dddd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RFormula.scala
@@ -349,7 +349,8 @@ class RFormulaModel private[feature](
     private[ml] val pipelineModel: PipelineModel)
   extends Model[RFormulaModel] with RFormulaBase with MLWritable {
 
-  private[ml] def this() = this(Identifiable.randomUID("rFormula"), null, null)
+  // For ml connect only
+  private[ml] def this() = this("", null, null)
 
   @Since("2.0.0")
   override def transform(dataset: Dataset[_]): DataFrame = {
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
index c77f7008d05a..1779f0d6278f 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/RobustScaler.scala
@@ -230,7 +230,8 @@ class RobustScalerModel private[ml] (
 
   import RobustScalerModel._
 
-  private[ml] def this() = this(Identifiable.randomUID("robustScal"), 
Vectors.empty, Vectors.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Vectors.empty, Vectors.empty)
 
   /** @group setParam */
   def setInputCol(value: String): this.type = set(inputCol, value)
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
index 546463c15844..c1ac1fdbba7d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala
@@ -147,7 +147,8 @@ class StandardScalerModel private[ml] (
 
   import StandardScalerModel._
 
-  private[ml] def this() = this(Identifiable.randomUID("stdScal"), 
Vectors.empty, Vectors.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Vectors.empty, Vectors.empty)
 
   /** @group setParam */
   @Since("1.2.0")
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
index 8b643372ec93..06a88e9b1c49 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StringIndexer.scala
@@ -302,8 +302,7 @@ class StringIndexerModel (
   def this(labelsArray: Array[Array[String]]) = 
this(Identifiable.randomUID("strIdx"), labelsArray)
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(labels = Array.empty)
+  private[ml] def this() = this("", Array.empty[Array[String]])
 
   @deprecated("`labels` is deprecated and will be removed in 3.1.0. Use 
`labelsArray` " +
     "instead.", "3.0.0")
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala
index 31504fead161..39ffaf32a1f3 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala
@@ -286,7 +286,8 @@ class TargetEncoderModel private[ml] (
     @Since("4.0.0") private[ml] val stats: Array[Map[Double, (Double, 
Double)]])
   extends Model[TargetEncoderModel] with TargetEncoderBase with MLWritable {
 
-  private[ml] def this() = this(Identifiable.randomUID("TargetEncoder"), 
Array.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Array.empty)
 
   /** @group setParam */
   @Since("4.0.0")
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
index d845e2887a64..704166d9b657 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/feature/UnivariateFeatureSelector.scala
@@ -289,8 +289,8 @@ class UnivariateFeatureSelectorModel private[ml](
   extends Model[UnivariateFeatureSelectorModel] with 
UnivariateFeatureSelectorParams
     with MLWritable {
 
-  private[ml] def this() = this(
-    Identifiable.randomUID("UnivariateFeatureSelector"), Array.emptyIntArray)
+  // For ml connect only
+  private[ml] def this() = this("", Array.emptyIntArray)
 
   /** @group setParam */
   @Since("3.1.1")
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
index 23ea1ee3066e..cd1905b90ace 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/feature/VarianceThresholdSelector.scala
@@ -126,8 +126,8 @@ class VarianceThresholdSelectorModel private[ml](
   extends Model[VarianceThresholdSelectorModel] with 
VarianceThresholdSelectorParams
     with MLWritable {
 
-  private[ml] def this() = this(
-    Identifiable.randomUID("VarianceThresholdSelector"), Array.emptyIntArray)
+  // For ml connect only
+  private[ml] def this() = this("", Array.emptyIntArray)
 
   if (selectedFeatures.length >= 2) {
     require(selectedFeatures.sliding(2).forall(l => l(0) < l(1)),
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
index 5063f15302a3..091e20922782 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/VectorIndexer.scala
@@ -298,7 +298,8 @@ class VectorIndexerModel private[ml] (
 
   import VectorIndexerModel._
 
-  private[ml] def this() = this(Identifiable.randomUID("vecIdx"), -1, 
Map.empty)
+  // For ml connect only
+  private[ml] def this() = this("", -1, Map.empty)
 
   /** Java-friendly version of [[categoryMaps]] */
   @Since("1.4.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
index c3eeb394c5d4..7d6765b231b5 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/Word2Vec.scala
@@ -211,7 +211,8 @@ class Word2VecModel private[ml] (
 
   import Word2VecModel._
 
-  private[ml] def this() = this(Identifiable.randomUID("w2v"), null)
+  // For ml connect only
+  private[ml] def this() = this("", null)
 
   /**
    * Returns a dataframe with two fields, "word" and "vector", with "word" 
being a String and
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala 
b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index d90124c62d54..0b75753695fd 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -223,7 +223,8 @@ class FPGrowthModel private[ml] (
     private val numTrainingRecords: Long)
   extends Model[FPGrowthModel] with FPGrowthParams with MLWritable {
 
-  private[ml] def this() = this(Identifiable.randomUID("fpgrowth"), null, 
Map.empty, 0L)
+  // For ml connect only
+  private[ml] def this() = this("", null, Map.empty, -1L)
 
   /** @group setParam */
   @Since("2.2.0")
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala 
b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 4120e16794a8..95c47531720d 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -281,8 +281,7 @@ class ALSModel private[ml] (
   extends Model[ALSModel] with ALSModelParams with MLWritable {
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(Identifiable.randomUID("als"), 0, null, null)
+  private[ml] def this() = this("", -1, null, null)
 
   /** @group setParam */
   @Since("1.4.0")
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
index d9f7af73ce33..8bc0f641aaad 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/AFTSurvivalRegression.scala
@@ -371,8 +371,8 @@ class AFTSurvivalRegressionModel private[ml] (
   extends RegressionModel[Vector, AFTSurvivalRegressionModel] with 
AFTSurvivalRegressionParams
   with MLWritable {
 
-  private[ml] def this() = this(Identifiable.randomUID("aftSurvReg"),
-    Vectors.empty, Double.NaN, Double.NaN)
+  // For ml connect only
+  private[ml] def this() = this("", Vectors.empty, Double.NaN, Double.NaN)
 
   @Since("3.0.0")
   override def numFeatures: Int = coefficients.size
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index 2c692d33a38d..4f38d8757413 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -188,8 +188,7 @@ class DecisionTreeRegressionModel private[ml] (
     this(Identifiable.randomUID("dtr"), rootNode, numFeatures)
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(Node.dummyNode, 0)
+  private[ml] def this() = this("", Node.dummyNode, -1)
 
   override def predict(features: Vector): Double = {
     rootNode.predictImpl(features).prediction
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala 
b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
index 02ef1df2c44e..994ee3f77f3c 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
@@ -461,8 +461,8 @@ class FMRegressionModel private[regression] (
   extends RegressionModel[Vector, FMRegressionModel]
   with FMRegressorParams with MLWritable {
 
-  private[ml] def this() = this(Identifiable.randomUID("fmr"),
-    Double.NaN, Vectors.empty, Matrices.empty)
+  // For ml connect only
+  private[ml] def this() = this("", Double.NaN, Vectors.empty, Matrices.empty)
 
   @Since("3.0.0")
   override val numFeatures: Int = linear.size
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index c2c672a7fa60..f71eea6c6293 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -243,9 +243,7 @@ class GBTRegressionModel private[ml](
     this(uid, _trees, _treeWeights, -1)
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(Identifiable.randomUID("gbtr"),
-    Array(new DecisionTreeRegressionModel), Array(0.0))
+  private[ml] def this() = this("", Array(new DecisionTreeRegressionModel), 
Array(Double.NaN), -1)
 
   @Since("1.4.0")
   override def trees: Array[DecisionTreeRegressionModel] = _trees
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index dea182902ace..d5141ec4ccee 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -1009,7 +1009,8 @@ class GeneralizedLinearRegressionModel private[ml] (
   with GeneralizedLinearRegressionBase with MLWritable
   with HasTrainingSummary[GeneralizedLinearRegressionTrainingSummary] {
 
-  private[ml] def this() = this(Identifiable.randomUID("glm"), Vectors.empty, 
Double.NaN)
+  // For ml connect only
+  private[ml] def this() = this("", Vectors.empty, Double.NaN)
 
   /**
    * Sets the link prediction (linear predictor) column name.
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala 
b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
index 53850089a5a4..e1bfff068cfe 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/IsotonicRegression.scala
@@ -213,7 +213,8 @@ class IsotonicRegressionModel private[ml] (
     private val oldModel: MLlibIsotonicRegressionModel)
   extends Model[IsotonicRegressionModel] with IsotonicRegressionBase with 
MLWritable {
 
-  private[ml] def this() = this(Identifiable.randomUID("isoReg"), null)
+  // For ml connect only
+  private[ml] def this() = this("", null)
 
   /** @group setParam */
   @Since("1.5.0")
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala 
b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
index b9cd138a51ec..b20c09db1fa7 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/LinearRegression.scala
@@ -703,8 +703,7 @@ class LinearRegressionModel private[ml] (
     this(uid, coefficients, intercept, 1.0)
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(Identifiable.randomUID("linReg"), 
Vectors.empty, 0.0, 0.0)
+  private[ml] def this() = this("", Vectors.empty, Double.NaN, Double.NaN)
 
   override val numFeatures: Int = coefficients.size
 
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index b0409c916a05..97d0f54d0eca 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -213,8 +213,7 @@ class RandomForestRegressionModel private[ml] (
     this(Identifiable.randomUID("rfr"), trees, numFeatures)
 
   // For ml connect only
-  @Since("4.0.0")
-  private[ml] def this() = this(Array(new DecisionTreeRegressionModel), 0)
+  private[ml] def this() = this("", Array(new DecisionTreeRegressionModel), -1)
 
   @Since("1.4.0")
   override def trees: Array[DecisionTreeRegressionModel] = _trees


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch branch-4.0 updated: [SPARK-51217][ML][CONNECT] ML model helper constructor clean up

Reply via email to