This is an automated email from the ASF dual-hosted git repository. srowen pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 25bcf59 [SPARK-25838][ML] Remove formatVersion from Saveable 25bcf59 is described below commit 25bcf59b3b566b77bfc8a40a4f4253b81f340aa4 Author: Marco Gaido <marcogaid...@gmail.com> AuthorDate: Sat Mar 9 09:44:20 2019 -0600 [SPARK-25838][ML] Remove formatVersion from Saveable ## What changes were proposed in this pull request? `Saveable` interface introduces `formatVersion` which is protected and it is used nowhere. So the PR proposes to remove it. ## How was this patch tested? existing tests Closes #22830 from mgaido91/SPARK-25838. Authored-by: Marco Gaido <marcogaid...@gmail.com> Signed-off-by: Sean Owen <sean.o...@databricks.com> --- .../mllib/classification/LogisticRegression.scala | 2 -- .../spark/mllib/classification/NaiveBayes.scala | 2 -- .../org/apache/spark/mllib/classification/SVM.scala | 2 -- .../mllib/clustering/BisectingKMeansModel.scala | 2 -- .../mllib/clustering/GaussianMixtureModel.scala | 2 -- .../apache/spark/mllib/clustering/KMeansModel.scala | 2 -- .../apache/spark/mllib/clustering/LDAModel.scala | 4 ---- .../mllib/clustering/PowerIterationClustering.scala | 2 -- .../apache/spark/mllib/feature/ChiSqSelector.scala | 2 -- .../org/apache/spark/mllib/feature/Word2Vec.scala | 2 -- .../scala/org/apache/spark/mllib/fpm/FPGrowth.scala | 2 -- .../org/apache/spark/mllib/fpm/PrefixSpan.scala | 2 -- .../recommendation/MatrixFactorizationModel.scala | 2 -- .../spark/mllib/regression/IsotonicRegression.scala | 2 -- .../org/apache/spark/mllib/regression/Lasso.scala | 2 -- .../spark/mllib/regression/LinearRegression.scala | 2 -- .../spark/mllib/regression/RidgeRegression.scala | 2 -- .../spark/mllib/tree/model/DecisionTreeModel.scala | 4 ---- .../spark/mllib/tree/model/treeEnsembleModels.scala | 8 -------- .../org/apache/spark/mllib/util/modelSaveLoad.scala | 3 --- project/MimaExcludes.scala | 21 +++++++++++++++++++++ 21 files changed, 21 insertions(+), 51 deletions(-) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala index 4b65000..d86aa01 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/LogisticRegression.scala @@ -163,8 +163,6 @@ class LogisticRegressionModel @Since("1.3.0") ( numFeatures, numClasses, weights, intercept, threshold) } - override protected def formatVersion: String = "1.0" - override def toString: String = { s"${super.toString}, numClasses = ${numClasses}, threshold = ${threshold.getOrElse("None")}" } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala index 16ba6ca..79bb4ad 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/NaiveBayes.scala @@ -170,8 +170,6 @@ class NaiveBayesModel private[spark] ( val data = NaiveBayesModel.SaveLoadV2_0.Data(labels, pi, theta, modelType) NaiveBayesModel.SaveLoadV2_0.save(sc, path, data) } - - override protected def formatVersion: String = "2.0" } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala index 5fb04ed..087c2c2 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/SVM.scala @@ -85,8 +85,6 @@ class SVMModel @Since("1.1.0") ( numFeatures = weights.size, numClasses = 2, weights, intercept, threshold) } - override protected def formatVersion: String = "1.0" - override def toString: String = { s"${super.toString}, numClasses = 2, threshold = ${threshold.getOrElse("None")}" } diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala index b54b891..c397911 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/BisectingKMeansModel.scala @@ -112,8 +112,6 @@ class BisectingKMeansModel private[clustering] ( override def save(sc: SparkContext, path: String): Unit = { BisectingKMeansModel.SaveLoadV3_0.save(sc, this, path) } - - override protected def formatVersion: String = "3.0" } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala index 1933d54..5d2ecf3 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/GaussianMixtureModel.scala @@ -48,8 +48,6 @@ class GaussianMixtureModel @Since("1.3.0") ( require(weights.length == gaussians.length, "Length of weight and Gaussian arrays must match") - override protected def formatVersion = "1.0" - @Since("1.4.0") override def save(sc: SparkContext, path: String): Unit = { GaussianMixtureModel.SaveLoadV1_0.save(sc, path, weights, gaussians) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala index 32a0bff..09c38f7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/KMeansModel.scala @@ -112,8 +112,6 @@ class KMeansModel (@Since("1.0.0") val clusterCenters: Array[Vector], override def save(sc: SparkContext, path: String): Unit = { KMeansModel.SaveLoadV2_0.save(sc, this, path) } - - override protected def formatVersion: String = "2.0" } @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala index fc0469c..91bc01c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDAModel.scala @@ -216,8 +216,6 @@ class LocalLDAModel private[spark] ( }.toArray } - override protected def formatVersion = "1.0" - /** * Random seed for cluster initialization. */ @@ -835,8 +833,6 @@ class DistributedLDAModel private[clustering] ( // TODO: // override def topicDistributions(documents: RDD[(Long, Vector)]): RDD[(Long, Vector)] = ??? - override protected def formatVersion = "1.0" - @Since("1.5.0") override def save(sc: SparkContext, path: String): Unit = { // Note: This intentionally does not save checkpointFiles. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala index 765f272..48172f0 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/PowerIterationClustering.scala @@ -48,8 +48,6 @@ class PowerIterationClusteringModel @Since("1.3.0") ( override def save(sc: SparkContext, path: String): Unit = { PowerIterationClusteringModel.SaveLoadV1_0.save(sc, this, path) } - - override protected def formatVersion: String = "1.0" } @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala index aa78e91..fc0a45c 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/ChiSqSelector.scala @@ -113,8 +113,6 @@ class ChiSqSelectorModel @Since("1.3.0") ( override def save(sc: SparkContext, path: String): Unit = { ChiSqSelectorModel.SaveLoadV1_0.save(sc, this, path) } - - override protected def formatVersion: String = "1.0" } object ChiSqSelectorModel extends Loader[ChiSqSelectorModel] { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index 9cdade1..94c4fcc 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -511,8 +511,6 @@ class Word2VecModel private[spark] ( this(Word2VecModel.buildWordIndex(model), Word2VecModel.buildWordVectors(model)) } - override protected def formatVersion = "1.0" - @Since("1.4.0") def save(sc: SparkContext, path: String): Unit = { Word2VecModel.SaveLoadV1_0.save(sc, path, getVectors) diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala index 519c1ea..3531822 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala @@ -84,8 +84,6 @@ class FPGrowthModel[Item: ClassTag] @Since("2.4.0") ( override def save(sc: SparkContext, path: String): Unit = { FPGrowthModel.SaveLoadV1_0.save(this, path) } - - override protected val formatVersion: String = "1.0" } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala index 55c2dc7..69e4b76 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/PrefixSpan.scala @@ -628,8 +628,6 @@ class PrefixSpanModel[Item] @Since("1.5.0") ( override def save(sc: SparkContext, path: String): Unit = { PrefixSpanModel.SaveLoadV1_0.save(this, path) } - - override protected val formatVersion: String = "1.0" } @Since("2.0.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala index 7b49d4d..e5e82d1 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/recommendation/MatrixFactorizationModel.scala @@ -196,8 +196,6 @@ class MatrixFactorizationModel @Since("0.8.0") ( .map(t => Rating(t._1, product, t._2)) } - protected override val formatVersion: String = "1.0" - /** * Save this model to the given path. * diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala index 8347cca..649f981 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/IsotonicRegression.scala @@ -160,8 +160,6 @@ class IsotonicRegressionModel @Since("1.3.0") ( override def save(sc: SparkContext, path: String): Unit = { IsotonicRegressionModel.SaveLoadV1_0.save(sc, path, boundaries, predictions, isotonic) } - - override protected def formatVersion: String = "1.0" } @Since("1.4.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala index cef1b4f..ead9f5b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/Lasso.scala @@ -51,8 +51,6 @@ class LassoModel @Since("1.1.0") ( override def save(sc: SparkContext, path: String): Unit = { GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept) } - - override protected def formatVersion: String = "1.0" } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala index 60262fd..cb08216 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/LinearRegression.scala @@ -51,8 +51,6 @@ class LinearRegressionModel @Since("1.1.0") ( override def save(sc: SparkContext, path: String): Unit = { GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept) } - - override protected def formatVersion: String = "1.0" } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala index 52977ac..43c3154 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/regression/RidgeRegression.scala @@ -52,8 +52,6 @@ class RidgeRegressionModel @Since("1.1.0") ( override def save(sc: SparkContext, path: String): Unit = { GLMRegressionModel.SaveLoadV1_0.save(sc, path, this.getClass.getName, weights, intercept) } - - override protected def formatVersion: String = "1.0" } @Since("1.3.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala index 27618e1..9983ca7 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/DecisionTreeModel.scala @@ -126,15 +126,11 @@ class DecisionTreeModel @Since("1.0.0") ( override def save(sc: SparkContext, path: String): Unit = { DecisionTreeModel.SaveLoadV1_0.save(sc, path, this) } - - override protected def formatVersion: String = DecisionTreeModel.formatVersion } @Since("1.3.0") object DecisionTreeModel extends Loader[DecisionTreeModel] with Logging { - private[spark] def formatVersion: String = "1.0" - private[tree] object SaveLoadV1_0 { def thisFormatVersion: String = "1.0" diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala index fc1d412..810f528 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala @@ -66,15 +66,11 @@ class RandomForestModel @Since("1.2.0") ( TreeEnsembleModel.SaveLoadV1_0.save(sc, path, this, RandomForestModel.SaveLoadV1_0.thisClassName) } - - override protected def formatVersion: String = RandomForestModel.formatVersion } @Since("1.3.0") object RandomForestModel extends Loader[RandomForestModel] { - private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion - /** * * @param sc Spark context used for loading model files. @@ -170,8 +166,6 @@ class GradientBoostedTreesModel @Since("1.2.0") ( broadcastTrees.destroy() evaluation.toArray } - - override protected def formatVersion: String = GradientBoostedTreesModel.formatVersion } /** @@ -235,8 +229,6 @@ object GradientBoostedTreesModel extends Loader[GradientBoostedTreesModel] { newPredError } - private[mllib] def formatVersion: String = TreeEnsembleModel.SaveLoadV1_0.thisFormatVersion - /** * @param sc Spark context used for loading model files. * @param path Path specifying the directory to which the model was saved. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala index da0eb04..e8889bf 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/modelSaveLoad.scala @@ -54,9 +54,6 @@ trait Saveable { @Since("1.3.0") def save(sc: SparkContext, path: String): Unit - /** Current version of model save/load format. */ - protected def formatVersion: String - } /** diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 5d5d01f..fdc5cf1 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -36,6 +36,27 @@ object MimaExcludes { // Exclude rules for 3.0.x lazy val v30excludes = v24excludes ++ Seq( + // [SPARK-25838] Remove formatVersion from Saveable + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.DistributedLDAModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.LocalLDAModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.BisectingKMeansModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.KMeansModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.PowerIterationClusteringModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.clustering.GaussianMixtureModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.recommendation.MatrixFactorizationModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.feature.ChiSqSelectorModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.feature.Word2VecModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.classification.SVMModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.classification.LogisticRegressionModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.classification.NaiveBayesModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.util.Saveable.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.fpm.FPGrowthModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.fpm.PrefixSpanModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.IsotonicRegressionModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.RidgeRegressionModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.LassoModel.formatVersion"), + ProblemFilters.exclude[DirectMissingMethodProblem]("org.apache.spark.mllib.regression.LinearRegressionModel.formatVersion"), + // [SPARK-26254][CORE] Extract Hive + Kafka dependencies from Core. ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.deploy.security.HiveDelegationTokenProvider"), --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org