This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch branch-3.1
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.1 by this push:
     new 1fa052f  [SPARK-33398] Fix loading tree models prior to Spark 3.0
1fa052f is described below

commit 1fa052fa589daa7d9e5218296cd2ef7143bae443
Author: Ruifeng Zheng <ruife...@foxmail.com>
AuthorDate: Sun Jan 3 11:52:46 2021 -0600

    [SPARK-33398] Fix loading tree models prior to Spark 3.0
    
    ### What changes were proposed in this pull request?
    In 
https://github.com/apache/spark/pull/21632/files#diff-0fdae8a6782091746ed20ea43f77b639f9c6a5f072dd2f600fcf9a7b37db4f47,
 a new field `rawCount` was added into `NodeData`, which cause that a tree 
model trained in 2.4 can not be loaded in 3.0/3.1/master;
    field `rawCount` is only used in training, and not used in 
`transform`/`predict`/`featureImportance`. So I just set it to -1L.
    
    ### Why are the changes needed?
    to support load old tree model in 3.0/3.1/master
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    added testsuites
    
    Closes #30889 from zhengruifeng/fix_tree_load.
    
    Authored-by: Ruifeng Zheng <ruife...@foxmail.com>
    Signed-off-by: Sean Owen <sro...@gmail.com>
    (cherry picked from commit 6b7527e381591bcd51be205853aea3e349893139)
    Signed-off-by: Sean Owen <sro...@gmail.com>
---
 .../org/apache/spark/ml/tree/treeModels.scala      |  48 ++++++++++++++-------
 .../ml-models/dtc-2.4.7/data/._SUCCESS.crc         | Bin 0 -> 8 bytes
 ...-406c-894c-ca4eac67c690-c000.snappy.parquet.crc | Bin 0 -> 36 bytes
 .../resources/ml-models/dtc-2.4.7/data/_SUCCESS    |   0
 ...c890-406c-894c-ca4eac67c690-c000.snappy.parquet | Bin 0 -> 3242 bytes
 .../ml-models/dtc-2.4.7/metadata/._SUCCESS.crc     | Bin 0 -> 8 bytes
 .../ml-models/dtc-2.4.7/metadata/.part-00000.crc   | Bin 0 -> 16 bytes
 .../ml-models/dtc-2.4.7/metadata/_SUCCESS          |   0
 .../ml-models/dtc-2.4.7/metadata/part-00000        |   1 +
 .../ml-models/dtr-2.4.7/data/._SUCCESS.crc         | Bin 0 -> 8 bytes
 ...-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc | Bin 0 -> 36 bytes
 .../resources/ml-models/dtr-2.4.7/data/_SUCCESS    |   0
 ...a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet | Bin 0 -> 3264 bytes
 .../ml-models/dtr-2.4.7/metadata/._SUCCESS.crc     | Bin 0 -> 8 bytes
 .../ml-models/dtr-2.4.7/metadata/.part-00000.crc   | Bin 0 -> 12 bytes
 .../ml-models/dtr-2.4.7/metadata/_SUCCESS          |   0
 .../ml-models/dtr-2.4.7/metadata/part-00000        |   1 +
 .../ml-models/gbtc-2.4.7/data/._SUCCESS.crc        | Bin 0 -> 8 bytes
 ...-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc | Bin 0 -> 44 bytes
 .../resources/ml-models/gbtc-2.4.7/data/_SUCCESS   |   0
 ...c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet | Bin 0 -> 4542 bytes
 .../ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc    | Bin 0 -> 8 bytes
 .../ml-models/gbtc-2.4.7/metadata/.part-00000.crc  | Bin 0 -> 16 bytes
 .../ml-models/gbtc-2.4.7/metadata/_SUCCESS         |   0
 .../ml-models/gbtc-2.4.7/metadata/part-00000       |   1 +
 .../gbtc-2.4.7/treesMetadata/._SUCCESS.crc         | Bin 0 -> 8 bytes
 ...-4a90-813c-ddc394101e21-c000.snappy.parquet.crc | Bin 0 -> 36 bytes
 .../ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS    |   0
 ...31e3-4a90-813c-ddc394101e21-c000.snappy.parquet | Bin 0 -> 3075 bytes
 .../ml-models/gbtr-2.4.7/data/._SUCCESS.crc        | Bin 0 -> 8 bytes
 ...-4511-9aab-639288bfae6d-c000.snappy.parquet.crc | Bin 0 -> 40 bytes
 .../resources/ml-models/gbtr-2.4.7/data/_SUCCESS   |   0
 ...d346-4511-9aab-639288bfae6d-c000.snappy.parquet | Bin 0 -> 3740 bytes
 .../ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc    | Bin 0 -> 8 bytes
 .../ml-models/gbtr-2.4.7/metadata/.part-00000.crc  | Bin 0 -> 16 bytes
 .../ml-models/gbtr-2.4.7/metadata/_SUCCESS         |   0
 .../ml-models/gbtr-2.4.7/metadata/part-00000       |   1 +
 .../gbtr-2.4.7/treesMetadata/._SUCCESS.crc         | Bin 0 -> 8 bytes
 ...-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc | Bin 0 -> 32 bytes
 .../ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS    |   0
 ...87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet | Bin 0 -> 3038 bytes
 .../ml-models/rfc-2.4.7/data/._SUCCESS.crc         | Bin 0 -> 8 bytes
 ...-4485-b112-25b4b11c9009-c000.snappy.parquet.crc | Bin 0 -> 40 bytes
 .../resources/ml-models/rfc-2.4.7/data/_SUCCESS    |   0
 ...91f8-4485-b112-25b4b11c9009-c000.snappy.parquet | Bin 0 -> 3836 bytes
 .../ml-models/rfc-2.4.7/metadata/._SUCCESS.crc     | Bin 0 -> 8 bytes
 .../ml-models/rfc-2.4.7/metadata/.part-00000.crc   | Bin 0 -> 16 bytes
 .../ml-models/rfc-2.4.7/metadata/_SUCCESS          |   0
 .../ml-models/rfc-2.4.7/metadata/part-00000        |   1 +
 .../rfc-2.4.7/treesMetadata/._SUCCESS.crc          | Bin 0 -> 8 bytes
 ...-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc | Bin 0 -> 36 bytes
 .../ml-models/rfc-2.4.7/treesMetadata/_SUCCESS     |   0
 ...b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet | Bin 0 -> 3391 bytes
 .../ml-models/rfr-2.4.7/data/._SUCCESS.crc         | Bin 0 -> 8 bytes
 ...-40fc-b681-981caaeca996-c000.snappy.parquet.crc | Bin 0 -> 40 bytes
 .../resources/ml-models/rfr-2.4.7/data/_SUCCESS    |   0
 ...6edb-40fc-b681-981caaeca996-c000.snappy.parquet | Bin 0 -> 3797 bytes
 .../ml-models/rfr-2.4.7/metadata/._SUCCESS.crc     | Bin 0 -> 8 bytes
 .../ml-models/rfr-2.4.7/metadata/.part-00000.crc   | Bin 0 -> 16 bytes
 .../ml-models/rfr-2.4.7/metadata/_SUCCESS          |   0
 .../ml-models/rfr-2.4.7/metadata/part-00000        |   1 +
 .../rfr-2.4.7/treesMetadata/._SUCCESS.crc          | Bin 0 -> 8 bytes
 ...-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc | Bin 0 -> 32 bytes
 .../ml-models/rfr-2.4.7/treesMetadata/_SUCCESS     |   0
 ...d349-447a-9b86-d95edaabcde8-c000.snappy.parquet | Bin 0 -> 3055 bytes
 .../DecisionTreeClassifierSuite.scala              |  12 ++++++
 .../ml/classification/GBTClassifierSuite.scala     |  14 ++++++
 .../MultilayerPerceptronClassifierSuite.scala      |   2 +-
 .../RandomForestClassifierSuite.scala              |  16 ++++++-
 .../apache/spark/ml/feature/HashingTFSuite.scala   |   2 +-
 .../spark/ml/feature/StringIndexerSuite.scala      |   2 +-
 .../ml/regression/DecisionTreeRegressorSuite.scala |  16 ++++++-
 .../spark/ml/regression/GBTRegressorSuite.scala    |  12 ++++++
 .../ml/regression/RandomForestRegressorSuite.scala |  12 ++++++
 74 files changed, 122 insertions(+), 20 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala 
b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
index 162641f..67b9166 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
@@ -31,8 +31,10 @@ import org.apache.spark.ml.util.{DefaultParamsReader, 
DefaultParamsWriter}
 import org.apache.spark.ml.util.DefaultParamsReader.Metadata
 import org.apache.spark.mllib.tree.impurity.ImpurityCalculator
 import org.apache.spark.mllib.tree.model.{DecisionTreeModel => 
OldDecisionTreeModel}
-import org.apache.spark.rdd.RDD
-import org.apache.spark.sql.{Dataset, SparkSession}
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.sql.functions.{col, lit, struct}
+import org.apache.spark.sql.types.StructType
+import org.apache.spark.util.VersionUtils
 import org.apache.spark.util.collection.OpenHashMap
 
 /**
@@ -401,8 +403,13 @@ private[ml] object DecisionTreeModelReadWrite {
     }
 
     val dataPath = new Path(path, "data").toString
-    val data = sparkSession.read.parquet(dataPath).as[NodeData]
-    buildTreeFromNodes(data.collect(), impurityType)
+    var df = sparkSession.read.parquet(dataPath)
+    val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
+    if (major.toInt < 3) {
+      df = df.withColumn("rawCount", lit(-1L))
+    }
+
+    buildTreeFromNodes(df.as[NodeData].collect(), impurityType)
   }
 
   /**
@@ -497,25 +504,36 @@ private[ml] object EnsembleModelReadWrite {
     }
 
     val treesMetadataPath = new Path(path, "treesMetadata").toString
-    val treesMetadataRDD: RDD[(Int, (Metadata, Double))] = 
sql.read.parquet(treesMetadataPath)
-      .select("treeID", "metadata", "weights").as[(Int, String, 
Double)].rdd.map {
-      case (treeID: Int, json: String, weights: Double) =>
+    val treesMetadataRDD = sql.read.parquet(treesMetadataPath)
+      .select("treeID", "metadata", "weights")
+      .as[(Int, String, Double)].rdd
+      .map { case (treeID: Int, json: String, weights: Double) =>
         treeID -> ((DefaultParamsReader.parseMetadata(json, treeClassName), 
weights))
-    }
+      }
 
     val treesMetadataWeights = treesMetadataRDD.sortByKey().values.collect()
     val treesMetadata = treesMetadataWeights.map(_._1)
     val treesWeights = treesMetadataWeights.map(_._2)
 
     val dataPath = new Path(path, "data").toString
-    val nodeData: Dataset[EnsembleNodeData] =
-      sql.read.parquet(dataPath).as[EnsembleNodeData]
-    val rootNodesRDD: RDD[(Int, Node)] =
-      nodeData.rdd.map(d => (d.treeID, d.nodeData)).groupByKey().map {
-        case (treeID: Int, nodeData: Iterable[NodeData]) =>
-          treeID -> 
DecisionTreeModelReadWrite.buildTreeFromNodes(nodeData.toArray, impurityType)
+    var df = sql.read.parquet(dataPath)
+    val (major, minor) = VersionUtils.majorMinorVersion(metadata.sparkVersion)
+    if (major.toInt < 3) {
+      val newNodeDataCol = df.schema("nodeData").dataType match {
+        case StructType(fields) =>
+          val cols = fields.map(f => col(s"nodeData.${f.name}")) :+ 
lit(-1L).as("rawCount")
+          struct(cols: _*)
+      }
+      df = df.withColumn("nodeData", newNodeDataCol)
+    }
+
+    val rootNodesRDD = df.as[EnsembleNodeData].rdd
+      .map(d => (d.treeID, d.nodeData))
+      .groupByKey()
+      .map { case (treeID: Int, nodeData: Iterable[NodeData]) =>
+        treeID -> 
DecisionTreeModelReadWrite.buildTreeFromNodes(nodeData.toArray, impurityType)
       }
-    val rootNodes: Array[Node] = rootNodesRDD.sortByKey().values.collect()
+    val rootNodes = rootNodesRDD.sortByKey().values.collect()
     (metadata, treesMetadata.zip(rootNodes), treesWeights)
   }
 
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc
 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc
new file mode 100644
index 0000000..3ac562a
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/.part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet.crc
 differ
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/_SUCCESS 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git 
a/mllib/src/test/resources/ml-models/dtc-2.4.7/data/part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet
 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet
new file mode 100644
index 0000000..09c38d6
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/data/part-00000-bd7ae42f-c890-406c-894c-ca4eac67c690-c000.snappy.parquet
 differ
diff --git 
a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/.part-00000.crc 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000..22b1eb8
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/.part-00000.crc differ
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/_SUCCESS 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git a/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000 
b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000
new file mode 100644
index 0000000..ef92265
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/dtc-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.classification.DecisionTreeClassificationModel","timestamp":1608687929358,"sparkVersion":"2.4.7","uid":"dtc_bc7ad285bb73","paramMap":{},"defaultParamMap":{"impurity":"gini","maxDepth":5,"labelCol":"label","maxMemoryInMB":256,"featuresCol":"features","predictionCol":"prediction","minInfoGain":0.0,"seed":159147643,"rawPredictionCol":"rawPrediction","minInstancesPerNode":1,"cacheNodeIds":false,"probabilityCol":"probability","maxBins":32,"checkpointInterval":10}
 [...]
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/data/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/.part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc
 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/data/.part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc
new file mode 100644
index 0000000..f6465e2
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/data/.part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet.crc
 differ
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/_SUCCESS 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git 
a/mllib/src/test/resources/ml-models/dtr-2.4.7/data/part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet
 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/data/part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet
new file mode 100644
index 0000000..2904f84
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/data/part-00000-39b027f0-a437-4b3d-84af-d861adcb9ca8-c000.snappy.parquet
 differ
diff --git 
a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000..bbad108
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/.part-00000.crc differ
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git a/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000 
b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000
new file mode 100644
index 0000000..2895223
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/dtr-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.regression.DecisionTreeRegressionModel","timestamp":1608687932847,"sparkVersion":"2.4.7","uid":"dtr_c16a90fcdaf8","paramMap":{},"defaultParamMap":{"labelCol":"label","checkpointInterval":10,"minInfoGain":0.0,"maxMemoryInMB":256,"minInstancesPerNode":1,"maxBins":32,"seed":926680331,"cacheNodeIds":false,"maxDepth":5,"predictionCol":"prediction","featuresCol":"features","impurity":"variance"},"numFeatures":692}
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc
 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc
new file mode 100644
index 0000000..13fc4ed
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/.part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet.crc
 differ
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/_SUCCESS 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git 
a/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet
 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet
new file mode 100644
index 0000000..5682038
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/data/part-00000-dacbde64-c861-41c7-91c0-6da8cc01fb43-c000.snappy.parquet
 differ
diff --git 
a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000..a810dd9
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/.part-00000.crc differ
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git a/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000
new file mode 100644
index 0000000..675fea2
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/gbtc-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.classification.GBTClassificationModel","timestamp":1608687932103,"sparkVersion":"2.4.7","uid":"gbtc_81db008b4f25","paramMap":{"maxIter":2},"defaultParamMap":{"seed":-1287390502,"maxMemoryInMB":256,"stepSize":0.1,"validationTol":0.01,"maxBins":32,"checkpointInterval":10,"predictionCol":"prediction","lossType":"logistic","rawPredictionCol":"rawPrediction","featuresCol":"features","cacheNodeIds":false,"maxIter":20,"featureSubsetStrategy":"all","impurity":"gini"
 [...]
diff --git 
a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/._SUCCESS.crc 
differ
diff --git 
a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc
 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc
new file mode 100644
index 0000000..101c207
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/.part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet.crc
 differ
diff --git 
a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git 
a/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet
 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet
new file mode 100644
index 0000000..e232de3
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtc-2.4.7/treesMetadata/part-00000-81137d9f-31e3-4a90-813c-ddc394101e21-c000.snappy.parquet
 differ
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc
 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc
new file mode 100644
index 0000000..c35b81f
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/.part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet.crc
 differ
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git 
a/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet
 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet
new file mode 100644
index 0000000..ba26a44
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/data/part-00000-3b5433ff-d346-4511-9aab-639288bfae6d-c000.snappy.parquet
 differ
diff --git 
a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000..7dc6e14
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/.part-00000.crc differ
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git a/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000
new file mode 100644
index 0000000..a9a712e
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/gbtr-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.regression.GBTRegressionModel","timestamp":1608687942434,"sparkVersion":"2.4.7","uid":"gbtr_0a74cb2536ff","paramMap":{"maxIter":2},"defaultParamMap":{"impurity":"variance","maxMemoryInMB":256,"maxDepth":5,"subsamplingRate":1.0,"validationTol":0.01,"labelCol":"label","maxIter":20,"checkpointInterval":10,"minInfoGain":0.0,"predictionCol":"prediction","stepSize":0.1,"cacheNodeIds":false,"lossType":"squared","seed":-131597770,"featureSubsetStrategy":"all","featu
 [...]
diff --git 
a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/._SUCCESS.crc 
differ
diff --git 
a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc
 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc
new file mode 100644
index 0000000..b681b9f
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/.part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet.crc
 differ
diff --git 
a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git 
a/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet
 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet
new file mode 100644
index 0000000..9a7e77a
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/gbtr-2.4.7/treesMetadata/part-00000-6b9124f5-87fe-4fd8-ad9c-4be239c2215a-c000.snappy.parquet
 differ
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/.part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet.crc
 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/.part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet.crc
new file mode 100644
index 0000000..5bb3a22
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/.part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet.crc
 differ
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git 
a/mllib/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet
 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet
new file mode 100644
index 0000000..d9ec35a
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/data/part-00000-e41a7b98-91f8-4485-b112-25b4b11c9009-c000.snappy.parquet
 differ
diff --git 
a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/.part-00000.crc 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000..58bda6d
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/.part-00000.crc differ
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git a/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000
new file mode 100644
index 0000000..07748b0
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/rfc-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.classification.RandomForestClassificationModel","timestamp":1608687930713,"sparkVersion":"2.4.7","uid":"rfc_db1adb353f1e","paramMap":{"numTrees":2},"defaultParamMap":{"impurity":"gini","predictionCol":"prediction","numTrees":20,"maxDepth":5,"featureSubsetStrategy":"auto","subsamplingRate":1.0,"featuresCol":"features","checkpointInterval":10,"rawPredictionCol":"rawPrediction","cacheNodeIds":false,"labelCol":"label","seed":207336481,"probabilityCol":"probabili
 [...]
diff --git 
a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/._SUCCESS.crc 
differ
diff --git 
a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc
 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc
new file mode 100644
index 0000000..729c5bb
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/.part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet.crc
 differ
diff --git 
a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git 
a/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet
 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet
new file mode 100644
index 0000000..6108821
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfc-2.4.7/treesMetadata/part-00000-21082d24-b666-4c4e-a823-70c7afdcbdc5-c000.snappy.parquet
 differ
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/data/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/data/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/.part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet.crc
 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/data/.part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet.crc
new file mode 100644
index 0000000..52cf21f
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/data/.part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet.crc
 differ
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/_SUCCESS 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/data/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git 
a/mllib/src/test/resources/ml-models/rfr-2.4.7/data/part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet
 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/data/part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet
new file mode 100644
index 0000000..75a3f04
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/data/part-00000-4a69607d-6edb-40fc-b681-981caaeca996-c000.snappy.parquet
 differ
diff --git 
a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/._SUCCESS.crc differ
diff --git 
a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc
new file mode 100644
index 0000000..1a72b8e
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/.part-00000.crc differ
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git a/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000
new file mode 100644
index 0000000..cccbb8f
--- /dev/null
+++ b/mllib/src/test/resources/ml-models/rfr-2.4.7/metadata/part-00000
@@ -0,0 +1 @@
+{"class":"org.apache.spark.ml.regression.RandomForestRegressionModel","timestamp":1608687933536,"sparkVersion":"2.4.7","uid":"rfr_d946d96b7ff0","paramMap":{"numTrees":2},"defaultParamMap":{"numTrees":20,"featureSubsetStrategy":"auto","maxDepth":5,"minInstancesPerNode":1,"labelCol":"label","cacheNodeIds":false,"checkpointInterval":10,"featuresCol":"features","maxMemoryInMB":256,"predictionCol":"prediction","minInfoGain":0.0,"subsamplingRate":1.0,"impurity":"variance","seed":235498149,"max
 [...]
diff --git 
a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc
new file mode 100644
index 0000000..3b7b044
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/._SUCCESS.crc 
differ
diff --git 
a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc
 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc
new file mode 100644
index 0000000..8081f88
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/.part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet.crc
 differ
diff --git 
a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/_SUCCESS
new file mode 100644
index 0000000..e69de29
diff --git 
a/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet
 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet
new file mode 100644
index 0000000..093c346
Binary files /dev/null and 
b/mllib/src/test/resources/ml-models/rfr-2.4.7/treesMetadata/part-00000-dfe4db51-d349-447a-9b86-d95edaabcde8-c000.snappy.parquet
 differ
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index d1ade85..13efdf1 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -446,6 +446,18 @@ class DecisionTreeClassifierSuite extends MLTest with 
DefaultReadWriteTest {
 
     testDefaultReadWrite(model)
   }
+
+  test("SPARK-33398: Load DecisionTreeClassificationModel prior to Spark 3.0") 
{
+    val path = testFile("ml-models/dtc-2.4.7")
+    val model = DecisionTreeClassificationModel.load(path)
+    assert(model.numClasses === 2)
+    assert(model.numFeatures === 692)
+    assert(model.numNodes === 5)
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private[ml] object DecisionTreeClassifierSuite extends SparkFunSuite {
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index a2208ed..37e695e 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -545,6 +545,20 @@ class GBTClassifierSuite extends MLTest with 
DefaultReadWriteTest {
     testEstimatorAndModelReadWrite(gbt, continuousData, allParamSettings,
       allParamSettings, checkModelData)
   }
+
+  test("SPARK-33398: Load GBTClassificationModel prior to Spark 3.0") {
+    val path = testFile("ml-models/gbtc-2.4.7")
+    val model = GBTClassificationModel.load(path)
+    assert(model.numClasses === 2)
+    assert(model.numFeatures === 692)
+    assert(model.getNumTrees === 2)
+    assert(model.totalNumNodes === 22)
+    assert(model.trees.map(_.numNodes) === Array(5, 17))
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private object GBTClassifierSuite extends SparkFunSuite {
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
index c909e72..cc52bd8 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifierSuite.scala
@@ -240,7 +240,7 @@ class MultilayerPerceptronClassifierSuite extends MLTest 
with DefaultReadWriteTe
 
     val metadata = spark.read.json(s"$mlpPath/metadata")
     val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
-    assert(sparkVersionStr == "2.4.4")
+    assert(sparkVersionStr === "2.4.4")
   }
 
   test("summary and training summary") {
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
index 645a436..7be007a 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/RandomForestClassifierSuite.scala
@@ -25,10 +25,10 @@ import org.apache.spark.ml.param.ParamsSuite
 import org.apache.spark.ml.tree._
 import org.apache.spark.ml.tree.impl.TreeTests
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
+import org.apache.spark.ml.util.TestingUtils._
 import org.apache.spark.mllib.regression.{LabeledPoint => OldLabeledPoint}
 import org.apache.spark.mllib.tree.{EnsembleTestHelper, RandomForest => 
OldRandomForest}
 import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
-import org.apache.spark.mllib.util.TestingUtils._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.sql.{DataFrame, Row}
 import org.apache.spark.sql.functions._
@@ -429,6 +429,20 @@ class RandomForestClassifierSuite extends MLTest with 
DefaultReadWriteTest {
     testEstimatorAndModelReadWrite(rf, continuousData, allParamSettings,
       allParamSettings, checkModelData)
   }
+
+  test("SPARK-33398: Load RandomForestClassificationModel prior to Spark 3.0") 
{
+    val path = testFile("ml-models/rfc-2.4.7")
+    val model = RandomForestClassificationModel.load(path)
+    assert(model.numClasses === 2)
+    assert(model.numFeatures === 692)
+    assert(model.getNumTrees === 2)
+    assert(model.totalNumNodes === 10)
+    assert(model.trees.map(_.numNodes) === Array(3, 7))
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private object RandomForestClassifierSuite extends SparkFunSuite {
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
index 8fd192f..861bf1e 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/HashingTFSuite.scala
@@ -99,7 +99,7 @@ class HashingTFSuite extends MLTest with DefaultReadWriteTest 
{
 
     val metadata = spark.read.json(s"$hashingTFPath/metadata")
     val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
-    assert(sparkVersionStr == "2.4.4")
+    assert(sparkVersionStr === "2.4.4")
 
     intercept[IllegalArgumentException] {
       loadedHashingTF.save(hashingTFPath)
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
index 9481408..c8247b9 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/StringIndexerSuite.scala
@@ -483,6 +483,6 @@ class StringIndexerSuite extends MLTest with 
DefaultReadWriteTest {
 
     val metadata = spark.read.json(s"$modelPath/metadata")
     val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
-    assert(sparkVersionStr == "2.4.4")
+    assert(sparkVersionStr === "2.4.4")
   }
 }
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
index 49ebcb3..9cb03454 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/DecisionTreeRegressorSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.ml.regression
 
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.ml.feature.LabeledPoint
-import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.ml.linalg.{Vector, Vectors}
 import org.apache.spark.ml.tree.impl.TreeTests
 import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTest, MLTestingUtils}
 import org.apache.spark.ml.util.TestingUtils._
@@ -236,6 +236,20 @@ class DecisionTreeRegressorSuite extends MLTest with 
DefaultReadWriteTest {
       TreeTests.allParamSettings ++ Map("maxDepth" -> 0),
       TreeTests.allParamSettings ++ Map("maxDepth" -> 0), checkModelData)
   }
+
+  test("SPARK-33398: Load DecisionTreeRegressionModel prior to Spark 3.0") {
+    val path = testFile("ml-models/dtr-2.4.7")
+    val model = DecisionTreeRegressionModel.load(path)
+    assert(model.numFeatures === 692)
+    assert(model.numNodes === 5)
+    assert(model.featureImportances ~==
+      Vectors.sparse(692, Array(100, 434),
+        Array(0.03987240829346093, 0.960127591706539)) absTol 1e-4)
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private[ml] object DecisionTreeRegressorSuite extends SparkFunSuite {
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
index 04b0d4b..7d84df6 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
@@ -370,6 +370,18 @@ class GBTRegressorSuite extends MLTest with 
DefaultReadWriteTest {
     testEstimatorAndModelReadWrite(gbt, continuousData, allParamSettings,
       allParamSettings, checkModelData)
   }
+
+  test("SPARK-33398: Load GBTRegressionModel prior to Spark 3.0") {
+    val path = testFile("ml-models/gbtr-2.4.7")
+    val model = GBTRegressionModel.load(path)
+    assert(model.numFeatures === 692)
+    assert(model.totalNumNodes === 6)
+    assert(model.trees.map(_.numNodes) === Array(5, 1))
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private object GBTRegressorSuite extends SparkFunSuite {
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
index aeddb5a..7ec30de 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/RandomForestRegressorSuite.scala
@@ -221,6 +221,18 @@ class RandomForestRegressorSuite extends MLTest with 
DefaultReadWriteTest{
     testEstimatorAndModelReadWrite(rf, continuousData, allParamSettings,
       allParamSettings, checkModelData)
   }
+
+  test("SPARK-33398: Load RandomForestRegressionModel prior to Spark 3.0") {
+    val path = testFile("ml-models/rfr-2.4.7")
+    val model = RandomForestRegressionModel.load(path)
+    assert(model.numFeatures === 692)
+    assert(model.totalNumNodes === 8)
+    assert(model.trees.map(_.numNodes) === Array(5, 3))
+
+    val metadata = spark.read.json(s"$path/metadata")
+    val sparkVersionStr = metadata.select("sparkVersion").first().getString(0)
+    assert(sparkVersionStr === "2.4.7")
+  }
 }
 
 private object RandomForestRegressorSuite extends SparkFunSuite {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to