Github user jkbradley commented on a diff in the pull request: https://github.com/apache/spark/pull/12023#discussion_r57674664 --- Diff: mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala --- @@ -240,12 +250,66 @@ final class RandomForestClassificationModel private[ml] ( private[ml] def toOld: OldRandomForestModel = { new OldRandomForestModel(OldAlgo.Classification, _trees.map(_.toOld)) } + + @Since("2.0.0") + override def write: MLWriter = + new RandomForestClassificationModel.RandomForestClassificationModelWriter(this) + + @Since("2.0.0") + override def read: MLReader = + new RandomForestClassificationModel.RandomForestClassificationModelReader(this) } -private[ml] object RandomForestClassificationModel { +@Since("2.0.0") +object RandomForestClassificationModel extends MLReadable[RandomForestClassificationModel] { + + + @Since("2.0.0") + override def load(path: String): RandomForestClassificationModel = super.load(path) + + private[RandomForestClassificationModel] + class RandomForestClassificationModelWriter(instance: RandomForestClassificationModel) + extends MLWriter { + + override protected def saveImpl(path: String): Unit = { + val extraMetadata: JObject = Map( + "numFeatures" -> instance.numFeatures, + "numClasses" -> instance.numClasses) + DefaultParamsWriter.saveMetadata(instance, path, sc, Some(extraMetadata)) + for(treeIndex <- 1 to instance.getNumTrees) { --- End diff -- This is writing each tree separately. Based on our JIRA discussion, it would be better to write all trees in a single DataFrame. You could create an RDD of trees, then flatMap that to an RDD of NodeData, and then convert that to a DataFrame.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org