Github user jkbradley commented on a diff in the pull request: https://github.com/apache/spark/pull/9674#discussion_r44988807 --- Diff: mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala --- @@ -200,4 +235,121 @@ class PipelineModel private[ml] ( override def copy(extra: ParamMap): PipelineModel = { new PipelineModel(uid, stages.map(_.copy(extra))).setParent(parent) } + + override def write: Writer = new PipelineModelWriter(this) +} + +object PipelineModel extends Readable[PipelineModel] { + + override def read: Reader[PipelineModel] = new PipelineModelReader + + override def load(path: String): PipelineModel = read.load(path) +} + +private[ml] class PipelineModelWriter(instance: PipelineModel) extends Writer { + + PipelineSharedWriter.validateStages(instance.stages.asInstanceOf[Array[PipelineStage]]) + + override protected def saveImpl(path: String): Unit = PipelineSharedWriter.saveImpl(instance, + instance.stages.asInstanceOf[Array[PipelineStage]], sc, path) +} + +private[ml] class PipelineModelReader extends Reader[PipelineModel] { + + /** Checked against metadata when loading model */ + private val className = "org.apache.spark.ml.PipelineModel" + + override def load(path: String): PipelineModel = { + val (uid: String, stages: Array[PipelineStage]) = + PipelineSharedReader.load(className, sc, path) + val transformers = stages map { + case stage: Transformer => stage + case stage => throw new RuntimeException(s"PipelineModel.read loaded a stage but found it" + + s" was not a Transformer. Bad stage: ${stage.uid}") + } + new PipelineModel(uid, transformers) + } +} + +/** Methods for [[Writer]] shared between [[Pipeline]] and [[PipelineModel]] */ +private[ml] object PipelineSharedWriter { + + import org.json4s.JsonDSL._ + + /** Check that all stages are Writable */ + def validateStages(stages: Array[PipelineStage]): Unit = { + stages.foreach { + case stage: Writable => // good + case stage => + throw new UnsupportedOperationException("Pipeline write will fail on this Pipeline" + --- End diff -- But a user could write: ``` val writer = pipeline.write // failure will occur here, before attempting to write writer.save(...) ```
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org