Github user jkbradley commented on a diff in the pull request: https://github.com/apache/spark/pull/18742#discussion_r131290424 --- Diff: python/pyspark/ml/util.py --- @@ -283,3 +353,143 @@ def numFeatures(self): Returns the number of features the model was trained on. If unknown, returns -1 """ return self._call_java("numFeatures") + + +@inherit_doc +class DefaultParamsWritable(MLWritable): + """ + Class for making simple Params types writable. Assumes that all parameters + are JSON-serializable. + + .. versionadded:: 2.3.0 + """ + + def write(self): + """Returns a DefaultParamsWriter instance for this class.""" + if isinstance(self, Params): + return DefaultParamsWriter(self) + else: + raise TypeError("Cannot use DefautParamsWritable with type %s because it does not " + + " extend Params.", type(self)) + + +@inherit_doc +class DefaultParamsWriter(MLWriter): + """ + Class for writing Estimators and Transformers whose parameters are JSON-serializable. + + .. versionadded:: 2.3.0 + """ + + def __init__(self, instance): + super(DefaultParamsWriter, self).__init__() + self.instance = instance + + def saveImpl(self, path): + DefaultParamsWriter.save_metadata(self.instance, path, self.sc) + + @staticmethod + def save_metadata(instance, path, sc, extraMetadata=None, paramMap=None): + metadataPath = os.path.join(path, "metadata") + metadataJson = DefaultParamsWriter.get_metadata_to_save(instance, + metadataPath, + sc, + extraMetadata, + paramMap) + sc.parallelize([metadataJson], 1).saveAsTextFile(metadataPath) + + @staticmethod + def get_metadata_to_save(instance, path, sc, extraMetadata=None, paramMap=None): + uid = instance.uid + cls = instance.__module__ + '.' + instance.__class__.__name__ + params = instance.extractParamMap() + jsonParams = {} + if paramMap is not None: + for p in paramMap: + jsonParams[p.name] = paramMap[p] + else: + for p in params: + jsonParams[p.name] = params[p] + basicMetadata = {"class": cls, "timestamp": long(round(time.time() * 1000)), + "sparkVersion": sc.version, "uid": uid, "paramMap": jsonParams} + if extraMetadata is not None: + basicMetadata.update(extraMetadata) + return json.dumps(basicMetadata, separators=[',', ':']) + + +@inherit_doc +class DefaultParamsReadable(MLReadable): + """ + Class for making simple Params types readable. Assumes that all parameters + are JSON-serializable. + + .. versionadded:: 2.3.0 + """ + + @classmethod + def read(cls): + """Returns a DefaultParamsReader instance for this class.""" + return DefaultParamsReader(cls) + + +@inherit_doc +class DefaultParamsReader(MLReader): + """ + Class for reading Estimators and Transformers whose parameters are JSON-serializable. + + .. versionadded:: 2.3.0 + """ + + def __init__(self, cls): + super(DefaultParamsReader, self).__init__() + self.cls = cls + + @staticmethod + def __get_class(clazz): + """ + Loads Python class from its name. + """ + parts = clazz.split('.') + module = ".".join(parts[:-1]) + m = __import__(module) + for comp in parts[1:]: + m = getattr(m, comp) + return m + + def load(self, path): + metadata = DefaultParamsReader.loadMetadata(path, self.sc) + py_type = DefaultParamsReader.__get_class(metadata['class']) + instance = py_type() + instance._resetUid(metadata['uid']) + DefaultParamsReader.getAndSetParams(instance, metadata) + return instance + + @staticmethod + def loadMetadata(path, sc, expectedClassName=""): --- End diff -- These static methods can be private (add leading underscores)
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org