[ https://issues.apache.org/jira/browse/SPARK-23471?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Keepun updated SPARK-23471: --------------------------- Description: RandomForestClassificationMode.load() does not work after save() {code:java} RandomForestClassifier rf = new RandomForestClassifier() .setFeaturesCol("features") .setLabelCol("result") .setNumTrees(100) .setMaxDepth(30) .setMinInstancesPerNode(1) //.setCacheNodeIds(true) .setMaxMemoryInMB(500) .setSeed(System.currentTimeMillis() + System.nanoTime()); RandomForestClassificationModel rfmodel = rf.train(data); try { rfmodel.save(args[2] + "." + System.currentTimeMillis()); } catch (IOException e) { LOG.error(e.getMessage(), e); e.printStackTrace(); } {code} File metadata\part-00000: {code:java} {"class":"org.apache.spark.ml.classification.RandomForestClassificationModel", "timestamp":1519136783983,"sparkVersion":"2.2.1","uid":"rfc_7c7e84ce7488", "paramMap":{"featureSubsetStrategy":"auto","cacheNodeIds":false,"impurity":"gini", "checkpointInterval":10, "numTrees":20,"maxDepth":5, "probabilityCol":"probability","labelCol":"label","featuresCol":"features", "maxMemoryInMB":256,"minInstancesPerNode":1,"subsamplingRate":1.0, "rawPredictionCol":"rawPrediction","predictionCol":"prediction","maxBins":32, "minInfoGain":0.0,"seed":-491520797},"numFeatures":1354,"numClasses":2, "numTrees":20} {code} should be: {code:java} "numTrees":100,"maxDepth":30,{code} was: RandomForestClassificationMode.load() does not work after save(): {code:java} RandomForestClassifier rf = new RandomForestClassifier() .setFeaturesCol("features") .setLabelCol("result") .setNumTrees(100) .setMaxDepth(30) .setMinInstancesPerNode(1) //.setCacheNodeIds(true) .setMaxMemoryInMB(500) .setSeed(System.currentTimeMillis() + System.nanoTime()); RandomForestClassificationModel rfmodel = rf.train(data); try { rfmodel.save(args[2] + "." + System.currentTimeMillis()); } catch (IOException e) { LOG.error(e.getMessage(), e); e.printStackTrace(); } {code} File metadata\part-00000: {code:java} {"class":"org.apache.spark.ml.classification.RandomForestClassificationModel", "timestamp":1519136783983,"sparkVersion":"2.2.1","uid":"rfc_7c7e84ce7488", "paramMap":{"featureSubsetStrategy":"auto","cacheNodeIds":false,"impurity":"gini", "checkpointInterval":10, "numTrees":20,"maxDepth":5, "probabilityCol":"probability","labelCol":"label","featuresCol":"features", "maxMemoryInMB":256,"minInstancesPerNode":1,"subsamplingRate":1.0, "rawPredictionCol":"rawPrediction","predictionCol":"prediction","maxBins":32, "minInfoGain":0.0,"seed":-491520797},"numFeatures":1354,"numClasses":2, "numTrees":20} {code} should be: {code:java} "numTrees":100,"maxDepth":30,{code} > RandomForestClassificationModel save() - incorrect metadata > ----------------------------------------------------------- > > Key: SPARK-23471 > URL: https://issues.apache.org/jira/browse/SPARK-23471 > Project: Spark > Issue Type: Bug > Components: ML > Affects Versions: 2.2.1 > Reporter: Keepun > Priority: Major > > RandomForestClassificationMode.load() does not work after save() > {code:java} > RandomForestClassifier rf = new RandomForestClassifier() > .setFeaturesCol("features") > .setLabelCol("result") > .setNumTrees(100) > .setMaxDepth(30) > .setMinInstancesPerNode(1) > //.setCacheNodeIds(true) > .setMaxMemoryInMB(500) > .setSeed(System.currentTimeMillis() + System.nanoTime()); > RandomForestClassificationModel rfmodel = rf.train(data); > try { > rfmodel.save(args[2] + "." + System.currentTimeMillis()); > } catch (IOException e) { > LOG.error(e.getMessage(), e); > e.printStackTrace(); > } > {code} > File metadata\part-00000: > {code:java} > {"class":"org.apache.spark.ml.classification.RandomForestClassificationModel", > "timestamp":1519136783983,"sparkVersion":"2.2.1","uid":"rfc_7c7e84ce7488", > "paramMap":{"featureSubsetStrategy":"auto","cacheNodeIds":false,"impurity":"gini", > "checkpointInterval":10, > "numTrees":20,"maxDepth":5, > "probabilityCol":"probability","labelCol":"label","featuresCol":"features", > "maxMemoryInMB":256,"minInstancesPerNode":1,"subsamplingRate":1.0, > "rawPredictionCol":"rawPrediction","predictionCol":"prediction","maxBins":32, > "minInfoGain":0.0,"seed":-491520797},"numFeatures":1354,"numClasses":2, > "numTrees":20} > {code} > should be: > {code:java} > "numTrees":100,"maxDepth":30,{code} > -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org