[jira] [Commented] (SPARK-34356) OVR transform fix potential column conflict
[ https://issues.apache.org/jira/browse/SPARK-34356?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17278750#comment-17278750 ] Apache Spark commented on SPARK-34356: -- User 'zhengruifeng' has created a pull request for this issue: https://github.com/apache/spark/pull/31472 > OVR transform fix potential column conflict > --- > > Key: SPARK-34356 > URL: https://issues.apache.org/jira/browse/SPARK-34356 > Project: Spark > Issue Type: Improvement > Components: ML >Affects Versions: 3.2.0 >Reporter: zhengruifeng >Assignee: zhengruifeng >Priority: Major > > {code:java} > import org.apache.spark.ml.classification._val df = > spark.read.format("libsvm").load("/d0/Dev/Opensource/spark/data/mllib/sample_multiclass_classification_data.txt").withColumn("probability", > lit(0.0))val classifier = new > LogisticRegression().setMaxIter(1).setTol(1E-6).setFitIntercept(true) > val ovr = new OneVsRest().setClassifier(classifier) > val ovrm = ovr.fit(df) > ovrm.transform(df) > java.lang.IllegalArgumentException: requirement failed: Column probability > already exists. > at scala.Predef$.require(Predef.scala:281) > at org.apache.spark.ml.util.SchemaUtils$.appendColumn(SchemaUtils.scala:106) > at org.apache.spark.ml.util.SchemaUtils$.appendColumn(SchemaUtils.scala:96) > at > org.apache.spark.ml.classification.ProbabilisticClassifierParams.validateAndTransformSchema(ProbabilisticClassifier.scala:38) > at > org.apache.spark.ml.classification.ProbabilisticClassifierParams.validateAndTransformSchema$(ProbabilisticClassifier.scala:33) > at > org.apache.spark.ml.classification.LogisticRegressionModel.org$apache$spark$ml$classification$LogisticRegressionParams$$super$validateAndTransformSchema(LogisticRegression.scala:917) > at > org.apache.spark.ml.classification.LogisticRegressionParams.validateAndTransformSchema(LogisticRegression.scala:268) > at > org.apache.spark.ml.classification.LogisticRegressionParams.validateAndTransformSchema$(LogisticRegression.scala:255) > at > org.apache.spark.ml.classification.LogisticRegressionModel.validateAndTransformSchema(LogisticRegression.scala:917) > at org.apache.spark.ml.PredictionModel.transformSchema(Predictor.scala:222) > at > org.apache.spark.ml.classification.ClassificationModel.transformSchema(Classifier.scala:182) > at > org.apache.spark.ml.classification.ProbabilisticClassificationModel.transformSchema(ProbabilisticClassifier.scala:88) > at org.apache.spark.ml.PipelineStage.transformSchema(Pipeline.scala:71) > at > org.apache.spark.ml.classification.ProbabilisticClassificationModel.transform(ProbabilisticClassifier.scala:107) > at > org.apache.spark.ml.classification.OneVsRestModel.$anonfun$transform$4(OneVsRest.scala:215) > at > scala.collection.IndexedSeqOptimized.foldLeft(IndexedSeqOptimized.scala:60) > at > scala.collection.IndexedSeqOptimized.foldLeft$(IndexedSeqOptimized.scala:68) > at scala.collection.mutable.ArrayOps$ofRef.foldLeft(ArrayOps.scala:198) > at > org.apache.spark.ml.classification.OneVsRestModel.transform(OneVsRest.scala:203) > ... 49 elided {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org
[jira] [Commented] (SPARK-34356) OVR transform fix potential column conflict
[ https://issues.apache.org/jira/browse/SPARK-34356?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17278749#comment-17278749 ] Apache Spark commented on SPARK-34356: -- User 'zhengruifeng' has created a pull request for this issue: https://github.com/apache/spark/pull/31472 > OVR transform fix potential column conflict > --- > > Key: SPARK-34356 > URL: https://issues.apache.org/jira/browse/SPARK-34356 > Project: Spark > Issue Type: Improvement > Components: ML >Affects Versions: 3.2.0 >Reporter: zhengruifeng >Assignee: zhengruifeng >Priority: Major > > {code:java} > import org.apache.spark.ml.classification._val df = > spark.read.format("libsvm").load("/d0/Dev/Opensource/spark/data/mllib/sample_multiclass_classification_data.txt").withColumn("probability", > lit(0.0))val classifier = new > LogisticRegression().setMaxIter(1).setTol(1E-6).setFitIntercept(true) > val ovr = new OneVsRest().setClassifier(classifier) > val ovrm = ovr.fit(df) > ovrm.transform(df) > java.lang.IllegalArgumentException: requirement failed: Column probability > already exists. > at scala.Predef$.require(Predef.scala:281) > at org.apache.spark.ml.util.SchemaUtils$.appendColumn(SchemaUtils.scala:106) > at org.apache.spark.ml.util.SchemaUtils$.appendColumn(SchemaUtils.scala:96) > at > org.apache.spark.ml.classification.ProbabilisticClassifierParams.validateAndTransformSchema(ProbabilisticClassifier.scala:38) > at > org.apache.spark.ml.classification.ProbabilisticClassifierParams.validateAndTransformSchema$(ProbabilisticClassifier.scala:33) > at > org.apache.spark.ml.classification.LogisticRegressionModel.org$apache$spark$ml$classification$LogisticRegressionParams$$super$validateAndTransformSchema(LogisticRegression.scala:917) > at > org.apache.spark.ml.classification.LogisticRegressionParams.validateAndTransformSchema(LogisticRegression.scala:268) > at > org.apache.spark.ml.classification.LogisticRegressionParams.validateAndTransformSchema$(LogisticRegression.scala:255) > at > org.apache.spark.ml.classification.LogisticRegressionModel.validateAndTransformSchema(LogisticRegression.scala:917) > at org.apache.spark.ml.PredictionModel.transformSchema(Predictor.scala:222) > at > org.apache.spark.ml.classification.ClassificationModel.transformSchema(Classifier.scala:182) > at > org.apache.spark.ml.classification.ProbabilisticClassificationModel.transformSchema(ProbabilisticClassifier.scala:88) > at org.apache.spark.ml.PipelineStage.transformSchema(Pipeline.scala:71) > at > org.apache.spark.ml.classification.ProbabilisticClassificationModel.transform(ProbabilisticClassifier.scala:107) > at > org.apache.spark.ml.classification.OneVsRestModel.$anonfun$transform$4(OneVsRest.scala:215) > at > scala.collection.IndexedSeqOptimized.foldLeft(IndexedSeqOptimized.scala:60) > at > scala.collection.IndexedSeqOptimized.foldLeft$(IndexedSeqOptimized.scala:68) > at scala.collection.mutable.ArrayOps$ofRef.foldLeft(ArrayOps.scala:198) > at > org.apache.spark.ml.classification.OneVsRestModel.transform(OneVsRest.scala:203) > ... 49 elided {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) - To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org