Repository: spark
Updated Branches:
  refs/heads/master 92e385e0b -> 7d432af8f


[SPARK-20043][ML] DecisionTreeModel: ImpurityCalculator builder fails for 
uppercase impurity type Gini

Fix bug: DecisionTreeModel can't recongnize Impurity "Gini" when loading

TODO:
+ [x] add unit test
+ [x] fix the bug

Author: 颜发才(Yan Facai) <facai....@gmail.com>

Closes #17407 from facaiy/BUG/decision_tree_loader_failer_with_Gini_impurity.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7d432af8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7d432af8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7d432af8

Branch: refs/heads/master
Commit: 7d432af8f3c47973550ea253dae0c23cd2961bde
Parents: 92e385e
Author: 颜发才(Yan Facai) <facai....@gmail.com>
Authored: Tue Mar 28 16:14:01 2017 -0700
Committer: Joseph K. Bradley <jos...@databricks.com>
Committed: Tue Mar 28 16:14:01 2017 -0700

----------------------------------------------------------------------
 .../apache/spark/mllib/tree/impurity/Impurity.scala   |  2 +-
 .../classification/DecisionTreeClassifierSuite.scala  | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/7d432af8/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
index a5bdc2c..98a3021 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impurity/Impurity.scala
@@ -184,7 +184,7 @@ private[spark] object ImpurityCalculator {
    * the given stats.
    */
   def getCalculator(impurity: String, stats: Array[Double]): 
ImpurityCalculator = {
-    impurity match {
+    impurity.toLowerCase match {
       case "gini" => new GiniCalculator(stats)
       case "entropy" => new EntropyCalculator(stats)
       case "variance" => new VarianceCalculator(stats)

http://git-wip-us.apache.org/repos/asf/spark/blob/7d432af8/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
index 10de503..964fcfb 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/DecisionTreeClassifierSuite.scala
@@ -385,6 +385,20 @@ class DecisionTreeClassifierSuite
     testEstimatorAndModelReadWrite(dt, continuousData, allParamSettings ++ 
Map("maxDepth" -> 0),
       allParamSettings ++ Map("maxDepth" -> 0), checkModelData)
   }
+
+  test("SPARK-20043: " +
+       "ImpurityCalculator builder fails for uppercase impurity type Gini in 
model read/write") {
+    val rdd = TreeTests.getTreeReadWriteData(sc)
+    val data: DataFrame =
+      TreeTests.setMetadata(rdd, Map.empty[Int, Int], numClasses = 2)
+
+    val dt = new DecisionTreeClassifier()
+      .setImpurity("Gini")
+      .setMaxDepth(2)
+    val model = dt.fit(data)
+
+    testDefaultReadWrite(model)
+  }
 }
 
 private[ml] object DecisionTreeClassifierSuite extends SparkFunSuite {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to