I am getting scala.MatchError in the code below. I'm not able to see why
this would be happening. I am using Spark 2.0.1

scala> testResults.columns
res538: Array[String] = Array(TopicVector, subject_id, hadm_id,
isElective, isNewborn, isUrgent, isEmergency, isMale, isFemale,
oasis_score, sapsii_score, sofa_score, age, hosp_death, test,
ModelFeatures, Label, rawPrediction, ModelProbability,
ModelPrediction)

scala> testResults.select("Label","ModelProbability").take(1)
res542: Array[org.apache.spark.sql.Row] =
Array([0.0,[0.737304818744076,0.262695181255924]])

scala> val testScoreAndLabel = testResults.
     | select("Label","ModelProbability").
     | map { case Row(l:Double, p:Vector) => (p(1), l) }
testScoreAndLabel: org.apache.spark.sql.Dataset[(Double, Double)] =
[_1: double, _2: double]

scala> testScoreAndLabel
res539: org.apache.spark.sql.Dataset[(Double, Double)] = [_1: double,
_2: double]

scala> testScoreAndLabel.columns
res540: Array[String] = Array(_1, _2)

scala> val testMetrics = new BinaryClassificationMetrics(testScoreAndLabel.rdd)
testMetrics: org.apache.spark.mllib.evaluation.BinaryClassificationMetrics
= org.apache.spark.mllib.evaluation.BinaryClassificationMetrics@36e780d1

The code below gives the error

val auROC = testMetrics.areaUnderROC() //this line gives the error

Caused by: scala.MatchError:
[0.0,[0.7316583497453766,0.2683416502546234]] (of class
org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema)

Reply via email to