Github user sethah commented on a diff in the pull request: https://github.com/apache/spark/pull/15721#discussion_r93891797 --- Diff: mllib/src/test/scala/org/apache/spark/ml/classification/NaiveBayesSuite.scala --- @@ -25,31 +25,36 @@ import breeze.stats.distributions.{Multinomial => BrzMultinomial} import org.apache.spark.{SparkException, SparkFunSuite} import org.apache.spark.ml.classification.NaiveBayes.{Bernoulli, Multinomial} import org.apache.spark.ml.classification.NaiveBayesSuite._ -import org.apache.spark.ml.feature.LabeledPoint +import org.apache.spark.ml.feature.{Instance, LabeledPoint} import org.apache.spark.ml.linalg._ import org.apache.spark.ml.param.ParamsSuite import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils} import org.apache.spark.ml.util.TestingUtils._ import org.apache.spark.mllib.util.MLlibTestSparkContext import org.apache.spark.sql.{DataFrame, Dataset, Row} +import org.apache.spark.sql.functions.lit class NaiveBayesSuite extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest { import testImplicits._ @transient var dataset: Dataset[_] = _ + @transient var bernoulliDataset: Dataset[_] = _ + + private val seed = 42 override def beforeAll(): Unit = { super.beforeAll() - val pi = Array(0.5, 0.1, 0.4).map(math.log) + val pi = Array(0.3, 0.3, 0.4).map(math.log) val theta = Array( - Array(0.70, 0.10, 0.10, 0.10), // label 0 - Array(0.10, 0.70, 0.10, 0.10), // label 1 - Array(0.10, 0.10, 0.70, 0.10) // label 2 + Array(0.30, 0.30, 0.30, 0.30), // label 0 + Array(0.30, 0.30, 0.30, 0.30), // label 1 + Array(0.40, 0.40, 0.40, 0.40) // label 2 --- End diff -- Ya this is changed so that when we set smoothing to zero for the weighted tests, we don't get some theta values of infinity.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. --- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org