Github user srowen commented on a diff in the pull request: https://github.com/apache/spark/pull/20632#discussion_r168956209 --- Diff: mllib/src/test/scala/org/apache/spark/ml/tree/impl/RandomForestSuite.scala --- @@ -640,4 +740,55 @@ private object RandomForestSuite { val (indices, values) = map.toSeq.sortBy(_._1).unzip Vectors.sparse(size, indices.toArray, values.toArray) } + + /** Generate a label. */ + private def generateLabel(rnd: Random, numClasses: Int): Double = { + rnd.nextInt(numClasses) + } + + /** Generate a numeric value in the range [numericMin, numericMax]. */ + private def generateNumericValue(rnd: Random, numericMin: Double, numericMax: Double) : Double = { + rnd.nextDouble() * (numericMax- numericMin) + numericMin + } + + /** Generate a binary value. */ + private def generateBinaryValue(rnd: Random) : Double = if (rnd.nextBoolean()) 1 else 0 + + /** Generate an array of binary values of length numBinary. */ + private def generateBinaryArray(rnd: Random, numBinary: Int): Array[Double] = { + Range.apply(0, numBinary).map(_ => generateBinaryValue(rnd)).toArray --- End diff -- `Array.fill(numBinary)(generateBinaryValue(rnd))` is simpler
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org