Repository: spark Updated Branches: refs/heads/master b24d3dba6 -> ff0501b0c
[SPARK-24300][ML] change the way to set seed in ml.cluster.LDASuite.generateLDAData ## What changes were proposed in this pull request? Using different RNG in all different partitions. ## How was this patch tested? manually Please review http://spark.apache.org/contributing.html before opening a pull request. Author: Lu WANG <lu.w...@databricks.com> Closes #21492 from ludatabricks/SPARK-24300. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ff0501b0 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ff0501b0 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ff0501b0 Branch: refs/heads/master Commit: ff0501b0c27dc8149bd5fb38a19d9b0056698766 Parents: b24d3db Author: Lu WANG <lu.w...@databricks.com> Authored: Mon Jun 4 16:08:27 2018 -0700 Committer: Xiangrui Meng <m...@databricks.com> Committed: Mon Jun 4 16:08:27 2018 -0700 ---------------------------------------------------------------------- .../src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ff0501b0/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala index 096b541..db92132 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/LDASuite.scala @@ -34,9 +34,8 @@ object LDASuite { vocabSize: Int): DataFrame = { val avgWC = 1 // average instances of each word in a doc val sc = spark.sparkContext - val rng = new java.util.Random() - rng.setSeed(1) val rdd = sc.parallelize(1 to rows).map { i => + val rng = new java.util.Random(i) Vectors.dense(Array.fill(vocabSize)(rng.nextInt(2 * avgWC).toDouble)) }.map(v => new TestRow(v)) spark.createDataFrame(rdd) --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org