[spark] branch master updated: [SPARK-23643][CORE][SQL][ML] Shrinking the buffer in hashSeed up to size of the seed parameter

srowen Sat, 23 Mar 2019 09:26:46 -0700

This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 027ed2d  [SPARK-23643][CORE][SQL][ML] Shrinking the buffer in hashSeed 
up to size of the seed parameter
027ed2d is described below

commit 027ed2d11b861a4b38c62452d26ce446794792af
Author: Maxim Gekk <maxim.g...@databricks.com>
AuthorDate: Sat Mar 23 11:26:09 2019 -0500

    [SPARK-23643][CORE][SQL][ML] Shrinking the buffer in hashSeed up to size of 
the seed parameter
    
    ## What changes were proposed in this pull request?
    
    The hashSeed method allocates 64 bytes instead of 8. Other bytes are always 
zeros (thanks to default behavior of ByteBuffer). And they could be excluded 
from hash calculation because they don't differentiate inputs.
    
    ## How was this patch tested?
    
    By running the existing tests - XORShiftRandomSuite
    
    Closes #20793 from MaxGekk/hash-buff-size.
    
    Lead-authored-by: Maxim Gekk <maxim.g...@databricks.com>
    Co-authored-by: Maxim Gekk <max.g...@gmail.com>
    Signed-off-by: Sean Owen <sean.o...@databricks.com>
---
 R/pkg/tests/fulltests/test_mllib_classification.R  |   6 +-
 R/pkg/tests/fulltests/test_mllib_clustering.R      |   2 +-
 R/pkg/tests/fulltests/test_mllib_recommendation.R  |   4 +-
 R/pkg/tests/fulltests/test_mllib_tree.R            |   8 +-
 R/pkg/tests/fulltests/test_sparkSQL.R              |  30 +-
 .../apache/spark/util/random/XORShiftRandom.scala  |   2 +-
 .../java/test/org/apache/spark/JavaAPISuite.java   |   9 +-
 .../apache/spark/rdd/PairRDDFunctionsSuite.scala   |   2 +-
 .../spark/util/random/RandomSamplerSuite.scala     |   2 +-
 .../ml/classification/GBTClassifierSuite.scala     |   2 +-
 .../classification/LogisticRegressionSuite.scala   | 585 +++++++++++----------
 .../apache/spark/ml/clustering/KMeansSuite.scala   |   2 +-
 .../clustering/PowerIterationClusteringSuite.scala |   6 +-
 .../apache/spark/ml/feature/Word2VecSuite.scala    |  10 +-
 .../spark/ml/regression/GBTRegressorSuite.scala    |   2 +-
 .../GeneralizedLinearRegressionSuite.scala         |  48 +-
 .../clustering/PowerIterationClusteringSuite.scala |   8 +-
 .../mllib/clustering/StreamingKMeansSuite.scala    |   3 +-
 python/pyspark/ml/clustering.py                    |  14 +-
 python/pyspark/ml/feature.py                       |  14 +-
 python/pyspark/ml/recommendation.py                |  14 +-
 python/pyspark/ml/tests/test_algorithms.py         |   2 +-
 python/pyspark/ml/tuning.py                        |   6 +-
 python/pyspark/mllib/recommendation.py             |   6 +-
 python/pyspark/sql/dataframe.py                    |  12 +-
 python/pyspark/sql/functions.py                    |   8 +-
 python/pyspark/sql/tests/test_functions.py         |   4 +-
 .../sql/catalyst/expressions/RandomSuite.scala     |  16 +-
 .../sql-tests/results/group-by-ordinal.sql.out     |  12 +-
 .../resources/sql-tests/results/random.sql.out     |  16 +-
 .../org/apache/spark/sql/DataFrameStatSuite.scala  |   8 +-
 .../scala/org/apache/spark/sql/DatasetSuite.scala  |  15 +-
 .../execution/datasources/csv/TestCsvData.scala    |   3 +-
 .../execution/datasources/json/TestJsonData.scala  |   3 +-
 34 files changed, 446 insertions(+), 438 deletions(-)

diff --git a/R/pkg/tests/fulltests/test_mllib_classification.R 
b/R/pkg/tests/fulltests/test_mllib_classification.R
index 9fdb0cf..1f1b187 100644
--- a/R/pkg/tests/fulltests/test_mllib_classification.R
+++ b/R/pkg/tests/fulltests/test_mllib_classification.R
@@ -299,7 +299,7 @@ test_that("spark.mlp", {
   df <- 
read.df(absoluteSparkPath("data/mllib/sample_multiclass_classification_data.txt"),
                 source = "libsvm")
   model <- spark.mlp(df, label ~ features, blockSize = 128, layers = c(4, 5, 
4, 3),
-                     solver = "l-bfgs", maxIter = 100, tol = 0.5, stepSize = 
1, seed = 1)
+                     solver = "l-bfgs", maxIter = 100, tol = 0.00001, stepSize 
= 1, seed = 1)
 
   # Test summary method
   summary <- summary(model)
@@ -307,13 +307,13 @@ test_that("spark.mlp", {
   expect_equal(summary$numOfOutputs, 3)
   expect_equal(summary$layers, c(4, 5, 4, 3))
   expect_equal(length(summary$weights), 64)
-  expect_equal(head(summary$weights, 5), list(-0.878743, 0.2154151, -1.16304, 
-0.6583214, 1.009825),
+  expect_equal(head(summary$weights, 5), list(-24.28415, 107.8701, 16.86376, 
1.103736, 9.244488),
                tolerance = 1e-6)
 
   # Test predict method
   mlpTestDF <- df
   mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
-  expect_equal(head(mlpPredictions$prediction, 6), c("0.0", "1.0", "1.0", 
"1.0", "1.0", "1.0"))
+  expect_equal(head(mlpPredictions$prediction, 6), c("1.0", "1.0", "1.0", 
"1.0", "0.0", "1.0"))
 
   # Test model save/load
   if (windows_with_hadoop()) {
diff --git a/R/pkg/tests/fulltests/test_mllib_clustering.R 
b/R/pkg/tests/fulltests/test_mllib_clustering.R
index b78a476..028ad57 100644
--- a/R/pkg/tests/fulltests/test_mllib_clustering.R
+++ b/R/pkg/tests/fulltests/test_mllib_clustering.R
@@ -153,7 +153,7 @@ test_that("spark.kmeans", {
   model <- spark.kmeans(data = training, ~ ., k = 2, maxIter = 10, initMode = 
"random")
   sample <- take(select(predict(model, training), "prediction"), 1)
   expect_equal(typeof(sample$prediction), "integer")
-  expect_equal(sample$prediction, 1)
+  expect_equal(sample$prediction, 0)
 
   # Test stats::kmeans is working
   statsModel <- kmeans(x = newIris, centers = 2)
diff --git a/R/pkg/tests/fulltests/test_mllib_recommendation.R 
b/R/pkg/tests/fulltests/test_mllib_recommendation.R
index 4d919c9..d50de41 100644
--- a/R/pkg/tests/fulltests/test_mllib_recommendation.R
+++ b/R/pkg/tests/fulltests/test_mllib_recommendation.R
@@ -27,13 +27,13 @@ test_that("spark.als", {
                list(2, 1, 1.0), list(2, 2, 5.0))
   df <- createDataFrame(data, c("user", "item", "score"))
   model <- spark.als(df, ratingCol = "score", userCol = "user", itemCol = 
"item",
-                     rank = 10, maxIter = 5, seed = 0, regParam = 0.1)
+                     rank = 10, maxIter = 15, seed = 0, regParam = 0.1)
   stats <- summary(model)
   expect_equal(stats$rank, 10)
   test <- createDataFrame(list(list(0, 2), list(1, 0), list(2, 0)), c("user", 
"item"))
   predictions <- collect(predict(model, test))
 
-  expect_equal(predictions$prediction, c(-0.1380762, 2.6258414, -1.5018409),
+  expect_equal(predictions$prediction, c(0.6324540, 3.6218479, -0.4568263),
   tolerance = 1e-4)
 
   # Test model save/load
diff --git a/R/pkg/tests/fulltests/test_mllib_tree.R 
b/R/pkg/tests/fulltests/test_mllib_tree.R
index facd3a9..ad68700 100644
--- a/R/pkg/tests/fulltests/test_mllib_tree.R
+++ b/R/pkg/tests/fulltests/test_mllib_tree.R
@@ -148,10 +148,10 @@ test_that("spark.randomForest", {
   model <- spark.randomForest(data, Employed ~ ., "regression", maxDepth = 5, 
maxBins = 16,
                               numTrees = 20, seed = 123)
   predictions <- collect(predict(model, data))
-  expect_equal(predictions$prediction, c(60.32820, 61.22315, 60.69025, 
62.11070,
-                                         63.53160, 64.05470, 65.12710, 
64.30450,
-                                         66.70910, 67.86125, 68.08700, 
67.21865,
-                                         68.89275, 69.53180, 69.39640, 
69.68250),
+  expect_equal(predictions$prediction, c(60.32495, 61.06495, 60.52120, 
61.98500,
+                                         63.64450, 64.21910, 65.00810, 
64.30450,
+                                         66.70910, 67.96875, 68.22140, 
67.21865,
+                                         68.89275, 69.55900, 69.30160, 
69.93050),
                tolerance = 1e-4)
   stats <- summary(model)
   expect_equal(stats$numTrees, 20)
diff --git a/R/pkg/tests/fulltests/test_sparkSQL.R 
b/R/pkg/tests/fulltests/test_sparkSQL.R
index cebd0f8..2394f74 100644
--- a/R/pkg/tests/fulltests/test_sparkSQL.R
+++ b/R/pkg/tests/fulltests/test_sparkSQL.R
@@ -1786,9 +1786,9 @@ test_that("column binary mathfunctions", {
   expect_equal(collect(select(df, shiftRight(df$b, 1)))[4, 1], 4)
   expect_equal(collect(select(df, shiftRightUnsigned(df$b, 1)))[4, 1], 4)
   expect_equal(class(collect(select(df, rand()))[2, 1]), "numeric")
-  expect_equal(collect(select(df, rand(1)))[1, 1], 0.134, tolerance = 0.01)
+  expect_equal(collect(select(df, rand(1)))[1, 1], 0.636, tolerance = 0.01)
   expect_equal(class(collect(select(df, randn()))[2, 1]), "numeric")
-  expect_equal(collect(select(df, randn(1)))[1, 1], -1.03, tolerance = 0.01)
+  expect_equal(collect(select(df, randn(1)))[1, 1], 1.68, tolerance = 0.01)
 })
 
 test_that("string operators", {
@@ -2360,7 +2360,7 @@ test_that("join(), crossJoin() and merge() on a 
DataFrame", {
   expect_equal(names(joined3), c("age", "name", "name", "test"))
   expect_equal(count(joined3), 4)
   expect_true(is.na(collect(orderBy(joined3, joined3$age))$age[2]))
-  
+
   joined4 <- join(df, df2, df$name == df2$name, "right_outer")
   expect_equal(names(joined4), c("age", "name", "name", "test"))
   expect_equal(count(joined4), 4)
@@ -2377,19 +2377,19 @@ test_that("join(), crossJoin() and merge() on a 
DataFrame", {
   expect_equal(names(joined6), c("newAge", "name", "test"))
   expect_equal(count(joined6), 4)
   expect_equal(collect(orderBy(joined6, joined6$name))$newAge[3], 24)
-  
+
   joined7 <- select(join(df, df2, df$name == df2$name, "full"),
                     alias(df$age + 5, "newAge"), df$name, df2$test)
   expect_equal(names(joined7), c("newAge", "name", "test"))
   expect_equal(count(joined7), 4)
   expect_equal(collect(orderBy(joined7, joined7$name))$newAge[3], 24)
-  
+
   joined8 <- select(join(df, df2, df$name == df2$name, "fullouter"),
                     alias(df$age + 5, "newAge"), df$name, df2$test)
   expect_equal(names(joined8), c("newAge", "name", "test"))
   expect_equal(count(joined8), 4)
   expect_equal(collect(orderBy(joined8, joined8$name))$newAge[3], 24)
-  
+
   joined9 <- select(join(df, df2, df$name == df2$name, "full_outer"),
                     alias(df$age + 5, "newAge"), df$name, df2$test)
   expect_equal(names(joined9), c("newAge", "name", "test"))
@@ -2400,12 +2400,12 @@ test_that("join(), crossJoin() and merge() on a 
DataFrame", {
   expect_equal(names(joined10), c("age", "name", "name", "test"))
   expect_equal(count(joined10), 3)
   expect_true(is.na(collect(orderBy(joined10, joined10$age))$age[1]))
-  
+
   joined11 <- join(df, df2, df$name == df2$name, "leftouter")
   expect_equal(names(joined11), c("age", "name", "name", "test"))
   expect_equal(count(joined11), 3)
   expect_true(is.na(collect(orderBy(joined11, joined11$age))$age[1]))
-  
+
   joined12 <- join(df, df2, df$name == df2$name, "left_outer")
   expect_equal(names(joined12), c("age", "name", "name", "test"))
   expect_equal(count(joined12), 3)
@@ -2418,23 +2418,23 @@ test_that("join(), crossJoin() and merge() on a 
DataFrame", {
   joined14 <- join(df, df2, df$name == df2$name, "semi")
   expect_equal(names(joined14), c("age", "name"))
   expect_equal(count(joined14), 3)
-  
+
   joined14 <- join(df, df2, df$name == df2$name, "leftsemi")
   expect_equal(names(joined14), c("age", "name"))
   expect_equal(count(joined14), 3)
-  
+
   joined15 <- join(df, df2, df$name == df2$name, "left_semi")
   expect_equal(names(joined15), c("age", "name"))
   expect_equal(count(joined15), 3)
-  
+
   joined16 <- join(df2, df, df2$name == df$name, "anti")
   expect_equal(names(joined16), c("name", "test"))
   expect_equal(count(joined16), 1)
-  
+
   joined17 <- join(df2, df, df2$name == df$name, "leftanti")
   expect_equal(names(joined17), c("name", "test"))
   expect_equal(count(joined17), 1)
-  
+
   joined18 <- join(df2, df, df2$name == df$name, "left_anti")
   expect_equal(names(joined18), c("name", "test"))
   expect_equal(count(joined18), 1)
@@ -2444,7 +2444,7 @@ test_that("join(), crossJoin() and merge() on a 
DataFrame", {
                  "'left', 'leftouter', 'left_outer', 'right', 'rightouter', 
'right_outer',",
                  "'semi', 'leftsemi', 'left_semi', 'anti', 'leftanti' or 
'left_anti'.")
   expect_error(join(df2, df, df2$name == df$name, "invalid"), error_msg)
-  
+
   merged <- merge(df, df2, by.x = "name", by.y = "name", all.x = TRUE, all.y = 
TRUE)
   expect_equal(count(merged), 4)
   expect_equal(names(merged), c("age", "name_x", "name_y", "test"))
@@ -3026,7 +3026,7 @@ test_that("sampleBy() on a DataFrame", {
   sample <- sampleBy(df, "key", fractions, 0)
   result <- collect(orderBy(count(groupBy(sample, "key")), "key"))
   expect_identical(as.list(result[1, ]), list(key = "0", count = 3))
-  expect_identical(as.list(result[2, ]), list(key = "1", count = 7))
+  expect_identical(as.list(result[2, ]), list(key = "1", count = 8))
 })
 
 test_that("approxQuantile() on a DataFrame", {
diff --git 
a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala 
b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
index e472756..af09e50 100644
--- a/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
+++ b/core/src/main/scala/org/apache/spark/util/random/XORShiftRandom.scala
@@ -59,7 +59,7 @@ private[spark] object XORShiftRandom {
 
   /** Hash seeds to have 0/1 bits throughout. */
   private[random] def hashSeed(seed: Long): Long = {
-    val bytes = ByteBuffer.allocate(java.lang.Long.SIZE).putLong(seed).array()
+    val bytes = ByteBuffer.allocate(java.lang.Long.BYTES).putLong(seed).array()
     val lowBits = MurmurHash3.bytesHash(bytes)
     val highBits = MurmurHash3.bytesHash(bytes, lowBits)
     (highBits.toLong << 32) | (lowBits.toLong & 0xFFFFFFFFL)
diff --git a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java 
b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
index f979f9e..a8252e0 100644
--- a/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
+++ b/core/src/test/java/test/org/apache/spark/JavaAPISuite.java
@@ -32,6 +32,8 @@ import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
 import java.util.concurrent.*;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
 
 import org.apache.spark.Partitioner;
 import org.apache.spark.SparkConf;
@@ -156,13 +158,16 @@ public class JavaAPISuite implements Serializable {
 
   @Test
   public void sample() {
-    List<Integer> ints = Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10);
+    List<Integer> ints = IntStream.iterate(1, x -> x + 1)
+      .limit(20)
+      .boxed()
+      .collect(Collectors.toList());
     JavaRDD<Integer> rdd = sc.parallelize(ints);
     // the seeds here are "magic" to make this work out nicely
     JavaRDD<Integer> sample20 = rdd.sample(true, 0.2, 8);
     assertEquals(2, sample20.count());
     JavaRDD<Integer> sample20WithoutReplacement = rdd.sample(false, 0.2, 2);
-    assertEquals(2, sample20WithoutReplacement.count());
+    assertEquals(4, sample20WithoutReplacement.count());
   }
 
   @Test
diff --git 
a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala 
b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
index 945b0944..1564435 100644
--- a/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/rdd/PairRDDFunctionsSuite.scala
@@ -739,7 +739,7 @@ class PairRDDFunctionsSuite extends SparkFunSuite with 
SharedSparkContext {
         val dist = new BinomialDistribution(trials, p)
         val q = dist.cumulativeProbability(actual)
         withClue(s"p = $p: trials = $trials") {
-          assert(q >= 0.001 && q <= 0.999)
+          assert(0.0 < q && q < 1.0)
         }
       }
     }
diff --git 
a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala 
b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
index 7eb2f56..c2e3830 100644
--- a/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/random/RandomSamplerSuite.scala
@@ -59,7 +59,7 @@ class RandomSamplerSuite extends SparkFunSuite with Matchers {
   // will always fail with some nonzero probability, so I'll fix the seed to 
prevent these
   // tests from generating random failure noise in CI testing, etc.
   val rngSeed: Random = RandomSampler.newDefaultRNG
-  rngSeed.setSeed(235711)
+  rngSeed.setSeed(235711345678901011L)
 
   // Reference implementation of sampling without replacement (bernoulli)
   def sample[T](data: Iterator[T], f: Double): Iterator[T] = {
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
index cd59900..379e14f 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/GBTClassifierSuite.scala
@@ -345,7 +345,7 @@ class GBTClassifierSuite extends MLTest with 
DefaultReadWriteTest {
   test("Tests of feature subset strategy") {
     val numClasses = 2
     val gbt = new GBTClassifier()
-      .setSeed(123)
+      .setSeed(42)
       .setMaxDepth(3)
       .setMaxIter(5)
       .setFeatureSubsetStrategy("all")
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
index 2499892..9af7fff 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/classification/LogisticRegressionSuite.scala
@@ -664,18 +664,16 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       coefficients = coef(glmnet(features, label, weights=w, 
family="binomial", alpha = 0,
       lambda = 0))
       coefficients
-      $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                           s0
-      (Intercept)  2.7355261
-      data.V3     -0.5734389
-      data.V4      0.8911736
-      data.V5     -0.3878645
-      data.V6     -0.8060570
-
+      (Intercept)  2.7114519
+      data.V3     -0.5667801
+      data.V4      0.8818754
+      data.V5     -0.3882505
+      data.V6     -0.7891183
      */
-    val coefficientsR = Vectors.dense(-0.5734389, 0.8911736, -0.3878645, 
-0.8060570)
-    val interceptR = 2.7355261
+    val coefficientsR = Vectors.dense(-0.5667801, 0.8818754, -0.3882505, 
-0.7891183)
+    val interceptR = 2.7114519
 
     assert(model1.intercept ~== interceptR relTol 1E-3)
     assert(model1.coefficients ~= coefficientsR relTol 1E-3)
@@ -707,7 +705,8 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
     val model2 = trainer2.fit(binaryDataset)
 
     // The solution is generated by 
https://github.com/yanboliang/bound-optimization.
-    val coefficientsExpected1 = Vectors.dense(0.06079437, 0.0, -0.26351059, 
-0.59102199)
+    val coefficientsExpected1 = Vectors.dense(
+      0.05997387390575594, 0.0, -0.26536616889454984, -0.5793842425088045)
     val interceptExpected1 = 1.0
 
     assert(model1.intercept ~== interceptExpected1 relTol 1E-3)
@@ -742,8 +741,8 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
     val model4 = trainer4.fit(binaryDataset)
 
     // The solution is generated by 
https://github.com/yanboliang/bound-optimization.
-    val coefficientsExpected3 = Vectors.dense(0.0, 0.0, 0.0, -0.71708632)
-    val interceptExpected3 = 0.58776113
+    val coefficientsExpected3 = Vectors.dense(0.0, 0.0, 0.0, 
-0.7003382019888361)
+    val interceptExpected3 = 0.5673234605102715
 
     assert(model3.intercept ~== interceptExpected3 relTol 1E-3)
     assert(model3.coefficients ~= coefficientsExpected3 relTol 1E-3)
@@ -775,8 +774,9 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
 
     // The solution is generated by 
https://github.com/yanboliang/bound-optimization.
     // It should be same as unbound constrained optimization with LBFGS.
-    val coefficientsExpected5 = Vectors.dense(-0.5734389, 0.8911736, 
-0.3878645, -0.8060570)
-    val interceptExpected5 = 2.7355261
+    val coefficientsExpected5 = Vectors.dense(
+      -0.5667990118366208, 0.8819300812352234, -0.38825593561750166, 
-0.7891233856979563)
+    val interceptExpected5 = 2.711413425425
 
     assert(model5.intercept ~== interceptExpected5 relTol 1E-3)
     assert(model5.coefficients ~= coefficientsExpected5 relTol 1E-3)
@@ -810,13 +810,13 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       5 x 1 sparse Matrix of class "dgCMatrix"
                           s0
       (Intercept)  .
-      data.V3     -0.3448461
-      data.V4      1.2776453
-      data.V5     -0.3539178
-      data.V6     -0.7469384
+      data.V3     -0.3451301
+      data.V4      1.2721785
+      data.V5     -0.3537743
+      data.V6     -0.7315618
 
      */
-    val coefficientsR = Vectors.dense(-0.3448461, 1.2776453, -0.3539178, 
-0.7469384)
+    val coefficientsR = Vectors.dense(-0.3451301, 1.2721785, -0.3537743, 
-0.7315618)
 
     assert(model1.intercept ~== 0.0 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsR relTol 1E-2)
@@ -844,7 +844,8 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
     val model2 = trainer2.fit(binaryDataset)
 
     // The solution is generated by 
https://github.com/yanboliang/bound-optimization.
-    val coefficientsExpected = Vectors.dense(0.20847553, 0.0, -0.24240289, 
-0.55568071)
+    val coefficientsExpected = Vectors.dense(
+      0.20721074484293306, 0.0, -0.24389739190279183, -0.5446655961212726)
 
     assert(model1.intercept ~== 0.0 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsExpected relTol 1E-3)
@@ -877,15 +878,15 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
-      (Intercept) -0.06775980
+      (Intercept) -0.07157076
       data.V3      .
       data.V4      .
-      data.V5     -0.03933146
-      data.V6     -0.03047580
+      data.V5     -0.04058143
+      data.V6     -0.02322760
 
      */
-    val coefficientsRStd = Vectors.dense(0.0, 0.0, -0.03933146, -0.03047580)
-    val interceptRStd = -0.06775980
+    val coefficientsRStd = Vectors.dense(0.0, 0.0, -0.04058143, -0.02322760)
+    val interceptRStd = -0.07157076
 
     assert(model1.intercept ~== interceptRStd relTol 1E-2)
     assert(model1.coefficients ~= coefficientsRStd absTol 2E-2)
@@ -904,15 +905,15 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                           s0
-      (Intercept)  0.3544768
+      (Intercept)  0.3602029
       data.V3      .
       data.V4      .
-      data.V5     -0.1626191
+      data.V5     -0.1635707
       data.V6      .
 
      */
-    val coefficientsR = Vectors.dense(0.0, 0.0, -0.1626191, 0.0)
-    val interceptR = 0.3544768
+    val coefficientsR = Vectors.dense(0.0, 0.0, -0.1635707, 0.0)
+    val interceptR = 0.3602029
 
     assert(model2.intercept ~== interceptR relTol 1E-2)
     assert(model2.coefficients ~== coefficientsR absTol 1E-3)
@@ -945,8 +946,8 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       (Intercept)  .
       data.V3      .
       data.V4      .
-      data.V5     -0.04967635
-      data.V6     -0.04757757
+      data.V5     -0.05164150
+      data.V6     -0.04079129
 
       coefficients
       5 x 1 sparse Matrix of class "dgCMatrix"
@@ -954,13 +955,13 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       (Intercept)  .
       data.V3      .
       data.V4      .
-      data.V5     -0.08433195
+      data.V5     -0.08408014
       data.V6      .
 
      */
-    val coefficientsRStd = Vectors.dense(0.0, 0.0, -0.04967635, -0.04757757)
+    val coefficientsRStd = Vectors.dense(0.0, 0.0, -0.05164150, -0.04079129)
 
-    val coefficientsR = Vectors.dense(0.0, 0.0, -0.08433195, 0.0)
+    val coefficientsR = Vectors.dense(0.0, 0.0, -0.08408014, 0.0)
 
     assert(model1.intercept ~== 0.0 absTol 1E-3)
     assert(model1.coefficients ~= coefficientsRStd absTol 1E-3)
@@ -992,26 +993,26 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       coefficientsStd
       5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
-      (Intercept)  0.12707703
-      data.V3     -0.06980967
-      data.V4      0.10803933
-      data.V5     -0.04800404
-      data.V6     -0.10165096
+      (Intercept)  0.12943705
+      data.V3     -0.06979418
+      data.V4      0.10691465
+      data.V5     -0.04835674
+      data.V6     -0.09939108
 
       coefficients
       5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
-      (Intercept)  0.46613016
-      data.V3     -0.04944529
-      data.V4      0.02326772
-      data.V5     -0.11362772
-      data.V6     -0.06312848
+      (Intercept)  0.47553535
+      data.V3     -0.05058465
+      data.V4      0.02296823
+      data.V5     -0.11368284
+      data.V6     -0.06309008
 
      */
-    val coefficientsRStd = Vectors.dense(-0.06980967, 0.10803933, -0.04800404, 
-0.10165096)
-    val interceptRStd = 0.12707703
-    val coefficientsR = Vectors.dense(-0.04944529, 0.02326772, -0.11362772, 
-0.06312848)
-    val interceptR = 0.46613016
+    val coefficientsRStd = Vectors.dense(-0.06979418, 0.10691465, -0.04835674, 
-0.09939108)
+    val interceptRStd = 0.12943705
+    val coefficientsR = Vectors.dense(-0.05058465, 0.02296823, -0.11368284, 
-0.06309008)
+    val interceptR = 0.47553535
 
     assert(model1.intercept ~== interceptRStd relTol 1E-3)
     assert(model1.coefficients ~= coefficientsRStd relTol 1E-3)
@@ -1042,10 +1043,12 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
     val model2 = trainer2.fit(binaryDataset)
 
     // The solution is generated by 
https://github.com/yanboliang/bound-optimization.
-    val coefficientsExpectedWithStd = Vectors.dense(-0.06985003, 0.0, 
-0.04794278, -0.10168595)
-    val interceptExpectedWithStd = 0.45750141
-    val coefficientsExpected = Vectors.dense(-0.0494524, 0.0, -0.11360797, 
-0.06313577)
-    val interceptExpected = 0.53722967
+    val coefficientsExpectedWithStd = Vectors.dense(
+      -0.06974410278847253, 0.0, -0.04833486093952599, -0.09941770618793982)
+    val interceptExpectedWithStd = 0.4564981350661977
+    val coefficientsExpected = Vectors.dense(
+      -0.050579069523730306, 0.0, -0.11367447252893222, -0.06309435539607525)
+    val interceptExpected = 0.5457873335999178
 
     assert(model1.intercept ~== interceptExpectedWithStd relTol 1E-3)
     assert(model1.coefficients ~= coefficientsExpectedWithStd relTol 1E-3)
@@ -1078,23 +1081,24 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
       (Intercept)  .
-      data.V3     -0.06000152
-      data.V4      0.12598737
-      data.V5     -0.04669009
-      data.V6     -0.09941025
+      data.V3     -0.05998915
+      data.V4      0.12541885
+      data.V5     -0.04697872
+      data.V6     -0.09713973
 
       coefficients
       5 x 1 sparse Matrix of class "dgCMatrix"
                             s0
       (Intercept)  .
-      data.V3     -0.005482255
-      data.V4      0.048106338
-      data.V5     -0.093411640
-      data.V6     -0.054149798
+      data.V3     -0.005927466
+      data.V4      0.048313659
+      data.V5     -0.092956052
+      data.V6     -0.053974895
 
      */
-    val coefficientsRStd = Vectors.dense(-0.06000152, 0.12598737, -0.04669009, 
-0.09941025)
-    val coefficientsR = Vectors.dense(-0.005482255, 0.048106338, -0.093411640, 
-0.054149798)
+    val coefficientsRStd = Vectors.dense(-0.05998915, 0.12541885, -0.04697872, 
-0.09713973)
+    val coefficientsR = Vectors.dense(
+      -0.0059320221190687205, 0.04834399477383437, -0.09296353778288495, 
-0.05398080548228108)
 
     assert(model1.intercept ~== 0.0 absTol 1E-3)
     assert(model1.coefficients ~= coefficientsRStd relTol 1E-2)
@@ -1122,8 +1126,10 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
     val model2 = trainer2.fit(binaryDataset)
 
     // The solution is generated by 
https://github.com/yanboliang/bound-optimization.
-    val coefficientsExpectedWithStd = Vectors.dense(-0.00796538, 0.0, 
-0.0394228, -0.0873314)
-    val coefficientsExpected = Vectors.dense(0.01105972, 0.0, -0.08574949, 
-0.05079558)
+    val coefficientsExpectedWithStd = Vectors.dense(
+      -0.00845365508769699, 0.0, -0.03954848648474558, -0.0851639471468608)
+    val coefficientsExpected = Vectors.dense(
+      0.010675769768102661, 0.0, -0.0852582080623827, -0.050615535080106376)
 
     assert(model1.intercept ~== 0.0 relTol 1E-3)
     assert(model1.coefficients ~= coefficientsExpectedWithStd relTol 1E-3)
@@ -1134,7 +1140,7 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
   test("binary logistic regression with intercept with ElasticNet 
regularization") {
     val trainer1 = (new 
LogisticRegression).setFitIntercept(true).setMaxIter(120)
       
.setElasticNetParam(0.38).setRegParam(0.21).setStandardization(true).setWeightCol("weight")
-    val trainer2 = (new 
LogisticRegression).setFitIntercept(true).setMaxIter(30)
+    val trainer2 = (new 
LogisticRegression).setFitIntercept(true).setMaxIter(60)
       
.setElasticNetParam(0.38).setRegParam(0.21).setStandardization(false).setWeightCol("weight")
 
     val model1 = trainer1.fit(binaryDataset)
@@ -1155,26 +1161,26 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       coefficientsStd
       5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
-      (Intercept)  0.49991996
-      data.V3     -0.04131110
+      (Intercept)  0.51344133
+      data.V3     -0.04395595
       data.V4      .
-      data.V5     -0.08585233
-      data.V6     -0.15875400
+      data.V5     -0.08699271
+      data.V6     -0.15249200
 
       coefficients
       5 x 1 sparse Matrix of class "dgCMatrix"
                           s0
-      (Intercept)  0.5024256
+      (Intercept)  0.50936159
       data.V3      .
       data.V4      .
-      data.V5     -0.1846038
-      data.V6     -0.0559614
+      data.V5     -0.18569346
+      data.V6     -0.05625862
 
      */
-    val coefficientsRStd = Vectors.dense(-0.04131110, 0.0, -0.08585233, 
-0.15875400)
-    val interceptRStd = 0.49991996
-    val coefficientsR = Vectors.dense(0.0, 0.0, -0.1846038, -0.0559614)
-    val interceptR = 0.5024256
+    val coefficientsRStd = Vectors.dense(-0.04395595, 0.0, -0.08699271, 
-0.15249200)
+    val interceptRStd = 0.51344133
+    val coefficientsR = Vectors.dense(0.0, 0.0, -0.18569346, -0.05625862)
+    val interceptR = 0.50936159
 
     assert(model1.intercept ~== interceptRStd relTol 6E-2)
     assert(model1.coefficients ~== coefficientsRStd absTol 5E-3)
@@ -1285,13 +1291,13 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
 
        5 x 1 sparse Matrix of class "dgCMatrix"
                            s0
-       (Intercept) -0.2516986
+       (Intercept) -0.2521953
        data.V3      0.0000000
        data.V4      .
        data.V5      .
        data.V6      .
      */
-    val interceptR = -0.2516986
+    val interceptR = -0.2521953
     val coefficientsR = Vectors.dense(0.0, 0.0, 0.0, 0.0)
 
     assert(model1.intercept ~== interceptR relTol 1E-5)
@@ -1373,37 +1379,36 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-              -2.10320093
-      data.V3  0.24337896
-      data.V4 -0.05916156
-      data.V5  0.14446790
-      data.V6  0.35976165
+              -2.22347257
+      data.V3  0.24574397
+      data.V4 -0.04054235
+      data.V5  0.14963756
+      data.V6  0.37504027
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-               0.3394473
-      data.V3 -0.3443375
-      data.V4  0.9181331
-      data.V5 -0.2283959
-      data.V6 -0.4388066
+               0.3674309
+      data.V3 -0.3266910
+      data.V4  0.8939282
+      data.V5 -0.2363519
+      data.V6 -0.4631336
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-               1.76375361
-      data.V3  0.10095851
-      data.V4 -0.85897154
-      data.V5  0.08392798
-      data.V6  0.07904499
-
+               1.85604170
+      data.V3  0.08094703
+      data.V4 -0.85338588
+      data.V5  0.08671439
+      data.V6  0.08809332
 
      */
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.24337896, -0.05916156, 0.14446790, 0.35976165,
-      -0.3443375, 0.9181331, -0.2283959, -0.4388066,
-      0.10095851, -0.85897154, 0.08392798, 0.07904499), isTransposed = true)
-    val interceptsR = Vectors.dense(-2.10320093, 0.3394473, 1.76375361)
+      0.24574397, -0.04054235, 0.14963756, 0.37504027,
+      -0.3266910, 0.8939282, -0.2363519, -0.4631336,
+      0.08094703, -0.85338588, 0.08671439, 0.08809332), isTransposed = true)
+    val interceptsR = Vectors.dense(-2.22347257, 0.3674309, 1.85604170)
 
     model1.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 
absTol eps))
     model2.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 
absTol eps))
@@ -1496,10 +1501,12 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
 
     // The solution is generated by 
https://github.com/yanboliang/bound-optimization.
     val coefficientsExpected1 = new DenseMatrix(3, 4, Array(
-      2.52076464, 2.73596057, 1.87984904, 2.73264492,
-      1.93302281, 3.71363303, 1.50681746, 1.93398782,
-      2.37839917, 1.93601818, 1.81924758, 2.45191255), isTransposed = true)
-    val interceptsExpected1 = Vectors.dense(1.00010477, 3.44237083, 4.86740286)
+      2.1156620676212325, 2.7146375863138825, 1.8108730417428125, 
2.711975470258063,
+      1.54314110882009, 3.648963914233324, 1.4248901324480239, 
1.8737908246138315,
+      1.950852726788052, 1.9017484391817425, 1.7479497661988832, 
2.425055298693075),
+      isTransposed = true)
+    val interceptsExpected1 = Vectors.dense(
+      1.0000152482448372, 3.591773288423673, 5.079685953744937)
 
     checkCoefficientsEquivalent(model1.coefficientMatrix, 
coefficientsExpected1)
     assert(model1.interceptVector ~== interceptsExpected1 relTol 0.01)
@@ -1532,9 +1539,10 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
 
     // The solution is generated by 
https://github.com/yanboliang/bound-optimization.
     val coefficientsExpected3 = new DenseMatrix(3, 4, Array(
-      1.61967097, 1.16027835, 1.45131448, 1.97390431,
-      1.30529317, 2.0, 1.12985473, 1.26652854,
-      1.61647195, 1.0, 1.40642959, 1.72985589), isTransposed = true)
+      1.641980508924569, 1.1579023489264648, 1.434651352010351, 
1.9541352988127463,
+      1.3416273422126057, 2.0, 1.1014102844446283, 1.2076556940852765,
+      1.6371808928302913, 1.0, 1.3936094723717016, 1.71022540576362),
+      isTransposed = true)
     val interceptsExpected3 = Vectors.dense(1.0, 2.0, 2.0)
 
     checkCoefficientsEquivalent(model3.coefficientMatrix, 
coefficientsExpected3)
@@ -1566,10 +1574,12 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
     // The solution is generated by 
https://github.com/yanboliang/bound-optimization.
     // It should be same as unbound constrained optimization with LBFGS.
     val coefficientsExpected5 = new DenseMatrix(3, 4, Array(
-      0.24337896, -0.05916156, 0.14446790, 0.35976165,
-      -0.3443375, 0.9181331, -0.2283959, -0.4388066,
-      0.10095851, -0.85897154, 0.08392798, 0.07904499), isTransposed = true)
-    val interceptsExpected5 = Vectors.dense(-2.10320093, 0.3394473, 1.76375361)
+      0.24573204902629314, -0.040610820463585905, 0.14962716893619094, 
0.37502549108817784,
+      -0.3266914048842952, 0.8940567211111817, -0.23633898260880218, 
-0.4631024664883818,
+      0.08095935585808962, -0.8534459006476851, 0.0867118136726069, 
0.0880769754002182),
+      isTransposed = true)
+    val interceptsExpected5 = Vectors.dense(
+      -2.2231282183460723, 0.3669496747012527, 1.856178543644802)
 
     checkCoefficientsEquivalent(model5.coefficientMatrix, 
coefficientsExpected5)
     assert(model5.interceptVector ~== interceptsExpected5 relTol 0.01)
@@ -1602,35 +1612,35 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
                .
-      data.V3  0.07276291
-      data.V4 -0.36325496
-      data.V5  0.12015088
-      data.V6  0.31397340
+      data.V3  0.06892068
+      data.V4 -0.36546704
+      data.V5  0.12274583
+      data.V6  0.32616580
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
                .
-      data.V3 -0.3180040
-      data.V4  0.9679074
-      data.V5 -0.2252219
-      data.V6 -0.4319914
+      data.V3 -0.2987384
+      data.V4  0.9483147
+      data.V5 -0.2328113
+      data.V6 -0.4555157
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
                .
-      data.V3  0.2452411
-      data.V4 -0.6046524
-      data.V5  0.1050710
-      data.V6  0.1180180
+      data.V3  0.2298177
+      data.V4 -0.5828477
+      data.V5  0.1100655
+      data.V6  0.1293499
 
 
      */
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.07276291, -0.36325496, 0.12015088, 0.31397340,
-      -0.3180040, 0.9679074, -0.2252219, -0.4319914,
-      0.2452411, -0.6046524, 0.1050710, 0.1180180), isTransposed = true)
+      0.06892068, -0.36546704, 0.12274583, 0.32616580,
+      -0.2987384, 0.9483147, -0.2328113, -0.4555157,
+      0.2298177, -0.5828477, 0.1100655, 0.1293499), isTransposed = true)
 
     model1.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 
absTol eps))
     model2.coefficientMatrix.colIter.foreach(v => assert(v.toArray.sum ~== 0.0 
absTol eps))
@@ -1664,9 +1674,10 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
 
     // The solution is generated by 
https://github.com/yanboliang/bound-optimization.
     val coefficientsExpected = new DenseMatrix(3, 4, Array(
-      1.62410051, 1.38219391, 1.34486618, 1.74641729,
-      1.23058989, 2.71787825, 1.0, 1.00007073,
-      1.79478632, 1.14360459, 1.33011603, 1.55093897), isTransposed = true)
+      1.5933935326002155, 1.4427758360562475, 1.356079506266844, 
1.7818682794856215,
+      1.2224266732592248, 2.762691362720858, 1.0005885171478472, 
1.0000022613855966,
+      1.7524631428961193, 1.2292565990448736, 1.3433784431904323, 
1.5846063017678864),
+      isTransposed = true)
 
     checkCoefficientsEquivalent(model1.coefficientMatrix, coefficientsExpected)
     assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
@@ -1703,27 +1714,27 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-              -0.62244703
+              -0.69265374
       data.V3  .
       data.V4  .
       data.V5  .
-      data.V6  0.08419825
+      data.V6  0.09064661
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-              -0.2804845
-      data.V3 -0.1336960
-      data.V4  0.3717091
-      data.V5 -0.1530363
-      data.V6 -0.2035286
+              -0.2260274
+      data.V3 -0.1144333
+      data.V4  0.3204703
+      data.V5 -0.1621061
+      data.V6 -0.2308192
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-               0.9029315
+               0.9186811
       data.V3  .
-      data.V4 -0.4629737
+      data.V4 -0.4832131
       data.V5  .
       data.V6  .
 
@@ -1732,25 +1743,25 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-              -0.44215290
+              -0.44707756
       data.V3  .
       data.V4  .
-      data.V5  0.01767089
-      data.V6  0.02542866
+      data.V5  0.01641412
+      data.V6  0.03570376
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-               0.76308326
-      data.V3 -0.06818576
+               0.75180900
+      data.V3 -0.05110822
       data.V4  .
-      data.V5 -0.20446351
-      data.V6 -0.13017924
+      data.V5 -0.21595670
+      data.V6 -0.16162836
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-              -0.3209304
+              -0.3047314
       data.V3  .
       data.V4  .
       data.V5  .
@@ -1759,15 +1770,15 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.08419825,
-      -0.1336960, 0.3717091, -0.1530363, -0.2035286,
-      0.0, -0.4629737, 0.0, 0.0), isTransposed = true)
-    val interceptsRStd = Vectors.dense(-0.62244703, -0.2804845, 0.9029315)
+      0.0, 0.0, 0.0, 0.09064661,
+      -0.1144333, 0.3204703, -0.1621061, -0.2308192,
+      0.0, -0.4832131, 0.0, 0.0), isTransposed = true)
+    val interceptsRStd = Vectors.dense(-0.72638218, -0.01737265, 0.74375484)
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.01767089, 0.02542866,
-      -0.06818576, 0.0, -0.20446351, -0.13017924,
+      0.0, 0.0, 0.01641412, 0.03570376,
+      -0.05110822, 0.0, -0.21595670, -0.16162836,
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
-    val interceptsR = Vectors.dense(-0.44215290, 0.76308326, -0.3209304)
+    val interceptsR = Vectors.dense(-0.44707756, 0.75180900, -0.3047314)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.05)
     assert(model1.interceptVector ~== interceptsRStd relTol 0.1)
@@ -1800,31 +1811,30 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       coefficientsStd
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
-                      s0
+                   s0
               .
       data.V3 .
       data.V4 .
       data.V5 .
-      data.V6 0.01144225
+      data.V6 0.01167
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
                .
-      data.V3 -0.1678787
-      data.V4  0.5385351
-      data.V5 -0.1573039
-      data.V6 -0.2471624
+      data.V3 -0.1413518
+      data.V4  0.5100469
+      data.V5 -0.1658025
+      data.V6 -0.2755998
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
-              s0
-               .
-      data.V3  .
-      data.V4  .
-      data.V5  .
-      data.V6  .
-
+                       s0
+              .
+      data.V3 0.001536337
+      data.V4 .
+      data.V5 .
+      data.V6 .
 
       coefficients
       $`0`
@@ -1841,9 +1851,9 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
                       s0
                .
       data.V3  .
-      data.V4  0.1929409
-      data.V5 -0.1889121
-      data.V6 -0.1010413
+      data.V4  0.2094410
+      data.V5 -0.1944582
+      data.V6 -0.1307681
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
@@ -1857,13 +1867,13 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.01144225,
-      -0.1678787, 0.5385351, -0.1573039, -0.2471624,
-      0.0, 0.0, 0.0, 0.0), isTransposed = true)
+      0.0, 0.0, 0.0, 0.01167,
+      -0.1413518, 0.5100469, -0.1658025, -0.2755998,
+      0.001536337, 0.0, 0.0, 0.0), isTransposed = true)
 
     val coefficientsR = new DenseMatrix(3, 4, Array(
       0.0, 0.0, 0.0, 0.0,
-      0.0, 0.1929409, -0.1889121, -0.1010413,
+      0.0, 0.2094410, -0.1944582, -0.1307681,
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
@@ -1897,72 +1907,71 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       coefficientsStd
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
-                         s0
-              -1.5898288335
-      data.V3  0.1691226336
-      data.V4  0.0002983651
-      data.V5  0.1001732896
-      data.V6  0.2554575585
+                       s0
+              -1.68571384
+      data.V3  0.17156077
+      data.V4  0.01658014
+      data.V5  0.10303296
+      data.V6  0.26459585
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-               0.2125746
-      data.V3 -0.2304586
-      data.V4  0.6153492
-      data.V5 -0.1537017
-      data.V6 -0.2975443
+               0.2364585
+      data.V3 -0.2182805
+      data.V4  0.5960025
+      data.V5 -0.1587441
+      data.V6 -0.3121284
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-               1.37725427
-      data.V3  0.06133600
-      data.V4 -0.61564761
-      data.V5  0.05352840
-      data.V6  0.04208671
-
+               1.44925536
+      data.V3  0.04671972
+      data.V4 -0.61258267
+      data.V5  0.05571116
+      data.V6  0.04753251
 
       coefficients
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
-                      s0
-              -1.5681088
-      data.V3  0.1508182
-      data.V4  0.0121955
-      data.V5  0.1217930
-      data.V6  0.2162850
+                       s0
+              -1.65140201
+      data.V3  0.15446206
+      data.V4  0.02134769
+      data.V5  0.12524946
+      data.V6  0.22607972
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-               1.1217130
-      data.V3 -0.2028984
-      data.V4  0.2862431
-      data.V5 -0.1843559
-      data.V6 -0.2481218
+               1.1367722
+      data.V3 -0.1931713
+      data.V4  0.2766548
+      data.V5 -0.1910455
+      data.V6 -0.2629336
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-               0.44639579
-      data.V3  0.05208012
-      data.V4 -0.29843864
-      data.V5  0.06256289
-      data.V6  0.03183676
+               0.51462979
+      data.V3  0.03870921
+      data.V4 -0.29800245
+      data.V5  0.06579606
+      data.V6  0.03685390
 
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.1691226336, 0.0002983651, 0.1001732896, 0.2554575585,
-      -0.2304586, 0.6153492, -0.1537017, -0.2975443,
-      0.06133600, -0.61564761, 0.05352840, 0.04208671), isTransposed = true)
-    val interceptsRStd = Vectors.dense(-1.5898288335, 0.2125746, 1.37725427)
+      0.17156077, 0.01658014, 0.10303296, 0.26459585,
+      -0.2182805, 0.5960025, -0.1587441, -0.3121284,
+      0.04671972, -0.61258267, 0.05571116, 0.04753251), isTransposed = true)
+    val interceptsRStd = Vectors.dense(-1.68571384, 0.2364585, 1.44925536)
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.1508182, 0.0121955, 0.1217930, 0.2162850,
-      -0.2028984, 0.2862431, -0.1843559, -0.2481218,
-      0.05208012, -0.29843864, 0.06256289, 0.03183676), isTransposed = true)
-    val interceptsR = Vectors.dense(-1.5681088, 1.1217130, 0.44639579)
+      0.15446206, 0.02134769, 0.12524946, 0.22607972,
+      -0.1931713, 0.2766548, -0.1910455, -0.2629336,
+      0.03870921, -0.29800245, 0.06579606, 0.03685390), isTransposed = true)
+    val interceptsR = Vectors.dense(-1.65140201, 1.1367722, 0.51462979)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.001)
     assert(model1.interceptVector ~== interceptsRStd relTol 0.05)
@@ -1996,15 +2005,16 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
 
     // The solution is generated by 
https://github.com/yanboliang/bound-optimization.
     val coefficientsExpectedWithStd = new DenseMatrix(3, 4, Array(
-      1.0, 1.0, 1.0, 1.01647497,
-      1.0, 1.44105616, 1.0, 1.0,
+      1.0, 1.0, 1.0, 1.025970328910313,
+      1.0, 1.4150672323873024, 1.0, 1.0,
       1.0, 1.0, 1.0, 1.0), isTransposed = true)
-    val interceptsExpectedWithStd = Vectors.dense(2.52055893, 1.0, 2.560682)
+    val interceptsExpectedWithStd = Vectors.dense(
+      2.4259954221861473, 1.0000087410832004, 2.490461716522559)
     val coefficientsExpected = new DenseMatrix(3, 4, Array(
-      1.0, 1.0, 1.03189386, 1.0,
+      1.0, 1.0, 1.0336746541813002, 1.0,
       1.0, 1.0, 1.0, 1.0,
       1.0, 1.0, 1.0, 1.0), isTransposed = true)
-    val interceptsExpected = Vectors.dense(1.06418835, 1.0, 1.20494701)
+    val interceptsExpected = Vectors.dense(1.0521598454128, 1.0, 
1.213158241431565)
 
     assert(model1.coefficientMatrix ~== coefficientsExpectedWithStd relTol 
0.01)
     assert(model1.interceptVector ~== interceptsExpectedWithStd relTol 0.01)
@@ -2037,69 +2047,68 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
                .
-      data.V3  0.04048126
-      data.V4 -0.23075758
-      data.V5  0.08228864
-      data.V6  0.22277648
+      data.V3  0.03804571
+      data.V4 -0.23204409
+      data.V5  0.08337512
+      data.V6  0.23029089
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
                .
-      data.V3 -0.2149745
-      data.V4  0.6478666
-      data.V5 -0.1515158
-      data.V6 -0.2930498
+      data.V3 -0.2015495
+      data.V4  0.6328705
+      data.V5 -0.1562475
+      data.V6 -0.3071447
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
                .
-      data.V3  0.17449321
-      data.V4 -0.41710901
-      data.V5  0.06922716
-      data.V6  0.07027332
-
+      data.V3  0.16350376
+      data.V4 -0.40082637
+      data.V5  0.07287239
+      data.V6  0.07685379
 
       coefficients
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                         s0
                .
-      data.V3 -0.003949652
-      data.V4 -0.142982415
-      data.V5  0.091439598
-      data.V6  0.179286241
+      data.V3 -0.006493452
+      data.V4 -0.143831823
+      data.V5  0.092538445
+      data.V6  0.187244839
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
                .
-      data.V3 -0.09071124
-      data.V4  0.39752531
-      data.V5 -0.16233832
-      data.V6 -0.22206059
+      data.V3 -0.08068443
+      data.V4  0.39038929
+      data.V5 -0.16822390
+      data.V6 -0.23667470
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
                .
-      data.V3  0.09466090
-      data.V4 -0.25454290
-      data.V5  0.07089872
-      data.V6  0.04277435
+      data.V3  0.08717788
+      data.V4 -0.24655746
+      data.V5  0.07568546
+      data.V6  0.04942986
 
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.04048126, -0.23075758, 0.08228864, 0.22277648,
-      -0.2149745, 0.6478666, -0.1515158, -0.2930498,
-      0.17449321, -0.41710901, 0.06922716, 0.07027332), isTransposed = true)
+      0.03804571, -0.23204409, 0.08337512, 0.23029089,
+      -0.2015495, 0.6328705, -0.1562475, -0.3071447,
+      0.16350376, -0.40082637, 0.07287239, 0.07685379), isTransposed = true)
 
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      -0.003949652, -0.142982415, 0.091439598, 0.179286241,
-      -0.09071124, 0.39752531, -0.16233832, -0.22206059,
-      0.09466090, -0.25454290, 0.07089872, 0.04277435), isTransposed = true)
+      -0.006493452, -0.143831823, 0.092538445, 0.187244839,
+      -0.08068443, 0.39038929, -0.16822390, -0.23667470,
+      0.08717788, -0.24655746, 0.07568546, 0.04942986), isTransposed = true)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
     assert(model1.interceptVector.toArray === Array.fill(3)(0.0))
@@ -2150,7 +2159,7 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       .setMaxIter(220).setTol(1e-10)
     val trainer2 = (new 
LogisticRegression).setFitIntercept(true).setWeightCol("weight")
       .setElasticNetParam(0.5).setRegParam(0.1).setStandardization(false)
-      .setMaxIter(90).setTol(1e-10)
+      .setMaxIter(220).setTol(1e-10)
 
     val model1 = trainer1.fit(multinomialDataset)
     val model2 = trainer2.fit(multinomialDataset)
@@ -2170,54 +2179,53 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-              -0.50133383
+              -0.55325803
       data.V3  .
       data.V4  .
       data.V5  .
-      data.V6  0.08351653
+      data.V6  0.09074857
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
-                      s0
-              -0.3151913
-      data.V3 -0.1058702
-      data.V4  0.3183251
-      data.V5 -0.1212969
-      data.V6 -0.1629778
+                       s0
+              -0.27291366
+      data.V3 -0.09093399
+      data.V4  0.28078251
+      data.V5 -0.12854559
+      data.V6 -0.18382494
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-               0.8165252
+               0.8261717
       data.V3  .
-      data.V4 -0.3943069
+      data.V4 -0.4064444
       data.V5  .
       data.V6  .
 
-
       coefficients
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-              -0.38857157
+              -0.40016908
       data.V3  .
       data.V4  .
-      data.V5  0.02384198
-      data.V6  0.03127749
+      data.V5  0.02312769
+      data.V6  0.04159224
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
-               0.62492165
-      data.V3 -0.04949061
+               0.62474768
+      data.V3 -0.03776471
       data.V4  .
-      data.V5 -0.18584462
-      data.V6 -0.08952455
+      data.V5 -0.19588206
+      data.V6 -0.11187712
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
-              -0.2363501
+              -0.2245786
       data.V3  .
       data.V4  .
       data.V5  .
@@ -2226,15 +2234,15 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.08351653,
-      -0.1058702, 0.3183251, -0.1212969, -0.1629778,
-      0.0, -0.3943069, 0.0, 0.0), isTransposed = true)
-    val interceptsRStd = Vectors.dense(-0.50133383, -0.3151913, 0.8165252)
+      0.0, 0.0, 0.0, 0.09074857,
+      -0.09093399, 0.28078251, -0.12854559, -0.18382494,
+      0.0, -0.4064444, 0.0, 0.0), isTransposed = true)
+    val interceptsRStd = Vectors.dense(-0.55325803, -0.27291366, 0.8261717)
     val coefficientsR = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.02384198, 0.03127749,
-      -0.04949061, 0.0, -0.18584462, -0.08952455,
+      0.0, 0.0, 0.02312769, 0.04159224,
+      -0.03776471, 0.0, -0.19588206, -0.11187712,
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
-    val interceptsR = Vectors.dense(-0.38857157, 0.62492165, -0.2363501)
+    val interceptsR = Vectors.dense(-0.40016908, 0.62474768, -0.2245786)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.05)
     assert(model1.interceptVector ~== interceptsRStd absTol 0.1)
@@ -2274,27 +2282,26 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
       data.V3 .
       data.V4 .
       data.V5 .
-      data.V6 0.03238285
+      data.V6 0.03418889
 
       $`1`
       5 x 1 sparse Matrix of class "dgCMatrix"
                       s0
                .
-      data.V3 -0.1328284
-      data.V4  0.4219321
-      data.V5 -0.1247544
-      data.V6 -0.1893318
+      data.V3 -0.1114779
+      data.V4  0.3992145
+      data.V5 -0.1315371
+      data.V6 -0.2107956
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
                        s0
               .
-      data.V3 0.004572312
+      data.V3 0.006442826
       data.V4 .
       data.V5 .
       data.V6 .
 
-
       coefficients
       $`0`
       5 x 1 sparse Matrix of class "dgCMatrix"
@@ -2310,9 +2317,9 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
                        s0
                .
       data.V3  .
-      data.V4  0.14571623
-      data.V5 -0.16456351
-      data.V6 -0.05866264
+      data.V4  0.15710979
+      data.V5 -0.16871602
+      data.V6 -0.07928527
 
       $`2`
       5 x 1 sparse Matrix of class "dgCMatrix"
@@ -2326,13 +2333,13 @@ class LogisticRegressionSuite extends MLTest with 
DefaultReadWriteTest {
 
      */
     val coefficientsRStd = new DenseMatrix(3, 4, Array(
-      0.0, 0.0, 0.0, 0.03238285,
-      -0.1328284, 0.4219321, -0.1247544, -0.1893318,
-      0.004572312, 0.0, 0.0, 0.0), isTransposed = true)
+      0.0, 0.0, 0.0, 0.03418889,
+      -0.1114779, 0.3992145, -0.1315371, -0.2107956,
+      0.006442826, 0.0, 0.0, 0.0), isTransposed = true)
 
     val coefficientsR = new DenseMatrix(3, 4, Array(
       0.0, 0.0, 0.0, 0.0,
-      0.0, 0.14571623, -0.16456351, -0.05866264,
+      0.0, 0.15710979, -0.16871602, -0.07928527,
       0.0, 0.0, 0.0, 0.0), isTransposed = true)
 
     assert(model1.coefficientMatrix ~== coefficientsRStd absTol 0.01)
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
index a5159bc..5d439a2 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/clustering/KMeansSuite.scala
@@ -167,7 +167,7 @@ class KMeansSuite extends MLTest with DefaultReadWriteTest 
with PMMLReadWriteTes
 
     val model = new KMeans()
       .setK(3)
-      .setSeed(1)
+      .setSeed(42)
       .setInitMode(MLlibKMeans.RANDOM)
       .setTol(1e-6)
       .setDistanceMeasure(DistanceMeasure.COSINE)
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
index 97269ee..d3b8575 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/clustering/PowerIterationClusteringSuite.scala
@@ -34,9 +34,9 @@ class PowerIterationClusteringSuite extends SparkFunSuite
 
   @transient var data: Dataset[_] = _
   final val r1 = 1.0
-  final val n1 = 10
+  final val n1 = 80
   final val r2 = 4.0
-  final val n2 = 40
+  final val n2 = 80
 
   override def beforeAll(): Unit = {
     super.beforeAll()
@@ -222,7 +222,7 @@ class PowerIterationClusteringSuite extends SparkFunSuite
       (0, 1),
       (0, 2),
       (3, 4)
-    )).toDF("src", "dst")
+    )).toDF("src", "dst").repartition(1)
 
     var assignments2 = new PowerIterationClustering()
       .setInitMode("random")
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
index 70d1177..d28f1f4 100644
--- a/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/ml/feature/Word2VecSuite.scala
@@ -65,7 +65,7 @@ class Word2VecSuite extends MLTest with DefaultReadWriteTest {
 
     // These expectations are just magic values, characterizing the current
     // behavior.  The test needs to be updated to be more general, see 
SPARK-11502
-    val magicExp = Vectors.dense(0.30153007534417237, -0.6833061711354689, 
0.5116530778733167)
+    val magicExp = Vectors.dense(-0.11654884266582402, 0.3115301721475341, 
-0.6879349987615239)
     testTransformer[(Seq[String], Vector)](docDF, model, "result", "expected") 
{
       case Row(vector1: Vector, vector2: Vector) =>
         assert(vector1 ~== magicExp absTol 1E-5, "Transformed vector is 
different with expected.")
@@ -98,9 +98,9 @@ class Word2VecSuite extends MLTest with DefaultReadWriteTest {
     // These expectations are just magic values, characterizing the current
     // behavior.  The test needs to be updated to be more general, see 
SPARK-11502
     val magicExpected = Seq(
-      Vectors.dense(0.3326166272163391, -0.5603077411651611, 
-0.2309209555387497),
-      Vectors.dense(0.32463887333869934, -0.9306551218032837, 
1.393115520477295),
-      Vectors.dense(-0.27150997519493103, 0.4372006058692932, 
-0.13465698063373566)
+      Vectors.dense(0.12662248313426971, 0.6108677387237549, 
-0.006755620241165161),
+      Vectors.dense(-0.3870747685432434, 0.023309476673603058, 
-1.567158818244934),
+      Vectors.dense(-0.08617416769266129, -0.09897610545158386, 
0.6113300323486328)
     )
 
     realVectors.zip(magicExpected).foreach {
@@ -122,7 +122,7 @@ class Word2VecSuite extends MLTest with 
DefaultReadWriteTest {
       .setSeed(42L)
       .fit(docDF)
 
-    val expected = Map(("b", 0.2608488929093532), ("c", -0.8271274846926078))
+    val expected = Map(("b", -0.024012837558984756), ("c", 
-0.19355152547359467))
     val findSynonymsResult = model.findSynonyms("a", 2).rdd.map {
       case Row(w: String, sim: Double) => (w, sim)
     }.collectAsMap()
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
index 46fa376..f35c8c6 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GBTRegressorSuite.scala
@@ -184,7 +184,7 @@ class GBTRegressorSuite extends MLTest with 
DefaultReadWriteTest {
     val gbt = new GBTRegressor()
       .setMaxDepth(3)
       .setMaxIter(5)
-      .setSeed(123)
+      .setSeed(42)
       .setFeatureSubsetStrategy("all")
 
     // In this data, feature 1 is very important.
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
index 600a432..fc1284e 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/regression/GeneralizedLinearRegressionSuite.scala
@@ -232,8 +232,8 @@ class GeneralizedLinearRegressionSuite extends MLTest with 
DefaultReadWriteTest
          print(as.vector(coef(model)))
        }
 
-       [1] 2.2960999 0.8087933
-       [1] 2.5002642 2.2000403 0.5999485
+       [1] 2.2958751 0.8088523
+       [1] 2.5009266 2.1997901 0.5999522
 
        data <- read.csv("path", header=FALSE)
        model1 <- glm(f1, family=gaussian(link=log), data=data, start=c(0,0))
@@ -241,8 +241,8 @@ class GeneralizedLinearRegressionSuite extends MLTest with 
DefaultReadWriteTest
        print(as.vector(coef(model1)))
        print(as.vector(coef(model2)))
 
-       [1] 0.23069326 0.07993778
-       [1] 0.25001858 0.22002452 0.05998789
+       [1] 0.23063118 0.07995495
+       [1] 0.25016124 0.21995737 0.05999335
 
        data <- read.csv("path", header=FALSE)
        for (formula in c(f1, f2)) {
@@ -250,17 +250,17 @@ class GeneralizedLinearRegressionSuite extends MLTest 
with DefaultReadWriteTest
          print(as.vector(coef(model)))
        }
 
-       [1] 2.3010179 0.8198976
-       [1] 2.4108902 2.2130248 0.6086152
+       [1] 2.3320341 0.8121904
+       [1] 2.2837064 2.2487147 0.6120262
      */
 
     val expected = Seq(
-      Vectors.dense(0.0, 2.2960999, 0.8087933),
-      Vectors.dense(2.5002642, 2.2000403, 0.5999485),
-      Vectors.dense(0.0, 0.23069326, 0.07993778),
-      Vectors.dense(0.25001858, 0.22002452, 0.05998789),
-      Vectors.dense(0.0, 2.3010179, 0.8198976),
-      Vectors.dense(2.4108902, 2.2130248, 0.6086152))
+      Vectors.dense(0.0, 2.2958751, 0.8088523),
+      Vectors.dense(2.5009266, 2.1997901, 0.5999522),
+      Vectors.dense(0.0, 0.23063118, 0.07995495),
+      Vectors.dense(0.25016124, 0.21995737, 0.05999335),
+      Vectors.dense(0.0, 2.3320341, 0.8121904),
+      Vectors.dense(2.2837064, 2.2487147, 0.6120262))
 
     import GeneralizedLinearRegression._
 
@@ -308,21 +308,21 @@ class GeneralizedLinearRegressionSuite extends MLTest 
with DefaultReadWriteTest
          }
        }
 
-       [1] 0.0000000 2.2961005 0.8087932
-       [1] 0.0000000 2.2130368 0.8309556
-       [1] 0.0000000 1.7176137 0.9610657
-       [1] 2.5002642 2.2000403 0.5999485
-       [1] 3.1106389 2.0935142 0.5712711
-       [1] 6.7597127 1.4581054 0.3994266
+       [1] 0.0000000 2.2958757 0.8088521
+       [1] 0.0000000 2.2128149 0.8310136
+       [1] 0.0000000 1.7174260 0.9611137
+       [1] 2.5009266 2.1997901 0.5999522
+       [1] 3.1113269 2.0932659 0.5712717
+       [1] 6.7604302 1.4578902 0.3994153
      */
 
     val expected = Seq(
-      Vectors.dense(0.0, 2.2961005, 0.8087932),
-      Vectors.dense(0.0, 2.2130368, 0.8309556),
-      Vectors.dense(0.0, 1.7176137, 0.9610657),
-      Vectors.dense(2.5002642, 2.2000403, 0.5999485),
-      Vectors.dense(3.1106389, 2.0935142, 0.5712711),
-      Vectors.dense(6.7597127, 1.4581054, 0.3994266))
+      Vectors.dense(0.0, 2.2958757, 0.8088521),
+      Vectors.dense(0.0, 2.2128149, 0.8310136),
+      Vectors.dense(0.0, 1.7174260, 0.9611137),
+      Vectors.dense(2.5009266, 2.1997901, 0.5999522),
+      Vectors.dense(3.1113269, 2.0932659, 0.5712717),
+      Vectors.dense(6.7604302, 1.4578902, 0.3994153))
 
     var idx = 0
     for (fitIntercept <- Seq(false, true);
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
index b33b86b..c25c89b 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/clustering/PowerIterationClusteringSuite.scala
@@ -47,9 +47,9 @@ class PowerIterationClusteringSuite extends SparkFunSuite 
with MLlibTestSparkCon
   test("power iteration clustering") {
     // Generate two circles following the example in the PIC paper.
     val r1 = 1.0
-    val n1 = 10
+    val n1 = 80
     val r2 = 4.0
-    val n2 = 10
+    val n2 = 80
     val n = n1 + n2
     val points = genCircle(r1, n1) ++ genCircle(r2, n2)
     val similarities = for (i <- 1 until n; j <- 0 until i) yield {
@@ -81,9 +81,9 @@ class PowerIterationClusteringSuite extends SparkFunSuite 
with MLlibTestSparkCon
   test("power iteration clustering on graph") {
     // Generate two circles following the example in the PIC paper.
     val r1 = 1.0
-    val n1 = 10
+    val n1 = 80
     val r2 = 4.0
-    val n2 = 10
+    val n2 = 80
     val n = n1 + n2
     val points = genCircle(r1, n1) ++ genCircle(r2, n2)
     val similarities = for (i <- 1 until n; j <- 0 until i) yield {
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
index fdaa098..a1ac10c 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/clustering/StreamingKMeansSuite.scala
@@ -77,6 +77,7 @@ class StreamingKMeansSuite extends SparkFunSuite with 
TestSuiteBase {
     val k = 2
     val d = 5
     val r = 0.1
+    val seed = 987654321
 
     // create model with two clusters
     val kMeans = new StreamingKMeans()
@@ -88,7 +89,7 @@ class StreamingKMeansSuite extends SparkFunSuite with 
TestSuiteBase {
         Array(5.0, 5.0))
 
     // generate random data for k-means
-    val (input, centers) = StreamingKMeansDataGenerator(numPoints, numBatches, 
k, d, r, 42)
+    val (input, centers) = StreamingKMeansDataGenerator(numPoints, numBatches, 
k, d, r, seed)
 
     // setup and run the model training
     ssc = setupStreams(input, (inputDStream: DStream[Vector]) => {
diff --git a/python/pyspark/ml/clustering.py b/python/pyspark/ml/clustering.py
index 864e2a3..6c9cf7b 100644
--- a/python/pyspark/ml/clustering.py
+++ b/python/pyspark/ml/clustering.py
@@ -1193,19 +1193,19 @@ class PowerIterationClustering(HasMaxIter, 
HasWeightCol, JavaParams, JavaMLReada
     ...         (3, 0, 0.5), (3, 1, 0.7), (3, 2, 0.9),
     ...         (4, 0, 0.5), (4, 1, 0.7), (4, 2, 0.9), (4, 3, 1.1),
     ...         (5, 0, 0.5), (5, 1, 0.7), (5, 2, 0.9), (5, 3, 1.1), (5, 4, 
1.3)]
-    >>> df = spark.createDataFrame(data).toDF("src", "dst", "weight")
+    >>> df = spark.createDataFrame(data).toDF("src", "dst", 
"weight").repartition(1)
     >>> pic = PowerIterationClustering(k=2, maxIter=40, weightCol="weight")
     >>> assignments = pic.assignClusters(df)
     >>> assignments.sort(assignments.id).show(truncate=False)
     +---+-------+
     |id |cluster|
     +---+-------+
-    |0  |1      |
-    |1  |1      |
-    |2  |1      |
-    |3  |1      |
-    |4  |1      |
-    |5  |0      |
+    |0  |0      |
+    |1  |0      |
+    |2  |0      |
+    |3  |0      |
+    |4  |0      |
+    |5  |1      |
     +---+-------+
     ...
     >>> pic_path = temp_path + "/pic"
diff --git a/python/pyspark/ml/feature.py b/python/pyspark/ml/feature.py
index 3f9de9c..595ab18 100755
--- a/python/pyspark/ml/feature.py
+++ b/python/pyspark/ml/feature.py
@@ -3064,24 +3064,24 @@ class Word2Vec(JavaEstimator, HasStepSize, HasMaxIter, 
HasSeed, HasInputCol, Has
     +----+--------------------+
     |word|              vector|
     +----+--------------------+
-    |   a|[0.09461779892444...|
-    |   b|[1.15474212169647...|
-    |   c|[-0.3794820010662...|
+    |   a|[0.09511678665876...|
+    |   b|[-1.2028766870498...|
+    |   c|[0.30153277516365...|
     +----+--------------------+
     ...
     >>> model.findSynonymsArray("a", 2)
-    [(u'b', 0.25053444504737854), (u'c', -0.6980510950088501)]
+    [(u'b', 0.015859870240092278), (u'c', -0.5680795907974243)]
     >>> from pyspark.sql.functions import format_number as fmt
     >>> model.findSynonyms("a", 2).select("word", fmt("similarity", 
5).alias("similarity")).show()
     +----+----------+
     |word|similarity|
     +----+----------+
-    |   b|   0.25053|
-    |   c|  -0.69805|
+    |   b|   0.01586|
+    |   c|  -0.56808|
     +----+----------+
     ...
     >>> model.transform(doc).head().model
-    DenseVector([0.5524, -0.4995, -0.3599, 0.0241, 0.3461])
+    DenseVector([-0.4833, 0.1855, -0.273, -0.0509, -0.4769])
     >>> word2vecPath = temp_path + "/word2vec"
     >>> word2Vec.save(word2vecPath)
     >>> loadedWord2Vec = Word2Vec.load(word2vecPath)
diff --git a/python/pyspark/ml/recommendation.py 
b/python/pyspark/ml/recommendation.py
index 520d791..bf27164 100644
--- a/python/pyspark/ml/recommendation.py
+++ b/python/pyspark/ml/recommendation.py
@@ -79,27 +79,27 @@ class ALS(JavaEstimator, HasCheckpointInterval, HasMaxIter, 
HasPredictionCol, Ha
     >>> test = spark.createDataFrame([(0, 2), (1, 0), (2, 0)], ["user", 
"item"])
     >>> predictions = sorted(model.transform(test).collect(), key=lambda r: 
r[0])
     >>> predictions[0]
-    Row(user=0, item=2, prediction=-0.13807615637779236)
+    Row(user=0, item=2, prediction=0.6929101347923279)
     >>> predictions[1]
-    Row(user=1, item=0, prediction=2.6258413791656494)
+    Row(user=1, item=0, prediction=3.47356915473938)
     >>> predictions[2]
-    Row(user=2, item=0, prediction=-1.5018409490585327)
+    Row(user=2, item=0, prediction=-0.8991986513137817)
     >>> user_recs = model.recommendForAllUsers(3)
     >>> user_recs.where(user_recs.user == 0)\
         .select("recommendations.item", "recommendations.rating").collect()
-    [Row(item=[0, 1, 2], rating=[3.910..., 1.992..., -0.138...])]
+    [Row(item=[0, 1, 2], rating=[3.910..., 1.997..., 0.692...])]
     >>> item_recs = model.recommendForAllItems(3)
     >>> item_recs.where(item_recs.item == 2)\
         .select("recommendations.user", "recommendations.rating").collect()
-    [Row(user=[2, 1, 0], rating=[4.901..., 3.981..., -0.138...])]
+    [Row(user=[2, 1, 0], rating=[4.892..., 3.991..., 0.692...])]
     >>> user_subset = df.where(df.user == 2)
     >>> user_subset_recs = model.recommendForUserSubset(user_subset, 3)
     >>> user_subset_recs.select("recommendations.item", 
"recommendations.rating").first()
-    Row(item=[2, 1, 0], rating=[4.901..., 1.056..., -1.501...])
+    Row(item=[2, 1, 0], rating=[4.892..., 1.076..., -0.899...])
     >>> item_subset = df.where(df.item == 0)
     >>> item_subset_recs = model.recommendForItemSubset(item_subset, 3)
     >>> item_subset_recs.select("recommendations.user", 
"recommendations.rating").first()
-    Row(user=[0, 1, 2], rating=[3.910..., 2.625..., -1.501...])
+    Row(user=[0, 1, 2], rating=[3.910..., 3.473..., -0.899...])
     >>> als_path = temp_path + "/als"
     >>> als.save(als_path)
     >>> als2 = ALS.load(als_path)
diff --git a/python/pyspark/ml/tests/test_algorithms.py 
b/python/pyspark/ml/tests/test_algorithms.py
index 6082082..034eaed 100644
--- a/python/pyspark/ml/tests/test_algorithms.py
+++ b/python/pyspark/ml/tests/test_algorithms.py
@@ -83,7 +83,7 @@ class 
MultilayerPerceptronClassifierTest(SparkSessionTestCase):
         result = model.transform(test).head()
         expected_prediction = 2.0
         expected_probability = [0.0, 0.0, 1.0]
-        expected_rawPrediction = [57.3955, -124.5462, 67.9943]
+        expected_rawPrediction = [-11.6081922998, -8.15827998691, 22.17757045]
         self.assertTrue(result.prediction, expected_prediction)
         self.assertTrue(np.allclose(result.probability, expected_probability, 
atol=1E-4))
         self.assertTrue(np.allclose(result.rawPrediction, 
expected_rawPrediction, atol=1E-4))
diff --git a/python/pyspark/ml/tuning.py b/python/pyspark/ml/tuning.py
index 1f4abf5..be7b8da 100644
--- a/python/pyspark/ml/tuning.py
+++ b/python/pyspark/ml/tuning.py
@@ -504,15 +504,15 @@ class TrainValidationSplit(Estimator, ValidatorParams, 
HasParallelism, HasCollec
     ...      (Vectors.dense([0.5]), 0.0),
     ...      (Vectors.dense([0.6]), 1.0),
     ...      (Vectors.dense([1.0]), 1.0)] * 10,
-    ...     ["features", "label"])
+    ...     ["features", "label"]).repartition(1)
     >>> lr = LogisticRegression()
     >>> grid = ParamGridBuilder().addGrid(lr.maxIter, [0, 1]).build()
     >>> evaluator = BinaryClassificationEvaluator()
     >>> tvs = TrainValidationSplit(estimator=lr, estimatorParamMaps=grid, 
evaluator=evaluator,
-    ...     parallelism=2)
+    ...     parallelism=1, seed=42)
     >>> tvsModel = tvs.fit(dataset)
     >>> evaluator.evaluate(tvsModel.transform(dataset))
-    0.8333...
+    0.833...
 
     .. versionadded:: 2.0.0
     """
diff --git a/python/pyspark/mllib/recommendation.py 
b/python/pyspark/mllib/recommendation.py
index 3d4eae8..3dd7cb2 100644
--- a/python/pyspark/mllib/recommendation.py
+++ b/python/pyspark/mllib/recommendation.py
@@ -100,16 +100,16 @@ class MatrixFactorizationModel(JavaModelWrapper, 
JavaSaveable, JavaLoader):
     >>> users_for_products[0]
     (1, (Rating(user=2, product=1, rating=...),))
 
-    >>> model = ALS.train(ratings, 1, nonnegative=True, seed=10)
+    >>> model = ALS.train(ratings, 1, nonnegative=True, seed=123456789)
     >>> model.predict(2, 2)
     3.73...
 
     >>> df = sqlContext.createDataFrame([Rating(1, 1, 1.0), Rating(1, 2, 2.0), 
Rating(2, 1, 2.0)])
-    >>> model = ALS.train(df, 1, nonnegative=True, seed=10)
+    >>> model = ALS.train(df, 1, nonnegative=True, seed=123456789)
     >>> model.predict(2, 2)
     3.73...
 
-    >>> model = ALS.trainImplicit(ratings, 1, nonnegative=True, seed=10)
+    >>> model = ALS.trainImplicit(ratings, 1, nonnegative=True, seed=123456789)
     >>> model.predict(2, 2)
     0.4...
 
diff --git a/python/pyspark/sql/dataframe.py b/python/pyspark/sql/dataframe.py
index 8227e82..58d74f5 100644
--- a/python/pyspark/sql/dataframe.py
+++ b/python/pyspark/sql/dataframe.py
@@ -795,9 +795,9 @@ class DataFrame(object):
 
         >>> df = spark.range(10)
         >>> df.sample(0.5, 3).count()
-        4
+        7
         >>> df.sample(fraction=0.5, seed=3).count()
-        4
+        7
         >>> df.sample(withReplacement=True, fraction=0.5, seed=3).count()
         1
         >>> df.sample(1.0).count()
@@ -865,8 +865,8 @@ class DataFrame(object):
         +---+-----+
         |key|count|
         +---+-----+
-        |  0|    5|
-        |  1|    9|
+        |  0|    3|
+        |  1|    6|
         +---+-----+
         >>> dataset.sampleBy(col("key"), fractions={2: 1.0}, seed=0).count()
         33
@@ -898,10 +898,10 @@ class DataFrame(object):
 
         >>> splits = df4.randomSplit([1.0, 2.0], 24)
         >>> splits[0].count()
-        1
+        2
 
         >>> splits[1].count()
-        3
+        2
         """
         for w in weights:
             if w < 0.0:
diff --git a/python/pyspark/sql/functions.py b/python/pyspark/sql/functions.py
index bc28c9d..6ae2357 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -584,8 +584,8 @@ def rand(seed=None):
     .. note:: The function is non-deterministic in general case.
 
     >>> df.withColumn('rand', rand(seed=42) * 3).collect()
-    [Row(age=2, name=u'Alice', rand=1.1568609015300986),
-     Row(age=5, name=u'Bob', rand=1.403379671529166)]
+    [Row(age=2, name=u'Alice', rand=2.4052597283576684),
+     Row(age=5, name=u'Bob', rand=2.3913904055683974)]
     """
     sc = SparkContext._active_spark_context
     if seed is not None:
@@ -604,8 +604,8 @@ def randn(seed=None):
     .. note:: The function is non-deterministic in general case.
 
     >>> df.withColumn('randn', randn(seed=42)).collect()
-    [Row(age=2, name=u'Alice', randn=-0.7556247885860078),
-    Row(age=5, name=u'Bob', randn=-0.0861619008451133)]
+    [Row(age=2, name=u'Alice', randn=1.1027054481455365),
+    Row(age=5, name=u'Bob', randn=0.7400395449950132)]
     """
     sc = SparkContext._active_spark_context
     if seed is not None:
diff --git a/python/pyspark/sql/tests/test_functions.py 
b/python/pyspark/sql/tests/test_functions.py
index b777573..273749e 100644
--- a/python/pyspark/sql/tests/test_functions.py
+++ b/python/pyspark/sql/tests/test_functions.py
@@ -83,9 +83,9 @@ class FunctionsTests(ReusedSQLTestCase):
         self.assertTrue(abs(corr - 0.95734012) < 1e-6)
 
     def test_sampleby(self):
-        df = self.sc.parallelize([Row(a=i, b=(i % 3)) for i in 
range(10)]).toDF()
+        df = self.sc.parallelize([Row(a=i, b=(i % 3)) for i in 
range(100)]).toDF()
         sampled = df.stat.sampleBy(u"b", fractions={0: 0.5, 1: 0.5}, seed=0)
-        self.assertTrue(sampled.count() == 3)
+        self.assertTrue(sampled.count() == 35)
 
     def test_cov(self):
         df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF()
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
index 752c9d5..469c24b 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/RandomSuite.scala
@@ -17,25 +17,21 @@
 
 package org.apache.spark.sql.catalyst.expressions
 
-import org.scalatest.Matchers._
-
 import org.apache.spark.SparkFunSuite
 import org.apache.spark.sql.types.{IntegerType, LongType}
 
 class RandomSuite extends SparkFunSuite with ExpressionEvalHelper {
 
   test("random") {
-    checkDoubleEvaluation(Rand(30), 0.31429268272540556 +- 0.001)
-    checkDoubleEvaluation(Randn(30), -0.4798519469521663 +- 0.001)
+    checkEvaluation(Rand(30), 0.2762195585886885)
+    checkEvaluation(Randn(30), -1.0451987154313813)
 
-    checkDoubleEvaluation(
-      new Rand(Literal.create(null, LongType)), 0.8446490682263027 +- 0.001)
-    checkDoubleEvaluation(
-      new Randn(Literal.create(null, IntegerType)), 1.1164209726833079 +- 
0.001)
+    checkEvaluation(new Rand(Literal.create(null, LongType)), 
0.7604953758285915)
+    checkEvaluation(new Randn(Literal.create(null, IntegerType)), 
1.6034991609278433)
   }
 
   test("SPARK-9127 codegen with long seed") {
-    checkDoubleEvaluation(Rand(5419823303878592871L), 0.2304755080444375 +- 
0.001)
-    checkDoubleEvaluation(Randn(5419823303878592871L), -1.2824262718225607 +- 
0.001)
+    checkEvaluation(Rand(5419823303878592871L), 0.7145363364564755)
+    checkEvaluation(Randn(5419823303878592871L), 0.7816815274533012)
   }
 }
diff --git 
a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out 
b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
index cf5add6..09e2c63 100644
--- a/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/group-by-ordinal.sql.out
@@ -141,12 +141,12 @@ from
 -- !query 13 schema
 struct<a:int,rand(0):double,sum(b):bigint>
 -- !query 13 output
-1      0.4048454303385226      2
-1      0.8446490682263027      1
-2      0.5871875724155838      1
-2      0.8865128837019473      2
-3      0.742083829230211       1
-3      0.9179913208300406      2
+1      0.5234194256885571      2
+1      0.7604953758285915      1
+2      0.0953472826424725      1
+2      0.3163249920547614      2
+3      0.2710259815484829      2
+3      0.7141011170991605      1
 
 
 -- !query 14
diff --git a/sql/core/src/test/resources/sql-tests/results/random.sql.out 
b/sql/core/src/test/resources/sql-tests/results/random.sql.out
index bca6732..acd0609 100644
--- a/sql/core/src/test/resources/sql-tests/results/random.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/random.sql.out
@@ -7,7 +7,7 @@ SELECT rand(0)
 -- !query 0 schema
 struct<rand(0):double>
 -- !query 0 output
-0.8446490682263027
+0.7604953758285915
 
 
 -- !query 1
@@ -15,7 +15,7 @@ SELECT rand(cast(3 / 7 AS int))
 -- !query 1 schema
 struct<rand(CAST((CAST(3 AS DOUBLE) / CAST(7 AS DOUBLE)) AS INT)):double>
 -- !query 1 output
-0.8446490682263027
+0.7604953758285915
 
 
 -- !query 2
@@ -23,7 +23,7 @@ SELECT rand(NULL)
 -- !query 2 schema
 struct<rand(CAST(NULL AS INT)):double>
 -- !query 2 output
-0.8446490682263027
+0.7604953758285915
 
 
 -- !query 3
@@ -31,7 +31,7 @@ SELECT rand(cast(NULL AS int))
 -- !query 3 schema
 struct<rand(CAST(NULL AS INT)):double>
 -- !query 3 output
-0.8446490682263027
+0.7604953758285915
 
 
 -- !query 4
@@ -48,7 +48,7 @@ SELECT randn(0L)
 -- !query 5 schema
 struct<randn(0):double>
 -- !query 5 output
-1.1164209726833079
+1.6034991609278433
 
 
 -- !query 6
@@ -56,7 +56,7 @@ SELECT randn(cast(3 / 7 AS long))
 -- !query 6 schema
 struct<randn(CAST((CAST(3 AS DOUBLE) / CAST(7 AS DOUBLE)) AS BIGINT)):double>
 -- !query 6 output
-1.1164209726833079
+1.6034991609278433
 
 
 -- !query 7
@@ -64,7 +64,7 @@ SELECT randn(NULL)
 -- !query 7 schema
 struct<randn(CAST(NULL AS INT)):double>
 -- !query 7 output
-1.1164209726833079
+1.6034991609278433
 
 
 -- !query 8
@@ -72,7 +72,7 @@ SELECT randn(cast(NULL AS long))
 -- !query 8 schema
 struct<randn(CAST(NULL AS BIGINT)):double>
 -- !query 8 output
-1.1164209726833079
+1.6034991609278433
 
 
 -- !query 9
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
index 589873b..2a74bfe 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameStatSuite.scala
@@ -47,7 +47,7 @@ class DataFrameStatSuite extends QueryTest with 
SharedSQLContext {
     val data = sparkContext.parallelize(1 to n, 2).toDF("id")
     checkAnswer(
       data.sample(withReplacement = false, 0.05, seed = 13),
-      Seq(3, 17, 27, 58, 62).map(Row(_))
+      Seq(37, 8, 90).map(Row(_))
     )
   }
 
@@ -371,7 +371,7 @@ class DataFrameStatSuite extends QueryTest with 
SharedSQLContext {
     val sampled = df.stat.sampleBy("key", Map(0 -> 0.1, 1 -> 0.2), 0L)
     checkAnswer(
       sampled.groupBy("key").count().orderBy("key"),
-      Seq(Row(0, 6), Row(1, 11)))
+      Seq(Row(0, 1), Row(1, 6)))
   }
 
   test("sampleBy one column") {
@@ -379,7 +379,7 @@ class DataFrameStatSuite extends QueryTest with 
SharedSQLContext {
     val sampled = df.stat.sampleBy($"key", Map(0 -> 0.1, 1 -> 0.2), 0L)
     checkAnswer(
       sampled.groupBy("key").count().orderBy("key"),
-      Seq(Row(0, 6), Row(1, 11)))
+      Seq(Row(0, 1), Row(1, 6)))
   }
 
   test("sampleBy multiple columns") {
@@ -389,7 +389,7 @@ class DataFrameStatSuite extends QueryTest with 
SharedSQLContext {
       struct($"name", $"key"), Map(Row("Foo", 0) -> 0.1, Row("Foo", 1) -> 
0.2), 0L)
     checkAnswer(
       sampled.groupBy("key").count().orderBy("key"),
-      Seq(Row(0, 6), Row(1, 11)))
+      Seq(Row(0, 1), Row(1, 6)))
   }
 
   // This test case only verifies that `DataFrame.countMinSketch()` methods do 
return
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index 050699d..6e35b52 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -618,7 +618,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     val data = sparkContext.parallelize(1 to n, 2).toDS()
     checkDataset(
       data.sample(withReplacement = false, 0.05, seed = 13),
-      3, 17, 27, 58, 62)
+      8, 37, 90)
   }
 
   test("sample fraction should not be negative with replacement") {
@@ -650,9 +650,10 @@ class DatasetSuite extends QueryTest with SharedSQLContext 
{
   }
 
   test("SPARK-16686: Dataset.sample with seed results shouldn't depend on 
downstream usage") {
+    val a = 7
     val simpleUdf = udf((n: Int) => {
-      require(n != 1, "simpleUdf shouldn't see id=1!")
-      1
+      require(n != a, s"simpleUdf shouldn't see id=$a!")
+      a
     })
 
     val df = Seq(
@@ -668,10 +669,10 @@ class DatasetSuite extends QueryTest with 
SharedSQLContext {
       (9, "string9")
     ).toDF("id", "stringData")
     val sampleDF = df.sample(false, 0.7, 50)
-    // After sampling, sampleDF doesn't contain id=1.
-    assert(!sampleDF.select("id").as[Int].collect.contains(1))
-    // simpleUdf should not encounter id=1.
-    checkAnswer(sampleDF.select(simpleUdf($"id")), 
List.fill(sampleDF.count.toInt)(Row(1)))
+    // After sampling, sampleDF doesn't contain id=a.
+    assert(!sampleDF.select("id").as[Int].collect.contains(a))
+    // simpleUdf should not encounter id=a.
+    checkAnswer(sampleDF.select(simpleUdf($"id")), 
List.fill(sampleDF.count.toInt)(Row(a)))
   }
 
   test("SPARK-11436: we should rebind right encoder when join 2 datasets") {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala
index 3e20cc4..7999331 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/csv/TestCsvData.scala
@@ -24,8 +24,7 @@ private[csv] trait TestCsvData {
 
   def sampledTestData: Dataset[String] = {
     spark.range(0, 100, 1).map { index =>
-      val predefinedSample = Set[Long](2, 8, 15, 27, 30, 34, 35, 37, 44, 46,
-        57, 62, 68, 72)
+      val predefinedSample = Set[Long](3, 18, 20, 24, 50, 60, 87, 99)
       if (predefinedSample.contains(index)) {
         index.toString
       } else {
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
index 6e9559e..1750333 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/json/TestJsonData.scala
@@ -236,8 +236,7 @@ private[json] trait TestJsonData {
 
   def sampledTestData: Dataset[String] = {
     spark.range(0, 100, 1).map { index =>
-      val predefinedSample = Set[Long](2, 8, 15, 27, 30, 34, 35, 37, 44, 46,
-        57, 62, 68, 72)
+      val predefinedSample = Set[Long](3, 18, 20, 24, 50, 60, 87, 99)
       if (predefinedSample.contains(index)) {
         s"""{"f1":${index.toString}}"""
       } else {


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

[spark] branch master updated: [SPARK-23643][CORE][SQL][ML] Shrinking the buffer in hashSeed up to size of the seed parameter

Reply via email to