This is an automated email from the ASF dual-hosted git repository.

srowen pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new c7d246b  [SPARK-35310][MLLIB] Update to breeze 1.2
c7d246b is described below

commit c7d246ba4e5c729c7d105d20ca3bb5f3dae66f1d
Author: Sean Owen <sro...@gmail.com>
AuthorDate: Thu Jul 22 13:58:01 2021 -0500

    [SPARK-35310][MLLIB] Update to breeze 1.2
    
    Update to the latest breeze 1.2
    
    Minor bug fixes
    
    No.
    
    Existing tests
    
    Closes #33449 from srowen/SPARK-35310.
    
    Authored-by: Sean Owen <sro...@gmail.com>
    Signed-off-by: Sean Owen <sro...@gmail.com>
---
 dev/deps/spark-deps-hadoop-2.7-hive-2.3                 | 17 ++++++++---------
 dev/deps/spark-deps-hadoop-3.2-hive-2.3                 | 17 ++++++++---------
 .../spark/ml/optim/WeightedLeastSquaresSuite.scala      | 16 ++++++++++------
 .../org/apache/spark/mllib/linalg/VectorsSuite.scala    |  6 ++++--
 .../org/apache/spark/mllib/util/MLUtilsSuite.scala      | 15 ++++++++++-----
 pom.xml                                                 |  2 +-
 6 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/dev/deps/spark-deps-hadoop-2.7-hive-2.3 
b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
index c2994ec..19de6d8 100644
--- a/dev/deps/spark-deps-hadoop-2.7-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-2.7-hive-2.3
@@ -5,7 +5,7 @@ RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
 ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.19//aircompressor-0.19.jar
-algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
+algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar
 annotations/17.0.0//annotations-17.0.0.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
 antlr4-runtime/4.8//antlr4-runtime-4.8.jar
@@ -28,9 +28,9 @@ avro-mapred/1.10.2//avro-mapred-1.10.2.jar
 avro/1.10.2//avro-1.10.2.jar
 blas/2.2.0//blas-2.2.0.jar
 bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar
-breeze_2.12/1.0//breeze_2.12-1.0.jar
-cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
+breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar
+breeze_2.12/1.2//breeze_2.12-1.2.jar
+cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar
 chill-java/0.10.0//chill-java-0.10.0.jar
 chill_2.12/0.10.0//chill_2.12-0.10.0.jar
 commons-beanutils/1.9.4//commons-beanutils-1.9.4.jar
@@ -182,7 +182,6 @@ libthrift/0.12.0//libthrift-0.12.0.jar
 log4j/1.2.17//log4j-1.2.17.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.7.1//lz4-java-1.7.1.jar
-machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
 macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
 mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar
 metrics-core/4.2.0//metrics-core-4.2.0.jar
@@ -224,10 +223,10 @@ slf4j-api/1.7.30//slf4j-api-1.7.30.jar
 slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar
 snakeyaml/1.27//snakeyaml-1.27.jar
 snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar
-spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
-spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
-spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
-spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
+spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar
+spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar
+spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar
+spire_2.12/0.17.0//spire_2.12-0.17.0.jar
 stax-api/1.0.1//stax-api-1.0.1.jar
 stream/2.9.6//stream-2.9.6.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
diff --git a/dev/deps/spark-deps-hadoop-3.2-hive-2.3 
b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
index f574770..d59496a 100644
--- a/dev/deps/spark-deps-hadoop-3.2-hive-2.3
+++ b/dev/deps/spark-deps-hadoop-3.2-hive-2.3
@@ -5,7 +5,7 @@ RoaringBitmap/0.9.0//RoaringBitmap-0.9.0.jar
 ST4/4.0.4//ST4-4.0.4.jar
 activation/1.1.1//activation-1.1.1.jar
 aircompressor/0.19//aircompressor-0.19.jar
-algebra_2.12/2.0.0-M2//algebra_2.12-2.0.0-M2.jar
+algebra_2.12/2.0.1//algebra_2.12-2.0.1.jar
 annotations/17.0.0//annotations-17.0.0.jar
 antlr-runtime/3.5.2//antlr-runtime-3.5.2.jar
 antlr4-runtime/4.8//antlr4-runtime-4.8.jar
@@ -23,9 +23,9 @@ avro-mapred/1.10.2//avro-mapred-1.10.2.jar
 avro/1.10.2//avro-1.10.2.jar
 blas/2.2.0//blas-2.2.0.jar
 bonecp/0.8.0.RELEASE//bonecp-0.8.0.RELEASE.jar
-breeze-macros_2.12/1.0//breeze-macros_2.12-1.0.jar
-breeze_2.12/1.0//breeze_2.12-1.0.jar
-cats-kernel_2.12/2.0.0-M4//cats-kernel_2.12-2.0.0-M4.jar
+breeze-macros_2.12/1.2//breeze-macros_2.12-1.2.jar
+breeze_2.12/1.2//breeze_2.12-1.2.jar
+cats-kernel_2.12/2.1.1//cats-kernel_2.12-2.1.1.jar
 chill-java/0.10.0//chill-java-0.10.0.jar
 chill_2.12/0.10.0//chill_2.12-0.10.0.jar
 commons-cli/1.2//commons-cli-1.2.jar
@@ -153,7 +153,6 @@ libthrift/0.12.0//libthrift-0.12.0.jar
 log4j/1.2.17//log4j-1.2.17.jar
 logging-interceptor/3.12.12//logging-interceptor-3.12.12.jar
 lz4-java/1.7.1//lz4-java-1.7.1.jar
-machinist_2.12/0.6.8//machinist_2.12-0.6.8.jar
 macro-compat_2.12/1.1.1//macro-compat_2.12-1.1.1.jar
 mesos/1.4.0/shaded-protobuf/mesos-1.4.0-shaded-protobuf.jar
 metrics-core/4.2.0//metrics-core-4.2.0.jar
@@ -195,10 +194,10 @@ slf4j-api/1.7.30//slf4j-api-1.7.30.jar
 slf4j-log4j12/1.7.30//slf4j-log4j12-1.7.30.jar
 snakeyaml/1.27//snakeyaml-1.27.jar
 snappy-java/1.1.8.4//snappy-java-1.1.8.4.jar
-spire-macros_2.12/0.17.0-M1//spire-macros_2.12-0.17.0-M1.jar
-spire-platform_2.12/0.17.0-M1//spire-platform_2.12-0.17.0-M1.jar
-spire-util_2.12/0.17.0-M1//spire-util_2.12-0.17.0-M1.jar
-spire_2.12/0.17.0-M1//spire_2.12-0.17.0-M1.jar
+spire-macros_2.12/0.17.0//spire-macros_2.12-0.17.0.jar
+spire-platform_2.12/0.17.0//spire-platform_2.12-0.17.0.jar
+spire-util_2.12/0.17.0//spire-util_2.12-0.17.0.jar
+spire_2.12/0.17.0//spire_2.12-0.17.0.jar
 stax-api/1.0.1//stax-api-1.0.1.jar
 stream/2.9.6//stream-2.9.6.jar
 super-csv/2.2.0//super-csv-2.2.0.jar
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
index 093d02e..4dbd224 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/optim/WeightedLeastSquaresSuite.scala
@@ -142,7 +142,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with 
MLlibTestSparkContext
       solverType = WeightedLeastSquares.Cholesky)
     val wlsModelWithIntercept = wlsWithIntercept.fit(instances)
     val wls = new WeightedLeastSquares(false, 0.0, 0.0, true, true,
-      solverType = WeightedLeastSquares.Cholesky)
+      solverType = WeightedLeastSquares.Cholesky, tol = 1e-14, maxIter = 
100000)
     val wlsModel = wls.fit(instances)
 
     assert(expectedWithIntercept ~== wlsModelWithIntercept.diagInvAtWA relTol 
1e-4)
@@ -169,7 +169,8 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with 
MLlibTestSparkContext
          solver <- Seq(WeightedLeastSquares.Auto, 
WeightedLeastSquares.QuasiNewton)) {
       val singularModel = new WeightedLeastSquares(fitIntercept, regParam = 
0.0,
         elasticNetParam = 0.0, standardizeFeatures = standardization,
-        standardizeLabel = standardization, solverType = 
solver).fit(collinearInstances)
+        standardizeLabel = standardization, solverType = solver,
+        tol = 1e-14, maxIter = 100000).fit(collinearInstances)
 
       collinearInstances.collect().foreach { case Instance(l, w, f) =>
         val pred = BLAS.dot(singularModel.coefficients, f) + 
singularModel.intercept
@@ -202,6 +203,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with 
MLlibTestSparkContext
         for (solver <- WeightedLeastSquares.supportedSolvers) {
           val wls = new WeightedLeastSquares(fitIntercept, regParam = 0.0, 
elasticNetParam = 0.0,
             standardizeFeatures = standardization, standardizeLabel = 
standardization,
+            tol = 1e-14, maxIter = 100000,
             solverType = solver).fit(instances)
           val actual = Vectors.dense(wls.intercept, wls.coefficients(0), 
wls.coefficients(1))
           assert(actual ~== expected(idx) absTol 1e-4)
@@ -305,7 +307,8 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with 
MLlibTestSparkContext
     for (fitIntercept <- Seq(false, true)) {
       val wls = new WeightedLeastSquares(fitIntercept = fitIntercept, regParam 
= 0.5,
         elasticNetParam = 0.0, standardizeFeatures = true,
-        standardizeLabel = true, solverType = WeightedLeastSquares.Cholesky)
+        standardizeLabel = true, solverType = WeightedLeastSquares.Cholesky,
+        tol = 1e-14, maxIter = 100000)
         .fit(constantFeaturesInstances)
       val actual = Vectors.dense(wls.intercept, wls.coefficients(0), 
wls.coefficients(1))
       assert(actual ~== expectedCholesky(idx) absTol 1e-6)
@@ -363,7 +366,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with 
MLlibTestSparkContext
          (lambda, alpha) <- Seq((0.0, 0.0), (0.5, 0.0), (0.5, 0.5), (0.5, 
1.0))) {
       val wls = new WeightedLeastSquares(fitIntercept, regParam = lambda, 
elasticNetParam = alpha,
         standardizeFeatures = standardization, standardizeLabel = true,
-        solverType = WeightedLeastSquares.QuasiNewton)
+        solverType = WeightedLeastSquares.QuasiNewton, tol = 1e-14, maxIter = 
100000)
       val model = wls.fit(constantFeaturesInstances)
       val actual = Vectors.dense(model.intercept, model.coefficients(0), 
model.coefficients(1))
       assert(actual ~== expectedQuasiNewton(idx) absTol 1e-6)
@@ -473,7 +476,7 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with 
MLlibTestSparkContext
          elasticNetParam <- Seq(0.1, 0.5, 1.0)) {
       val wls = new WeightedLeastSquares(fitIntercept, regParam, 
elasticNetParam,
         standardizeFeatures = standardization, standardizeLabel = true,
-        solverType = WeightedLeastSquares.Auto)
+        solverType = WeightedLeastSquares.Auto, tol = 1e-14, maxIter = 100000)
         .fit(instances)
       val actual = Vectors.dense(wls.intercept, wls.coefficients(0), 
wls.coefficients(1))
       assert(actual ~== expected(idx) absTol 1e-4)
@@ -531,7 +534,8 @@ class WeightedLeastSquaresSuite extends SparkFunSuite with 
MLlibTestSparkContext
          standardization <- Seq(false, true)) {
       for (solver <- WeightedLeastSquares.supportedSolvers) {
         val wls = new WeightedLeastSquares(fitIntercept, regParam, 
elasticNetParam = 0.0,
-          standardizeFeatures = standardization, standardizeLabel = true, 
solverType = solver)
+          standardizeFeatures = standardization, standardizeLabel = true, 
solverType = solver,
+          tol = 1e-14, maxIter = 100000)
           .fit(instances)
         val actual = Vectors.dense(wls.intercept, wls.coefficients(0), 
wls.coefficients(1))
         assert(actual ~== expected(idx) absTol 1e-4)
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
index baac015..70ba4d3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/VectorsSuite.scala
@@ -21,7 +21,7 @@ import scala.collection.mutable.ArrayBuilder
 import scala.reflect.ClassTag
 import scala.util.Random
 
-import breeze.linalg.{squaredDistance => breezeSquaredDistance, DenseMatrix => 
BDM}
+import breeze.linalg.{DenseMatrix => BDM}
 import org.json4s.jackson.JsonMethods.{parse => parseJson}
 
 import org.apache.spark.{SparkConf, SparkException, SparkFunSuite}
@@ -295,7 +295,9 @@ class VectorsSuite extends SparkFunSuite with Logging {
       val denseVector1 = Vectors.dense(sparseVector1.toArray)
       val denseVector2 = Vectors.dense(sparseVector2.toArray)
 
-      val squaredDist = breezeSquaredDistance(sparseVector1.asBreeze, 
sparseVector2.asBreeze)
+      val squaredDist = sparseVector1.toArray.zip(sparseVector2.toArray).map {
+        case (a, b) => (a - b) * (a - b)
+      }.sum
 
       // SparseVector vs. SparseVector
       assert(Vectors.sqdist(sparseVector1, sparseVector2) ~== squaredDist 
relTol 1E-8)
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
index fb3bc9f..69ce683 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/MLUtilsSuite.scala
@@ -22,11 +22,10 @@ import java.nio.charset.StandardCharsets
 
 import scala.io.Source
 
-import breeze.linalg.{squaredDistance => breezeSquaredDistance}
 import com.google.common.io.Files
 
 import org.apache.spark.{SparkException, SparkFunSuite}
-import org.apache.spark.mllib.linalg.{DenseVector, Matrices, SparseVector, 
Vectors}
+import org.apache.spark.mllib.linalg.{DenseVector, Matrices, SparseVector, 
Vector, Vectors}
 import org.apache.spark.mllib.regression.LabeledPoint
 import org.apache.spark.mllib.util.MLUtils._
 import org.apache.spark.mllib.util.TestingUtils._
@@ -50,6 +49,12 @@ class MLUtilsSuite extends SparkFunSuite with 
MLlibTestSparkContext {
     val v1 = Vectors.dense(a)
     val norm1 = Vectors.norm(v1, 2.0)
     val precision = 1e-6
+
+    def squaredDistance(v1: Vector, v2: Vector): Double =
+      v1.toArray.zip(v2.toArray).map {
+        case (a, b) => (a - b) * (a - b)
+      }.sum
+
     for (m <- 0 until n) {
       val indices = (0 to m).toArray
       val values = indices.map(i => a(i))
@@ -57,13 +62,13 @@ class MLUtilsSuite extends SparkFunSuite with 
MLlibTestSparkContext {
       val norm2 = Vectors.norm(v2, 2.0)
       val v3 = Vectors.sparse(n, indices, indices.map(i => a(i) + 0.5))
       val norm3 = Vectors.norm(v3, 2.0)
-      val squaredDist = breezeSquaredDistance(v1.asBreeze, v2.asBreeze)
+      val squaredDist = squaredDistance(v1, v2)
       val fastSquaredDist1 = fastSquaredDistance(v1, norm1, v2, norm2, 
precision)
       assert((fastSquaredDist1 - squaredDist) <= precision * squaredDist, 
s"failed with m = $m")
       val fastSquaredDist2 =
         fastSquaredDistance(v1, norm1, Vectors.dense(v2.toArray), norm2, 
precision)
       assert((fastSquaredDist2 - squaredDist) <= precision * squaredDist, 
s"failed with m = $m")
-      val squaredDist2 = breezeSquaredDistance(v2.asBreeze, v3.asBreeze)
+      val squaredDist2 = squaredDistance(v2, v3)
       val fastSquaredDist3 =
         fastSquaredDistance(v2, norm2, v3, norm3, precision)
       assert((fastSquaredDist3 - squaredDist2) <= precision * squaredDist2, 
s"failed with m = $m")
@@ -71,7 +76,7 @@ class MLUtilsSuite extends SparkFunSuite with 
MLlibTestSparkContext {
         val v4 = Vectors.sparse(n, indices.slice(0, m - 10),
           indices.map(i => a(i) + 0.5).slice(0, m - 10))
         val norm4 = Vectors.norm(v4, 2.0)
-        val squaredDist = breezeSquaredDistance(v2.asBreeze, v4.asBreeze)
+        val squaredDist = squaredDistance(v2, v4)
         val fastSquaredDist =
           fastSquaredDistance(v2, norm2, v4, norm4, precision)
         assert((fastSquaredDist - squaredDist) <= precision * squaredDist, 
s"failed with m = $m")
diff --git a/pom.xml b/pom.xml
index 0f8e32b..3d2548f 100644
--- a/pom.xml
+++ b/pom.xml
@@ -880,7 +880,7 @@
       <dependency>
         <groupId>org.scalanlp</groupId>
         <artifactId>breeze_${scala.binary.version}</artifactId>
-        <version>1.0</version>
+        <version>1.2</version>
         <exclusions>
           <exclusion>
             <groupId>org.apache.commons</groupId>

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to