spark git commit: [SPARK-25047][ML] Can't assign SerializedLambda to scala.Function1 in deserialization of BucketedRandomProjectionLSHModel

srowen Thu, 09 Aug 2018 06:08:00 -0700

Repository: spark
Updated Branches:
  refs/heads/master b2950cef3 -> 1a7e747ce



[SPARK-25047][ML] Can't assign SerializedLambda to scala.Function1 in 
deserialization of BucketedRandomProjectionLSHModel

## What changes were proposed in this pull request?

Convert two function fields in ML classes to simple functions to avoiâ¦d odd 
SerializedLambda deserialization problem

## How was this patch tested?

Existing tests.

Closes #22032 from srowen/SPARK-25047.

Authored-by: Sean Owen <sro...@gmail.com>
Signed-off-by: Sean Owen <sro...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1a7e747c
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1a7e747c
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1a7e747c

Branch: refs/heads/master
Commit: 1a7e747ce4f8c5253c5923045d23c62e43a6566b
Parents: b2950ce
Author: Sean Owen <sro...@gmail.com>
Authored: Thu Aug 9 08:07:46 2018 -0500
Committer: Sean Owen <sro...@gmail.com>
Committed: Thu Aug 9 08:07:46 2018 -0500

----------------------------------------------------------------------
 .../feature/BucketedRandomProjectionLSH.scala   | 14 ++++++--------
 .../scala/org/apache/spark/ml/feature/LSH.scala |  4 ++--
 .../apache/spark/ml/feature/MinHashLSH.scala    | 20 +++++++++-----------
 .../GeneralizedLinearRegression.scala           | 15 +++++++--------
 4 files changed, 24 insertions(+), 29 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/1a7e747c/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
index a906e95..0554455 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/feature/BucketedRandomProjectionLSH.scala
@@ -82,14 +82,12 @@ class BucketedRandomProjectionLSHModel private[ml](
   override def setOutputCol(value: String): this.type = super.set(outputCol, 
value)
 
   @Since("2.1.0")
-  override protected[ml] val hashFunction: Vector => Array[Vector] = {
-    key: Vector => {
-      val hashValues: Array[Double] = randUnitVectors.map({
-        randUnitVector => Math.floor(BLAS.dot(key, randUnitVector) / 
$(bucketLength))
-      })
-      // TODO: Output vectors of dimension numHashFunctions in SPARK-18450
-      hashValues.map(Vectors.dense(_))
-    }
+  override protected[ml] def hashFunction(elems: Vector): Array[Vector] = {
+    val hashValues = randUnitVectors.map(
+      randUnitVector => Math.floor(BLAS.dot(elems, randUnitVector) / 
$(bucketLength))
+    )
+    // TODO: Output vectors of dimension numHashFunctions in SPARK-18450
+    hashValues.map(Vectors.dense(_))
   }
 
   @Since("2.1.0")

http://git-wip-us.apache.org/repos/asf/spark/blob/1a7e747c/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
index a70931f..b208523 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/LSH.scala
@@ -75,7 +75,7 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
    * The hash function of LSH, mapping an input feature vector to multiple 
hash vectors.
    * @return The mapping of LSH function.
    */
-  protected[ml] val hashFunction: Vector => Array[Vector]
+  protected[ml] def hashFunction(elems: Vector): Array[Vector]
 
   /**
    * Calculate the distance between two different keys using the distance 
metric corresponding
@@ -97,7 +97,7 @@ private[ml] abstract class LSHModel[T <: LSHModel[T]]
 
   override def transform(dataset: Dataset[_]): DataFrame = {
     transformSchema(dataset.schema, logging = true)
-    val transformUDF = udf(hashFunction, DataTypes.createArrayType(new 
VectorUDT))
+    val transformUDF = udf(hashFunction(_: Vector), 
DataTypes.createArrayType(new VectorUDT))
     dataset.withColumn($(outputCol), transformUDF(dataset($(inputCol))))
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/1a7e747c/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
index a043033..21cde66 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinHashLSH.scala
@@ -60,18 +60,16 @@ class MinHashLSHModel private[ml](
   override def setOutputCol(value: String): this.type = super.set(outputCol, 
value)
 
   @Since("2.1.0")
-  override protected[ml] val hashFunction: Vector => Array[Vector] = {
-    elems: Vector => {
-      require(elems.numNonzeros > 0, "Must have at least 1 non zero entry.")
-      val elemsList = elems.toSparse.indices.toList
-      val hashValues = randCoefficients.map { case (a, b) =>
-        elemsList.map { elem: Int =>
-          ((1L + elem) * a + b) % MinHashLSH.HASH_PRIME
-        }.min.toDouble
-      }
-      // TODO: Output vectors of dimension numHashFunctions in SPARK-18450
-      hashValues.map(Vectors.dense(_))
+  override protected[ml] def hashFunction(elems: Vector): Array[Vector] = {
+    require(elems.numNonzeros > 0, "Must have at least 1 non zero entry.")
+    val elemsList = elems.toSparse.indices.toList
+    val hashValues = randCoefficients.map { case (a, b) =>
+      elemsList.map { elem: Int =>
+        ((1L + elem) * a + b) % MinHashLSH.HASH_PRIME
+      }.min.toDouble
     }
+    // TODO: Output vectors of dimension numHashFunctions in SPARK-18450
+    hashValues.map(Vectors.dense(_))
   }
 
   @Since("2.1.0")

http://git-wip-us.apache.org/repos/asf/spark/blob/1a7e747c/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
index 20878b6..abb60ea 100644
--- 
a/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
+++ 
b/mllib/src/main/scala/org/apache/spark/ml/regression/GeneralizedLinearRegression.scala
@@ -515,14 +515,13 @@ object GeneralizedLinearRegression extends 
DefaultParamsReadable[GeneralizedLine
      * The reweight function used to update working labels and weights
      * at each iteration of [[IterativelyReweightedLeastSquares]].
      */
-    val reweightFunc: (OffsetInstance, WeightedLeastSquaresModel) => (Double, 
Double) = {
-      (instance: OffsetInstance, model: WeightedLeastSquaresModel) => {
-        val eta = model.predict(instance.features) + instance.offset
-        val mu = fitted(eta)
-        val newLabel = eta - instance.offset + (instance.label - mu) * 
link.deriv(mu)
-        val newWeight = instance.weight / (math.pow(this.link.deriv(mu), 2.0) 
* family.variance(mu))
-        (newLabel, newWeight)
-      }
+    def reweightFunc(
+        instance: OffsetInstance, model: WeightedLeastSquaresModel): (Double, 
Double) = {
+      val eta = model.predict(instance.features) + instance.offset
+      val mu = fitted(eta)
+      val newLabel = eta - instance.offset + (instance.label - mu) * 
link.deriv(mu)
+      val newWeight = instance.weight / (math.pow(this.link.deriv(mu), 2.0) * 
family.variance(mu))
+      (newLabel, newWeight)
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-25047][ML] Can't assign SerializedLambda to scala.Function1 in deserialization of BucketedRandomProjectionLSHModel

Reply via email to