Repository: spark
Updated Branches:
  refs/heads/master 8509519d8 -> 78b39c7e0


[SPARK-7115] [MLLIB] skip the very first 1 in poly expansion

yinxusen

Author: Xiangrui Meng <m...@databricks.com>

Closes #5681 from mengxr/SPARK-7115 and squashes the following commits:

9ac27cd [Xiangrui Meng] skip the very first 1 in poly expansion


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78b39c7e
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78b39c7e
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78b39c7e

Branch: refs/heads/master
Commit: 78b39c7e0de8c9dc748cfbf8f78578a9524b6a94
Parents: 8509519
Author: Xiangrui Meng <m...@databricks.com>
Authored: Fri Apr 24 08:27:48 2015 -0700
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Fri Apr 24 08:27:48 2015 -0700

----------------------------------------------------------------------
 .../spark/ml/feature/PolynomialExpansion.scala  | 22 ++++++++++++--------
 .../ml/feature/PolynomialExpansionSuite.scala   | 22 ++++++++++----------
 2 files changed, 24 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/78b39c7e/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala 
b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
index c3a59a3..d855f04 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala
@@ -87,7 +87,9 @@ object PolynomialExpansion {
     if (multiplier == 0.0) {
       // do nothing
     } else if (degree == 0 || lastIdx < 0) {
-      polyValues(curPolyIdx) = multiplier
+      if (curPolyIdx >= 0) { // skip the very first 1
+        polyValues(curPolyIdx) = multiplier
+      }
     } else {
       val v = values(lastIdx)
       val lastIdx1 = lastIdx - 1
@@ -116,8 +118,10 @@ object PolynomialExpansion {
     if (multiplier == 0.0) {
       // do nothing
     } else if (degree == 0 || lastIdx < 0) {
-      polyIndices += curPolyIdx
-      polyValues += multiplier
+      if (curPolyIdx >= 0) { // skip the very first 1
+        polyIndices += curPolyIdx
+        polyValues += multiplier
+      }
     } else {
       // Skip all zeros at the tail.
       val v = values(lastIdx)
@@ -139,8 +143,8 @@ object PolynomialExpansion {
   private def expand(dv: DenseVector, degree: Int): DenseVector = {
     val n = dv.size
     val polySize = getPolySize(n, degree)
-    val polyValues = new Array[Double](polySize)
-    expandDense(dv.values, n - 1, degree, 1.0, polyValues, 0)
+    val polyValues = new Array[Double](polySize - 1)
+    expandDense(dv.values, n - 1, degree, 1.0, polyValues, -1)
     new DenseVector(polyValues)
   }
 
@@ -149,12 +153,12 @@ object PolynomialExpansion {
     val nnz = sv.values.length
     val nnzPolySize = getPolySize(nnz, degree)
     val polyIndices = mutable.ArrayBuilder.make[Int]
-    polyIndices.sizeHint(nnzPolySize)
+    polyIndices.sizeHint(nnzPolySize - 1)
     val polyValues = mutable.ArrayBuilder.make[Double]
-    polyValues.sizeHint(nnzPolySize)
+    polyValues.sizeHint(nnzPolySize - 1)
     expandSparse(
-      sv.indices, sv.values, nnz - 1, sv.size - 1, degree, 1.0, polyIndices, 
polyValues, 0)
-    new SparseVector(polySize, polyIndices.result(), polyValues.result())
+      sv.indices, sv.values, nnz - 1, sv.size - 1, degree, 1.0, polyIndices, 
polyValues, -1)
+    new SparseVector(polySize - 1, polyIndices.result(), polyValues.result())
   }
 
   def expand(v: Vector, degree: Int): Vector = {

http://git-wip-us.apache.org/repos/asf/spark/blob/78b39c7e/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
index b0a537b..c1d64fb 100644
--- 
a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala
@@ -44,11 +44,11 @@ class PolynomialExpansionSuite extends FunSuite with 
MLlibTestSparkContext {
     )
 
     val twoDegreeExpansion: Array[Vector] = Array(
-      Vectors.sparse(10, Array(0, 1, 2, 3, 4, 5), Array(1.0, -2.0, 4.0, 2.3, 
-4.6, 5.29)),
-      Vectors.dense(1.0, -2.0, 4.0, 2.3, -4.6, 5.29),
-      Vectors.dense(Array(1.0) ++ Array.fill[Double](9)(0.0)),
-      Vectors.dense(1.0, 0.6, 0.36, -1.1, -0.66, 1.21, -3.0, -1.8, 3.3, 9.0),
-      Vectors.sparse(10, Array(0), Array(1.0)))
+      Vectors.sparse(9, Array(0, 1, 2, 3, 4), Array(-2.0, 4.0, 2.3, -4.6, 
5.29)),
+      Vectors.dense(-2.0, 4.0, 2.3, -4.6, 5.29),
+      Vectors.dense(new Array[Double](9)),
+      Vectors.dense(0.6, 0.36, -1.1, -0.66, 1.21, -3.0, -1.8, 3.3, 9.0),
+      Vectors.sparse(9, Array.empty, Array.empty))
 
     val df = 
sqlContext.createDataFrame(data.zip(twoDegreeExpansion)).toDF("features", 
"expected")
 
@@ -76,13 +76,13 @@ class PolynomialExpansionSuite extends FunSuite with 
MLlibTestSparkContext {
     )
 
     val threeDegreeExpansion: Array[Vector] = Array(
-      Vectors.sparse(20, Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
-        Array(1.0, -2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)),
-      Vectors.dense(1.0, -2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17),
-      Vectors.dense(Array(1.0) ++ Array.fill[Double](19)(0.0)),
-      Vectors.dense(1.0, 0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, 
-1.331, -3.0, -1.8,
+      Vectors.sparse(19, Array(0, 1, 2, 3, 4, 5, 6, 7, 8),
+        Array(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)),
+      Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17),
+      Vectors.dense(new Array[Double](19)),
+      Vectors.dense(0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, 
-1.331, -3.0, -1.8,
         -1.08, 3.3, 1.98, -3.63, 9.0, 5.4, -9.9, -27.0),
-      Vectors.sparse(20, Array(0), Array(1.0)))
+      Vectors.sparse(19, Array.empty, Array.empty))
 
     val df = 
sqlContext.createDataFrame(data.zip(threeDegreeExpansion)).toDF("features", 
"expected")
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to