Repository: spark Updated Branches: refs/heads/master 8509519d8 -> 78b39c7e0
[SPARK-7115] [MLLIB] skip the very first 1 in poly expansion yinxusen Author: Xiangrui Meng <m...@databricks.com> Closes #5681 from mengxr/SPARK-7115 and squashes the following commits: 9ac27cd [Xiangrui Meng] skip the very first 1 in poly expansion Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/78b39c7e Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/78b39c7e Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/78b39c7e Branch: refs/heads/master Commit: 78b39c7e0de8c9dc748cfbf8f78578a9524b6a94 Parents: 8509519 Author: Xiangrui Meng <m...@databricks.com> Authored: Fri Apr 24 08:27:48 2015 -0700 Committer: Xiangrui Meng <m...@databricks.com> Committed: Fri Apr 24 08:27:48 2015 -0700 ---------------------------------------------------------------------- .../spark/ml/feature/PolynomialExpansion.scala | 22 ++++++++++++-------- .../ml/feature/PolynomialExpansionSuite.scala | 22 ++++++++++---------- 2 files changed, 24 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/78b39c7e/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala index c3a59a3..d855f04 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/PolynomialExpansion.scala @@ -87,7 +87,9 @@ object PolynomialExpansion { if (multiplier == 0.0) { // do nothing } else if (degree == 0 || lastIdx < 0) { - polyValues(curPolyIdx) = multiplier + if (curPolyIdx >= 0) { // skip the very first 1 + polyValues(curPolyIdx) = multiplier + } } else { val v = values(lastIdx) val lastIdx1 = lastIdx - 1 @@ -116,8 +118,10 @@ object PolynomialExpansion { if (multiplier == 0.0) { // do nothing } else if (degree == 0 || lastIdx < 0) { - polyIndices += curPolyIdx - polyValues += multiplier + if (curPolyIdx >= 0) { // skip the very first 1 + polyIndices += curPolyIdx + polyValues += multiplier + } } else { // Skip all zeros at the tail. val v = values(lastIdx) @@ -139,8 +143,8 @@ object PolynomialExpansion { private def expand(dv: DenseVector, degree: Int): DenseVector = { val n = dv.size val polySize = getPolySize(n, degree) - val polyValues = new Array[Double](polySize) - expandDense(dv.values, n - 1, degree, 1.0, polyValues, 0) + val polyValues = new Array[Double](polySize - 1) + expandDense(dv.values, n - 1, degree, 1.0, polyValues, -1) new DenseVector(polyValues) } @@ -149,12 +153,12 @@ object PolynomialExpansion { val nnz = sv.values.length val nnzPolySize = getPolySize(nnz, degree) val polyIndices = mutable.ArrayBuilder.make[Int] - polyIndices.sizeHint(nnzPolySize) + polyIndices.sizeHint(nnzPolySize - 1) val polyValues = mutable.ArrayBuilder.make[Double] - polyValues.sizeHint(nnzPolySize) + polyValues.sizeHint(nnzPolySize - 1) expandSparse( - sv.indices, sv.values, nnz - 1, sv.size - 1, degree, 1.0, polyIndices, polyValues, 0) - new SparseVector(polySize, polyIndices.result(), polyValues.result()) + sv.indices, sv.values, nnz - 1, sv.size - 1, degree, 1.0, polyIndices, polyValues, -1) + new SparseVector(polySize - 1, polyIndices.result(), polyValues.result()) } def expand(v: Vector, degree: Int): Vector = { http://git-wip-us.apache.org/repos/asf/spark/blob/78b39c7e/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala index b0a537b..c1d64fb 100644 --- a/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/ml/feature/PolynomialExpansionSuite.scala @@ -44,11 +44,11 @@ class PolynomialExpansionSuite extends FunSuite with MLlibTestSparkContext { ) val twoDegreeExpansion: Array[Vector] = Array( - Vectors.sparse(10, Array(0, 1, 2, 3, 4, 5), Array(1.0, -2.0, 4.0, 2.3, -4.6, 5.29)), - Vectors.dense(1.0, -2.0, 4.0, 2.3, -4.6, 5.29), - Vectors.dense(Array(1.0) ++ Array.fill[Double](9)(0.0)), - Vectors.dense(1.0, 0.6, 0.36, -1.1, -0.66, 1.21, -3.0, -1.8, 3.3, 9.0), - Vectors.sparse(10, Array(0), Array(1.0))) + Vectors.sparse(9, Array(0, 1, 2, 3, 4), Array(-2.0, 4.0, 2.3, -4.6, 5.29)), + Vectors.dense(-2.0, 4.0, 2.3, -4.6, 5.29), + Vectors.dense(new Array[Double](9)), + Vectors.dense(0.6, 0.36, -1.1, -0.66, 1.21, -3.0, -1.8, 3.3, 9.0), + Vectors.sparse(9, Array.empty, Array.empty)) val df = sqlContext.createDataFrame(data.zip(twoDegreeExpansion)).toDF("features", "expected") @@ -76,13 +76,13 @@ class PolynomialExpansionSuite extends FunSuite with MLlibTestSparkContext { ) val threeDegreeExpansion: Array[Vector] = Array( - Vectors.sparse(20, Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9), - Array(1.0, -2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)), - Vectors.dense(1.0, -2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17), - Vectors.dense(Array(1.0) ++ Array.fill[Double](19)(0.0)), - Vectors.dense(1.0, 0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, -1.331, -3.0, -1.8, + Vectors.sparse(19, Array(0, 1, 2, 3, 4, 5, 6, 7, 8), + Array(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)), + Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17), + Vectors.dense(new Array[Double](19)), + Vectors.dense(0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, -1.331, -3.0, -1.8, -1.08, 3.3, 1.98, -3.63, 9.0, 5.4, -9.9, -27.0), - Vectors.sparse(20, Array(0), Array(1.0))) + Vectors.sparse(19, Array.empty, Array.empty)) val df = sqlContext.createDataFrame(data.zip(threeDegreeExpansion)).toDF("features", "expected") --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org