Repository: spark
Updated Branches:
  refs/heads/branch-1.0 34529975e -> 9ed17ff34


Bug fix of sparse vector conversion

Fixed a small bug caused by the inconsistency of index/data array size and 
vector length.

Author: Funes <tianshao...@gmail.com>
Author: funes <tianshao...@gmail.com>

Closes #661 from funes/bugfix and squashes the following commits:

edb2b9d [funes] remove unused import
75dced3 [Funes] update test case
d129a66 [Funes] Add test for sparse breeze by vector builder
64e7198 [Funes] Copy data only when necessary
b85806c [Funes] Bug fix of sparse vector conversion
(cherry picked from commit 191279ce4edb940821d11a6b25cd33c8ad0af054)

Signed-off-by: Patrick Wendell <pwend...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9ed17ff3
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9ed17ff3
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9ed17ff3

Branch: refs/heads/branch-1.0
Commit: 9ed17ff34503854c676986f1553a103a370d66c6
Parents: 3452997
Author: Funes <tianshao...@gmail.com>
Authored: Thu May 8 17:54:10 2014 -0700
Committer: Patrick Wendell <pwend...@gmail.com>
Committed: Thu May 8 17:54:17 2014 -0700

----------------------------------------------------------------------
 .../main/scala/org/apache/spark/mllib/linalg/Vectors.scala  | 6 +++++-
 .../spark/mllib/linalg/BreezeVectorConversionSuite.scala    | 9 +++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9ed17ff3/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
----------------------------------------------------------------------
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
index 7cdf6bd..84d2239 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala
@@ -136,7 +136,11 @@ object Vectors {
           new DenseVector(v.toArray)  // Can't use underlying array directly, 
so make a new one
         }
       case v: BSV[Double] =>
-        new SparseVector(v.length, v.index, v.data)
+        if (v.index.length == v.used) {
+          new SparseVector(v.length, v.index, v.data)
+        } else {
+          new SparseVector(v.length, v.index.slice(0, v.used), v.data.slice(0, 
v.used))
+        }
       case v: BV[_] =>
         sys.error("Unsupported Breeze vector type: " + v.getClass.getName)
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/9ed17ff3/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala
index aacaa30..8abdac7 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/BreezeVectorConversionSuite.scala
@@ -55,4 +55,13 @@ class BreezeVectorConversionSuite extends FunSuite {
     assert(vec.indices.eq(indices), "should not copy data")
     assert(vec.values.eq(values), "should not copy data")
   }
+
+  test("sparse breeze with partially-used arrays to vector") {
+    val activeSize = 3
+    val breeze = new BSV[Double](indices, values, activeSize, n)
+    val vec = Vectors.fromBreeze(breeze).asInstanceOf[SparseVector]
+    assert(vec.size === n)
+    assert(vec.indices === indices.slice(0, activeSize))
+    assert(vec.values === values.slice(0, activeSize))
+  }
 }

Reply via email to