spark git commit: [SPARK-7615][MLLIB] MLLIB Word2Vec wordVectors divided by Euclidean Norm equals to zero
Repository: spark Updated Branches: refs/heads/branch-1.6 4c67d55c0 -> 94b39f777 [SPARK-7615][MLLIB] MLLIB Word2Vec wordVectors divided by Euclidean Norm equals to zero Cosine similarity with 0 vector should be 0 Related to https://github.com/apache/spark/pull/10152 Author: Sean Owen Closes #10696 from srowen/SPARK-7615. (cherry picked from commit c48f2a3a5fd714ad2ff19b29337e55583988431e) Signed-off-by: Sean Owen Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/94b39f77 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/94b39f77 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/94b39f77 Branch: refs/heads/branch-1.6 Commit: 94b39fecff3794727c186bd681fa4c6af4fd Parents: 4c67d55 Author: Sean Owen Authored: Tue Jan 12 11:50:33 2016 + Committer: Sean Owen Committed: Tue Jan 12 13:27:44 2016 + -- .../main/scala/org/apache/spark/mllib/feature/Word2Vec.scala | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/94b39f77/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index a010775..1dbedaa 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -539,7 +539,12 @@ class Word2VecModel private[spark] ( val cosVec = cosineVec.map(_.toDouble) var ind = 0 while (ind < numWords) { - cosVec(ind) /= wordVecNorms(ind) + val norm = wordVecNorms(ind) + if (norm == 0.0) { +cosVec(ind) = 0.0 + } else { +cosVec(ind) /= norm + } ind += 1 } wordList.zip(cosVec) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-7615][MLLIB] MLLIB Word2Vec wordVectors divided by Euclidean Norm equals to zero
Repository: spark Updated Branches: refs/heads/master 8cfa218f4 -> c48f2a3a5 [SPARK-7615][MLLIB] MLLIB Word2Vec wordVectors divided by Euclidean Norm equals to zero Cosine similarity with 0 vector should be 0 Related to https://github.com/apache/spark/pull/10152 Author: Sean Owen Closes #10696 from srowen/SPARK-7615. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/c48f2a3a Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/c48f2a3a Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/c48f2a3a Branch: refs/heads/master Commit: c48f2a3a5fd714ad2ff19b29337e55583988431e Parents: 8cfa218 Author: Sean Owen Authored: Tue Jan 12 11:50:33 2016 + Committer: Sean Owen Committed: Tue Jan 12 11:50:33 2016 + -- .../main/scala/org/apache/spark/mllib/feature/Word2Vec.scala | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) -- http://git-wip-us.apache.org/repos/asf/spark/blob/c48f2a3a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala -- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala index dc5d070..dee8988 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/Word2Vec.scala @@ -543,7 +543,12 @@ class Word2VecModel private[spark] ( val cosVec = cosineVec.map(_.toDouble) var ind = 0 while (ind < numWords) { - cosVec(ind) /= wordVecNorms(ind) + val norm = wordVecNorms(ind) + if (norm == 0.0) { +cosVec(ind) = 0.0 + } else { +cosVec(ind) /= norm + } ind += 1 } wordList.zip(cosVec) - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org