spark git commit: [SPARK-8525] [MLLIB] fix LabeledPoint parser when there is a whitespace between label and features vector

meng Tue, 23 Jun 2015 13:12:59 -0700

Repository: spark
Updated Branches:
  refs/heads/master f2fb0285a -> a8031183a



[SPARK-8525] [MLLIB] fix LabeledPoint parser when there is a whitespace between 
label and features vector

fix LabeledPoint parser when there is a whitespace between label and features 
vector, e.g.
(y, [x1, x2, x3])

Author: Oleksiy Dyagilev <oleksiy_dyagi...@epam.com>

Closes #6954 from fe2s/SPARK-8525 and squashes the following commits:

0755b9d [Oleksiy Dyagilev] [SPARK-8525][MLLIB] addressing comment, removing dep 
on commons-lang
c1abc2b [Oleksiy Dyagilev] [SPARK-8525][MLLIB] fix LabeledPoint parser when 
there is a whitespace on specific position


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/a8031183
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/a8031183
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/a8031183

Branch: refs/heads/master
Commit: a8031183aff2e23de9204ddfc7e7f5edbf052a7e
Parents: f2fb028
Author: Oleksiy Dyagilev <oleksiy_dyagi...@epam.com>
Authored: Tue Jun 23 13:12:19 2015 -0700
Committer: Xiangrui Meng <m...@databricks.com>
Committed: Tue Jun 23 13:12:19 2015 -0700

----------------------------------------------------------------------
 .../scala/org/apache/spark/mllib/util/NumericParser.scala     | 2 ++
 .../org/apache/spark/mllib/regression/LabeledPointSuite.scala | 5 +++++
 .../org/apache/spark/mllib/util/NumericParserSuite.scala      | 7 +++++++
 3 files changed, 14 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/a8031183/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala 
b/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
index 308f7f3..a841c5c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala
@@ -98,6 +98,8 @@ private[mllib] object NumericParser {
         }
       } else if (token == ")") {
         parsing = false
+      } else if (token.trim.isEmpty){
+          // ignore whitespaces between delim chars, e.g. ", ["
       } else {
         // expecting a number
         items.append(parseDouble(token))

http://git-wip-us.apache.org/repos/asf/spark/blob/a8031183/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
index d8364a0..f8d0af8 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala
@@ -31,6 +31,11 @@ class LabeledPointSuite extends SparkFunSuite {
     }
   }
 
+  test("parse labeled points with whitespaces") {
+    val point = LabeledPoint.parse("(0.0, [1.0, 2.0])")
+    assert(point === LabeledPoint(0.0, Vectors.dense(1.0, 2.0)))
+  }
+
   test("parse labeled points with v0.9 format") {
     val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0")
     assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0)))

http://git-wip-us.apache.org/repos/asf/spark/blob/a8031183/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala 
b/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala
index 8dcb9ba..fa4f74d 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala
@@ -37,4 +37,11 @@ class NumericParserSuite extends SparkFunSuite {
       }
     }
   }
+
+  test("parser with whitespaces") {
+    val s = "(0.0, [1.0, 2.0])"
+    val parsed = NumericParser.parse(s).asInstanceOf[Seq[_]]
+    assert(parsed(0).asInstanceOf[Double] === 0.0)
+    assert(parsed(1).asInstanceOf[Array[Double]] === Array(1.0, 2.0))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-8525] [MLLIB] fix LabeledPoint parser when there is a whitespace between label and features vector

Reply via email to