Repository: spark Updated Branches: refs/heads/branch-1.2 30789f6ef -> 24c2c58c2
[SPARK-8525] [MLLIB] fix LabeledPoint parser when there is a whitespace between label and features vector fix LabeledPoint parser when there is a whitespace between label and features vector, e.g. (y, [x1, x2, x3]) Author: Oleksiy Dyagilev <oleksiy_dyagi...@epam.com> Closes #6954 from fe2s/SPARK-8525 and squashes the following commits: 0755b9d [Oleksiy Dyagilev] [SPARK-8525][MLLIB] addressing comment, removing dep on commons-lang c1abc2b [Oleksiy Dyagilev] [SPARK-8525][MLLIB] fix LabeledPoint parser when there is a whitespace on specific position (cherry picked from commit a8031183aff2e23de9204ddfc7e7f5edbf052a7e) Signed-off-by: Xiangrui Meng <m...@databricks.com> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/24c2c58c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/24c2c58c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/24c2c58c Branch: refs/heads/branch-1.2 Commit: 24c2c58c28dfcc32d3d4aa2ea089d8bdaaa7ecf3 Parents: 30789f6 Author: Oleksiy Dyagilev <oleksiy_dyagi...@epam.com> Authored: Tue Jun 23 13:12:19 2015 -0700 Committer: Xiangrui Meng <m...@databricks.com> Committed: Tue Jun 23 13:17:43 2015 -0700 ---------------------------------------------------------------------- .../scala/org/apache/spark/mllib/util/NumericParser.scala | 2 ++ .../org/apache/spark/mllib/regression/LabeledPointSuite.scala | 5 +++++ .../org/apache/spark/mllib/util/NumericParserSuite.scala | 7 +++++++ 3 files changed, 14 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/24c2c58c/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala ---------------------------------------------------------------------- diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala index f7cba6c..629b0af 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala @@ -98,6 +98,8 @@ private[mllib] object NumericParser { } } else if (token == ")") { parsing = false + } else if (token.trim.isEmpty){ + // ignore whitespaces between delim chars, e.g. ", [" } else { // expecting a number items.append(parseDouble(token)) http://git-wip-us.apache.org/repos/asf/spark/blob/24c2c58c/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala index 110c44a..fc08bac 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala @@ -32,6 +32,11 @@ class LabeledPointSuite extends FunSuite { } } + test("parse labeled points with whitespaces") { + val point = LabeledPoint.parse("(0.0, [1.0, 2.0])") + assert(point === LabeledPoint(0.0, Vectors.dense(1.0, 2.0))) + } + test("parse labeled points with v0.9 format") { val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0") assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0))) http://git-wip-us.apache.org/repos/asf/spark/blob/24c2c58c/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala index f68fb95..5027311 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala @@ -39,4 +39,11 @@ class NumericParserSuite extends FunSuite { } } } + + test("parser with whitespaces") { + val s = "(0.0, [1.0, 2.0])" + val parsed = NumericParser.parse(s).asInstanceOf[Seq[_]] + assert(parsed(0).asInstanceOf[Double] === 0.0) + assert(parsed(1).asInstanceOf[Array[Double]] === Array(1.0, 2.0)) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org