Repository: spark Updated Branches: refs/heads/master 233e534ac -> d188a6776
[SPARK-10533][SQL] handle scientific notation in sqlParser https://issues.apache.org/jira/browse/SPARK-10533 val df = sqlContext.createDataFrame(Seq(("a",1.0),("b",2.0),("c",3.0))) df.filter("_2 < 2.0e1").show Scientific notation didn't work. Author: Daoyuan Wang <daoyuan.w...@intel.com> Closes #9085 from adrian-wang/scinotation. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d188a677 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d188a677 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d188a677 Branch: refs/heads/master Commit: d188a67762dfc09929e30931509be5851e29dfa5 Parents: 233e534 Author: Daoyuan Wang <daoyuan.w...@intel.com> Authored: Tue Nov 3 22:30:23 2015 +0800 Committer: Cheng Lian <l...@databricks.com> Committed: Tue Nov 3 22:30:23 2015 +0800 ---------------------------------------------------------------------- .../spark/sql/catalyst/AbstractSparkSQLParser.scala | 15 +++++++++++++-- .../org/apache/spark/sql/catalyst/SqlParser.scala | 11 +++++++++++ .../scala/org/apache/spark/sql/DataFrameSuite.scala | 11 ++++++++--- 3 files changed, 32 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/d188a677/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala index 2bac08e..04ac4f2 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/AbstractSparkSQLParser.scala @@ -82,6 +82,10 @@ class SqlLexical extends StdLexical { override def toString: String = chars } + case class DecimalLit(chars: String) extends Token { + override def toString: String = chars + } + /* This is a work around to support the lazy setting */ def initialize(keywords: Seq[String]): Unit = { reserved.clear() @@ -102,8 +106,12 @@ class SqlLexical extends StdLexical { } override lazy val token: Parser[Token] = - ( identChar ~ (identChar | digit).* ^^ - { case first ~ rest => processIdent((first :: rest).mkString) } + ( rep1(digit) ~ ('.' ~> digit.*).? ~ (exp ~> sign.? ~ rep1(digit)) ^^ { + case i ~ None ~ (sig ~ rest) => + DecimalLit(i.mkString + "e" + sig.mkString + rest.mkString) + case i ~ Some(d) ~ (sig ~ rest) => + DecimalLit(i.mkString + "." + d.mkString + "e" + sig.mkString + rest.mkString) + } | digit.* ~ identChar ~ (identChar | digit).* ^^ { case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) } | rep1(digit) ~ ('.' ~> digit.*).? ^^ { @@ -125,6 +133,9 @@ class SqlLexical extends StdLexical { override def identChar: Parser[Elem] = letter | elem('_') + private lazy val sign: Parser[Elem] = elem("s", c => c == '+' || c == '-') + private lazy val exp: Parser[Elem] = elem("e", c => c == 'E' || c == 'e') + override def whitespace: Parser[Any] = ( whitespaceChar | '/' ~ '*' ~ comment http://git-wip-us.apache.org/repos/asf/spark/blob/d188a677/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index d7567e8..1ba559d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -337,6 +337,9 @@ object SqlParser extends AbstractSparkSQLParser with DataTypeParser { | sign.? ~ unsignedFloat ^^ { case s ~ f => Literal(toDecimalOrDouble(s.getOrElse("") + f)) } + | sign.? ~ unsignedDecimal ^^ { + case s ~ d => Literal(toDecimalOrDouble(s.getOrElse("") + d)) + } ) protected lazy val unsignedFloat: Parser[String] = @@ -344,6 +347,14 @@ object SqlParser extends AbstractSparkSQLParser with DataTypeParser { | elem("decimal", _.isInstanceOf[lexical.FloatLit]) ^^ (_.chars) ) + protected lazy val unsignedDecimal: Parser[String] = + ( "." ~> decimalLit ^^ { u => "0." + u } + | elem("scientific_notation", _.isInstanceOf[lexical.DecimalLit]) ^^ (_.chars) + ) + + def decimalLit: Parser[String] = + elem("scientific_notation", _.isInstanceOf[lexical.DecimalLit]) ^^ (_.chars) + protected lazy val sign: Parser[String] = ("+" | "-") protected lazy val integral: Parser[String] = http://git-wip-us.apache.org/repos/asf/spark/blob/d188a677/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala index 6b86c59..a883bcb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala @@ -177,9 +177,14 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { } test("filterExpr") { - checkAnswer( - testData.filter("key > 90"), - testData.collect().filter(_.getInt(0) > 90).toSeq) + val res = testData.collect().filter(_.getInt(0) > 90).toSeq + checkAnswer(testData.filter("key > 90"), res) + checkAnswer(testData.filter("key > 9.0e1"), res) + checkAnswer(testData.filter("key > .9e+2"), res) + checkAnswer(testData.filter("key > 0.9e+2"), res) + checkAnswer(testData.filter("key > 900e-1"), res) + checkAnswer(testData.filter("key > 900.0E-1"), res) + checkAnswer(testData.filter("key > 9.e+1"), res) } test("filterExpr using where") { --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org