This is an automated email from the ASF dual-hosted git repository. tdsilva pushed a commit to branch 4.x-HBase-1.2 in repository https://gitbox.apache.org/repos/asf/phoenix.git
The following commit(s) were added to refs/heads/4.x-HBase-1.2 by this push: new 9a1a494 PHOENIX-5102 Filtering on DATE types throws an exception using the spark connector 9a1a494 is described below commit 9a1a494fe012031c00ee3c9fba35fb74e988500f Author: Thomas D'Silva <tdsi...@apache.org> AuthorDate: Tue Jan 15 16:56:15 2019 -0800 PHOENIX-5102 Filtering on DATE types throws an exception using the spark connector --- .../src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala | 6 +++++- .../org/apache/phoenix/spark/FilterExpressionCompiler.scala | 9 +++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala index d6d0f92..b40b638 100644 --- a/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala +++ b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala @@ -624,7 +624,7 @@ class PhoenixSparkIT extends AbstractPhoenixSparkIT { varByteArray shouldEqual dataSet(0).get(3) } - test("Can load Phoenix DATE columns through DataFrame API") { + test("Can load and filter Phoenix DATE columns through DataFrame API") { val df = spark.sqlContext.read .format("phoenix") .options(Map("table" -> "DATE_TEST", PhoenixDataSource.ZOOKEEPER_URL -> quorumAddress)) @@ -638,6 +638,10 @@ class PhoenixSparkIT extends AbstractPhoenixSparkIT { // Note that Spark also applies the timezone offset to the returned date epoch. Rather than perform timezone // gymnastics, just make sure we're within 24H of the epoch generated just now assert(Math.abs(epoch - dt) < 86400000) + + df.createOrReplaceTempView("DATE_TEST") + val df2 = spark.sql("SELECT * FROM DATE_TEST WHERE COL1 > TO_DATE('1990-01-01 00:00:01', 'yyyy-MM-dd HH:mm:ss')") + assert(df2.count() == 1L) } test("Filter operation doesn't work for column names containing a white space (PHOENIX-2547)") { diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/FilterExpressionCompiler.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/FilterExpressionCompiler.scala index 74ff67e..1d6973c 100644 --- a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/FilterExpressionCompiler.scala +++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/FilterExpressionCompiler.scala @@ -17,6 +17,7 @@ */ package org.apache.phoenix.spark +import java.sql.Date import java.sql.Timestamp import java.text.Format @@ -26,6 +27,7 @@ import org.apache.spark.sql.sources._ class FilterExpressionCompiler() { + val dateformatter:Format = DateUtil.getDateFormatter(DateUtil.DEFAULT_DATE_FORMAT, DateUtil.DEFAULT_TIME_ZONE_ID) val timeformatter:Format = DateUtil.getTimestampFormatter(DateUtil.DEFAULT_TIME_FORMAT, DateUtil.DEFAULT_TIME_ZONE_ID) /** @@ -102,6 +104,8 @@ class FilterExpressionCompiler() { case timestampValue: Timestamp => getTimestampString(timestampValue) + case dateValue: Date => getDateString(dateValue) + // Borrowed from 'elasticsearch-hadoop', support these internal UTF types across Spark versions // Spark 1.4 case utf if (isClass(utf, "org.apache.spark.sql.types.UTF8String")) => s"'${escapeStringConstant(utf.toString)}'" @@ -117,6 +121,11 @@ class FilterExpressionCompiler() { DateUtil.DEFAULT_TIME_FORMAT, DateUtil.DEFAULT_TIME_ZONE_ID) } + private def getDateString(dateValue: Date): String = { + "TO_DATE('%s', '%s', '%s')".format(dateformatter.format(dateValue), + DateUtil.DEFAULT_DATE_FORMAT, DateUtil.DEFAULT_TIME_ZONE_ID) + } + // Helper function to escape column key to work with SQL queries private def escapeKey(key: String): String = SchemaUtil.getEscapedFullColumnName(key)