spark git commit: [SPARK-10623] [SQL] Fixes ORC predicate push-down

2015-09-18 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/master c8149ef2c -> 22be2ae14


[SPARK-10623] [SQL] Fixes ORC predicate push-down

When pushing down a leaf predicate, ORC `SearchArgument` builder requires an 
extra "parent" predicate (any one among `AND`/`OR`/`NOT`) to wrap the leaf 
predicate. E.g., to push down `a < 1`, we must build `AND(a < 1)` instead. 
Fortunately, when actually constructing the `SearchArgument`, the builder will 
eliminate all those unnecessary wrappers.

This PR is based on #8783 authored by zhzhan. I also took the chance to simply 
`OrcFilters` a little bit to improve readability.

Author: Cheng Lian 

Closes #8799 from liancheng/spark-10623/fix-orc-ppd.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/22be2ae1
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/22be2ae1
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/22be2ae1

Branch: refs/heads/master
Commit: 22be2ae147a111e88896f6fb42ed46bbf108a99b
Parents: c8149ef
Author: Cheng Lian 
Authored: Fri Sep 18 18:42:20 2015 -0700
Committer: Yin Huai 
Committed: Fri Sep 18 18:42:20 2015 -0700

--
 .../apache/spark/sql/hive/orc/OrcFilters.scala  | 56 
 .../spark/sql/hive/orc/OrcQuerySuite.scala  | 30 +++
 2 files changed, 52 insertions(+), 34 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/22be2ae1/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
index b3d9f7f..27193f5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
@@ -31,11 +31,13 @@ import org.apache.spark.sql.sources._
  * and cannot be used anymore.
  */
 private[orc] object OrcFilters extends Logging {
-  def createFilter(expr: Array[Filter]): Option[SearchArgument] = {
-expr.reduceOption(And).flatMap { conjunction =>
-  val builder = SearchArgumentFactory.newBuilder()
-  buildSearchArgument(conjunction, builder).map(_.build())
-}
+  def createFilter(filters: Array[Filter]): Option[SearchArgument] = {
+for {
+  // Combines all filters with `And`s to produce a single conjunction 
predicate
+  conjunction <- filters.reduceOption(And)
+  // Then tries to build a single ORC `SearchArgument` for the conjunction 
predicate
+  builder <- buildSearchArgument(conjunction, 
SearchArgumentFactory.newBuilder())
+} yield builder.build()
   }
 
   private def buildSearchArgument(expression: Filter, builder: Builder): 
Option[Builder] = {
@@ -102,46 +104,32 @@ private[orc] object OrcFilters extends Logging {
   negate <- buildSearchArgument(child, builder.startNot())
 } yield negate.end()
 
-  case EqualTo(attribute, value) =>
-Option(value)
-  .filter(isSearchableLiteral)
-  .map(builder.equals(attribute, _))
+  case EqualTo(attribute, value) if isSearchableLiteral(value) =>
+Some(builder.startAnd().equals(attribute, value).end())
 
-  case EqualNullSafe(attribute, value) =>
-Option(value)
-  .filter(isSearchableLiteral)
-  .map(builder.nullSafeEquals(attribute, _))
+  case EqualNullSafe(attribute, value) if isSearchableLiteral(value) =>
+Some(builder.startAnd().nullSafeEquals(attribute, value).end())
 
-  case LessThan(attribute, value) =>
-Option(value)
-  .filter(isSearchableLiteral)
-  .map(builder.lessThan(attribute, _))
+  case LessThan(attribute, value) if isSearchableLiteral(value) =>
+Some(builder.startAnd().lessThan(attribute, value).end())
 
-  case LessThanOrEqual(attribute, value) =>
-Option(value)
-  .filter(isSearchableLiteral)
-  .map(builder.lessThanEquals(attribute, _))
+  case LessThanOrEqual(attribute, value) if isSearchableLiteral(value) =>
+Some(builder.startAnd().lessThanEquals(attribute, value).end())
 
-  case GreaterThan(attribute, value) =>
-Option(value)
-  .filter(isSearchableLiteral)
-  .map(builder.startNot().lessThanEquals(attribute, _).end())
+  case GreaterThan(attribute, value) if isSearchableLiteral(value) =>
+Some(builder.startNot().lessThanEquals(attribute, value).end())
 
-  case GreaterThanOrEqual(attribute, value) =>
-Option(value)
-  .filter(isSearchableLiteral)
-  .map(builder.startNot().lessThan(attribute, _).end())
+  case GreaterThanOrEqual(attribute, value) if isSearchableLiteral(value) 
=>
+Some(builder.startNot().lessThan(attrib

spark git commit: [SPARK-10623] [SQL] Fixes ORC predicate push-down

2015-09-18 Thread yhuai
Repository: spark
Updated Branches:
  refs/heads/branch-1.5 a6c315358 -> b3f1e6533


[SPARK-10623] [SQL] Fixes ORC predicate push-down

When pushing down a leaf predicate, ORC `SearchArgument` builder requires an 
extra "parent" predicate (any one among `AND`/`OR`/`NOT`) to wrap the leaf 
predicate. E.g., to push down `a < 1`, we must build `AND(a < 1)` instead. 
Fortunately, when actually constructing the `SearchArgument`, the builder will 
eliminate all those unnecessary wrappers.

This PR is based on #8783 authored by zhzhan. I also took the chance to simply 
`OrcFilters` a little bit to improve readability.

Author: Cheng Lian 

Closes #8799 from liancheng/spark-10623/fix-orc-ppd.

(cherry picked from commit 22be2ae147a111e88896f6fb42ed46bbf108a99b)
Signed-off-by: Yin Huai 

Conflicts:
sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b3f1e653
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b3f1e653
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b3f1e653

Branch: refs/heads/branch-1.5
Commit: b3f1e653320e074fe78971a2a3b659c36da20b45
Parents: a6c3153
Author: Cheng Lian 
Authored: Fri Sep 18 18:42:20 2015 -0700
Committer: Yin Huai 
Committed: Fri Sep 18 18:46:53 2015 -0700

--
 .../apache/spark/sql/hive/orc/OrcFilters.scala  | 53 +---
 .../spark/sql/hive/orc/OrcQuerySuite.scala  | 30 +++
 2 files changed, 53 insertions(+), 30 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/spark/blob/b3f1e653/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
--
diff --git 
a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala 
b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
index 86142e5..27193f5 100644
--- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
+++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/orc/OrcFilters.scala
@@ -31,11 +31,13 @@ import org.apache.spark.sql.sources._
  * and cannot be used anymore.
  */
 private[orc] object OrcFilters extends Logging {
-  def createFilter(expr: Array[Filter]): Option[SearchArgument] = {
-expr.reduceOption(And).flatMap { conjunction =>
-  val builder = SearchArgumentFactory.newBuilder()
-  buildSearchArgument(conjunction, builder).map(_.build())
-}
+  def createFilter(filters: Array[Filter]): Option[SearchArgument] = {
+for {
+  // Combines all filters with `And`s to produce a single conjunction 
predicate
+  conjunction <- filters.reduceOption(And)
+  // Then tries to build a single ORC `SearchArgument` for the conjunction 
predicate
+  builder <- buildSearchArgument(conjunction, 
SearchArgumentFactory.newBuilder())
+} yield builder.build()
   }
 
   private def buildSearchArgument(expression: Filter, builder: Builder): 
Option[Builder] = {
@@ -102,41 +104,32 @@ private[orc] object OrcFilters extends Logging {
   negate <- buildSearchArgument(child, builder.startNot())
 } yield negate.end()
 
-  case EqualTo(attribute, value) =>
-Option(value)
-  .filter(isSearchableLiteral)
-  .map(builder.equals(attribute, _))
+  case EqualTo(attribute, value) if isSearchableLiteral(value) =>
+Some(builder.startAnd().equals(attribute, value).end())
+
+  case EqualNullSafe(attribute, value) if isSearchableLiteral(value) =>
+Some(builder.startAnd().nullSafeEquals(attribute, value).end())
 
-  case LessThan(attribute, value) =>
-Option(value)
-  .filter(isSearchableLiteral)
-  .map(builder.lessThan(attribute, _))
+  case LessThan(attribute, value) if isSearchableLiteral(value) =>
+Some(builder.startAnd().lessThan(attribute, value).end())
 
-  case LessThanOrEqual(attribute, value) =>
-Option(value)
-  .filter(isSearchableLiteral)
-  .map(builder.lessThanEquals(attribute, _))
+  case LessThanOrEqual(attribute, value) if isSearchableLiteral(value) =>
+Some(builder.startAnd().lessThanEquals(attribute, value).end())
 
-  case GreaterThan(attribute, value) =>
-Option(value)
-  .filter(isSearchableLiteral)
-  .map(builder.startNot().lessThanEquals(attribute, _).end())
+  case GreaterThan(attribute, value) if isSearchableLiteral(value) =>
+Some(builder.startNot().lessThanEquals(attribute, value).end())
 
-  case GreaterThanOrEqual(attribute, value) =>
-Option(value)
-  .filter(isSearchableLiteral)
-  .map(builder.startNot().lessThan(attribute, _).end())
+  case GreaterThanOrEqual(attribute, value) if isSearchableLiteral(value) 
=>
+Some(buil