[GitHub] [spark] dongjoon-hyun commented on a change in pull request #27728: [SPARK-25556][SPARK-17636][SPARK-31026][SQL][test-hive1.2] Nested Column Predicate Pushdown for Parquet

GitBox Tue, 03 Mar 2020 13:18:23 -0800

dongjoon-hyun commented on a change in pull request #27728: 
[SPARK-25556][SPARK-17636][SPARK-31026][SQL][test-hive1.2] Nested Column 
Predicate Pushdown for Parquet
URL: https://github.com/apache/spark/pull/27728#discussion_r387298806


 ##########
 File path: 
sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/DataSourceStrategy.scala
 ##########
 @@ -437,61 +437,74 @@ object DataSourceStrategy {
     }
   }
 
+  /**
+   * Find the column name of an expression that can be pushed down.
+   */
+  private[sql] def pushDownColName(e: Expression): Option[String] = {
+    import 
org.apache.spark.sql.connector.catalog.CatalogV2Implicits.MultipartIdentifierHelper
+    def helper(e: Expression): Option[Seq[String]] = e match {
+      case a: Attribute => Some(Seq(a.name))
+      case s: GetStructField => helper(s.child).map(_ :+ 
s.childSchema(s.ordinal).name)
+      case _ => None
+    }
+    helper(e).map(_.quoted)
+  }
+
   private def translateLeafNodeFilter(predicate: Expression): Option[Filter] = 
predicate match {
-    case expressions.EqualTo(a: Attribute, Literal(v, t)) =>
-      Some(sources.EqualTo(a.name, convertToScala(v, t)))
-    case expressions.EqualTo(Literal(v, t), a: Attribute) =>
-      Some(sources.EqualTo(a.name, convertToScala(v, t)))
-
-    case expressions.EqualNullSafe(a: Attribute, Literal(v, t)) =>
-      Some(sources.EqualNullSafe(a.name, convertToScala(v, t)))
-    case expressions.EqualNullSafe(Literal(v, t), a: Attribute) =>
-      Some(sources.EqualNullSafe(a.name, convertToScala(v, t)))
-
-    case expressions.GreaterThan(a: Attribute, Literal(v, t)) =>
-      Some(sources.GreaterThan(a.name, convertToScala(v, t)))
-    case expressions.GreaterThan(Literal(v, t), a: Attribute) =>
-      Some(sources.LessThan(a.name, convertToScala(v, t)))
-
-    case expressions.LessThan(a: Attribute, Literal(v, t)) =>
-      Some(sources.LessThan(a.name, convertToScala(v, t)))
-    case expressions.LessThan(Literal(v, t), a: Attribute) =>
-      Some(sources.GreaterThan(a.name, convertToScala(v, t)))
-
-    case expressions.GreaterThanOrEqual(a: Attribute, Literal(v, t)) =>
-      Some(sources.GreaterThanOrEqual(a.name, convertToScala(v, t)))
-    case expressions.GreaterThanOrEqual(Literal(v, t), a: Attribute) =>
-      Some(sources.LessThanOrEqual(a.name, convertToScala(v, t)))
-
-    case expressions.LessThanOrEqual(a: Attribute, Literal(v, t)) =>
-      Some(sources.LessThanOrEqual(a.name, convertToScala(v, t)))
-    case expressions.LessThanOrEqual(Literal(v, t), a: Attribute) =>
-      Some(sources.GreaterThanOrEqual(a.name, convertToScala(v, t)))
-
-    case expressions.InSet(a: Attribute, set) =>
-      val toScala = CatalystTypeConverters.createToScalaConverter(a.dataType)
-      Some(sources.In(a.name, set.toArray.map(toScala)))
+    case expressions.EqualTo(e: Expression, Literal(v, t)) =>
+      pushDownColName(e).map(sources.EqualTo(_, convertToScala(v, t)))
+    case expressions.EqualTo(Literal(v, t), e: Expression) =>
+      pushDownColName(e).map(sources.EqualTo(_, convertToScala(v, t)))
+
+    case expressions.EqualNullSafe(e: Expression, Literal(v, t)) =>
+      pushDownColName(e).map(sources.EqualNullSafe(_, convertToScala(v, t)))
+    case expressions.EqualNullSafe(Literal(v, t), e: Expression) =>
+      pushDownColName(e).map(sources.EqualNullSafe(_, convertToScala(v, t)))
+
+    case expressions.GreaterThan(e: Expression, Literal(v, t)) =>
+      pushDownColName(e).map(sources.GreaterThan(_, convertToScala(v, t)))
+    case expressions.GreaterThan(Literal(v, t), e: Expression) =>
+      pushDownColName(e).map(sources.LessThan(_, convertToScala(v, t)))
+
+    case expressions.LessThan(e: Expression, Literal(v, t)) =>
+      pushDownColName(e).map(sources.LessThan(_, convertToScala(v, t)))
+    case expressions.LessThan(Literal(v, t), e: Expression) =>
+      pushDownColName(e).map(sources.GreaterThan(_, convertToScala(v, t)))
+
+    case expressions.GreaterThanOrEqual(e: Expression, Literal(v, t)) =>
+      pushDownColName(e).map(sources.GreaterThanOrEqual(_, convertToScala(v, 
t)))
+    case expressions.GreaterThanOrEqual(Literal(v, t), e: Expression) =>
+      pushDownColName(e).map(sources.LessThanOrEqual(_, convertToScala(v, t)))
+
+    case expressions.LessThanOrEqual(e: Expression, Literal(v, t)) =>
+      pushDownColName(e).map(sources.LessThanOrEqual(_, convertToScala(v, t)))
+    case expressions.LessThanOrEqual(Literal(v, t), e: Expression) =>
+      pushDownColName(e).map(sources.GreaterThanOrEqual(_, convertToScala(v, 
t)))
+
+    case expressions.InSet(e: Expression, set) =>
+      val toScala = CatalystTypeConverters.createToScalaConverter(e.dataType)
+      pushDownColName(e).map(sources.In(_, set.toArray.map(toScala)))
 
 Review comment:
   If you don't mind, can we rewrite this like the following to prevent 
potential minor regression? The above new code execute 
`CatalystTypeConverters.createToScalaConverter` for all expressions while the 
previous one only do for `Attribute`.
   ```scala
   pushDownColName(e).map {
     val toScala = CatalystTypeConverters.createToScalaConverter(e.dataType)
     sources.In(_, set.toArray.map(toScala)) 
   }
   ```

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] [spark] dongjoon-hyun commented on a change in pull request #27728: [SPARK-25556][SPARK-17636][SPARK-31026][SQL][test-hive1.2] Nested Column Predicate Pushdown for Parquet

Reply via email to