[ 
https://issues.apache.org/jira/browse/DRILL-6413?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16481029#comment-16481029
 ] 

ASF GitHub Bot commented on DRILL-6413:
---------------------------------------

asfgit closed pull request #1269: DRILL-6413: Update 
ParquetFilterBuilder.visitBooleanOperator to handle simplied boolean expression
URL: https://github.com/apache/drill/pull/1269
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java
index 1f8c535a91..33472bb654 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/AbstractParquetGroupScan.java
@@ -255,8 +255,7 @@ public GroupScan applyFilter(LogicalExpression filterExpr, 
UdfUtilities udfUtili
         logger.debug("materializedFilter : {}", 
ExpressionStringBuilder.toString(materializedFilter));
 
         Set<LogicalExpression> constantBoundaries = 
ConstantExpressionIdentifier.getConstantExpressionSet(materializedFilter);
-        filterPredicate = (ParquetFilterPredicate) 
ParquetFilterBuilder.buildParquetFilterPredicate(
-            materializedFilter, constantBoundaries, udfUtilities);
+        filterPredicate = 
ParquetFilterBuilder.buildParquetFilterPredicate(materializedFilter, 
constantBoundaries, udfUtilities);
 
         if (filterPredicate == null) {
           return null;
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
index e55425e948..a8e101d6e0 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/store/parquet/ParquetFilterBuilder.java
@@ -42,6 +42,7 @@
 import org.apache.drill.exec.expr.holders.VarDecimalHolder;
 import org.apache.drill.exec.expr.stat.ParquetBooleanPredicates;
 import org.apache.drill.exec.expr.stat.ParquetComparisonPredicates;
+import org.apache.drill.exec.expr.stat.ParquetFilterPredicate;
 import org.apache.drill.exec.expr.stat.ParquetIsPredicates;
 import org.apache.drill.exec.ops.UdfUtilities;
 import org.apache.drill.exec.util.DecimalUtility;
@@ -54,7 +55,7 @@
 
 /**
  * A visitor which visits a materialized logical expression, and build 
ParquetFilterPredicate
- * If a visitXXX method returns null, that means the corresponding filter 
branch is not qualified for pushdown.
+ * If a visitXXX method returns null, that means the corresponding filter 
branch is not qualified for push down.
  */
 public class ParquetFilterBuilder extends 
AbstractExprVisitor<LogicalExpression, Set<LogicalExpression>, 
RuntimeException> {
   static final Logger logger = 
LoggerFactory.getLogger(ParquetFilterBuilder.class);
@@ -66,12 +67,18 @@
    * @param constantBoundaries set of constant expressions
    * @param udfUtilities udf utilities
    *
-   * @return logical expression
+   * @return parquet filter predicate
    */
-  public static LogicalExpression 
buildParquetFilterPredicate(LogicalExpression expr, final 
Set<LogicalExpression> constantBoundaries, UdfUtilities udfUtilities) {
-    return expr.accept(new ParquetFilterBuilder(udfUtilities), 
constantBoundaries);
+  public static ParquetFilterPredicate 
buildParquetFilterPredicate(LogicalExpression expr, final 
Set<LogicalExpression> constantBoundaries, UdfUtilities udfUtilities) {
+    LogicalExpression logicalExpression = expr.accept(new 
ParquetFilterBuilder(udfUtilities), constantBoundaries);
+    if (logicalExpression instanceof ParquetFilterPredicate) {
+      return (ParquetFilterPredicate) logicalExpression;
+    }
+    logger.debug("Logical expression {} was not qualified for filter push 
down", logicalExpression);
+    return null;
   }
 
+
   private ParquetFilterBuilder(UdfUtilities udfUtilities) {
     this.udfUtilities = udfUtilities;
   }
@@ -150,6 +157,10 @@ public LogicalExpression 
visitBooleanOperator(BooleanOperator op, Set<LogicalExp
           return null;
         }
       } else {
+        if (childPredicate instanceof TypedFieldExpr) {
+          // Calcite simplifies `= true` expression to field name, wrap it 
with is true predicate
+          childPredicate = new 
ParquetIsPredicates.IsTruePredicate(childPredicate);
+        }
         childPredicates.add(childPredicate);
       }
     }
diff --git 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java
 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java
index 83a4e8e923..3bbd39713b 100644
--- 
a/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java
+++ 
b/exec/java-exec/src/test/java/org/apache/drill/exec/store/parquet/TestParquetFilterPushDown.java
@@ -26,6 +26,7 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.parquet.format.converter.ParquetMetadataConverter;
 import org.apache.parquet.hadoop.ParquetFileReader;
 import org.apache.parquet.hadoop.metadata.ParquetMetadata;
 import org.junit.Assert;
@@ -424,6 +425,9 @@ public void testBooleanPredicate() throws Exception {
 
     final String queryNotEqualFalse = "select col_bln from 
dfs.`parquetFilterPush/blnTbl` where not col_bln = false";
     testParquetFilterPD(queryNotEqualFalse, 4, 2, false);
+
+    final String queryEqualTrueWithAnd = "select col_bln from 
dfs.`parquetFilterPush/blnTbl` where col_bln = true and unk_col = 'a'";
+    testParquetFilterPD(queryEqualTrueWithAnd, 0, 2, false);
   }
 
   @Test // DRILL-5359
@@ -445,10 +449,9 @@ public void testFilterWithItemFlatten() throws Exception {
   public void testMultiRowGroup() throws Exception {
     // multirowgroup is a parquet file with 2 rowgroups inside. One with a = 1 
and the other with a = 2;
     // FilterPushDown should be able to remove the rowgroup with a = 1 from 
the scan operator.
-    final String sql = String.format("select * from 
dfs.`parquet/multirowgroup.parquet` where a > 1");
+    final String sql = "select * from dfs.`parquet/multirowgroup.parquet` 
where a > 1";
     final String[] expectedPlan = {"numRowGroups=1"};
-    final String[] excludedPlan = {};
-    PlanTestBase.testPlanMatchingPatterns(sql, expectedPlan, excludedPlan);
+    PlanTestBase.testPlanMatchingPatterns(sql, expectedPlan);
   }
 
   
//////////////////////////////////////////////////////////////////////////////////////////////////
@@ -461,23 +464,20 @@ private void testParquetFilterPD(final String query, int 
expectedRowCount, int e
     String numFilesPattern = "numFiles=" + expectedNumFiles;
     String usedMetaPattern = "usedMetadataFile=" + usedMetadataFile;
 
-    testPlanMatchingPatterns(query, new String[]{numFilesPattern, 
usedMetaPattern}, new String[] {});
+    testPlanMatchingPatterns(query, new String[]{numFilesPattern, 
usedMetaPattern});
   }
 
-  private void testParquetRowGroupFilterEval(final ParquetMetadata footer, 
final String exprStr,
-      boolean canDropExpected) throws Exception{
+  private void testParquetRowGroupFilterEval(final ParquetMetadata footer, 
final String exprStr, boolean canDropExpected) throws Exception{
     final LogicalExpression filterExpr = parseExpr(exprStr);
     testParquetRowGroupFilterEval(footer, 0, filterExpr, canDropExpected);
   }
 
-  private void testParquetRowGroupFilterEval(final ParquetMetadata footer, 
final int rowGroupIndex,
-      final LogicalExpression filterExpr, boolean canDropExpected) throws 
Exception {
-    boolean canDrop = ParquetRGFilterEvaluator.evalFilter(filterExpr, footer, 
rowGroupIndex,
-        fragContext.getOptions(), fragContext);
+  private void testParquetRowGroupFilterEval(final ParquetMetadata footer, 
final int rowGroupIndex, final LogicalExpression filterExpr, boolean 
canDropExpected) {
+    boolean canDrop = ParquetRGFilterEvaluator.evalFilter(filterExpr, footer, 
rowGroupIndex, fragContext.getOptions(), fragContext);
     Assert.assertEquals(canDropExpected, canDrop);
   }
 
   private ParquetMetadata getParquetMetaData(File file) throws IOException{
-    return ParquetFileReader.readFooter(new Configuration(fs.getConf()), new 
Path(file.toURI()));
+    return ParquetFileReader.readFooter(new Configuration(fs.getConf()), new 
Path(file.toURI()), ParquetMetadataConverter.NO_FILTER);
   }
 }


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> Specific query returns an exception if filter a boolean column by "equals" 
> operator
> -----------------------------------------------------------------------------------
>
>                 Key: DRILL-6413
>                 URL: https://issues.apache.org/jira/browse/DRILL-6413
>             Project: Apache Drill
>          Issue Type: Bug
>            Reporter: Anton Gozhiy
>            Assignee: Arina Ielchiieva
>            Priority: Major
>              Labels: ready-to-commit
>             Fix For: 1.14.0
>
>         Attachments: Test_data.tar.gz
>
>
> *Data:*
>  Use the attached dataset
> *Query:*
> {code:sql}
>  select *
>  from dfs.tmp.`Test_data`
>  where bool_col = true and part_col in ('Partition_two')
> {code}
> *Expected result:*
>  The query should return result normally
> *Actual result:*
>  Exception happens:
> {noformat}
> Error: SYSTEM ERROR: ClassCastException: 
> org.apache.drill.common.expression.TypedFieldExpr cannot be cast to 
> org.apache.drill.exec.expr.stat.ParquetFilterPredicate
> {noformat}
> *Notes:*
>  It works OK if use "is" operator or if not use "*" in the select statement



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to