>From Murtadha Hubail <[email protected]>:

Murtadha Hubail has submitted this change. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21029?usp=email )

Change subject: [ASTERIXDB-3743][EXT] Failure to pushdown iceberg partition 
filter
......................................................................

[ASTERIXDB-3743][EXT] Failure to pushdown iceberg partition filter

- user model changes: no
- storage format changes: no
- interface changes: no

Details:
Allows pushing down expr1 in a expression like expr1 AND expr2
where expr is pushable and expr2 is not.
Allows ushing down expression like 2 = field_name.

Ext-ref: MB-70436

Change-Id: I22a39538af83226c323d905f35bb70f52243e118
Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21029
Tested-by: Jenkins <[email protected]>
Integration-Tests: Jenkins <[email protected]>
Reviewed-by: Peeyush Gupta <[email protected]>
Reviewed-by: Hussain Towaileb <[email protected]>
---
M 
asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/IcebergTableFilterBuilder.java
1 file changed, 271 insertions(+), 61 deletions(-)

Approvals:
  Jenkins: Verified; Verified
  Hussain Towaileb: Looks good to me, approved
  Peeyush Gupta: Looks good to me, but someone else must approve




diff --git 
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/IcebergTableFilterBuilder.java
 
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/IcebergTableFilterBuilder.java
index 94ec455..155f6d0 100644
--- 
a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/IcebergTableFilterBuilder.java
+++ 
b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/IcebergTableFilterBuilder.java
@@ -27,13 +27,16 @@
 import org.apache.asterix.om.base.ADate;
 import org.apache.asterix.om.base.ADateTime;
 import org.apache.asterix.om.base.ADouble;
+import org.apache.asterix.om.base.AFloat;
 import org.apache.asterix.om.base.AInt16;
 import org.apache.asterix.om.base.AInt32;
 import org.apache.asterix.om.base.AInt64;
 import org.apache.asterix.om.base.AInt8;
 import org.apache.asterix.om.base.AString;
+import org.apache.asterix.om.base.ATime;
+import org.apache.asterix.om.base.IAObject;
 import org.apache.asterix.om.constants.AsterixConstantValue;
-import org.apache.asterix.om.functions.IFunctionDescriptor;
+import org.apache.asterix.om.functions.BuiltinFunctions;
 import org.apache.asterix.om.types.ARecordType;
 import org.apache.asterix.om.types.ATypeTag;
 import 
org.apache.asterix.runtime.projection.ExternalDatasetProjectionFiltrationInfo;
@@ -64,108 +67,315 @@
     }

     public IExternalFilterEvaluatorFactory build() throws AlgebricksException {
-        Object icebergTablePredicate = null;
+        Expression icebergTablePredicate = null;
         if (filterExpression != null) {
             try {
-                icebergTablePredicate = createExpression(filterExpression);
+                icebergTablePredicate = 
createIcebergExpression(filterExpression, false);
             } catch (Exception e) {
-                LOGGER.error("Error creating IcebergTable filter expression, 
skipping filter pushdown", e);
+                LOGGER.warn("Error creating IcebergTable filter expression, 
skipping filter pushdown", e);
             }
         }
-        return new IcebergTableFilterEvaluatorFactory((Expression) 
icebergTablePredicate);
+        return new IcebergTableFilterEvaluatorFactory(icebergTablePredicate);
     }

-    protected Object createExpression(ILogicalExpression expression) throws 
AlgebricksException {
+    /**
+     * Recursively converts an AsterixDB logical expression into an Iceberg 
{@link Expression}.
+     *
+     * @return an Iceberg Expression, or {@code null} if the expression cannot 
be converted
+     */
+    protected Expression createIcebergExpression(ILogicalExpression 
expression, boolean insideNot)
+            throws AlgebricksException {
         if (filterPaths.containsKey(expression)) {
-            // Path expression, create a value accessor (i.e., a column reader)
-            return createColumnExpression(expression);
+            return null;
         } else if (expression.getExpressionTag() == 
LogicalExpressionTag.CONSTANT) {
-            return createLiteralExpression(expression);
+            return createBooleanConstantExpression(expression);
         } else if (expression.getExpressionTag() == 
LogicalExpressionTag.FUNCTION_CALL) {
-            return handleFunction(expression);
+            return handleFunction(expression, insideNot);
         }
-
-        /*
-         * A variable expression: This should not happen as the provided 
filter expression is inlined.
-         * If a variable was encountered for some reason, it should only be 
the record variable. If the record variable
-         * was encountered, that means there's a missing value path the 
compiler didn't provide.
-         */
-        throw new RuntimeException("Unsupported expression " + expression + ". 
the provided paths are: " + filterPaths);
+        LOGGER.debug("Unsupported expression type {}, skipping filter 
pushdown", expression);
+        return null;
     }

-    private Object createLiteralExpression(ILogicalExpression expression) 
throws AlgebricksException {
+    /**
+     * Extracts a Java value from a constant expression for use in comparisons.
+     *
+     * @return a Java value (String, Number, etc.), or {@code null} for 
null/missing
+     */
+    private Object createLiteralValue(ILogicalExpression expression) {
         ConstantExpression constExpr = (ConstantExpression) expression;
         if (constExpr.getValue().isNull() || constExpr.getValue().isMissing()) 
{
-            throw new RuntimeException("Unsupported literal type: " + 
constExpr.getValue());
+            return null;
+        }
+        if (!(constExpr.getValue() instanceof AsterixConstantValue)) {
+            return null;
         }
         AsterixConstantValue constantValue = (AsterixConstantValue) 
constExpr.getValue();
-        switch (constantValue.getObject().getType().getTypeTag()) {
+        IAObject obj = constantValue.getObject();
+        switch (obj.getType().getTypeTag()) {
             case STRING:
-                return ((AString) constantValue.getObject()).getStringValue();
+                return ((AString) obj).getStringValue();
             case TINYINT:
-                return ((AInt8) constantValue.getObject()).getByteValue();
+                return (int) ((AInt8) obj).getByteValue();
             case SMALLINT:
-                return ((AInt16) constantValue.getObject()).getShortValue();
+                return (int) ((AInt16) obj).getShortValue();
             case INTEGER:
-                return ((AInt32) constantValue.getObject()).getIntegerValue();
+                return ((AInt32) obj).getIntegerValue();
+            case BIGINT:
+                return ((AInt64) obj).getLongValue();
+            case FLOAT:
+                return ((AFloat) obj).getFloatValue();
+            case DOUBLE:
+                return ((ADouble) obj).getDoubleValue();
             case BOOLEAN:
                 return constantValue.isTrue();
-            case BIGINT:
-                return ((AInt64) constantValue.getObject()).getLongValue();
-            case DOUBLE:
-                return ((ADouble) constantValue.getObject()).getDoubleValue();
             case DATE:
-                return ((ADate) 
constantValue.getObject()).getChrononTimeInDays();
+                // Iceberg DATE is represented as days from epoch (int)
+                return ((ADate) obj).getChrononTimeInDays();
             case DATETIME:
-                Long millis = ((ADateTime) 
constantValue.getObject()).getChrononTime();
-                return TimeUnit.MILLISECONDS.toMicros(millis);
+                // Iceberg TIMESTAMP is represented as microseconds from epoch
+                return TimeUnit.MILLISECONDS.toMicros(((ADateTime) 
obj).getChrononTime());
+            case TIME:
+                // Iceberg TIME is represented as microseconds from midnight
+                return TimeUnit.MILLISECONDS.toMicros(((ATime) 
obj).getChrononTime());
             default:
-                throw new RuntimeException("Unsupported literal type: " + 
constantValue.getObject().getType());
+                LOGGER.debug("Unsupported literal type: {}", obj.getType());
+                return null;
         }
     }

+    /**
+     * Converts a bare boolean constant to an Iceberg Expression (alwaysTrue / 
alwaysFalse).
+     */
+    private Expression createBooleanConstantExpression(ILogicalExpression 
expression) {
+        ConstantExpression constExpr = (ConstantExpression) expression;
+        if (constExpr.getValue().isTrue()) {
+            return Expressions.alwaysTrue();
+        } else if (constExpr.getValue().isFalse()) {
+            return Expressions.alwaysFalse();
+        }
+        // null/missing constant — cannot determine truth value; do not push 
down
+        return null;
+    }
+
     @Override
     protected IScalarEvaluatorFactory createValueAccessor(ILogicalExpression 
expression) {
         return null;
     }

-    private Expression handleFunction(ILogicalExpression expr) throws 
AlgebricksException {
+    private Expression handleFunction(ILogicalExpression expr, boolean 
insideNot) throws AlgebricksException {
         AbstractFunctionCallExpression funcExpr = 
(AbstractFunctionCallExpression) expr;
-        IFunctionDescriptor fd = resolveFunction(funcExpr);
-        FunctionIdentifier fid = fd.getIdentifier();
-        if (funcExpr.getArguments().size() != 2
-                && !(fid.equals(AlgebricksBuiltinFunctions.AND) || 
fid.equals(AlgebricksBuiltinFunctions.OR))) {
-            throw new RuntimeException("Predicate should only have 2 
arguments: " + funcExpr);
-        }
-        List<Object> args = handleArgs(funcExpr);
+        FunctionIdentifier fid = funcExpr.getFunctionIdentifier();
+
+        // Logical Connectives
         if (fid.equals(AlgebricksBuiltinFunctions.AND)) {
-            return Expressions.and((Expression) args.get(0), (Expression) 
args.get(1));
+            return handleAnd(funcExpr, insideNot);
         } else if (fid.equals(AlgebricksBuiltinFunctions.OR)) {
-            return Expressions.or((Expression) args.get(0), (Expression) 
args.get(1));
-        } else if (fid.equals(AlgebricksBuiltinFunctions.EQ)) {
-            return Expressions.equal((String) args.get(0), args.get(1));
-        } else if (fid.equals(AlgebricksBuiltinFunctions.GE)) {
-            return Expressions.greaterThanOrEqual((String) args.get(0), 
args.get(1));
-        } else if (fid.equals(AlgebricksBuiltinFunctions.GT)) {
-            return Expressions.greaterThan((String) args.get(0), args.get(1));
-        } else if (fid.equals(AlgebricksBuiltinFunctions.LE)) {
-            return Expressions.lessThanOrEqual((String) args.get(0), 
args.get(1));
-        } else if (fid.equals(AlgebricksBuiltinFunctions.LT)) {
-            return Expressions.lessThan((String) args.get(0), args.get(1));
+            return handleOr(funcExpr, insideNot);
+        } else if (fid.equals(AlgebricksBuiltinFunctions.NOT)) {
+            return handleNot(funcExpr, insideNot);
+        }
+
+        // Null check
+        if (fid.equals(AlgebricksBuiltinFunctions.IS_NULL)) {
+            return handleIsNull(funcExpr);
+        }
+
+        // Comparison operators
+        if (fid.equals(AlgebricksBuiltinFunctions.EQ) || 
fid.equals(AlgebricksBuiltinFunctions.NEQ)
+                || fid.equals(AlgebricksBuiltinFunctions.LT) || 
fid.equals(AlgebricksBuiltinFunctions.LE)
+                || fid.equals(AlgebricksBuiltinFunctions.GT) || 
fid.equals(AlgebricksBuiltinFunctions.GE)) {
+            return handleComparison(funcExpr, fid);
+        }
+
+        // String functions
+        if (fid.equals(BuiltinFunctions.STRING_STARTS_WITH)) {
+            return handleStringStartsWith(funcExpr);
+        }
+        LOGGER.trace("Unsupported function for Iceberg filter pushdown: {}", 
fid);
+        return null;
+    }
+
+    private Expression handleAnd(AbstractFunctionCallExpression funcExpr, 
boolean insideNot)
+            throws AlgebricksException {
+        Expression result = null;
+        for (Mutable<ILogicalExpression> argRef : funcExpr.getArguments()) {
+            Expression argExpr = createIcebergExpression(argRef.getValue(), 
insideNot);
+            if (argExpr == null) {
+                if (insideNot) {
+                    // Under an odd number of NOTs, partial AND pushdown is 
unsafe.
+                    // NOT(AND(e1, e2)) ≡ NOT(e1) OR NOT(e2), so dropping e2 
and pushing
+                    // NOT(e1) would incorrectly prune rows where e2 is false.
+                    return null;
+                }
+                // Under an even number of NOTs (including zero — top-level or 
inside OR):
+                // skip un-pushable children safely.
+                // - Top-level / inside OR at top context: the residual filter 
in the plan
+                //   evaluates skipped conjuncts.
+                // - Inside OR: AND(e1, e2) reduced to e1 makes the disjunct 
weaker, so the
+                //   overall OR passes more rows — a safe over-approximation.
+                // - Under double-NOT: NOT(NOT(AND(e1,e2))) reduced to 
NOT(NOT(e1)) = e1,
+                //   which is also a safe over-approximation.
+                continue;
+            }
+            result = (result == null) ? argExpr : Expressions.and(result, 
argExpr);
+        }
+        return result;
+    }
+
+    private Expression handleOr(AbstractFunctionCallExpression funcExpr, 
boolean insideNot) throws AlgebricksException {
+        Expression result = null;
+        for (Mutable<ILogicalExpression> argRef : funcExpr.getArguments()) {
+            // insideNot (odd-NOT parity) propagates unchanged through OR: OR 
itself does
+            // not introduce or remove negation.
+            Expression argExpr = createIcebergExpression(argRef.getValue(), 
insideNot);
+            if (argExpr == null) {
+                // One un-pushable disjunct makes the whole OR un-pushable for 
correctness:
+                // skipping a disjunct would over-prune rows matching only 
that disjunct.
+                return null;
+            }
+            result = (result == null) ? argExpr : Expressions.or(result, 
argExpr);
+        }
+        return result;
+    }
+
+    private Expression handleNot(AbstractFunctionCallExpression funcExpr, 
boolean insideNot)
+            throws AlgebricksException {
+        List<Mutable<ILogicalExpression>> args = funcExpr.getArguments();
+        if (args.size() != 1) {
+            return null;
+        }
+        ILogicalExpression innerExpr = args.get(0).getValue();
+        // NOT toggles the negation context: each NOT flips whether we are 
inside an odd
+        // or even number of negations.  Partial AND pushdown is only unsafe 
under an odd
+        // number of NOTs (insideNot=true), because:
+        //   NOT(AND(e1,e2))  — partial push of e1 yields NOT(e1), which is 
too strong.
+        //   NOT(NOT(AND(e1,e2))) — partial push of e1 yields NOT(NOT(e1)) = 
e1, which is
+        //   a safe over-approximation (passes more rows than the full AND).
+        Expression inner = createIcebergExpression(innerExpr, !insideNot);
+        return (inner != null) ? Expressions.not(inner) : null;
+    }
+
+    private Expression handleIsNull(AbstractFunctionCallExpression funcExpr) 
throws AlgebricksException {
+        List<Mutable<ILogicalExpression>> args = funcExpr.getArguments();
+        if (args.size() != 1) {
+            return null;
+        }
+        ILogicalExpression arg = args.get(0).getValue();
+        String columnName = tryGetColumnName(arg);
+        if (columnName == null) {
+            return null;
+        }
+        return Expressions.isNull(columnName);
+    }
+
+    private Expression handleComparison(AbstractFunctionCallExpression 
funcExpr, FunctionIdentifier fid)
+            throws AlgebricksException {
+        List<Mutable<ILogicalExpression>> args = funcExpr.getArguments();
+        if (args.size() != 2) {
+            return null;
+        }
+        ILogicalExpression left = args.get(0).getValue();
+        ILogicalExpression right = args.get(1).getValue();
+        String columnName = tryGetColumnName(left);
+        Object literalValue;
+        boolean flipped = false;
+        if (columnName != null) {
+            // Normal: column <op> literal
+            literalValue = tryGetLiteralValue(right);
         } else {
-            throw new RuntimeException("Unsupported function: " + funcExpr);
+            // Try flipped: literal <op> column
+            columnName = tryGetColumnName(right);
+            if (columnName == null) {
+                // Neither side is a known column; could be column <op> column 
— not supported
+                return null;
+            }
+            literalValue = tryGetLiteralValue(left);
+            flipped = true;
+        }
+        if (literalValue == null) {
+            return null;
+        }
+        // When operands are flipped, reverse the comparison direction
+        FunctionIdentifier effectiveFid = flipped ? flipComparison(fid) : fid;
+        return buildComparisonExpression(effectiveFid, columnName, 
literalValue);
+    }
+
+    private Expression buildComparisonExpression(FunctionIdentifier fid, 
String columnName, Object value) {
+        if (fid.equals(AlgebricksBuiltinFunctions.EQ)) {
+            return Expressions.equal(columnName, value);
+        } else if (fid.equals(AlgebricksBuiltinFunctions.NEQ)) {
+            return Expressions.notEqual(columnName, value);
+        } else if (fid.equals(AlgebricksBuiltinFunctions.LT)) {
+            return Expressions.lessThan(columnName, value);
+        } else if (fid.equals(AlgebricksBuiltinFunctions.LE)) {
+            return Expressions.lessThanOrEqual(columnName, value);
+        } else if (fid.equals(AlgebricksBuiltinFunctions.GT)) {
+            return Expressions.greaterThan(columnName, value);
+        } else if (fid.equals(AlgebricksBuiltinFunctions.GE)) {
+            return Expressions.greaterThanOrEqual(columnName, value);
+        }
+        return null;
+    }
+
+    /**
+     * Flips a binary comparison operator to handle reversed operand order.
+     * e.g. {@code literal < column} becomes {@code column > literal}
+     */
+    private FunctionIdentifier flipComparison(FunctionIdentifier fid) {
+        if (fid.equals(AlgebricksBuiltinFunctions.LT)) {
+            return AlgebricksBuiltinFunctions.GT;
+        } else if (fid.equals(AlgebricksBuiltinFunctions.LE)) {
+            return AlgebricksBuiltinFunctions.GE;
+        } else if (fid.equals(AlgebricksBuiltinFunctions.GT)) {
+            return AlgebricksBuiltinFunctions.LT;
+        } else if (fid.equals(AlgebricksBuiltinFunctions.GE)) {
+            return AlgebricksBuiltinFunctions.LE;
+        }
+        // EQ and NEQ are symmetric; return as-is
+        return fid;
+    }
+
+    private Expression handleStringStartsWith(AbstractFunctionCallExpression 
funcExpr) throws AlgebricksException {
+        List<Mutable<ILogicalExpression>> args = funcExpr.getArguments();
+        if (args.size() != 2) {
+            return null;
+        }
+        String columnName = tryGetColumnName(args.get(0).getValue());
+        if (columnName == null) {
+            return null;
+        }
+        Object prefix = tryGetLiteralValue(args.get(1).getValue());
+        if (!(prefix instanceof String)) {
+            return null;
+        }
+        return Expressions.startsWith(columnName, (String) prefix);
+    }
+
+    /**
+     * Returns the Iceberg column name for an expression if it refers to a 
pushed-down filter path,
+     * or {@code null} if the expression is not a column reference.
+     */
+    private String tryGetColumnName(ILogicalExpression expression) {
+        if (!filterPaths.containsKey(expression)) {
+            return null;
+        }
+        try {
+            return (String) createColumnExpression(expression);
+        } catch (Exception e) {
+            LOGGER.debug("Failed to create column expression for {}", 
expression, e);
+            return null;
         }
     }

-    private List<Object> handleArgs(AbstractFunctionCallExpression funcExpr) 
throws AlgebricksException {
-        List<Mutable<ILogicalExpression>> args = funcExpr.getArguments();
-        List<Object> argsExpressions = new ArrayList<>();
-        for (int i = 0; i < args.size(); i++) {
-            ILogicalExpression expr = args.get(i).getValue();
-            Object evalFactory = createExpression(expr);
-            argsExpressions.add(evalFactory);
+    /**
+     * Returns the Java literal value from a constant expression, or {@code 
null} if the
+     * expression is not a constant or the constant type is unsupported.
+     */
+    private Object tryGetLiteralValue(ILogicalExpression expression) {
+        if (expression.getExpressionTag() != LogicalExpressionTag.CONSTANT) {
+            return null;
         }
-        return argsExpressions;
+        return createLiteralValue(expression);
     }

     protected Object createColumnExpression(ILogicalExpression expression) {

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21029?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings?usp=email

Gerrit-MessageType: merged
Gerrit-Project: asterixdb
Gerrit-Branch: lumina
Gerrit-Change-Id: I22a39538af83226c323d905f35bb70f52243e118
Gerrit-Change-Number: 21029
Gerrit-PatchSet: 3
Gerrit-Owner: Peeyush Gupta <[email protected]>
Gerrit-Reviewer: Hussain Towaileb <[email protected]>
Gerrit-Reviewer: Jenkins <[email protected]>
Gerrit-Reviewer: Murtadha Hubail <[email protected]>
Gerrit-Reviewer: Peeyush Gupta <[email protected]>
Gerrit-CC: Anon. E. Moose #1000171

Reply via email to