>From Murtadha Hubail <[email protected]>: Murtadha Hubail has submitted this change. ( https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21029?usp=email )
Change subject: [ASTERIXDB-3743][EXT] Failure to pushdown iceberg partition filter ...................................................................... [ASTERIXDB-3743][EXT] Failure to pushdown iceberg partition filter - user model changes: no - storage format changes: no - interface changes: no Details: Allows pushing down expr1 in a expression like expr1 AND expr2 where expr is pushable and expr2 is not. Allows ushing down expression like 2 = field_name. Ext-ref: MB-70436 Change-Id: I22a39538af83226c323d905f35bb70f52243e118 Reviewed-on: https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21029 Tested-by: Jenkins <[email protected]> Integration-Tests: Jenkins <[email protected]> Reviewed-by: Peeyush Gupta <[email protected]> Reviewed-by: Hussain Towaileb <[email protected]> --- M asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/IcebergTableFilterBuilder.java 1 file changed, 271 insertions(+), 61 deletions(-) Approvals: Jenkins: Verified; Verified Hussain Towaileb: Looks good to me, approved Peeyush Gupta: Looks good to me, but someone else must approve diff --git a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/IcebergTableFilterBuilder.java b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/IcebergTableFilterBuilder.java index 94ec455..155f6d0 100644 --- a/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/IcebergTableFilterBuilder.java +++ b/asterixdb/asterix-metadata/src/main/java/org/apache/asterix/metadata/utils/filter/IcebergTableFilterBuilder.java @@ -27,13 +27,16 @@ import org.apache.asterix.om.base.ADate; import org.apache.asterix.om.base.ADateTime; import org.apache.asterix.om.base.ADouble; +import org.apache.asterix.om.base.AFloat; import org.apache.asterix.om.base.AInt16; import org.apache.asterix.om.base.AInt32; import org.apache.asterix.om.base.AInt64; import org.apache.asterix.om.base.AInt8; import org.apache.asterix.om.base.AString; +import org.apache.asterix.om.base.ATime; +import org.apache.asterix.om.base.IAObject; import org.apache.asterix.om.constants.AsterixConstantValue; -import org.apache.asterix.om.functions.IFunctionDescriptor; +import org.apache.asterix.om.functions.BuiltinFunctions; import org.apache.asterix.om.types.ARecordType; import org.apache.asterix.om.types.ATypeTag; import org.apache.asterix.runtime.projection.ExternalDatasetProjectionFiltrationInfo; @@ -64,108 +67,315 @@ } public IExternalFilterEvaluatorFactory build() throws AlgebricksException { - Object icebergTablePredicate = null; + Expression icebergTablePredicate = null; if (filterExpression != null) { try { - icebergTablePredicate = createExpression(filterExpression); + icebergTablePredicate = createIcebergExpression(filterExpression, false); } catch (Exception e) { - LOGGER.error("Error creating IcebergTable filter expression, skipping filter pushdown", e); + LOGGER.warn("Error creating IcebergTable filter expression, skipping filter pushdown", e); } } - return new IcebergTableFilterEvaluatorFactory((Expression) icebergTablePredicate); + return new IcebergTableFilterEvaluatorFactory(icebergTablePredicate); } - protected Object createExpression(ILogicalExpression expression) throws AlgebricksException { + /** + * Recursively converts an AsterixDB logical expression into an Iceberg {@link Expression}. + * + * @return an Iceberg Expression, or {@code null} if the expression cannot be converted + */ + protected Expression createIcebergExpression(ILogicalExpression expression, boolean insideNot) + throws AlgebricksException { if (filterPaths.containsKey(expression)) { - // Path expression, create a value accessor (i.e., a column reader) - return createColumnExpression(expression); + return null; } else if (expression.getExpressionTag() == LogicalExpressionTag.CONSTANT) { - return createLiteralExpression(expression); + return createBooleanConstantExpression(expression); } else if (expression.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) { - return handleFunction(expression); + return handleFunction(expression, insideNot); } - - /* - * A variable expression: This should not happen as the provided filter expression is inlined. - * If a variable was encountered for some reason, it should only be the record variable. If the record variable - * was encountered, that means there's a missing value path the compiler didn't provide. - */ - throw new RuntimeException("Unsupported expression " + expression + ". the provided paths are: " + filterPaths); + LOGGER.debug("Unsupported expression type {}, skipping filter pushdown", expression); + return null; } - private Object createLiteralExpression(ILogicalExpression expression) throws AlgebricksException { + /** + * Extracts a Java value from a constant expression for use in comparisons. + * + * @return a Java value (String, Number, etc.), or {@code null} for null/missing + */ + private Object createLiteralValue(ILogicalExpression expression) { ConstantExpression constExpr = (ConstantExpression) expression; if (constExpr.getValue().isNull() || constExpr.getValue().isMissing()) { - throw new RuntimeException("Unsupported literal type: " + constExpr.getValue()); + return null; + } + if (!(constExpr.getValue() instanceof AsterixConstantValue)) { + return null; } AsterixConstantValue constantValue = (AsterixConstantValue) constExpr.getValue(); - switch (constantValue.getObject().getType().getTypeTag()) { + IAObject obj = constantValue.getObject(); + switch (obj.getType().getTypeTag()) { case STRING: - return ((AString) constantValue.getObject()).getStringValue(); + return ((AString) obj).getStringValue(); case TINYINT: - return ((AInt8) constantValue.getObject()).getByteValue(); + return (int) ((AInt8) obj).getByteValue(); case SMALLINT: - return ((AInt16) constantValue.getObject()).getShortValue(); + return (int) ((AInt16) obj).getShortValue(); case INTEGER: - return ((AInt32) constantValue.getObject()).getIntegerValue(); + return ((AInt32) obj).getIntegerValue(); + case BIGINT: + return ((AInt64) obj).getLongValue(); + case FLOAT: + return ((AFloat) obj).getFloatValue(); + case DOUBLE: + return ((ADouble) obj).getDoubleValue(); case BOOLEAN: return constantValue.isTrue(); - case BIGINT: - return ((AInt64) constantValue.getObject()).getLongValue(); - case DOUBLE: - return ((ADouble) constantValue.getObject()).getDoubleValue(); case DATE: - return ((ADate) constantValue.getObject()).getChrononTimeInDays(); + // Iceberg DATE is represented as days from epoch (int) + return ((ADate) obj).getChrononTimeInDays(); case DATETIME: - Long millis = ((ADateTime) constantValue.getObject()).getChrononTime(); - return TimeUnit.MILLISECONDS.toMicros(millis); + // Iceberg TIMESTAMP is represented as microseconds from epoch + return TimeUnit.MILLISECONDS.toMicros(((ADateTime) obj).getChrononTime()); + case TIME: + // Iceberg TIME is represented as microseconds from midnight + return TimeUnit.MILLISECONDS.toMicros(((ATime) obj).getChrononTime()); default: - throw new RuntimeException("Unsupported literal type: " + constantValue.getObject().getType()); + LOGGER.debug("Unsupported literal type: {}", obj.getType()); + return null; } } + /** + * Converts a bare boolean constant to an Iceberg Expression (alwaysTrue / alwaysFalse). + */ + private Expression createBooleanConstantExpression(ILogicalExpression expression) { + ConstantExpression constExpr = (ConstantExpression) expression; + if (constExpr.getValue().isTrue()) { + return Expressions.alwaysTrue(); + } else if (constExpr.getValue().isFalse()) { + return Expressions.alwaysFalse(); + } + // null/missing constant — cannot determine truth value; do not push down + return null; + } + @Override protected IScalarEvaluatorFactory createValueAccessor(ILogicalExpression expression) { return null; } - private Expression handleFunction(ILogicalExpression expr) throws AlgebricksException { + private Expression handleFunction(ILogicalExpression expr, boolean insideNot) throws AlgebricksException { AbstractFunctionCallExpression funcExpr = (AbstractFunctionCallExpression) expr; - IFunctionDescriptor fd = resolveFunction(funcExpr); - FunctionIdentifier fid = fd.getIdentifier(); - if (funcExpr.getArguments().size() != 2 - && !(fid.equals(AlgebricksBuiltinFunctions.AND) || fid.equals(AlgebricksBuiltinFunctions.OR))) { - throw new RuntimeException("Predicate should only have 2 arguments: " + funcExpr); - } - List<Object> args = handleArgs(funcExpr); + FunctionIdentifier fid = funcExpr.getFunctionIdentifier(); + + // Logical Connectives if (fid.equals(AlgebricksBuiltinFunctions.AND)) { - return Expressions.and((Expression) args.get(0), (Expression) args.get(1)); + return handleAnd(funcExpr, insideNot); } else if (fid.equals(AlgebricksBuiltinFunctions.OR)) { - return Expressions.or((Expression) args.get(0), (Expression) args.get(1)); - } else if (fid.equals(AlgebricksBuiltinFunctions.EQ)) { - return Expressions.equal((String) args.get(0), args.get(1)); - } else if (fid.equals(AlgebricksBuiltinFunctions.GE)) { - return Expressions.greaterThanOrEqual((String) args.get(0), args.get(1)); - } else if (fid.equals(AlgebricksBuiltinFunctions.GT)) { - return Expressions.greaterThan((String) args.get(0), args.get(1)); - } else if (fid.equals(AlgebricksBuiltinFunctions.LE)) { - return Expressions.lessThanOrEqual((String) args.get(0), args.get(1)); - } else if (fid.equals(AlgebricksBuiltinFunctions.LT)) { - return Expressions.lessThan((String) args.get(0), args.get(1)); + return handleOr(funcExpr, insideNot); + } else if (fid.equals(AlgebricksBuiltinFunctions.NOT)) { + return handleNot(funcExpr, insideNot); + } + + // Null check + if (fid.equals(AlgebricksBuiltinFunctions.IS_NULL)) { + return handleIsNull(funcExpr); + } + + // Comparison operators + if (fid.equals(AlgebricksBuiltinFunctions.EQ) || fid.equals(AlgebricksBuiltinFunctions.NEQ) + || fid.equals(AlgebricksBuiltinFunctions.LT) || fid.equals(AlgebricksBuiltinFunctions.LE) + || fid.equals(AlgebricksBuiltinFunctions.GT) || fid.equals(AlgebricksBuiltinFunctions.GE)) { + return handleComparison(funcExpr, fid); + } + + // String functions + if (fid.equals(BuiltinFunctions.STRING_STARTS_WITH)) { + return handleStringStartsWith(funcExpr); + } + LOGGER.trace("Unsupported function for Iceberg filter pushdown: {}", fid); + return null; + } + + private Expression handleAnd(AbstractFunctionCallExpression funcExpr, boolean insideNot) + throws AlgebricksException { + Expression result = null; + for (Mutable<ILogicalExpression> argRef : funcExpr.getArguments()) { + Expression argExpr = createIcebergExpression(argRef.getValue(), insideNot); + if (argExpr == null) { + if (insideNot) { + // Under an odd number of NOTs, partial AND pushdown is unsafe. + // NOT(AND(e1, e2)) ≡ NOT(e1) OR NOT(e2), so dropping e2 and pushing + // NOT(e1) would incorrectly prune rows where e2 is false. + return null; + } + // Under an even number of NOTs (including zero — top-level or inside OR): + // skip un-pushable children safely. + // - Top-level / inside OR at top context: the residual filter in the plan + // evaluates skipped conjuncts. + // - Inside OR: AND(e1, e2) reduced to e1 makes the disjunct weaker, so the + // overall OR passes more rows — a safe over-approximation. + // - Under double-NOT: NOT(NOT(AND(e1,e2))) reduced to NOT(NOT(e1)) = e1, + // which is also a safe over-approximation. + continue; + } + result = (result == null) ? argExpr : Expressions.and(result, argExpr); + } + return result; + } + + private Expression handleOr(AbstractFunctionCallExpression funcExpr, boolean insideNot) throws AlgebricksException { + Expression result = null; + for (Mutable<ILogicalExpression> argRef : funcExpr.getArguments()) { + // insideNot (odd-NOT parity) propagates unchanged through OR: OR itself does + // not introduce or remove negation. + Expression argExpr = createIcebergExpression(argRef.getValue(), insideNot); + if (argExpr == null) { + // One un-pushable disjunct makes the whole OR un-pushable for correctness: + // skipping a disjunct would over-prune rows matching only that disjunct. + return null; + } + result = (result == null) ? argExpr : Expressions.or(result, argExpr); + } + return result; + } + + private Expression handleNot(AbstractFunctionCallExpression funcExpr, boolean insideNot) + throws AlgebricksException { + List<Mutable<ILogicalExpression>> args = funcExpr.getArguments(); + if (args.size() != 1) { + return null; + } + ILogicalExpression innerExpr = args.get(0).getValue(); + // NOT toggles the negation context: each NOT flips whether we are inside an odd + // or even number of negations. Partial AND pushdown is only unsafe under an odd + // number of NOTs (insideNot=true), because: + // NOT(AND(e1,e2)) — partial push of e1 yields NOT(e1), which is too strong. + // NOT(NOT(AND(e1,e2))) — partial push of e1 yields NOT(NOT(e1)) = e1, which is + // a safe over-approximation (passes more rows than the full AND). + Expression inner = createIcebergExpression(innerExpr, !insideNot); + return (inner != null) ? Expressions.not(inner) : null; + } + + private Expression handleIsNull(AbstractFunctionCallExpression funcExpr) throws AlgebricksException { + List<Mutable<ILogicalExpression>> args = funcExpr.getArguments(); + if (args.size() != 1) { + return null; + } + ILogicalExpression arg = args.get(0).getValue(); + String columnName = tryGetColumnName(arg); + if (columnName == null) { + return null; + } + return Expressions.isNull(columnName); + } + + private Expression handleComparison(AbstractFunctionCallExpression funcExpr, FunctionIdentifier fid) + throws AlgebricksException { + List<Mutable<ILogicalExpression>> args = funcExpr.getArguments(); + if (args.size() != 2) { + return null; + } + ILogicalExpression left = args.get(0).getValue(); + ILogicalExpression right = args.get(1).getValue(); + String columnName = tryGetColumnName(left); + Object literalValue; + boolean flipped = false; + if (columnName != null) { + // Normal: column <op> literal + literalValue = tryGetLiteralValue(right); } else { - throw new RuntimeException("Unsupported function: " + funcExpr); + // Try flipped: literal <op> column + columnName = tryGetColumnName(right); + if (columnName == null) { + // Neither side is a known column; could be column <op> column — not supported + return null; + } + literalValue = tryGetLiteralValue(left); + flipped = true; + } + if (literalValue == null) { + return null; + } + // When operands are flipped, reverse the comparison direction + FunctionIdentifier effectiveFid = flipped ? flipComparison(fid) : fid; + return buildComparisonExpression(effectiveFid, columnName, literalValue); + } + + private Expression buildComparisonExpression(FunctionIdentifier fid, String columnName, Object value) { + if (fid.equals(AlgebricksBuiltinFunctions.EQ)) { + return Expressions.equal(columnName, value); + } else if (fid.equals(AlgebricksBuiltinFunctions.NEQ)) { + return Expressions.notEqual(columnName, value); + } else if (fid.equals(AlgebricksBuiltinFunctions.LT)) { + return Expressions.lessThan(columnName, value); + } else if (fid.equals(AlgebricksBuiltinFunctions.LE)) { + return Expressions.lessThanOrEqual(columnName, value); + } else if (fid.equals(AlgebricksBuiltinFunctions.GT)) { + return Expressions.greaterThan(columnName, value); + } else if (fid.equals(AlgebricksBuiltinFunctions.GE)) { + return Expressions.greaterThanOrEqual(columnName, value); + } + return null; + } + + /** + * Flips a binary comparison operator to handle reversed operand order. + * e.g. {@code literal < column} becomes {@code column > literal} + */ + private FunctionIdentifier flipComparison(FunctionIdentifier fid) { + if (fid.equals(AlgebricksBuiltinFunctions.LT)) { + return AlgebricksBuiltinFunctions.GT; + } else if (fid.equals(AlgebricksBuiltinFunctions.LE)) { + return AlgebricksBuiltinFunctions.GE; + } else if (fid.equals(AlgebricksBuiltinFunctions.GT)) { + return AlgebricksBuiltinFunctions.LT; + } else if (fid.equals(AlgebricksBuiltinFunctions.GE)) { + return AlgebricksBuiltinFunctions.LE; + } + // EQ and NEQ are symmetric; return as-is + return fid; + } + + private Expression handleStringStartsWith(AbstractFunctionCallExpression funcExpr) throws AlgebricksException { + List<Mutable<ILogicalExpression>> args = funcExpr.getArguments(); + if (args.size() != 2) { + return null; + } + String columnName = tryGetColumnName(args.get(0).getValue()); + if (columnName == null) { + return null; + } + Object prefix = tryGetLiteralValue(args.get(1).getValue()); + if (!(prefix instanceof String)) { + return null; + } + return Expressions.startsWith(columnName, (String) prefix); + } + + /** + * Returns the Iceberg column name for an expression if it refers to a pushed-down filter path, + * or {@code null} if the expression is not a column reference. + */ + private String tryGetColumnName(ILogicalExpression expression) { + if (!filterPaths.containsKey(expression)) { + return null; + } + try { + return (String) createColumnExpression(expression); + } catch (Exception e) { + LOGGER.debug("Failed to create column expression for {}", expression, e); + return null; } } - private List<Object> handleArgs(AbstractFunctionCallExpression funcExpr) throws AlgebricksException { - List<Mutable<ILogicalExpression>> args = funcExpr.getArguments(); - List<Object> argsExpressions = new ArrayList<>(); - for (int i = 0; i < args.size(); i++) { - ILogicalExpression expr = args.get(i).getValue(); - Object evalFactory = createExpression(expr); - argsExpressions.add(evalFactory); + /** + * Returns the Java literal value from a constant expression, or {@code null} if the + * expression is not a constant or the constant type is unsupported. + */ + private Object tryGetLiteralValue(ILogicalExpression expression) { + if (expression.getExpressionTag() != LogicalExpressionTag.CONSTANT) { + return null; } - return argsExpressions; + return createLiteralValue(expression); } protected Object createColumnExpression(ILogicalExpression expression) { -- To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21029?usp=email To unsubscribe, or for help writing mail filters, visit https://asterix-gerrit.ics.uci.edu/settings?usp=email Gerrit-MessageType: merged Gerrit-Project: asterixdb Gerrit-Branch: lumina Gerrit-Change-Id: I22a39538af83226c323d905f35bb70f52243e118 Gerrit-Change-Number: 21029 Gerrit-PatchSet: 3 Gerrit-Owner: Peeyush Gupta <[email protected]> Gerrit-Reviewer: Hussain Towaileb <[email protected]> Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Murtadha Hubail <[email protected]> Gerrit-Reviewer: Peeyush Gupta <[email protected]> Gerrit-CC: Anon. E. Moose #1000171
