>From Shahrzad Shirazi <[email protected]>:

Shahrzad Shirazi has uploaded this change for review. ( 
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21056?usp=email )


Change subject: WIP: HASHBASED_OR compiler property
......................................................................

WIP: HASHBASED_OR compiler property

Change-Id: If65b44c0a8ae36a6643397395fdab21b0174d5ea
---
M 
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
M 
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
M 
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
M 
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionTypeInferers.java
M 
hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
5 files changed, 66 insertions(+), 12 deletions(-)



  git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb 
refs/changes/56/21056/1

diff --git 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
index cfc47c6..3c56e70 100644
--- 
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
+++ 
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
@@ -98,7 +98,7 @@
                 CompilerProperties.COMPILER_CBO_TEST_KEY, 
CompilerProperties.COMPILER_FORCE_JOIN_ORDER_KEY,
                 CompilerProperties.COMPILER_QUERY_PLAN_SHAPE_KEY, 
CompilerProperties.COMPILER_MIN_MEMORY_ALLOCATION_KEY,
                 CompilerProperties.COMPILER_COLUMN_FILTER_KEY, 
CompilerProperties.COMPILER_BATCH_LOOKUP_KEY,
-                FunctionUtil.IMPORT_PRIVATE_FUNCTIONS,
+                CompilerProperties.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD_KEY, 
FunctionUtil.IMPORT_PRIVATE_FUNCTIONS,
                 
CompilerProperties.COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING_KEY,
                 CompilerProperties.COMPILER_DELTALAKE_FILESPLITS_KEY,
                 CompilerProperties.COMPILER_MAX_EXPRESSION_TREE_SIZE_KEY, 
FuzzyUtils.SIM_FUNCTION_PROP_NAME,
diff --git 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
index af2b450..feb74ee 100644
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
@@ -167,7 +167,11 @@
         COMPILER_REWRITE_DISJUNCTION(
                 BOOLEAN,
                 AlgebricksConfig.REWRITE_DISJUNCTION_DEFAULT,
-                "Set the mode for rewriting disjunctions to joins in query 
plans");
+                "Set the mode for rewriting disjunctions to joins in query 
plans"),
+        COMPILER_OR_TO_HASHBASED_OR_THRESHOLD(
+                getRangedIntegerType(-1, Integer.MAX_VALUE),
+                AlgebricksConfig.HASH_BASED_OR_THRESHOLD_DEFAULT,
+                "The threshold number of disjunctions required to justify 
using the hash-based OR approach");

         private final IOptionType type;
         private final Object defaultValue;
@@ -222,8 +226,12 @@
     public static final String COMPILER_SORT_SAMPLES_KEY = 
Option.COMPILER_SORT_SAMPLES.ini();

     public static final String COMPILER_INDEXONLY_KEY = 
Option.COMPILER_INDEXONLY.ini();
+
     public static final String COMPILER_REWRITE_DISJUNCTION_KEY = 
Option.COMPILER_REWRITE_DISJUNCTION.ini();

+    public static final String COMPILER_OR_TO_HASHBASED_OR_THRESHOLD_KEY =
+            Option.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD.ini();
+
     public static final String COMPILER_INTERNAL_SANITYCHECK_KEY = 
Option.COMPILER_INTERNAL_SANITYCHECK.ini();

     public static final String COMPILER_EXTERNAL_FIELD_PUSHDOWN_KEY = 
Option.COMPILER_EXTERNAL_FIELD_PUSHDOWN.ini();
@@ -328,6 +336,10 @@
         return accessor.getBoolean(Option.COMPILER_REWRITE_DISJUNCTION);
     }

+    public int getHashBasedORThreshold() {
+        return accessor.getInt(Option.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD);
+    }
+
     public boolean isSanityCheck() {
         return accessor.getBoolean(Option.COMPILER_INTERNAL_SANITYCHECK);
     }
diff --git 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
index 9f2ac87..bb7a2f1 100644
--- 
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
+++ 
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
@@ -100,6 +100,7 @@
         int maxVariableOccurrencesForInlining =
                 getMaxVariableOccurrencesForInlining(compilerProperties, 
querySpecificConfig, sourceLoc);
         int maxExpressionTreeSize = 
getMaxExpressionTreeSize(compilerProperties, querySpecificConfig, sourceLoc);
+        int hashBasedThreshold = getHashBasedORThreshold(compilerProperties, 
querySpecificConfig, sourceLoc);
         boolean orderFields = getBoolean(querySpecificConfig, 
CompilerProperties.COMPILER_ORDERED_FIELDS_KEY,
                 compilerProperties.isOrderedFields());

@@ -134,6 +135,7 @@
         
physOptConf.setMaxVariableOccurrencesForInlining(maxVariableOccurrencesForInlining);
         physOptConf.setMaxExpressionTreeSize(maxExpressionTreeSize);
         physOptConf.setOrderFields(orderFields);
+        //physOptConf.setHashBasedThreshold(hashBasedThreshold);

         // We should have already validated the parameter names at this 
point...
         Set<String> filteredParameterNames = new HashSet<>(parameterNames);
@@ -254,4 +256,17 @@
             throw AsterixException.create(ErrorCode.COMPILATION_ERROR, 
sourceLoc, e.getMessage());
         }
     }
+
+    private static int getHashBasedORThreshold(CompilerProperties 
compilerProperties,
+            Map<String, Object> querySpecificConfig, SourceLocation sourceLoc) 
throws AsterixException {
+        String valueInQuery =
+                (String) 
querySpecificConfig.get(CompilerProperties.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD_KEY);
+        try {
+            return valueInQuery == null ? 
compilerProperties.getHashBasedORThreshold()
+                    : OptionTypes.INTEGER.parse(valueInQuery);
+        } catch (IllegalArgumentException e) {
+            throw AsterixException.create(ErrorCode.COMPILATION_ERROR, 
sourceLoc, e.getMessage());
+        }
+    }
+
 }
diff --git 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionTypeInferers.java
 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionTypeInferers.java
index e075a70..9c08c68 100644
--- 
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionTypeInferers.java
+++ 
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionTypeInferers.java
@@ -21,6 +21,7 @@

 import java.util.ArrayList;
 import java.util.List;
+import java.util.Map;

 import org.apache.asterix.common.config.CompilerProperties;
 import org.apache.asterix.common.exceptions.CompilationException;
@@ -146,25 +147,51 @@
         @Override
         public void infer(ILogicalExpression expr, IFunctionDescriptor fd, 
IVariableTypeEnvironment context,
                 CompilerProperties compilerProps, IMetadataProvider<?, ?> mp) 
throws AlgebricksException {
-            Object hashBasedOption = 
mp.getConfig().get(AlgebricksConfig.HASH_BASED_OR_OPTION);
-            boolean hashBasedOrEnabled = 
AlgebricksConfig.HASH_BASED_OR_OPTION_DEFAULT;
-            if (hashBasedOption != null) {
-                hashBasedOrEnabled = 
Boolean.parseBoolean(String.valueOf(hashBasedOption));
+            int hashBasedThreshold;
+            Map<String, Object> config = mp.getConfig();
+            if 
(config.containsKey(CompilerProperties.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD_KEY))
 {
+                Object hashBasedOptionFromCompiler =
+                        
config.get(CompilerProperties.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD_KEY);
+                hashBasedThreshold = 
Integer.parseInt(String.valueOf(hashBasedOptionFromCompiler));
+            } else {
+                hashBasedThreshold = 
AlgebricksConfig.HASH_BASED_OR_THRESHOLD_DEFAULT;
             }
-            if (!hashBasedOrEnabled) {
-                return;
-            }
+
             AbstractFunctionCallExpression fce = 
(AbstractFunctionCallExpression) expr;
-            IAType elementType = useHashBased(fce);
+            IAType elementType = useHashBased(fce, hashBasedThreshold);
             if (elementType != null) {
                 fd.setImmutableStates((Object[]) new IAType[] { elementType });
             }
         }
     };

+<<<<<<< Updated upstream
     private static IAType useHashBased(AbstractFunctionCallExpression 
funcExpr) {
+=======
+    /**
+     * Determines if the OR expression can be optimized using hash-based 
evaluation.
+     * <p>Using the hash-based OR is only possible if we have:
+     * <ul>
+     *   <li>All arguments are equality comparisons (EQ function calls with 
exactly 2 arguments)</li>
+     *   <li>Each EQ has exactly one constant operand and one non-constant 
operand</li>
+     *   <li>All non-constant operands are identical across all EQs</li>
+     *   <li>All constant operands have mutually compatible types that can 
share the same hash function</li>
+     * </ul>
+     * <p>Examples of non-optimizable expressions:
+     * <ul>
+     *   <li>{@code EQ(a, 1) OR EQ(b = 2)} (different variables)</li>
+     *   <li>{@code EQ(a, 1) OR EQ(a = "2")} (incompatible constant types)</li>
+     *   <li>{@code EQ(a, 1) OR EQ(a = null)} (null is not compatible with 
integer)</li>
+     * </ul>
+     *
+     * @param funcExpr the OR function expression
+     * @return the element type for hash-based comparison, or null if 
optimization is not applicable
+     */
+
+    private static IAType useHashBased(AbstractFunctionCallExpression 
funcExpr, int hashBasedThreshold) {
+>>>>>>> Stashed changes
         List<Mutable<ILogicalExpression>> orArgs = funcExpr.getArguments();
-        if (orArgs.size() < 2) {
+        if (orArgs.size() < 2 || orArgs.size() < hashBasedThreshold) {
             return null;
         }
         LogicalVariable commonVar = null;
diff --git 
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
 
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
index b3506f4..672c2dc 100644
--- 
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
+++ 
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
@@ -49,7 +49,7 @@
     public static final boolean COLUMN_FILTER_DEFAULT = true;
     public static final boolean ORDERED_FIELDS = true;
     public static final int MAX_VARIABLE_OCCURRENCES_INLINING_DEFAULT = 128;
+    public static final int HASH_BASED_OR_THRESHOLD_DEFAULT = -1;
     public static final String HASH_BASED_OR_OPTION = "hash_based_or";
-    public static final boolean HASH_BASED_OR_OPTION_DEFAULT = false;
     public static final int MAX_EXPRESSION_TREE_SIZE_DEFAULT = 10000;
 }

--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21056?usp=email
To unsubscribe, or for help writing mail filters, visit 
https://asterix-gerrit.ics.uci.edu/settings?usp=email

Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: phoenix
Gerrit-Change-Id: If65b44c0a8ae36a6643397395fdab21b0174d5ea
Gerrit-Change-Number: 21056
Gerrit-PatchSet: 1
Gerrit-Owner: Shahrzad Shirazi <[email protected]>

Reply via email to