>From Shahrzad Shirazi <[email protected]>:
Shahrzad Shirazi has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21056?usp=email )
Change subject: WIP: HASHBASED_OR compiler property
......................................................................
WIP: HASHBASED_OR compiler property
Change-Id: If65b44c0a8ae36a6643397395fdab21b0174d5ea
---
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
M
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
M
asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
M
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionTypeInferers.java
M
hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
5 files changed, 66 insertions(+), 12 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/56/21056/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
index cfc47c6..3c56e70 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/compiler/provider/SqlppCompilationProvider.java
@@ -98,7 +98,7 @@
CompilerProperties.COMPILER_CBO_TEST_KEY,
CompilerProperties.COMPILER_FORCE_JOIN_ORDER_KEY,
CompilerProperties.COMPILER_QUERY_PLAN_SHAPE_KEY,
CompilerProperties.COMPILER_MIN_MEMORY_ALLOCATION_KEY,
CompilerProperties.COMPILER_COLUMN_FILTER_KEY,
CompilerProperties.COMPILER_BATCH_LOOKUP_KEY,
- FunctionUtil.IMPORT_PRIVATE_FUNCTIONS,
+ CompilerProperties.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD_KEY,
FunctionUtil.IMPORT_PRIVATE_FUNCTIONS,
CompilerProperties.COMPILER_MAX_VARIABLE_OCCURRENCES_INLINING_KEY,
CompilerProperties.COMPILER_DELTALAKE_FILESPLITS_KEY,
CompilerProperties.COMPILER_MAX_EXPRESSION_TREE_SIZE_KEY,
FuzzyUtils.SIM_FUNCTION_PROP_NAME,
diff --git
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
index af2b450..feb74ee 100644
---
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
+++
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/CompilerProperties.java
@@ -167,7 +167,11 @@
COMPILER_REWRITE_DISJUNCTION(
BOOLEAN,
AlgebricksConfig.REWRITE_DISJUNCTION_DEFAULT,
- "Set the mode for rewriting disjunctions to joins in query
plans");
+ "Set the mode for rewriting disjunctions to joins in query
plans"),
+ COMPILER_OR_TO_HASHBASED_OR_THRESHOLD(
+ getRangedIntegerType(-1, Integer.MAX_VALUE),
+ AlgebricksConfig.HASH_BASED_OR_THRESHOLD_DEFAULT,
+ "The threshold number of disjunctions required to justify
using the hash-based OR approach");
private final IOptionType type;
private final Object defaultValue;
@@ -222,8 +226,12 @@
public static final String COMPILER_SORT_SAMPLES_KEY =
Option.COMPILER_SORT_SAMPLES.ini();
public static final String COMPILER_INDEXONLY_KEY =
Option.COMPILER_INDEXONLY.ini();
+
public static final String COMPILER_REWRITE_DISJUNCTION_KEY =
Option.COMPILER_REWRITE_DISJUNCTION.ini();
+ public static final String COMPILER_OR_TO_HASHBASED_OR_THRESHOLD_KEY =
+ Option.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD.ini();
+
public static final String COMPILER_INTERNAL_SANITYCHECK_KEY =
Option.COMPILER_INTERNAL_SANITYCHECK.ini();
public static final String COMPILER_EXTERNAL_FIELD_PUSHDOWN_KEY =
Option.COMPILER_EXTERNAL_FIELD_PUSHDOWN.ini();
@@ -328,6 +336,10 @@
return accessor.getBoolean(Option.COMPILER_REWRITE_DISJUNCTION);
}
+ public int getHashBasedORThreshold() {
+ return accessor.getInt(Option.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD);
+ }
+
public boolean isSanityCheck() {
return accessor.getBoolean(Option.COMPILER_INTERNAL_SANITYCHECK);
}
diff --git
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
index 9f2ac87..bb7a2f1 100644
---
a/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
+++
b/asterixdb/asterix-common/src/main/java/org/apache/asterix/common/config/OptimizationConfUtil.java
@@ -100,6 +100,7 @@
int maxVariableOccurrencesForInlining =
getMaxVariableOccurrencesForInlining(compilerProperties,
querySpecificConfig, sourceLoc);
int maxExpressionTreeSize =
getMaxExpressionTreeSize(compilerProperties, querySpecificConfig, sourceLoc);
+ int hashBasedThreshold = getHashBasedORThreshold(compilerProperties,
querySpecificConfig, sourceLoc);
boolean orderFields = getBoolean(querySpecificConfig,
CompilerProperties.COMPILER_ORDERED_FIELDS_KEY,
compilerProperties.isOrderedFields());
@@ -134,6 +135,7 @@
physOptConf.setMaxVariableOccurrencesForInlining(maxVariableOccurrencesForInlining);
physOptConf.setMaxExpressionTreeSize(maxExpressionTreeSize);
physOptConf.setOrderFields(orderFields);
+ //physOptConf.setHashBasedThreshold(hashBasedThreshold);
// We should have already validated the parameter names at this
point...
Set<String> filteredParameterNames = new HashSet<>(parameterNames);
@@ -254,4 +256,17 @@
throw AsterixException.create(ErrorCode.COMPILATION_ERROR,
sourceLoc, e.getMessage());
}
}
+
+ private static int getHashBasedORThreshold(CompilerProperties
compilerProperties,
+ Map<String, Object> querySpecificConfig, SourceLocation sourceLoc)
throws AsterixException {
+ String valueInQuery =
+ (String)
querySpecificConfig.get(CompilerProperties.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD_KEY);
+ try {
+ return valueInQuery == null ?
compilerProperties.getHashBasedORThreshold()
+ : OptionTypes.INTEGER.parse(valueInQuery);
+ } catch (IllegalArgumentException e) {
+ throw AsterixException.create(ErrorCode.COMPILATION_ERROR,
sourceLoc, e.getMessage());
+ }
+ }
+
}
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionTypeInferers.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionTypeInferers.java
index e075a70..9c08c68 100644
---
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionTypeInferers.java
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/functions/FunctionTypeInferers.java
@@ -21,6 +21,7 @@
import java.util.ArrayList;
import java.util.List;
+import java.util.Map;
import org.apache.asterix.common.config.CompilerProperties;
import org.apache.asterix.common.exceptions.CompilationException;
@@ -146,25 +147,51 @@
@Override
public void infer(ILogicalExpression expr, IFunctionDescriptor fd,
IVariableTypeEnvironment context,
CompilerProperties compilerProps, IMetadataProvider<?, ?> mp)
throws AlgebricksException {
- Object hashBasedOption =
mp.getConfig().get(AlgebricksConfig.HASH_BASED_OR_OPTION);
- boolean hashBasedOrEnabled =
AlgebricksConfig.HASH_BASED_OR_OPTION_DEFAULT;
- if (hashBasedOption != null) {
- hashBasedOrEnabled =
Boolean.parseBoolean(String.valueOf(hashBasedOption));
+ int hashBasedThreshold;
+ Map<String, Object> config = mp.getConfig();
+ if
(config.containsKey(CompilerProperties.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD_KEY))
{
+ Object hashBasedOptionFromCompiler =
+
config.get(CompilerProperties.COMPILER_OR_TO_HASHBASED_OR_THRESHOLD_KEY);
+ hashBasedThreshold =
Integer.parseInt(String.valueOf(hashBasedOptionFromCompiler));
+ } else {
+ hashBasedThreshold =
AlgebricksConfig.HASH_BASED_OR_THRESHOLD_DEFAULT;
}
- if (!hashBasedOrEnabled) {
- return;
- }
+
AbstractFunctionCallExpression fce =
(AbstractFunctionCallExpression) expr;
- IAType elementType = useHashBased(fce);
+ IAType elementType = useHashBased(fce, hashBasedThreshold);
if (elementType != null) {
fd.setImmutableStates((Object[]) new IAType[] { elementType });
}
}
};
+<<<<<<< Updated upstream
private static IAType useHashBased(AbstractFunctionCallExpression
funcExpr) {
+=======
+ /**
+ * Determines if the OR expression can be optimized using hash-based
evaluation.
+ * <p>Using the hash-based OR is only possible if we have:
+ * <ul>
+ * <li>All arguments are equality comparisons (EQ function calls with
exactly 2 arguments)</li>
+ * <li>Each EQ has exactly one constant operand and one non-constant
operand</li>
+ * <li>All non-constant operands are identical across all EQs</li>
+ * <li>All constant operands have mutually compatible types that can
share the same hash function</li>
+ * </ul>
+ * <p>Examples of non-optimizable expressions:
+ * <ul>
+ * <li>{@code EQ(a, 1) OR EQ(b = 2)} (different variables)</li>
+ * <li>{@code EQ(a, 1) OR EQ(a = "2")} (incompatible constant types)</li>
+ * <li>{@code EQ(a, 1) OR EQ(a = null)} (null is not compatible with
integer)</li>
+ * </ul>
+ *
+ * @param funcExpr the OR function expression
+ * @return the element type for hash-based comparison, or null if
optimization is not applicable
+ */
+
+ private static IAType useHashBased(AbstractFunctionCallExpression
funcExpr, int hashBasedThreshold) {
+>>>>>>> Stashed changes
List<Mutable<ILogicalExpression>> orArgs = funcExpr.getArguments();
- if (orArgs.size() < 2) {
+ if (orArgs.size() < 2 || orArgs.size() < hashBasedThreshold) {
return null;
}
LogicalVariable commonVar = null;
diff --git
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
index b3506f4..672c2dc 100644
---
a/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
+++
b/hyracks-fullstack/algebricks/algebricks-core/src/main/java/org/apache/hyracks/algebricks/core/config/AlgebricksConfig.java
@@ -49,7 +49,7 @@
public static final boolean COLUMN_FILTER_DEFAULT = true;
public static final boolean ORDERED_FIELDS = true;
public static final int MAX_VARIABLE_OCCURRENCES_INLINING_DEFAULT = 128;
+ public static final int HASH_BASED_OR_THRESHOLD_DEFAULT = -1;
public static final String HASH_BASED_OR_OPTION = "hash_based_or";
- public static final boolean HASH_BASED_OR_OPTION_DEFAULT = false;
public static final int MAX_EXPRESSION_TREE_SIZE_DEFAULT = 10000;
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/21056?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: phoenix
Gerrit-Change-Id: If65b44c0a8ae36a6643397395fdab21b0174d5ea
Gerrit-Change-Number: 21056
Gerrit-PatchSet: 1
Gerrit-Owner: Shahrzad Shirazi <[email protected]>