>From Shahrzad Shirazi <[email protected]>:
Shahrzad Shirazi has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20774?usp=email )
Change subject: WIP: Choose the Best index for a query to make it index-only
......................................................................
WIP: Choose the Best index for a query to make it index-only
Change-Id: I4f996ee7b6eb03067fb384161fb4eb60f94e8353
---
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
M
asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
2 files changed, 96 insertions(+), 3 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/74/20774/1
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
index ce6a0d4..21d8aaa 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/AbstractIntroduceAccessMethodRule.java
@@ -230,7 +230,8 @@
* process by making it more systematic.
*/
protected IntroduceSelectAccessMethodRule.IndexAccessInfo chooseBestIndex(
- Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs,
List<IndexAccessInfo> chosenIndexes) {
+ Map<IAccessMethod, AccessMethodAnalysisContext> analyzedAMs,
List<IndexAccessInfo> chosenIndexes)
+ throws AlgebricksException {
List<IntroduceSelectAccessMethodRule.IndexAccessInfo> list = new
ArrayList<>();
chooseAllIndexes(analyzedAMs, list);
chosenIndexes.addAll(list);
@@ -244,9 +245,16 @@
* [BTreeAccessMethod , IndexType.BTREE], [RTreeAccessMethod ,
IndexType.RTREE],
* [InvertedIndexAccessMethod, IndexType.SINGLE_PARTITION_WORD_INVIX ||
SINGLE_PARTITION_NGRAM_INVIX ||
* LENGTH_PARTITIONED_WORD_INVIX || LENGTH_PARTITIONED_NGRAM_INVIX]
+ * If an index covers all fields used in the query, that index will be the
only one chosen.
*/
protected void chooseAllIndexes(Map<IAccessMethod,
AccessMethodAnalysisContext> analyzedAMs,
- List<IntroduceSelectAccessMethodRule.IndexAccessInfo> result) {
+ List<IntroduceSelectAccessMethodRule.IndexAccessInfo> result)
throws AlgebricksException {
+ chooseAllIndexes(analyzedAMs, result, null, null, null);
+ }
+
+ protected void chooseAllIndexes(Map<IAccessMethod,
AccessMethodAnalysisContext> analyzedAMs,
+ List<IntroduceSelectAccessMethodRule.IndexAccessInfo> result,
OptimizableOperatorSubTree subTree,
+ List<Mutable<ILogicalOperator>> afterSelectRefs,
IOptimizationContext context) throws AlgebricksException {
// Use variables (fields) to the index types map to check which type
of indexes are applied for the vars.
Map<List<Pair<Integer, Integer>>, List<IndexType>>
resultVarsToIndexTypesMap = new HashMap<>();
Iterator<Map.Entry<IAccessMethod, AccessMethodAnalysisContext>> amIt =
analyzedAMs.entrySet().iterator();
@@ -305,6 +313,91 @@
}
}
}
+
+ // If an index covers all fields used in the query, keep only that
index
+ Set<List<String>> queryFields = new HashSet<>();
+ // Extract fields from WHERE clause (optimizable function expressions)
+ for (AccessMethodAnalysisContext ctx : analyzedAMs.values()) {
+ for (IOptimizableFuncExpr expr : ctx.getMatchedFuncExprs()) {
+ for (int idx = 0; idx < expr.getNumLogicalVars(); idx++) {
+ List<String> fieldName = expr.getFieldName(idx);
+ if (fieldName != null && !fieldName.isEmpty()) {
+ queryFields.add(fieldName);
+ }
+ }
+ }
+ }
+
+ // Extract fields from SELECT clause (operators after SELECT)
+ if (subTree != null && afterSelectRefs != null && context != null) {
+ // First, populate the varsToFieldNameMap by filling field names
in the subtree
+ fillFieldNamesInTheSubTree(subTree, context);
+
+ // Then, extract fields from variables used in operators after
SELECT
+ Set<LogicalVariable> usedVars = new HashSet<>();
+ for (Mutable<ILogicalOperator> opRef : afterSelectRefs) {
+ ILogicalOperator op = opRef.getValue();
+ VariableUtilities.getUsedVariables(op, usedVars);
+ }
+
+ // Map variables to field names
+ for (LogicalVariable var : usedVars) {
+ List<String> fieldName =
subTree.getVarsToFieldNameMap().get(var);
+ if (fieldName != null && !fieldName.isEmpty()) {
+ queryFields.add(fieldName);
+ }
+ }
+ }
+
+ if (!queryFields.isEmpty() && !result.isEmpty()) {
+ // Find indexes that cover all query fields
+ List<IntroduceSelectAccessMethodRule.IndexAccessInfo>
coveringIndexes = new ArrayList<>();
+
+ for (IntroduceSelectAccessMethodRule.IndexAccessInfo indexInfo :
result) {
+ List<List<String>> indexFields =
findKeyFieldNames(indexInfo.getIndex());
+ if (coversAllQueryFields(indexFields, queryFields)) {
+ coveringIndexes.add(indexInfo);
+ }
+ }
+
+ // If we found indexes that cover all fields, keep only those
+ // If multiple indexes cover all fields, prefer the one with the
fewest fields
+ if (!coveringIndexes.isEmpty()) {
+ result.clear();
+ if (coveringIndexes.size() == 1) {
+ result.add(coveringIndexes.get(0));
+ } else {
+ // Find the index with the fewest fields that still covers
all query fields
+ IntroduceSelectAccessMethodRule.IndexAccessInfo bestIndex
= null;
+ int minFields = Integer.MAX_VALUE;
+ for (IntroduceSelectAccessMethodRule.IndexAccessInfo
indexInfo : coveringIndexes) {
+ List<List<String>> indexFields =
findKeyFieldNames(indexInfo.getIndex());
+ int numFields = indexFields.size();
+ if (numFields < minFields) {
+ minFields = numFields;
+ bestIndex = indexInfo;
+ } else if (numFields == minFields && bestIndex !=
null) {
+ // Tie-breaker: prefer the index that comes last
alphabetically by name
+ // This favors indexes with longer names (e.g.,
n999 over n99)
+ if (indexInfo.getIndex().getIndexName()
+
.compareTo(bestIndex.getIndex().getIndexName()) > 0) {
+ bestIndex = indexInfo;
+ }
+ }
+ }
+ if (bestIndex != null) {
+ result.add(bestIndex);
+ } else {
+ // Fallback: add all covering indexes
+ result.addAll(coveringIndexes);
+ }
+ }
+ }
+ }
+ }
+
+ private boolean coversAllQueryFields(List<List<String>> indexFields,
Set<List<String>> queryFields) {
+ return queryFields.stream().allMatch(queryField ->
indexFields.contains(queryField));
}
private boolean
isSameFullTextConfigInIndexAndQuery(AccessMethodAnalysisContext analysisCtx,
diff --git
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
index b246ff2..49a5438 100644
---
a/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
+++
b/asterixdb/asterix-algebra/src/main/java/org/apache/asterix/optimizer/rules/am/IntroduceSelectAccessMethodRule.java
@@ -547,7 +547,7 @@
pruneIndexCandidates(analyzedAMs, context, typeEnvironment,
checkApplicableOnly);
// Choose all indexes that will be applied.
- chooseAllIndexes(analyzedAMs, chosenIndexes);
+ chooseAllIndexes(analyzedAMs, chosenIndexes, subTree,
afterSelectRefs, context);
removeSmallerPrefixIndexes(chosenIndexes);
if (chosenIndexes == null || chosenIndexes.isEmpty()) {
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/20774?usp=email
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings?usp=email
Gerrit-MessageType: newchange
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I4f996ee7b6eb03067fb384161fb4eb60f94e8353
Gerrit-Change-Number: 20774
Gerrit-PatchSet: 1
Gerrit-Owner: Shahrzad Shirazi <[email protected]>