http://git-wip-us.apache.org/repos/asf/lens/blob/975fa2c2/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidate.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidate.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidate.java index 636b1d0..8ba69c4 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidate.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageCandidate.java @@ -18,7 +18,6 @@ */ package org.apache.lens.cube.parse; -import static org.apache.hadoop.hive.ql.parse.HiveParser.Identifier; import static org.apache.lens.cube.parse.CandidateTablePruneCause.*; import static org.apache.lens.cube.parse.StorageUtil.*; @@ -32,6 +31,7 @@ import org.apache.lens.server.api.metastore.DataCompletenessChecker; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; @@ -67,6 +67,11 @@ public class StorageCandidate implements Candidate, CandidateTable { @Getter private TreeSet<UpdatePeriod> validUpdatePeriods = new TreeSet<>(); private Configuration conf = null; + + /** + * This map holds Tags (A tag refers to one or more measures) that have incomplete (below configured threshold) data. + * Value is a map of date string and %completeness. + */ @Getter private Map<String, Map<String, Float>> dataCompletenessMap = new HashMap<>(); private SimpleDateFormat partWhereClauseFormat = null; @@ -94,18 +99,16 @@ public class StorageCandidate implements Candidate, CandidateTable { @Getter private CubeInterface cube; @Getter - Map<Dimension, CandidateDim> dimsToQuery; + private Map<Dimension, CandidateDim> dimsToQuery; + @Getter + private Date startTime; + @Getter + private Date endTime; /** * Cached fact columns */ private Collection<String> factColumns; - /** - * This map holds Tags (A tag refers to one or more measures) that have incomplete (below configured threshold) data. - * Value is a map of date string and %completeness. - */ - @Getter - @Setter - private Map<String, Map<String, Float>> incompleteDataDetails; + /** * Partition calculated by getPartition() method. */ @@ -114,11 +117,13 @@ public class StorageCandidate implements Candidate, CandidateTable { /** * Non existing partitions */ + @Getter private Set<String> nonExistingPartitions = new HashSet<>(); @Getter private int numQueriedParts = 0; - public StorageCandidate(CubeInterface cube, CubeFactTable fact, String storageName, CubeQueryContext cubeql) { + public StorageCandidate(CubeInterface cube, CubeFactTable fact, String storageName, CubeQueryContext cubeql) + throws LensException { if ((cube == null) || (fact == null) || (storageName == null)) { throw new IllegalArgumentException("Cube,fact and storageName should be non null"); } @@ -137,12 +142,14 @@ public class StorageCandidate implements Candidate, CandidateTable { this.partWhereClauseFormat = new SimpleDateFormat(formatStr); } completenessPartCol = conf.get(CubeQueryConfUtil.COMPLETENESS_CHECK_PART_COL); - client = cubeql.getMetastoreClient(); completenessThreshold = conf .getFloat(CubeQueryConfUtil.COMPLETENESS_THRESHOLD, CubeQueryConfUtil.DEFAULT_COMPLETENESS_THRESHOLD); + client = cubeql.getMetastoreClient(); + startTime = client.getStorageTableStartDate(name, fact.getName()); + endTime = client.getStorageTableEndDate(name, fact.getName()); } - public StorageCandidate(StorageCandidate sc) { + public StorageCandidate(StorageCandidate sc) throws LensException { this(sc.getCube(), sc.getFact(), sc.getStorageName(), sc.getCubeql()); // Copy update periods. for (UpdatePeriod updatePeriod : sc.getValidUpdatePeriods()) { @@ -150,42 +157,104 @@ public class StorageCandidate implements Candidate, CandidateTable { } } - static boolean containsAny(Collection<String> srcSet, Collection<String> colSet) { - if (colSet == null || colSet.isEmpty()) { - return true; + private void setMissingExpressions(Set<Dimension> queriedDims) throws LensException { + setFromString(String.format("%s", getFromTable())); + setWhereString(joinWithAnd( + genWhereClauseWithDimPartitions(whereString, queriedDims), cubeql.getConf().getBoolean( + CubeQueryConfUtil.REPLACE_TIMEDIM_WITH_PART_COL, CubeQueryConfUtil.DEFAULT_REPLACE_TIMEDIM_WITH_PART_COL) + ? getPostSelectionWhereClause() : null)); + if (cubeql.getHavingAST() != null) { + queryAst.setHavingAST(MetastoreUtil.copyAST(cubeql.getHavingAST())); } - for (String column : colSet) { - if (srcSet.contains(column)) { - return true; + } + + private String genWhereClauseWithDimPartitions(String originalWhere, Set<Dimension> queriedDims) { + StringBuilder whereBuf; + if (originalWhere != null) { + whereBuf = new StringBuilder(originalWhere); + } else { + whereBuf = new StringBuilder(); + } + + // add where clause for all dimensions + if (cubeql != null) { + boolean added = (originalWhere != null); + for (Dimension dim : queriedDims) { + CandidateDim cdim = dimsToQuery.get(dim); + String alias = cubeql.getAliasForTableName(dim.getName()); + if (!cdim.isWhereClauseAdded() && !StringUtils.isBlank(cdim.getWhereClause())) { + appendWhereClause(whereBuf, StorageUtil.getWhereClause(cdim, alias), added); + added = true; + } } } - return false; + if (whereBuf.length() == 0) { + return null; + } + return whereBuf.toString(); } - private void setMissingExpressions() throws LensException { - setFromString(String.format("%s", getFromTable())); - setWhereString(joinWithAnd(whereString, null)); - if (cubeql.getHavingAST() != null) { - queryAst.setHavingAST(MetastoreUtil.copyAST(cubeql.getHavingAST())); + static void appendWhereClause(StringBuilder filterCondition, String whereClause, boolean hasMore) { + // Make sure we add AND only when there are already some conditions in where + // clause + if (hasMore && !filterCondition.toString().isEmpty() && !StringUtils.isBlank(whereClause)) { + filterCondition.append(" AND "); + } + + if (!StringUtils.isBlank(whereClause)) { + filterCondition.append("("); + filterCondition.append(whereClause); + filterCondition.append(")"); } } + protected String getPostSelectionWhereClause() throws LensException { + return null; + } + public void setAnswerableMeasurePhraseIndices(int index) { answerableMeasurePhraseIndices.add(index); } - public String toHQL() throws LensException { - setMissingExpressions(); + public String toHQL(Set<Dimension> queriedDims) throws LensException { + setMissingExpressions(queriedDims); // Check if the picked candidate is a StorageCandidate and in that case // update the selectAST with final alias. if (this == cubeql.getPickedCandidate()) { CandidateUtil.updateFinalAlias(queryAst.getSelectAST(), cubeql); + updateOrderByWithFinalAlias(queryAst.getOrderByAST(), queryAst.getSelectAST()); } return CandidateUtil .buildHQLString(queryAst.getSelectString(), fromString, whereString, queryAst.getGroupByString(), queryAst.getOrderByString(), queryAst.getHavingString(), queryAst.getLimitValue()); } + /** + * Update Orderby children with final alias used in select + * + * @param orderby + * @param select + */ + private void updateOrderByWithFinalAlias(ASTNode orderby, ASTNode select) { + if (orderby == null) { + return; + } + for(Node orderbyNode : orderby.getChildren()) { + ASTNode orderBychild = (ASTNode) orderbyNode; + for(Node selectNode : select.getChildren()) { + ASTNode selectChild = (ASTNode) selectNode; + if (selectChild.getChildCount() == 2) { + if (HQLParser.getString((ASTNode) selectChild.getChild(0)) + .equals(HQLParser.getString((ASTNode) orderBychild.getChild(0)))) { + ASTNode alias = new ASTNode((ASTNode) selectChild.getChild(1)); + orderBychild.replaceChildren(0, 0, alias); + break; + } + } + } + } + } + @Override public String getStorageString(String alias) { return storageName + " " + alias; @@ -213,17 +282,6 @@ public class StorageCandidate implements Candidate, CandidateTable { } @Override - public Date getStartTime() { - // TODO union : get storage stat time and take max out of it - return fact.getStartTime(); - } - - @Override - public Date getEndTime() { - return fact.getEndTime(); - } - - @Override public double getCost() { return fact.weight(); } @@ -253,18 +311,19 @@ public class StorageCandidate implements Candidate, CandidateTable { /** * Gets FactPartitions for the given fact using the following logic * - * 1. Find the max update interval that will be used for the query. Lets assume time range is 15 Sep to 15 Dec and the - * fact has two storage with update periods as MONTHLY,DAILY,HOURLY. In this case the data for - * [15 sep - 1 oct)U[1 Dec - 15 Dec) will be answered by DAILY partitions and [1 oct - 1Dec) will be answered by - * MONTHLY partitions. The max interavl for this query will be MONTHLY. + * 1. Find the max update interval that will be used for the query. Lets assume time + * range is 15 Sep to 15 Dec and the fact has two storage with update periods as MONTHLY,DAILY,HOURLY. + * In this case the data for [15 sep - 1 oct)U[1 Dec - 15 Dec) will be answered by DAILY partitions + * and [1 oct - 1Dec) will be answered by MONTHLY partitions. The max interavl for this query will be MONTHLY. * * 2.Prune Storgaes that do not fall in the queries time range. * {@link CubeMetastoreClient#isStorageTableCandidateForRange(String, Date, Date)} * - * 3. Iterate over max interavl . In out case it will give two months Oct and Nov. Find partitions for these two months. - * Check validity of FactPartitions for Oct and Nov via {@link #updatePartitionStorage(FactPartition)}. - * If the partition is missing, try getting partitions for the time range form other update periods (DAILY,HOURLY).This - * is achieved by calling getPartitions() recursively but passing only 2 update periods (DAILY,HOURLY) + * 3. Iterate over max interavl . In out case it will give two months Oct and Nov. Find partitions for + * these two months.Check validity of FactPartitions for Oct and Nov + * via {@link #updatePartitionStorage(FactPartition)}. + * If the partition is missing, try getting partitions for the time range form other update periods (DAILY,HOURLY). + * This is achieved by calling getPartitions() recursively but passing only 2 update periods (DAILY,HOURLY) * * 4.If the monthly partitions are found, check for lookahead partitions and call getPartitions recursively for the * remaining time intervals i.e, [15 sep - 1 oct) and [1 Dec - 15 Dec) @@ -296,7 +355,7 @@ public class StorageCandidate implements Candidate, CandidateTable { if (!client.isStorageTableCandidateForRange(name, fromDate, toDate)) { cubeql.addStoragePruningMsg(this, - new CandidateTablePruneCause(CandidateTablePruneCause.CandidateTablePruneCode.TIME_RANGE_NOT_ANSWERABLE)); + new CandidateTablePruneCause(CandidateTablePruneCause.CandidateTablePruneCode.TIME_RANGE_NOT_ANSWERABLE)); return false; } else if (!client.partColExists(name, partCol)) { log.info("{} does not exist in {}", partCol, name); @@ -310,7 +369,7 @@ public class StorageCandidate implements Candidate, CandidateTable { Date floorToDate = DateUtil.getFloorDate(toDate, interval); int lookAheadNumParts = conf - .getInt(CubeQueryConfUtil.getLookAheadPTPartsKey(interval), CubeQueryConfUtil.DEFAULT_LOOK_AHEAD_PT_PARTS); + .getInt(CubeQueryConfUtil.getLookAheadPTPartsKey(interval), CubeQueryConfUtil.DEFAULT_LOOK_AHEAD_PT_PARTS); TimeRange.Iterable.Iterator iter = TimeRange.iterable(ceilFromDate, floorToDate, interval, 1).iterator(); // add partitions from ceilFrom to floorTo @@ -340,12 +399,12 @@ public class StorageCandidate implements Candidate, CandidateTable { // look-ahead // process time are present TimeRange.Iterable.Iterator processTimeIter = TimeRange.iterable(nextDt, lookAheadNumParts, interval, 1) - .iterator(); + .iterator(); while (processTimeIter.hasNext()) { Date pdt = processTimeIter.next(); Date nextPdt = processTimeIter.peekNext(); FactPartition processTimePartition = new FactPartition(processTimePartCol, pdt, interval, null, - partWhereClauseFormat); + partWhereClauseFormat); updatePartitionStorage(processTimePartition); if (processTimePartition.isFound()) { log.debug("Finer parts not required for look-ahead partition :{}", part); @@ -359,15 +418,15 @@ public class StorageCandidate implements Candidate, CandidateTable { // Get partitions for look ahead process time log.debug("Looking for process time partitions between {} and {}", pdt, nextPdt); Set<FactPartition> processTimeParts = getPartitions( - TimeRange.getBuilder().fromDate(pdt).toDate(nextPdt).partitionColumn(processTimePartCol).build(), - newset, true, failOnPartialData, missingPartitions); + TimeRange.getBuilder().fromDate(pdt).toDate(nextPdt).partitionColumn(processTimePartCol).build(), + newset, true, failOnPartialData, missingPartitions); log.debug("Look ahead partitions: {}", processTimeParts); TimeRange timeRange = TimeRange.getBuilder().fromDate(dt).toDate(nextDt).build(); for (FactPartition pPart : processTimeParts) { log.debug("Looking for finer partitions in pPart: {}", pPart); for (Date date : timeRange.iterable(pPart.getPeriod(), 1)) { FactPartition innerPart = new FactPartition(partCol, date, pPart.getPeriod(), pPart, - partWhereClauseFormat); + partWhereClauseFormat); updatePartitionStorage(innerPart); innerPart.setFound(pPart.isFound()); if (innerPart.isFound()) { @@ -408,9 +467,10 @@ public class StorageCandidate implements Candidate, CandidateTable { } } return - getPartitions(fromDate, ceilFromDate, partCol, partitions, updatePeriods, addNonExistingParts, failOnPartialData, - missingPartitions) && getPartitions(floorToDate, toDate, partCol, partitions, updatePeriods, - addNonExistingParts, failOnPartialData, missingPartitions); + getPartitions(fromDate, ceilFromDate, partCol, partitions, updatePeriods, + addNonExistingParts, failOnPartialData, missingPartitions) + && getPartitions(floorToDate, toDate, partCol, partitions, updatePeriods, + addNonExistingParts, failOnPartialData, missingPartitions); } /** @@ -429,9 +489,8 @@ public class StorageCandidate implements Candidate, CandidateTable { // Check the measure tags. if (!evaluateMeasuresCompleteness(timeRange)) { log - .info("Fact table:{} has partitions with incomplete data: {} for given ranges: {}", fact, dataCompletenessMap, - cubeql.getTimeRanges()); - cubeql.addStoragePruningMsg(this, incompletePartitions(dataCompletenessMap)); + .info("Storage candidate:{} has partitions with incomplete data: {} for given ranges: {}", this, + dataCompletenessMap, cubeql.getTimeRanges()); if (failOnPartialData) { return false; } @@ -482,9 +541,11 @@ public class StorageCandidate implements Candidate, CandidateTable { break; } } + // Add all the partitions. participatingPartitions contains all the partitions for previous time ranges also. + this.participatingPartitions.addAll(rangeParts); numQueriedParts += rangeParts.size(); if (!unsupportedTimeDims.isEmpty()) { - log.info("Not considering fact table:{} as it doesn't support time dimensions: {}", this.getFact(), + log.info("Not considering storage candidate:{} as it doesn't support time dimensions: {}", this, unsupportedTimeDims); cubeql.addStoragePruningMsg(this, timeDimNotSupported(unsupportedTimeDims)); return false; @@ -493,7 +554,7 @@ public class StorageCandidate implements Candidate, CandidateTable { // TODO union : Relook at this. nonExistingPartitions.addAll(nonExistingParts); if (rangeParts.size() == 0 || (failOnPartialData && !nonExistingParts.isEmpty())) { - log.info("No partitions for fallback range:{}", timeRange); + log.info("Not considering storage candidate:{} as no partitions for fallback range:{}", this, timeRange); return false; } String extraWhere = extraWhereClauseFallback.toString(); @@ -505,8 +566,6 @@ public class StorageCandidate implements Candidate, CandidateTable { rangeToWhere.put(parentTimeRange, rangeWriter .getTimeRangeWhereClause(cubeql, cubeql.getAliasForTableName(cubeql.getCube().getName()), rangeParts)); } - // Add all the partitions. participatingPartitions contains all the partitions for previous time ranges also. - this.participatingPartitions.addAll(rangeParts); return true; } @@ -559,7 +618,7 @@ public class StorageCandidate implements Candidate, CandidateTable { dataCompletenessMap.put(measureorExprFromTag, incompletePartition); } incompletePartition.put(formatter.format(completenessResult.getKey()), completenessResult.getValue()); - isDataComplete = true; + isDataComplete = false; } } } @@ -600,7 +659,7 @@ public class StorageCandidate implements Candidate, CandidateTable { ASTNode selectExpr = (ASTNode) queryAst.getSelectAST().getChild(currentChild); Set<String> exprCols = HQLParser.getColsInExpr(cubeql.getAliasForTableName(cubeql.getCube()), selectExpr); if (getColumns().containsAll(exprCols)) { - ASTNode aliasNode = HQLParser.findNodeByPath(selectExpr, Identifier); + ASTNode aliasNode = HQLParser.findNodeByPath(selectExpr, HiveParser.Identifier); String alias = cubeql.getSelectPhrases().get(i).getSelectAlias(); if (aliasNode != null) { String queryAlias = aliasNode.getText(); @@ -666,9 +725,9 @@ public class StorageCandidate implements Candidate, CandidateTable { private String getFromTable() throws LensException { if (cubeql.isAutoJoinResolved()) { - return fromString; + return fromString; } else { - return cubeql.getQBFromString(this, getDimsToQuery()); + return cubeql.getQBFromString(this, getDimsToQuery()); } } @@ -685,5 +744,4 @@ public class StorageCandidate implements Candidate, CandidateTable { } return ret; } - }
http://git-wip-us.apache.org/repos/asf/lens/blob/975fa2c2/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java index 57b4cf0..3029589 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java @@ -18,15 +18,7 @@ */ package org.apache.lens.cube.parse; -//import static org.apache.lens.cube.metadata.MetastoreUtil.getFactOrDimtableStorageTableName; -//import static org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.TIME_RANGE_NOT_ANSWERABLE; -//import static org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.INVALID; -//import static org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.UNSUPPORTED_STORAGE; -//import static org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.NO_PARTITIONS; -//import static org.apache.lens.cube.parse.CandidateTablePruneCause.missingPartitions; -//import static org.apache.lens.cube.parse.CandidateTablePruneCause.noCandidateStorages; -//import static org.apache.lens.cube.parse.StorageUtil.getFallbackRange; - +import static org.apache.lens.cube.parse.CandidateTablePruneCause.incompletePartitions; import java.util.*; @@ -39,7 +31,6 @@ import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import lombok.extern.slf4j.Slf4j; - /** * Resolve storages and partitions of all candidate tables and prunes candidate tables with missing storages or * partitions. @@ -57,8 +48,6 @@ class StorageTableResolver implements ContextRewriter { private final Map<String, Set<String>> nonExistingPartitions = new HashMap<>(); CubeMetastoreClient client; private PHASE phase; - // TODO union : we do not need this. Remove the storage candidate - //private HashMap<CubeFactTable, Map<String, SkipStorageCause>> skipStorageCausesPerFact; private float completenessThreshold; private String completenessPartCol; @@ -136,15 +125,15 @@ class StorageTableResolver implements ContextRewriter { for (TimeRange range : cubeql.getTimeRanges()) { isComplete &= candidate.evaluateCompleteness(range, range, failOnPartialData); } - if (!isComplete) { + if (failOnPartialData && !isComplete) { candidateIterator.remove(); - + log.info("Not considering candidate:{} as its data is not is not complete", candidate); Set<StorageCandidate> scSet = CandidateUtil.getStorageCandidates(candidate); - Set<String> missingPartitions; for (StorageCandidate sc : scSet) { - missingPartitions = CandidateUtil.getMissingPartitions(sc); - if (!missingPartitions.isEmpty()) { - cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.missingPartitions(missingPartitions)); + if (!sc.getNonExistingPartitions().isEmpty()) { + cubeql.addStoragePruningMsg(sc, CandidateTablePruneCause.missingPartitions(sc.getNonExistingPartitions())); + } else if (!sc.getDataCompletenessMap().isEmpty()) { + cubeql.addStoragePruningMsg(sc, incompletePartitions(sc.getDataCompletenessMap())); } } } @@ -179,10 +168,11 @@ class StorageTableResolver implements ContextRewriter { Map<String, CandidateTablePruneCode> skipStorageCauses = new HashMap<>(); for (String storage : dimtable.getStorages()) { if (isStorageSupportedOnDriver(storage)) { - String tableName = MetastoreUtil.getFactOrDimtableStorageTableName(dimtable.getName(), storage).toLowerCase(); + String tableName = MetastoreUtil.getFactOrDimtableStorageTableName(dimtable.getName(), + storage).toLowerCase(); if (validDimTables != null && !validDimTables.contains(tableName)) { log.info("Not considering dim storage table:{} as it is not a valid dim storage", tableName); - skipStorageCauses.put(tableName,CandidateTablePruneCode.INVALID); + skipStorageCauses.put(tableName, CandidateTablePruneCode.INVALID); continue; } @@ -278,21 +268,16 @@ class StorageTableResolver implements ContextRewriter { boolean partitionColumnExists = client.partColExists(storageTable, range.getPartitionColumn()); valid = partitionColumnExists; if (!partitionColumnExists) { - //TODO union : handle prune cause below case. String timeDim = cubeql.getBaseCube().getTimeDimOfPartitionColumn(range.getPartitionColumn()); - // if (!sc.getFact().getColumns().contains(timeDim)) { - // // Not a time dimension so no fallback required. - // pruningCauses.add(TIMEDIM_NOT_SUPPORTED); - // continue; - // } - TimeRange fallBackRange = StorageUtil.getFallbackRange(range, sc.getFact().getCubeName(), cubeql); + TimeRange fallBackRange = StorageUtil.getFallbackRange(range, sc.getFact().getName(), cubeql); if (fallBackRange == null) { log.info("No partitions for range:{}. fallback range: {}", range, fallBackRange); pruningCauses.add(CandidateTablePruneCode.TIME_RANGE_NOT_ANSWERABLE); continue; } - valid = client - .isStorageTableCandidateForRange(storageTable, fallBackRange.getFromDate(), fallBackRange.getToDate()); + valid = client.partColExists(storageTable, fallBackRange.getPartitionColumn()) + && client.isStorageTableCandidateForRange(storageTable, fallBackRange.getFromDate(), + fallBackRange.getToDate()); if (!valid) { pruningCauses.add(CandidateTablePruneCode.TIME_RANGE_NOT_ANSWERABLE); } http://git-wip-us.apache.org/repos/asf/lens/blob/975fa2c2/lens-cube/src/main/java/org/apache/lens/cube/parse/TimeRangeChecker.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/TimeRangeChecker.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/TimeRangeChecker.java index fe867c7..e37db8b 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/TimeRangeChecker.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/TimeRangeChecker.java @@ -36,7 +36,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.plan.PlanUtils; -import com.google.common.collect.Lists; import lombok.extern.slf4j.Slf4j; @Slf4j http://git-wip-us.apache.org/repos/asf/lens/blob/975fa2c2/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionCandidate.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionCandidate.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionCandidate.java index 91276cd..d97e7b8 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionCandidate.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionCandidate.java @@ -1,3 +1,21 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ package org.apache.lens.cube.parse; import java.util.*; @@ -88,10 +106,10 @@ public class UnionCandidate implements Candidate { if (this.equals(candidate)) { return true; } - for (Candidate child : childCandidates) { - if (child.contains((candidate))) + if (child.contains((candidate))) { return true; + } } return false; } @@ -261,4 +279,4 @@ public class UnionCandidate implements Candidate { builder.partitionColumn(timeRange.getPartitionColumn()); return builder; } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/lens/blob/975fa2c2/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionQueryWriter.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionQueryWriter.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionQueryWriter.java index eb0e545..daf3daf 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionQueryWriter.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/UnionQueryWriter.java @@ -19,22 +19,29 @@ package org.apache.lens.cube.parse; +import static org.apache.lens.cube.parse.HQLParser.*; + +import static org.apache.hadoop.hive.ql.parse.HiveParser.*; + +import java.util.*; + +import org.apache.lens.cube.metadata.Dimension; +import org.apache.lens.cube.metadata.MetastoreUtil; +import org.apache.lens.server.api.error.LensException; -import org.antlr.runtime.CommonToken; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.HiveParser; import org.apache.hadoop.util.StringUtils; -import org.apache.lens.cube.metadata.MetastoreUtil; -import org.apache.lens.server.api.error.LensException; -import java.util.*; - -import static org.apache.hadoop.hive.ql.parse.HiveParser.*; -import static org.apache.lens.cube.parse.HQLParser.*; +import org.antlr.runtime.CommonToken; import lombok.extern.slf4j.Slf4j; +/** + * Utility class to write union query. Given any complex Join or Union Candidate, + * this class rewrites union query for all the participating StorageCandidates. + */ @Slf4j public class UnionQueryWriter { @@ -54,7 +61,7 @@ public class UnionQueryWriter { storageCandidates = CandidateUtil.getStorageCandidates(cand); } - public String toHQL() throws LensException { + public String toHQL(Map<StorageCandidate, Set<Dimension>> factDimMap) throws LensException { StorageCandidate firstCandidate = storageCandidates.iterator().next(); // Set the default queryAST for the outer query queryAst = DefaultQueryAST.fromStorageCandidate(firstCandidate, @@ -65,7 +72,7 @@ public class UnionQueryWriter { processGroupByAST(); processOrderByAST(); CandidateUtil.updateFinalAlias(queryAst.getSelectAST(), cubeql); - return CandidateUtil.buildHQLString(queryAst.getSelectString(), getFromString(), null, + return CandidateUtil.buildHQLString(queryAst.getSelectString(), getFromString(factDimMap), null, queryAst.getGroupByString(), queryAst.getOrderByString(), queryAst.getHavingString(), queryAst.getLimitValue()); } @@ -107,7 +114,7 @@ public class UnionQueryWriter { * @throws LensException */ private ASTNode processHavingAST(ASTNode innerAst, AliasDecider aliasDecider, StorageCandidate sc) - throws LensException { + throws LensException { if (cubeql.getHavingAST() != null) { ASTNode havingCopy = MetastoreUtil.copyAST(cubeql.getHavingAST()); Set<ASTNode> havingAggChildrenASTs = new LinkedHashSet<>(); @@ -121,7 +128,7 @@ public class UnionQueryWriter { } /** - * Update havingAST with proper alias name projected. + * Update outer havingAST with proper alias name projected. * * @param node * @return @@ -131,9 +138,9 @@ public class UnionQueryWriter { && (HQLParser.isAggregateAST(node))) { if (innerToOuterSelectASTs.containsKey(new HQLParser.HashableASTNode(node)) || innerToOuterHavingASTs.containsKey(new HQLParser.HashableASTNode(node))) { - ASTNode expr = innerToOuterSelectASTs.containsKey(new HQLParser.HashableASTNode(node)) ? - innerToOuterSelectASTs.get(new HQLParser.HashableASTNode(node)) : - innerToOuterHavingASTs.get(new HQLParser.HashableASTNode(node)); + ASTNode expr = innerToOuterSelectASTs.containsKey(new HQLParser.HashableASTNode(node)) + ? innerToOuterSelectASTs.get(new HQLParser.HashableASTNode(node)) + : innerToOuterHavingASTs.get(new HQLParser.HashableASTNode(node)); node.getParent().setChild(0, expr); } } @@ -190,15 +197,18 @@ public class UnionQueryWriter { return outerExpression; } - private ASTNode getDefaultNode(ASTNode aliasNode) throws LensException { - ASTNode defaultNode = getSelectExprAST(); - defaultNode.addChild(HQLParser.parseExpr(DEFAULT_MEASURE)); - defaultNode.addChild(aliasNode); - return defaultNode; - } - + /** + * Get the select expression. In case of node is default retunrs "0.0" with alias + * otherwise the select phrase with alias. + * + * @param nodeWithoutAlias + * @param aliasNode + * @param isDefault + * @return + * @throws LensException + */ private ASTNode getSelectExpr(ASTNode nodeWithoutAlias, ASTNode aliasNode, boolean isDefault) - throws LensException { + throws LensException { ASTNode node = getSelectExprAST(); if (nodeWithoutAlias == null && isDefault) { node.addChild(HQLParser.parseExpr(DEFAULT_MEASURE)); @@ -215,6 +225,15 @@ public class UnionQueryWriter { return new ASTNode(new CommonToken(HiveParser.TOK_SELEXPR, "TOK_SELEXPR")); } + + /** + * Get the aggregate node for the SelectPhrase index. A given measure might not be answerable + * for a StorageCanddate. In that case get the non default aggregate node wcich ideally not "0.0", + * from otherStorage candidate. + * + * @param position + * @return + */ private ASTNode getAggregateNodesExpression(int position) { ASTNode node = null; for (StorageCandidate sc : storageCandidates) { @@ -226,21 +245,33 @@ public class UnionQueryWriter { return MetastoreUtil.copyAST(node); } + /** + * Check if ASTNode is answerable by StorageCandidate + * @param sc + * @param node + * @return + */ private boolean isNodeAnswerableForStorageCandidate(StorageCandidate sc, ASTNode node) { Set<String> cols = new LinkedHashSet<>(); getAllColumnsOfNode(node, cols); if (!sc.getColumns().containsAll(cols)) { return true; } - return false; + return false; } - private ASTNode setDefaultValueInExprForAggregateNodes(ASTNode node, StorageCandidate sc) - throws LensException { + /** + * Set the default value "0.0" in the non answerable aggreagte expressions. + * @param node + * @param sc + * @return + * @throws LensException + */ + private ASTNode setDefaultValueInExprForAggregateNodes(ASTNode node, StorageCandidate sc) throws LensException { if (HQLParser.isAggregateAST(node) && isNodeAnswerableForStorageCandidate(sc, node)) { - node.setChild(1, getSelectExpr(null, null, true) ); - } + node.setChild(1, getSelectExpr(null, null, true)); + } for (int i = 0; i < node.getChildCount(); i++) { ASTNode child = (ASTNode) node.getChild(i); setDefaultValueInExprForAggregateNodes(child, sc); @@ -248,15 +279,111 @@ public class UnionQueryWriter { return node; } - private boolean isAggregateFunctionUsedInAST(ASTNode node) { - if (HQLParser.isAggregateAST(node) - || HQLParser.hasAggregate(node)) { + if (HQLParser.isAggregateAST(node) + || HQLParser.hasAggregate(node)) { + return true; + } + return false; + } + + private boolean isNodeDefault(ASTNode node) { + if (HQLParser.isAggregateAST((ASTNode) node.getChild(0))) { + if (HQLParser.getString((ASTNode) node.getChild(0).getChild(1)).equals(DEFAULT_MEASURE)) { return true; } + } return false; } + private List<ASTNode> getProjectedNonDefaultPhrases() { + List<ASTNode> phrases = new ArrayList<>(); + int selectPhraseCount = cubeql.getSelectPhrases().size(); + for (int i = 0; i < selectPhraseCount; i++) { + for (StorageCandidate sc : storageCandidates) { + ASTNode selectAST = sc.getQueryAst().getSelectAST(); + if (isNodeDefault((ASTNode) selectAST.getChild(i))) { + continue; + } else { + phrases.add((ASTNode) selectAST.getChild(i)); + break; + } + } + } + return phrases; + } + + private void removeRedundantProjectedPhrases() { + List<ASTNode> phrases = getProjectedNonDefaultPhrases(); + List<String> phrasesWithoutAlias = new ArrayList<>(); + // populate all phrases without alias + for (ASTNode node : phrases) { + phrasesWithoutAlias.add(HQLParser.getString((ASTNode) node.getChild(0))); + } + Map<String, List<Integer>> phraseCountMap = new HashMap<>(); + Map<String, List<String>> aliasMap = new HashMap<>(); + for (int i = 0; i < phrasesWithoutAlias.size(); i++) { + String phrase = phrasesWithoutAlias.get(i); + if (phraseCountMap.containsKey(phrase)) { + phraseCountMap.get(phrase).add(i); + } else { + List<Integer> indices = new ArrayList<>(); + indices.add(i); + phraseCountMap.put(phrase, indices); + } + } + for (List<Integer> values : phraseCountMap.values()) { + if (values.size() > 1) { + String aliasToKeep = HQLParser.findNodeByPath((ASTNode) + phrases.get(values.get(0)), Identifier).toString(); + ArrayList<String> dupAliases = new ArrayList<>(); + for (int i : values.subList(1, values.size())) { + dupAliases.add(HQLParser.findNodeByPath((ASTNode) + phrases.get(i), Identifier).toString()); + } + aliasMap.put(aliasToKeep, dupAliases); + } + } + + for (String col : phraseCountMap.keySet()) { + if (phraseCountMap.get(col).size() > 1) { + List<Integer> childenToDelete = phraseCountMap.get(col). + subList(1, phraseCountMap.get(col).size()); + int counter = 0; + for (int i : childenToDelete) { + for (StorageCandidate sc : storageCandidates) { + sc.getQueryAst().getSelectAST().deleteChild(i - counter); + } + counter++; + } + } + } + updateOuterSelectDuplicateAliases(queryAst.getSelectAST(), aliasMap); + } + + public void updateOuterSelectDuplicateAliases(ASTNode node, + Map<String, List<String>> aliasMap) { + if (node.getToken().getType() == HiveParser.DOT) { + String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).toString(); + String col = node.getChild(1).toString(); + for (Map.Entry<String, List<String>> entry : aliasMap.entrySet()) { + if (entry.getValue().contains(col)) { + try { + node.setChild(1, HQLParser.parseExpr(entry.getKey())); + } catch (LensException e) { + log.error("Unable to parse select expression: {}.", entry.getKey()); + } + } + + } + } + for (int i = 0; i < node.getChildCount(); i++) { + ASTNode child = (ASTNode) node.getChild(i); + updateOuterSelectDuplicateAliases(child, aliasMap); + } + } + + /** * Set the default value for the non queriable measures. If a measure is not * answerable from a StorageCandidate set it as 0.0 @@ -267,12 +394,14 @@ public class UnionQueryWriter { for (int i = 0; i < cubeql.getSelectPhrases().size(); i++) { SelectPhraseContext phrase = cubeql.getSelectPhrases().get(i); ASTNode aliasNode = new ASTNode(new CommonToken(Identifier, phrase.getSelectAlias())); + // Select phrase is dimension if (!phrase.hasMeasures(cubeql)) { for (StorageCandidate sc : storageCandidates) { ASTNode exprWithOutAlias = (ASTNode) sc.getQueryAst().getSelectAST().getChild(i).getChild(0); storageCandidateToSelectAstMap.get(sc.toString()). addChild(getSelectExpr(exprWithOutAlias, aliasNode, false)); } + // Select phrase is measure } else if (!phrase.getQueriedMsrs().isEmpty()) { for (StorageCandidate sc : storageCandidates) { if (sc.getAnswerableMeasurePhraseIndices().contains(phrase.getPosition())) { @@ -290,6 +419,7 @@ public class UnionQueryWriter { addChild(getSelectExpr(resolvedExprNode, aliasNode, false)); } } + // Select phrase is expression } else { for (StorageCandidate sc : storageCandidates) { if (phrase.isEvaluable(cubeql, sc) @@ -312,6 +442,11 @@ public class UnionQueryWriter { } } + /** + * Update Select and Having clause of outer query. + * + * @throws LensException + */ private void processSelectAndHavingAST() throws LensException { ASTNode outerSelectAst = new ASTNode(queryAst.getSelectAST()); DefaultAliasDecider aliasDecider = new DefaultAliasDecider(); @@ -329,8 +464,18 @@ public class UnionQueryWriter { aliasDecider.setCounter(selectAliasCounter); processHavingAST(sc.getQueryAst().getSelectAST(), aliasDecider, sc); } + removeRedundantProjectedPhrases(); } + /** + * Get the inner and outer AST with alias for each child of StorageCandidate + * + * @param sc + * @param outerSelectAst + * @param innerSelectAST + * @param aliasDecider + * @throws LensException + */ private void processSelectExpression(StorageCandidate sc, ASTNode outerSelectAst, ASTNode innerSelectAST, AliasDecider aliasDecider) throws LensException { //ASTNode selectAST = sc.getQueryAst().getSelectAST(); @@ -361,22 +506,21 @@ public class UnionQueryWriter { } /* - -Perform a DFS on the provided AST, and Create an AST of similar structure with changes specific to the -inner query - outer query dynamics. The resultant AST is supposed to be used in outer query. - -Base cases: - 1. ast is null => null - 2. ast is aggregate_function(table.column) => add aggregate_function(table.column) to inner select expressions, - generate alias, return aggregate_function(cube.alias). Memoize the mapping - aggregate_function(table.column) => aggregate_function(cube.alias) - Assumption is aggregate_function is transitive i.e. f(a,b,c,d) = f(f(a,b), f(c,d)). SUM, MAX, MIN etc - are transitive, while AVG, COUNT etc are not. For non-transitive aggregate functions, the re-written - query will be incorrect. - 3. ast has aggregates - iterate over children and add the non aggregate nodes as is and recursively get outer ast - for aggregate. - 4. If no aggregates, simply select its alias in outer ast. - 5. If given ast is memorized as mentioned in the above cases, return the mapping. + Perform a DFS on the provided AST, and Create an AST of similar structure with changes specific to the + inner query - outer query dynamics. The resultant AST is supposed to be used in outer query. + + Base cases: + 1. ast is null => null + 2. ast is aggregate_function(table.column) => add aggregate_function(table.column) to inner select expressions, + generate alias, return aggregate_function(cube.alias). Memoize the mapping + aggregate_function(table.column) => aggregate_function(cube.alias) + Assumption is aggregate_function is transitive i.e. f(a,b,c,d) = f(f(a,b), f(c,d)). SUM, MAX, MIN etc + are transitive, while AVG, COUNT etc are not. For non-transitive aggregate functions, the re-written + query will be incorrect. + 3. ast has aggregates - iterate over children and add the non aggregate nodes as is and recursively get outer ast + for aggregate. + 4. If no aggregates, simply select its alias in outer ast. + 5. If given ast is memorized as mentioned in the above cases, return the mapping. */ private ASTNode getOuterAST(ASTNode astNode, ASTNode innerSelectAST, AliasDecider aliasDecider, StorageCandidate sc, boolean isSelectAst) throws LensException { @@ -402,7 +546,7 @@ Base cases: if (hasAggregate(childAST) && sc.getColumns().containsAll(msrCols)) { outerAST.addChild(getOuterAST(childAST, innerSelectAST, aliasDecider, sc, isSelectAst)); } else if (hasAggregate(childAST) && !sc.getColumns().containsAll(msrCols)) { - childAST.replaceChildren(1, 1, getDefaultNode(null)); + childAST.replaceChildren(1, 1, getSelectExpr(null, null, true)); outerAST.addChild(getOuterAST(childAST, innerSelectAST, aliasDecider, sc, isSelectAst)); } else { outerAST.addChild(childAST); @@ -456,6 +600,14 @@ Base cases: return outerAST; } + /** + * GroupbyAST is having dim only columns all the columns should have been + * projected. Get the alias for the projected columns and add to group by clause. + * + * @param astNode + * @return + * @throws LensException + */ private ASTNode processGroupByExpression(ASTNode astNode) throws LensException { ASTNode outerExpression = new ASTNode(astNode); @@ -469,12 +621,21 @@ Base cases: return outerExpression; } - private void processHavingExpression(ASTNode innerSelectAst,Set<ASTNode> havingAggASTs, + /** + * Process having clause, if a columns is not projected add it + * to the projected columns of inner selectAST. + * + * @param innerSelectAst + * @param havingAggASTs + * @param aliasDecider + * @param sc + * @throws LensException + */ + + private void processHavingExpression(ASTNode innerSelectAst, Set<ASTNode> havingAggASTs, AliasDecider aliasDecider, StorageCandidate sc) throws LensException { // iterate over all children of the ast and get outer ast corresponding to it. for (ASTNode child : havingAggASTs) { - //ASTNode node = MetastoreUtil.copyAST(child); - //setDefaultValueInExprForAggregateNodes(node, sc); if (!innerToOuterSelectASTs.containsKey(new HQLParser.HashableASTNode(child))) { getOuterAST(child, innerSelectAst, aliasDecider, sc, false); } @@ -483,6 +644,7 @@ Base cases: /** * Gets all aggreage nodes used in having + * * @param node * @param havingClauses * @return @@ -498,6 +660,13 @@ Base cases: return havingClauses; } + /** + * Get columns used in ASTNode + * + * @param node + * @param msrs + * @return + */ private Set<String> getAllColumnsOfNode(ASTNode node, Set<String> msrs) { if (node.getToken().getType() == HiveParser.DOT) { String table = HQLParser.findNodeByPath(node, TOK_TABLE_OR_COL, Identifier).toString(); @@ -513,14 +682,16 @@ Base cases: /** * Gets from string of the ouer query, this is a union query of all * StorageCandidates participated. + * * @return * @throws LensException */ - private String getFromString() throws LensException { + private String getFromString(Map<StorageCandidate, Set<Dimension>> factDimMap) throws LensException { StringBuilder from = new StringBuilder(); List<String> hqlQueries = new ArrayList<>(); for (StorageCandidate sc : storageCandidates) { - hqlQueries.add(" ( " + sc.toHQL() + " ) "); + Set<Dimension> queriedDims = factDimMap.get(sc); + hqlQueries.add(sc.toHQL(queriedDims)); } return from.append(" ( ") .append(StringUtils.join(" UNION ALL ", hqlQueries)) http://git-wip-us.apache.org/repos/asf/lens/blob/975fa2c2/lens-cube/src/main/java/org/apache/lens/cube/parse/join/AutoJoinContext.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/join/AutoJoinContext.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/join/AutoJoinContext.java index 2bf3159..b5b0b30 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/join/AutoJoinContext.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/join/AutoJoinContext.java @@ -169,7 +169,6 @@ public class AutoJoinContext { joinPathFromColumns.remove(dim); } - //TODO union: use StaorgeCandidate public String getFromString(String fromTable, StorageCandidate sc, Set<Dimension> qdims, Map<Dimension, CandidateDim> dimsToQuery, CubeQueryContext cubeql, QueryAST ast) throws LensException { String fromString = fromTable; @@ -348,7 +347,6 @@ public class AutoJoinContext { return allPaths; } - //TODO union: use Set<StorageCandidate> /** * Prunes the join chains defined in Cube whose starting column is not there in any of the candidate facts. * Same is done in case of join paths defined in Dimensions. http://git-wip-us.apache.org/repos/asf/lens/blob/975fa2c2/lens-cube/src/main/java/org/apache/lens/driver/cube/RewriterPlan.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/driver/cube/RewriterPlan.java b/lens-cube/src/main/java/org/apache/lens/driver/cube/RewriterPlan.java index 928a2cb..f4049f5 100644 --- a/lens-cube/src/main/java/org/apache/lens/driver/cube/RewriterPlan.java +++ b/lens-cube/src/main/java/org/apache/lens/driver/cube/RewriterPlan.java @@ -56,7 +56,6 @@ public final class RewriterPlan extends DriverQueryPlan { } } } - //TODO union: updated code to work on picked Candidate if (ctx.getPickedCandidate() != null) { for (StorageCandidate sc : CandidateUtil.getStorageCandidates(ctx.getPickedCandidate())) { addTablesQueried(sc.getAliasForTable("")); http://git-wip-us.apache.org/repos/asf/lens/blob/975fa2c2/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java index 9878158..194ab7c 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java @@ -680,11 +680,12 @@ public class CubeTestSetup { "Not null cityid Expr", "case when cityid is null then 0 else cityid end")); // union join context exprs.add(new ExprColumn(new FieldSchema(prefix + "notnullcityid", "int", prefix + "Not null cityid"), - prefix + "Not null cityid Expr", "case when union_join_ctx_cityid is null then 0 else union_join_ctx_cityid end")); + prefix + "Not null cityid Expr", "case when union_join_ctx_cityid is null then 0 " + + "else union_join_ctx_cityid end")); exprs.add(new ExprColumn(new FieldSchema(prefix + "sum_msr1_msr2", "int", prefix + "sum of msr1 and msr2"), prefix + "sum of msr1 and msr2", "sum(union_join_ctx_msr1) + sum(union_join_ctx_msr2)")); - exprs.add(new ExprColumn(new FieldSchema(prefix + "msr1_greater_than_100", "int", prefix + "msr1 greater than 100"), - prefix + "msr1 greater than 100", "case when sum(union_join_ctx_msr1) > 100 then \"high\" else \"low\" end")); + exprs.add(new ExprColumn(new FieldSchema(prefix + "msr1_greater_than_100", "int", prefix + "msr1 greater than 100"), + prefix + "msr1 greater than 100", "case when sum(union_join_ctx_msr1) > 100 then \"high\" else \"low\" end")); exprs.add(new ExprColumn(new FieldSchema(prefix + "non_zero_msr2_sum", "int", prefix + "non zero msr2 sum"), prefix + "non zero msr2 sum", "sum(case when union_join_ctx_msr2 > 0 then union_join_ctx_msr2 else 0 end)")); http://git-wip-us.apache.org/repos/asf/lens/blob/975fa2c2/lens-cube/src/test/java/org/apache/lens/cube/parse/TestAggregateResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestAggregateResolver.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestAggregateResolver.java index f467755..1e5d05f 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestAggregateResolver.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestAggregateResolver.java @@ -108,8 +108,8 @@ public class TestAggregateResolver extends TestQueryRewrite { + "from ", null, "group by testcube.cityid", getWhereForDailyAndHourly2days(cubeName, "C2_testfact")); String expectedq4 = - getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + - "from ", null, "group by testcube.cityid having" + " sum(testCube.msr2) > 100", + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + + "from ", null, "group by testcube.cityid having" + " sum(testCube.msr2) > 100", getWhereForDailyAndHourly2days(cubeName, "C2_testfact")); String expectedq5 = getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `msr2` from ", null, @@ -155,8 +155,7 @@ public class TestAggregateResolver extends TestQueryRewrite { compareQueries(hql, expected[i]); } aggregateFactSelectionTests(conf); - //TODO union : Fix after CandidateFact deleted - //rawFactSelectionTests(getConfWithStorages("C1,C2")); + rawFactSelectionTests(getConfWithStorages("C1,C2")); } @Test @@ -177,7 +176,8 @@ public class TestAggregateResolver extends TestQueryRewrite { String query2 = "SELECT count (distinct testcube.cityid) from testcube where " + TWO_DAYS_RANGE; String hQL2 = rewrite(query2, conf); String expectedQL2 = - getExpectedQuery(cubeName, "SELECT count (distinct testcube.cityid) as `count(distinct testcube.cityid)`" + " from ", null, null, + getExpectedQuery(cubeName, "SELECT count (distinct testcube.cityid) as `count(distinct testcube.cityid)`" + + " from ", null, null, getWhereForDailyAndHourly2days(cubeName, "C2_testfact")); compareQueries(hQL2, expectedQL2); @@ -210,8 +210,6 @@ public class TestAggregateResolver extends TestQueryRewrite { } - //TODO union : Fix after CandidateFact deleted - /* @Test public void testAggregateResolverOff() throws ParseException, LensException { Configuration conf2 = getConfWithStorages("C1,C2"); @@ -222,11 +220,12 @@ public class TestAggregateResolver extends TestQueryRewrite { String query = "SELECT cityid, testCube.msr2 FROM testCube WHERE " + TWO_DAYS_RANGE; CubeQueryContext cubeql = rewriteCtx(query, conf2); String hQL = cubeql.toHQL(); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - CandidateFact candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + Candidate candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); String expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, testCube.msr2 from ", null, null, + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, testCube.msr2 as `msr2` from ", null, null, getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); conf2.set(CubeQueryConfUtil.DRIVER_SUPPORTED_STORAGES, "C2"); @@ -234,7 +233,7 @@ public class TestAggregateResolver extends TestQueryRewrite { conf2.set(CubeQueryConfUtil.DRIVER_SUPPORTED_STORAGES, "C1,C2"); rawFactSelectionTests(conf2); } -*/ + private void aggregateFactSelectionTests(Configuration conf) throws ParseException, LensException { String query = "SELECT count(distinct cityid) from testcube where " + TWO_DAYS_RANGE; CubeQueryContext cubeql = rewriteCtx(query, conf); @@ -257,8 +256,8 @@ public class TestAggregateResolver extends TestQueryRewrite { cubeql = rewriteCtx(query, conf); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + - "from ", null, "group by testcube.cityid", getWhereForDailyAndHourly2days(cubeName, "C2_testfact")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + + "from ", null, "group by testcube.cityid", getWhereForDailyAndHourly2days(cubeName, "C2_testfact")); compareQueries(hQL, expectedQL); query = "SELECT cityid, sum(testCube.msr2) m2 FROM testCube WHERE " + TWO_DAYS_RANGE + " order by m2"; @@ -273,166 +272,183 @@ public class TestAggregateResolver extends TestQueryRewrite { cubeql = rewriteCtx(query, conf); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + - "from ", null, "group by testcube.cityid having max(testcube.msr3) > 100", + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + + "from ", null, "group by testcube.cityid having max(testcube.msr3) > 100", getWhereForDailyAndHourly2days(cubeName, "C2_testfact")); compareQueries(hQL, expectedQL); } - //TODO union : Fix after CandidateFact deleted - /* + private void rawFactSelectionTests(Configuration conf) throws ParseException, LensException { // Check a query with non default aggregate function String query = "SELECT cityid, avg(testCube.msr2) FROM testCube WHERE " + TWO_DAYS_RANGE; CubeQueryContext cubeql = rewriteCtx(query, conf); String hQL = cubeql.toHQL(); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - CandidateFact candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + Candidate candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); String expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, avg(testCube.msr2) from ", null, - "group by testcube.cityid", getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, avg(testCube.msr2) as `avg(testCube.msr2)` " + + "from ", null, "group by testcube.cityid", getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); // query with measure in a where clause query = "SELECT cityid, sum(testCube.msr2) FROM testCube WHERE testCube.msr1 < 100 and " + TWO_DAYS_RANGE; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, sum(testCube.msr2) from ", "testcube.msr1 < 100", - "group by testcube.cityid", getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + + "from ", "testcube.msr1 < 100", "group by testcube.cityid", getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); query = "SELECT cityid, testCube.msr2 FROM testCube WHERE testCube.msr2 < 100 and " + TWO_DAYS_RANGE; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, testCube.msr2 from ", "testcube.msr2 < 100", null, - getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, testCube.msr2 as `msr2` from ", + "testcube.msr2 < 100", null, getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); query = "SELECT cityid, sum(testCube.msr2) FROM testCube WHERE " + TWO_DAYS_RANGE + " group by testCube.msr1"; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, sum(testCube.msr2) from ", null, - " group by testCube.msr1", getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + + "from ", null, " group by testCube.msr1", getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); query = "SELECT cityid, sum(testCube.msr2) FROM testCube WHERE " + TWO_DAYS_RANGE + " group by testCube.msr3"; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, sum(testCube.msr2) from ", null, - " group by testCube.msr3", getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + + "from ", null, " group by testCube.msr3", getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); query = "SELECT cityid, sum(testCube.msr2) FROM testCube WHERE " + TWO_DAYS_RANGE + " order by testCube.msr1"; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, sum(testCube.msr2) from ", null, - " group by testcube.cityid order by testcube.msr1 asc", getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + + "from ", null, " group by testcube.cityid order by testcube.msr1 asc", + getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); query = "SELECT cityid, sum(testCube.msr2) FROM testCube WHERE " + TWO_DAYS_RANGE + " order by testCube.msr3"; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, sum(testCube.msr2) from ", null, - " group by testcube.cityid order by testcube.msr3 asc", getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + + "from ", null, " group by testcube.cityid order by testcube.msr3 asc", + getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); query = "SELECT distinct cityid, round(testCube.msr2) from testCube where " + TWO_DAYS_RANGE; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT distinct testcube.cityid, round(testCube.msr2) from ", null, null, - getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT distinct testcube.cityid as `cityid`, round(testCube.msr2) " + + "as `round(testCube.msr2)` from ", null, null, getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); query = "SELECT cityid, count(distinct(testCube.msr2)) from testCube where " + TWO_DAYS_RANGE; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, count(distinct testCube.msr2) from ", null, - "group by testcube.cityid", getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, count(distinct testCube.msr2) " + + "as `count(distinct(testCube.msr2))` from ", null, "group by testcube.cityid", + getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); // query with no default aggregate measure query = "SELECT cityid, round(testCube.msr1) from testCube where " + TWO_DAYS_RANGE; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, round(testCube.msr1) from ", null, null, - getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, round(testCube.msr1) as `round(testCube.msr1)` " + + "from ", null, null, getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); query = "SELECT distinct cityid, round(testCube.msr1) from testCube where " + TWO_DAYS_RANGE; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT distinct testcube.cityid, round(testCube.msr1) from ", null, null, - getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT distinct testcube.cityid as `cityid`, round(testCube.msr1) " + + "as `round(testCube.msr1)` from ", null, null, getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); query = "SELECT cityid, count(distinct(testCube.msr1)) from testCube where " + TWO_DAYS_RANGE; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, count(distinct testCube.msr1) from ", null, - "group by testcube.cityid", getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, count(distinct testCube.msr1) " + + "as ` count(distinct testCube.msr1)` from ", null, "group by testcube.cityid", + getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); query = "SELECT cityid, sum(testCube.msr1) from testCube where " + TWO_DAYS_RANGE; cubeql = rewriteCtx(query, conf); - Assert.assertEquals(1, cubeql.getCandidateFacts().size()); - candidateFact = cubeql.getCandidateFacts().iterator().next(); - Assert.assertEquals("testFact2_raw".toLowerCase(), candidateFact.fact.getName().toLowerCase()); + Assert.assertEquals(1, cubeql.getCandidates().size()); + candidate = cubeql.getCandidates().iterator().next(); + Assert.assertTrue(candidate instanceof StorageCandidate); + Assert.assertEquals("c1_testFact2_raw".toLowerCase(), ((StorageCandidate) candidate).getName().toLowerCase()); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, sum(testCube.msr1) from ", null, - "group by testcube.cityid", getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr1) as `sum(testCube.msr1)` " + + "from ", null, "group by testcube.cityid", + getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); query = "SELECT cityid, sum(testCube.msr2) FROM testCube WHERE " + TWO_DAYS_RANGE + " having max(msr1) > 100"; cubeql = rewriteCtx(query, conf); hQL = cubeql.toHQL(); expectedQL = - getExpectedQuery(cubeName, "SELECT testcube.cityid, sum(testCube.msr2) from ", null, - "group by testcube.cityid having max(testcube.msr1) > 100", getWhereForHourly2days("c1_testfact2_raw")); + getExpectedQuery(cubeName, "SELECT testcube.cityid as `cityid`, sum(testCube.msr2) as `sum(testCube.msr2)` " + + "from ", null, "group by testcube.cityid having max(testcube.msr1) > 100", + getWhereForHourly2days("c1_testfact2_raw")); compareQueries(hQL, expectedQL); } - */ }