LENS-1333: Add data completeness checks on query writing path
Project: http://git-wip-us.apache.org/repos/asf/lens/repo Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/078555c1 Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/078555c1 Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/078555c1 Branch: refs/heads/master Commit: 078555c1a92cecef19f53962703d7dd00eb44633 Parents: 0cce226 Author: Narayan Periwal <narayan.peri...@inmobi.com> Authored: Fri Nov 11 09:05:49 2016 +0530 Committer: Amareshwari Sriramadasu <amareshw...@apache.org> Committed: Fri Nov 11 09:05:49 2016 +0530 ---------------------------------------------------------------------- .../lens/cube/metadata/CubeFactTable.java | 4 + .../lens/cube/metadata/CubeMetastoreClient.java | 22 ++ .../lens/cube/metadata/MetastoreConstants.java | 2 + .../apache/lens/cube/parse/CandidateFact.java | 3 + .../cube/parse/CandidateTablePruneCause.java | 21 ++ .../lens/cube/parse/CubeQueryConfUtil.java | 4 +- .../cube/parse/DataCompletenessChecker.java | 55 ----- .../cube/parse/MaxCoveringFactResolver.java | 55 ++++- .../lens/cube/parse/StorageTableResolver.java | 154 +++++++++++- .../src/main/resources/olap-query-conf.xml | 13 + .../apache/lens/cube/parse/CubeTestSetup.java | 42 +++- .../FieldsCannotBeQueriedTogetherTest.java | 1 - .../cube/parse/MockCompletenessChecker.java | 46 ++++ .../lens/cube/parse/TestBaseCubeQueries.java | 3 +- .../lens/cube/parse/TestCubeRewriter.java | 48 ++++ .../lens/cube/parse/TestQueryRewrite.java | 5 + .../lens/server/api/LensConfConstants.java | 23 ++ .../api/metastore/DataCompletenessChecker.java | 55 +++++ .../server/api/metastore/DefaultChecker.java | 34 +++ .../src/main/resources/lensserver-default.xml | 12 + .../src/main/resources/lenssession-default.xml | 7 + src/site/apt/admin/config.apt | 240 ++++++++++--------- src/site/apt/admin/session-config.apt | 78 +++--- src/site/apt/user/olap-query-conf.apt | 60 ++--- 24 files changed, 737 insertions(+), 250 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeFactTable.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeFactTable.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeFactTable.java index fb958c3..adb6c92 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeFactTable.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeFactTable.java @@ -314,6 +314,10 @@ public class CubeFactTable extends AbstractCubeTable { addCubeNames(getName(), getProperties(), cubeName); } + public String getDataCompletenessTag() { + return getProperties().get(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG); + } + public boolean isAggregated() { // It's aggregate table unless explicitly set to false return !"false".equalsIgnoreCase(getProperties().get(MetastoreConstants.FACT_AGGREGATED_PROPERTY)); http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeMetastoreClient.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeMetastoreClient.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeMetastoreClient.java index e14c43f..6c9cde2 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeMetastoreClient.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeMetastoreClient.java @@ -31,7 +31,9 @@ import org.apache.lens.cube.metadata.Storage.LatestInfo; import org.apache.lens.cube.metadata.Storage.LatestPartColumnInfo; import org.apache.lens.cube.metadata.timeline.PartitionTimeline; import org.apache.lens.cube.metadata.timeline.PartitionTimelineFactory; +import org.apache.lens.server.api.*; import org.apache.lens.server.api.error.LensException; +import org.apache.lens.server.api.metastore.DataCompletenessChecker; import org.apache.lens.server.api.util.LensUtil; import org.apache.commons.lang.StringUtils; @@ -45,6 +47,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.session.SessionState; +import org.apache.hadoop.util.ReflectionUtils; import org.apache.thrift.TException; import com.google.common.collect.Lists; @@ -92,6 +95,25 @@ public class CubeMetastoreClient { private static final Map<String, CubeMetastoreClient> CLIENT_MAPPING = Maps.newConcurrentMap(); // Set of all storage table names for which latest partitions exist private final Set<String> latestLookupCache = Sets.newSetFromMap(new ConcurrentHashMap<String, Boolean>()); + private DataCompletenessChecker completenessChecker; + + private Boolean isDataCompletenessCheckEnabled; + + public DataCompletenessChecker getCompletenessChecker() { + if (completenessChecker == null) { + completenessChecker = ReflectionUtils.newInstance(config.getClass(LensConfConstants.COMPLETENESS_CHECKER_CLASS, + LensConfConstants.DEFAULT_COMPLETENESS_CHECKER, DataCompletenessChecker.class), this.config); + } + return completenessChecker; + } + + public boolean isDataCompletenessCheckEnabled() { + if (isDataCompletenessCheckEnabled == null) { + isDataCompletenessCheckEnabled = config.getBoolean(LensConfConstants.ENABLE_DATACOMPLETENESS_CHECK, + LensConfConstants.DEFAULT_ENABLE_DATACOMPLETENESS_CHECK); + } + return isDataCompletenessCheckEnabled; + } /** extract storage name from fact and storage table name. String operation */ private String extractStorageName(CubeFactTable fact, String storageTableName) throws LensException { http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreConstants.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreConstants.java b/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreConstants.java index 4585ef7..88500fd 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreConstants.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreConstants.java @@ -57,6 +57,7 @@ public final class MetastoreConstants { public static final String FACT_RELATIVE_END_TIME = "cube.fact.relative.end.time"; public static final String FACT_COL_START_TIME_PFX = "cube.fact.col.start.time."; public static final String FACT_COL_END_TIME_PFX = "cube.fact.col.end.time."; + public static final String FACT_DATA_COMPLETENESS_TAG = "cube.fact.datacompleteness.tag"; // Segmentation constants public static final String SEGMENTATION_KEY_PFX = "cube.segmentation.internal."; @@ -95,6 +96,7 @@ public final class MetastoreConstants { public static final String MAX_SFX = ".max"; public static final String EXPR_SFX = ".expr"; public static final String FORMATSTRING_SFX = ".format"; + public static final String MEASURE_DATACOMPLETENESS_TAG = "cube.measure.datacompleteness.tag"; // dimension constants public static final String DIM_KEY_PFX = "cube.dimension."; http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java index 5dc9dc9..b42262d 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java @@ -86,6 +86,9 @@ public class CandidateFact implements CandidateTable, QueryAST { private final Map<TimeRange, Map<String, LinkedHashSet<FactPartition>>> rangeToStoragePartMap = new HashMap<>(); @Getter private final Map<TimeRange, Map<String, String>> rangeToStorageWhereMap = new HashMap<>(); + @Getter + @Setter + private Map<String, Map<String, Float>> dataCompletenessMap; CandidateFact(CubeFactTable fact, CubeInterface cube) { this.fact = fact; http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java index 78fb21d..2ad6e20 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java @@ -146,6 +146,18 @@ public class CandidateTablePruneCause { } return new String[]{missingPartitions.toString()}; } + }, + // incomplete data in the fact + INCOMPLETE_PARTITION("Data is incomplete. Details : %s") { + Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) { + Set<Map<String, Map<String, Float>>> incompletePartitions = Sets.newHashSet(); + for (CandidateTablePruneCause cause : causes) { + if (cause.getIncompletePartitions() != null) { + incompletePartitions.add(cause.getIncompletePartitions()); + } + } + return new String[]{incompletePartitions.toString()}; + } }; @@ -231,6 +243,8 @@ public class CandidateTablePruneCause { // populated only incase of missing partitions cause private Set<String> missingPartitions; + // populated only incase of incomplete partitions cause + private Map<String, Map<String, Float>> incompletePartitions; // populated only incase of missing update periods cause private List<String> missingUpdatePeriods; // populated in case of missing columns @@ -300,6 +314,13 @@ public class CandidateTablePruneCause { return cause; } + public static CandidateTablePruneCause incompletePartitions(Map<String, Map<String, Float>> incompleteParts) { + CandidateTablePruneCause cause = new CandidateTablePruneCause(INCOMPLETE_PARTITION); + //incompleteParts may be null when partial data is allowed. + cause.setIncompletePartitions(incompleteParts); + return cause; + } + public static CandidateTablePruneCause lessData(MaxCoveringFactResolver.TimeCovered timeCovered) { CandidateTablePruneCause cause = new CandidateTablePruneCause(LESS_DATA); cause.setMaxTimeCovered(timeCovered); http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryConfUtil.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryConfUtil.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryConfUtil.java index 408086f..300d798 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryConfUtil.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryConfUtil.java @@ -25,7 +25,6 @@ import org.apache.lens.cube.metadata.UpdatePeriod; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; - /** * Contains all configurations of cube query rewriting. */ @@ -46,6 +45,7 @@ public final class CubeQueryConfUtil { public static final String NON_EXISTING_PARTITIONS = "lens.cube.query.nonexisting.partitions"; public static final String QUERY_MAX_INTERVAL = "lens.cube.query.max.interval"; public static final String PROCESS_TIME_PART_COL = "lens.cube.query.process.time" + ".partition.column"; + public static final String COMPLETENESS_CHECK_PART_COL = "lens.cube.query.completeness.check.partition.column"; public static final String LOOK_AHEAD_PT_PARTS_PFX = "lens.cube.query.lookahead.ptparts.forinterval."; public static final String ENABLE_GROUP_BY_TO_SELECT = "lens.cube.query.promote.groupby.toselect"; public static final String ENABLE_SELECT_TO_GROUPBY = "lens.cube.query.promote.select.togroupby"; @@ -123,4 +123,6 @@ public final class CubeQueryConfUtil { public static final String DEFAULT_BRIDGE_TABLE_FIELD_ARRAY_FILTER = "array_contains"; public static final String REWRITE_DIM_FILTER_TO_FACT_FILTER = "lens.cube.query.rewrite.dim.filter.to.fact.filter"; public static final boolean DEFAULT_REWRITE_DIM_FILTER_TO_FACT_FILTER = false; + public static final String COMPLETENESS_THRESHOLD = "lens.cube.query.completeness.threshold"; + public static final float DEFAULT_COMPLETENESS_THRESHOLD = 100f; } http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/DataCompletenessChecker.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/DataCompletenessChecker.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/DataCompletenessChecker.java deleted file mode 100644 index 6a0230d..0000000 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/DataCompletenessChecker.java +++ /dev/null @@ -1,55 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ -package org.apache.lens.cube.parse; - -import java.util.Date; -import java.util.Map; -import java.util.Set; - -import org.apache.lens.server.api.error.LensException; - -/** - * DataCompletenessChecker is for identifying the completeness of data in a fact for the given set of measures, start - * and end date. A fact will have a dataCompletenessTag, multiple facts can have the same dataCompletenessTag. - * Similarly, measures will have a dataCompletenessTag, multiple measures can have the same dataCompletenessTag. - * The api will take the dataCompletenessTag for the facts and measures and compute the completeness based on these - * tags. The utility of having tags is that the similar kind of measures or facts which will have the same level of - * completeness can use the same tag, thus we avoid the redundant completeness computation for similar measures - * and facts. - * The implementations of the interface can truncate the start and end date. - */ -public interface DataCompletenessChecker { - - /** - * Get completeness of the set of measures in a fact based on the dataCompletenessTag for the given starttime and - * endtime. - * - * @param factTag This is the dataCompletenessTag for a fact. The tag can be specified by setting the property - * named dataCompletenessTag for the fact. Mutltiple facts can have the same dataCompletenessTag. - * @param start Start time of the query (Inclusive). - * @param end End time of the query (Exclusive). - * @param measureTag List of distinct tag of the measures in the query. Multiple measures can have the same - * dataCompletenessTag. - * @return map; key is the name of the dataCompletenessTag which refers to one or more measures. Value is the map - * of date and %completeness. - */ - Map<String, Map<Date, Float>> getCompleteness(String factTag, Date start, Date end, Set<String> measureTag) - throws LensException; - -} http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java index 13f1aa4..45824fe 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java @@ -57,6 +57,13 @@ class MaxCoveringFactResolver implements ContextRewriter { // nothing to prune. return; } + resolveByTimeCovered(cubeql); + if (cubeql.getMetastoreClient() != null && cubeql.getMetastoreClient().isDataCompletenessCheckEnabled()) { + resolveByDataCompleteness(cubeql); + } + } + + private void resolveByTimeCovered(CubeQueryContext cubeql) { // For each part column, which candidate fact sets are covering how much amount. // Later, we'll maximize coverage for each queried part column. Map<String, Map<Set<CandidateFact>, Long>> partCountsPerPartCol = Maps.newHashMap(); @@ -82,7 +89,7 @@ class MaxCoveringFactResolver implements ContextRewriter { } if (timeCoveredLong < maxTimeCovered) { log.info("Not considering facts:{} from candidate fact tables as it covers less time than the max" - + " for partition column: {} which is: {}", facts, partColQueried, timeCovered); + + " for partition column: {} which is: {}", facts, partColQueried, timeCovered); iter.remove(); } } @@ -91,6 +98,52 @@ class MaxCoveringFactResolver implements ContextRewriter { cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCause.lessData(null)); } + private void resolveByDataCompleteness(CubeQueryContext cubeql) { + // From the list of candidate fact sets, we calculate the maxDataCompletenessFactor. + float maxDataCompletenessFactor = 0f; + for (Set<CandidateFact> facts : cubeql.getCandidateFactSets()) { + float dataCompletenessFactor = computeDataCompletenessFactor(facts); + if (dataCompletenessFactor > maxDataCompletenessFactor) { + maxDataCompletenessFactor = dataCompletenessFactor; + } + } + + if (maxDataCompletenessFactor == 0f) { + //there is nothing to prune + return; + } + + // We prune those candidate fact set, whose dataCompletenessFactor is less than maxDataCompletenessFactor + Iterator<Set<CandidateFact>> iter = cubeql.getCandidateFactSets().iterator(); + while (iter.hasNext()) { + Set<CandidateFact> facts = iter.next(); + float dataCompletenessFactor = computeDataCompletenessFactor(facts); + if (dataCompletenessFactor < maxDataCompletenessFactor) { + log.info("Not considering facts:{} from candidate fact tables as the dataCompletenessFactor for this:{} is " + + "less than the max:{}", facts, dataCompletenessFactor, maxDataCompletenessFactor); + iter.remove(); + } + } + cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCause.incompletePartitions(null)); + } + + private float computeDataCompletenessFactor(Set<CandidateFact> facts) { + float completenessFactor = 0f; + int numPartition = 0; + for (CandidateFact fact : facts) { + if (fact.getDataCompletenessMap() != null) { + Map<String, Map<String, Float>> completenessMap = fact.getDataCompletenessMap(); + for (Map<String, Float> partitionCompleteness : completenessMap.values()) { + for (Float value : partitionCompleteness.values()) { + numPartition++; + completenessFactor += value; + } + } + } + } + return numPartition == 0 ? completenessFactor : completenessFactor/numPartition; + } + /** * Returns time covered by fact set for each part column. * @param facts http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java index db26718..cdf6812 100644 --- a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java +++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java @@ -18,22 +18,24 @@ */ package org.apache.lens.cube.parse; -import static org.apache.lens.cube.metadata.DateUtil.WSPACE; -import static org.apache.lens.cube.metadata.MetastoreUtil.*; -import static org.apache.lens.cube.parse.CandidateTablePruneCause.*; -import static org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.*; -import static org.apache.lens.cube.parse.CandidateTablePruneCause.SkipStorageCode.*; - import java.text.DateFormat; import java.text.ParseException; import java.text.SimpleDateFormat; + import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import static org.apache.lens.cube.metadata.DateUtil.WSPACE; +import static org.apache.lens.cube.metadata.MetastoreUtil.*; +import static org.apache.lens.cube.parse.CandidateTablePruneCause.*; +import static org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.*; +import static org.apache.lens.cube.parse.CandidateTablePruneCause.SkipStorageCode.*; + import org.apache.lens.cube.metadata.*; import org.apache.lens.cube.parse.CandidateTablePruneCause.*; import org.apache.lens.server.api.error.LensException; +import org.apache.lens.server.api.metastore.*; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; @@ -42,6 +44,7 @@ import org.apache.hadoop.util.ReflectionUtils; import com.google.common.collect.Lists; import com.google.common.collect.Sets; + import lombok.extern.slf4j.Slf4j; /** @@ -65,6 +68,8 @@ class StorageTableResolver implements ContextRewriter { private DateFormat partWhereClauseFormat = null; private PHASE phase; private HashMap<CubeFactTable, Map<String, SkipStorageCause>> skipStorageCausesPerFact; + private float completenessThreshold; + private String completenessPartCol; enum PHASE { FACT_TABLES, FACT_PARTITIONS, DIM_TABLE_AND_PARTITIONS; @@ -104,6 +109,9 @@ class StorageTableResolver implements ContextRewriter { partWhereClauseFormat = new SimpleDateFormat(formatStr); } this.phase = PHASE.first(); + completenessThreshold = conf.getFloat(CubeQueryConfUtil.COMPLETENESS_THRESHOLD, + CubeQueryConfUtil.DEFAULT_COMPLETENESS_THRESHOLD); + completenessPartCol = conf.get(CubeQueryConfUtil.COMPLETENESS_CHECK_PART_COL); } private List<String> getSupportedStorages(Configuration conf) { @@ -138,6 +146,13 @@ class StorageTableResolver implements ContextRewriter { resolveFactStoragePartitions(cubeql); } cubeql.pruneCandidateFactSet(CandidateTablePruneCode.NO_CANDIDATE_STORAGES); + if (client != null && client.isDataCompletenessCheckEnabled()) { + if (!cubeql.getCandidateFacts().isEmpty()) { + // resolve incomplete fact partition + resolveFactCompleteness(cubeql); + } + cubeql.pruneCandidateFactSet(CandidateTablePruneCode.INCOMPLETE_PARTITION); + } break; case DIM_TABLE_AND_PARTITIONS: resolveDimStorageTablesAndPartitions(cubeql); @@ -499,6 +514,133 @@ class StorageTableResolver implements ContextRewriter { } } + private static boolean processCubeColForDataCompleteness(CubeQueryContext cubeql, String cubeCol, String alias, + Set<String> measureTag, + Map<String, String> tagToMeasureOrExprMap) { + CubeMeasure column = cubeql.getCube().getMeasureByName(cubeCol); + if (column != null && column.getTags() != null) { + String dataCompletenessTag = column.getTags().get(MetastoreConstants.MEASURE_DATACOMPLETENESS_TAG); + //Checking if dataCompletenessTag is set for queried measure + if (dataCompletenessTag != null) { + measureTag.add(dataCompletenessTag); + String value = tagToMeasureOrExprMap.get(dataCompletenessTag); + if (value == null) { + tagToMeasureOrExprMap.put(dataCompletenessTag, alias); + } else { + value = value.concat(",").concat(alias); + tagToMeasureOrExprMap.put(dataCompletenessTag, value); + } + return true; + } + } + return false; + } + + private static void processMeasuresFromExprMeasures(CubeQueryContext cubeql, Set<String> measureTag, + Map<String, String> tagToMeasureOrExprMap) { + boolean isExprProcessed; + String cubeAlias = cubeql.getAliasForTableName(cubeql.getCube().getName()); + for (String expr : cubeql.getQueriedExprsWithMeasures()) { + isExprProcessed = false; + for (ExpressionResolver.ExprSpecContext esc : cubeql.getExprCtx().getExpressionContext(expr, cubeAlias) + .getAllExprs()) { + if (esc.getTblAliasToColumns().get(cubeAlias) != null) { + for (String cubeCol : esc.getTblAliasToColumns().get(cubeAlias)) { + if (processCubeColForDataCompleteness(cubeql, cubeCol, expr, measureTag, tagToMeasureOrExprMap)) { + /* This is done to associate the expression with one of the dataCompletenessTag for the measures. + So, even if the expression is composed of measures with different dataCompletenessTags, we will be + determining the dataCompleteness from one of the measure and this expression is grouped with the + other queried measures that have the same dataCompletenessTag. */ + isExprProcessed = true; + break; + } + } + } + if (isExprProcessed) { + break; + } + } + } + } + + private void resolveFactCompleteness(CubeQueryContext cubeql) throws LensException { + if (client == null || client.getCompletenessChecker() == null || completenessPartCol == null) { + return; + } + DataCompletenessChecker completenessChecker = client.getCompletenessChecker(); + Set<String> measureTag = new HashSet<>(); + Map<String, String> tagToMeasureOrExprMap = new HashMap<>(); + + processMeasuresFromExprMeasures(cubeql, measureTag, tagToMeasureOrExprMap); + + Set<String> measures = cubeql.getQueriedMsrs(); + if (measures == null) { + measures = new HashSet<>(); + } + for (String measure : measures) { + processCubeColForDataCompleteness(cubeql, measure, measure, measureTag, tagToMeasureOrExprMap); + } + //Checking if dataCompletenessTag is set for the fact + if (measureTag.isEmpty()) { + log.info("No Queried measures with the dataCompletenessTag, hence skipping the availability check"); + return; + } + Iterator<CandidateFact> i = cubeql.getCandidateFacts().iterator(); + DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); + formatter.setTimeZone(TimeZone.getTimeZone("UTC")); + while (i.hasNext()) { + CandidateFact cFact = i.next(); + // Map from measure to the map from partition to %completeness + Map<String, Map<String, Float>> incompleteMeasureData = new HashMap<>(); + + String factDataCompletenessTag = cFact.fact.getDataCompletenessTag(); + if (factDataCompletenessTag == null) { + log.info("Not checking completeness for the fact table:{} as the dataCompletenessTag is not set", cFact.fact); + continue; + } + boolean isFactDataIncomplete = false; + for (TimeRange range : cubeql.getTimeRanges()) { + if (!range.getPartitionColumn().equals(completenessPartCol)) { + log.info("Completeness check not available for partCol:{}", range.getPartitionColumn()); + continue; + } + Date from = range.getFromDate(); + Date to = range.getToDate(); + Map<String, Map<Date, Float>> completenessMap = completenessChecker.getCompleteness(factDataCompletenessTag, + from, to, measureTag); + if (completenessMap != null && !completenessMap.isEmpty()) { + for (Map.Entry<String, Map<Date, Float>> measureCompleteness : completenessMap.entrySet()) { + String tag = measureCompleteness.getKey(); + for (Map.Entry<Date, Float> completenessResult : measureCompleteness.getValue().entrySet()) { + if (completenessResult.getValue() < completenessThreshold) { + log.info("Completeness for the measure_tag {} is {}, threshold: {}, for the hour {}", tag, + completenessResult.getValue(), completenessThreshold, + formatter.format(completenessResult.getKey())); + String measureorExprFromTag = tagToMeasureOrExprMap.get(tag); + Map<String, Float> incompletePartition = incompleteMeasureData.get(measureorExprFromTag); + if (incompletePartition == null) { + incompletePartition = new HashMap<>(); + incompleteMeasureData.put(measureorExprFromTag, incompletePartition); + } + incompletePartition.put(formatter.format(completenessResult.getKey()), completenessResult.getValue()); + isFactDataIncomplete = true; + } + } + } + } + } + if (isFactDataIncomplete) { + log.info("Fact table:{} has partitions with incomplete data: {} for given ranges: {}", cFact.fact, + incompleteMeasureData, cubeql.getTimeRanges()); + if (failOnPartialData) { + i.remove(); + cubeql.addFactPruningMsgs(cFact.fact, incompletePartitions(incompleteMeasureData)); + } else { + cFact.setDataCompletenessMap(incompleteMeasureData); + } + } + } + } void addNonExistingParts(String name, Set<String> nonExistingParts) { nonExistingPartitions.put(name, nonExistingParts); http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/resources/olap-query-conf.xml ---------------------------------------------------------------------- diff --git a/lens-cube/src/main/resources/olap-query-conf.xml b/lens-cube/src/main/resources/olap-query-conf.xml index 0c888ca..b389d6a 100644 --- a/lens-cube/src/main/resources/olap-query-conf.xml +++ b/lens-cube/src/main/resources/olap-query-conf.xml @@ -107,6 +107,19 @@ </property> <property> + <name>lens.cube.query.completeness.check.partition.column</name> + <value></value> + <description>The Supported Partition Column for the Data Completeness check</description> + </property> + + <property> + <name>lens.cube.query.completeness.threshold</name> + <value>100</value> + <description>The query will fail if data completeness is less than the set threshold given that the flag + "lens.cube.query.fail.if.data.partial" is set as true</description> + </property> + + <property> <name>lens.cube.query.nonexisting.partitions</name> <value></value> <description>The list of comma separated non existing partitions, if query can run with partial data. The value will http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java index 0c43cb5..41ea83d 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java @@ -515,7 +515,10 @@ public class CubeTestSetup { private void createCube(CubeMetastoreClient client) throws HiveException, ParseException, LensException { cubeMeasures = new HashSet<CubeMeasure>(); - cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr1", "int", "first measure"))); + Map<String, String> tags = new HashMap<>(); + tags.put(MetastoreConstants.MEASURE_DATACOMPLETENESS_TAG, "tag1"); + cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr1", "int", "first measure"), null, null, null, null, null, + null, null, null, null, tags)); cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr2", "float", "second measure"), "Measure2", null, "SUM", "RS")); cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr21", "float", "second measure"), "Measure22", null, "SUM", @@ -526,6 +529,8 @@ public class CubeTestSetup { null)); cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr4", "bigint", "fourth measure"), "Measure4", null, "COUNT", null)); + cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr9", "bigint", "ninth measure"), null, null, null, null, + null, null, null, null, null, tags)); cubeMeasures.add(new ColumnMeasure(new FieldSchema("noAggrMsr", "bigint", "measure without a default aggregate"), "No aggregateMsr", null, null, null)); cubeMeasures.add(new ColumnMeasure(new FieldSchema("newmeasure", "bigint", "measure available from now"), @@ -682,6 +687,7 @@ public class CubeTestSetup { measures.add("msr1"); measures.add("msr2"); measures.add("msr3"); + measures.add("msr9"); Set<String> dimensions = new HashSet<String>(); dimensions.add("dim1"); dimensions.add("dim2"); @@ -1208,6 +1214,7 @@ public class CubeTestSetup { derivedProperties.put(MetastoreConstants.CUBE_ALL_FIELDS_QUERIABLE, "true"); Set<String> measures = new HashSet<>(); measures.add("msr1"); + measures.add("msr9"); measures.add("msr11"); Set<String> dimensions = new HashSet<>(); dimensions.add("dim1"); @@ -1404,6 +1411,7 @@ public class CubeTestSetup { factColumns = new ArrayList<FieldSchema>(); factColumns.add(new FieldSchema("msr11", "int", "first measure")); factColumns.add(new FieldSchema("msr12", "float", "second measure")); + factColumns.add(new FieldSchema("msr9", "bigint", "ninth measure")); // add dimensions of the cube factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); @@ -1427,6 +1435,7 @@ public class CubeTestSetup { properties.clear(); properties.putAll(factValidityProperties); properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false"); + properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f2"); client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties, storageTables); @@ -1477,6 +1486,36 @@ public class CubeTestSetup { properties.put(MetastoreConstants.FACT_COL_START_TIME_PFX.concat("user_id_added_far_future"), "2099-01-01"); client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties, storageTables); + + factName = "testFact5_RAW_BASE"; + factColumns = new ArrayList<FieldSchema>(); + factColumns.add(new FieldSchema("msr9", "bigint", "ninth measure")); + + // add dimensions of the cube + factColumns.add(new FieldSchema("d_time", "timestamp", "event time")); + factColumns.add(new FieldSchema("processing_time", "timestamp", "processing time")); + factColumns.add(new FieldSchema("dim1", "string", "base dim")); + + properties.clear(); + properties.putAll(factValidityProperties); + properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false"); + properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f2"); + client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties, + storageTables); + + CubeFactTable fact = client.getFactTable(factName); + // Add all hourly partitions for two days + Calendar cal = Calendar.getInstance(); + cal.setTime(TWODAYS_BACK); + Date temp = cal.getTime(); + while (!(temp.after(NOW))) { + Map<String, Date> timeParts = new HashMap<String, Date>(); + timeParts.put("dt", temp); + StoragePartitionDesc sPartSpec = new StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY); + client.addPartition(sPartSpec, c1, CubeTableType.FACT); + cal.add(HOUR_OF_DAY, 1); + temp = cal.getTime(); + } } private void createCubeContinuousFact(CubeMetastoreClient client) throws Exception { @@ -1942,6 +1981,7 @@ public class CubeTestSetup { Map<String, String> properties = new HashMap<String, String>(); properties.putAll(factValidityProperties); properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false"); + properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f1"); client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, storageAggregatePeriods, 100L, properties, storageTables); http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java index 7afa32e..1a5bd0d 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java @@ -228,7 +228,6 @@ public class FieldsCannotBeQueriedTogetherTest extends TestQueryRewrite { cityState.name is a dimension attribute used in where clause(filter) and referenced through join chain name cityState. It is queryable through source column basecube.cityid. basecube.cityid and msr1 are not present in the same derived cube. However since cityState.name is only present in the case statement, the query is allowed. */ - rewrite("select SUM(CASE WHEN cityState.name ='foo' THEN msr1 END) from basecube where " + TWO_DAYS_RANGE, conf); } http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/MockCompletenessChecker.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/MockCompletenessChecker.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/MockCompletenessChecker.java new file mode 100644 index 0000000..76e81d5 --- /dev/null +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/MockCompletenessChecker.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.cube.parse; + +import java.util.*; + +import org.apache.lens.server.api.error.LensException; +import org.apache.lens.server.api.metastore.*; + +public class MockCompletenessChecker implements DataCompletenessChecker { + + @Override + public Map<String, Map<Date, Float>> getCompleteness(String factTag, Date start, Date end, Set<String> measureTag) + throws LensException { + Map<Date, Float> partitionCompleteness = new HashMap<>(); + Map<String, Map<Date, Float>> result = new HashMap<>(); + Calendar cal = Calendar.getInstance(); + cal.setTimeZone(TimeZone.getTimeZone("GMT")); + cal.add(Calendar.DATE, -1); + if (factTag.equals("f1")) { + partitionCompleteness.put(cal.getTime(), 80f); + } else { + partitionCompleteness.put(cal.getTime(), 90f); + } + result.put("tag1", partitionCompleteness); + return result; + } + +} + http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/TestBaseCubeQueries.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestBaseCubeQueries.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestBaseCubeQueries.java index f6cec1b..dbb8fa3 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestBaseCubeQueries.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestBaseCubeQueries.java @@ -118,7 +118,8 @@ public class TestBaseCubeQueries extends TestQueryRewrite { boolean columnNotFound = false; List<String> testTimeDimFactTables = Arrays.asList("testfact3_base", "testfact1_raw_base", "testfact3_raw_base", "testfact5_base", "testfact6_base", "testfact4_raw_base"); - List<String> factTablesForMeasures = Arrays.asList("testfact_deprecated", "testfact2_raw_base", "testfact2_base"); + List<String> factTablesForMeasures = Arrays.asList("testfact_deprecated", "testfact2_raw_base", "testfact2_base", + "testfact5_raw_base"); for (Map.Entry<String, List<CandidateTablePruneCause>> entry : pruneCauses.getDetails().entrySet()) { if (entry.getValue().contains(CandidateTablePruneCause.columnNotFound("test_time_dim"))) { columnNotFound = true; http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java index b90d4d3..c9e7c29 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java @@ -929,6 +929,54 @@ public class TestCubeRewriter extends TestQueryRewrite { compareQueries(hqlQuery, expected); } + /* The test is to check no failure on partial data when the flag FAIL_QUERY_ON_PARTIAL_DATA is not set + */ + @Test + public void testQueryWithMeasureWithDataCompletenessTagWithNoFailureOnPartialData() throws ParseException, + LensException { + //In this query a measure is used for which dataCompletenessTag is set. + Configuration conf = getConf(); + conf.setStrings(CubeQueryConfUtil.COMPLETENESS_CHECK_PART_COL, "dt"); + String hqlQuery = rewrite("select SUM(msr1) from basecube where " + TWO_DAYS_RANGE, conf); + String expected = getExpectedQuery("basecube", "select sum(basecube.msr1) FROM ", null, null, + getWhereForHourly2days("basecube", "c1_testfact1_raw_base")); + compareQueries(hqlQuery, expected); + } + + @Test + public void testQueryWithMeasureWithDataCompletenessPresentInMultipleFacts() throws ParseException, + LensException { + /*In this query a measure is used which is present in two facts with different %completeness. While resolving the + facts, the fact with the higher dataCompletenessFactor gets picked up.*/ + Configuration conf = getConf(); + conf.setStrings(CubeQueryConfUtil.COMPLETENESS_CHECK_PART_COL, "dt"); + String hqlQuery = rewrite("select SUM(msr9) from basecube where " + TWO_DAYS_RANGE, conf); + String expected = getExpectedQuery("basecube", "select sum(basecube.msr9) FROM ", null, null, + getWhereForHourly2days("basecube", "c1_testfact5_raw_base")); + compareQueries(hqlQuery, expected); + } + + @Test + public void testCubeWhereQueryWithMeasureWithDataCompletenessAndFailIfPartialDataFlagSet() throws ParseException, + LensException { + /*In this query a measure is used for which dataCompletenessTag is set and the flag FAIL_QUERY_ON_PARTIAL_DATA is + set. The partitions for the queried range are present but some of the them have incomplete data. So, the query + throws NO_CANDIDATE_FACT_AVAILABLE Exception*/ + Configuration conf = getConf(); + conf.setStrings(CubeQueryConfUtil.COMPLETENESS_CHECK_PART_COL, "dt"); + conf.setBoolean(CubeQueryConfUtil.FAIL_QUERY_ON_PARTIAL_DATA, true); + + LensException e = getLensExceptionInRewrite("select SUM(msr9) from basecube where " + TWO_DAYS_RANGE, conf); + assertEquals(e.getErrorCode(), LensCubeErrorCode.NO_CANDIDATE_FACT_AVAILABLE.getLensErrorInfo().getErrorCode()); + NoCandidateFactAvailableException ne = (NoCandidateFactAvailableException) e; + PruneCauses.BriefAndDetailedError pruneCauses = ne.getJsonMessage(); + /*Since the Flag FAIL_QUERY_ON_PARTIAL_DATA is set, and thhe queried fact has incomplete data, hence, we expect the + prune cause to be INCOMPLETE_PARTITION. The below check is to validate this.*/ + assertEquals(pruneCauses.getBrief().substring(0, INCOMPLETE_PARTITION.errorFormat.length() - 3), + INCOMPLETE_PARTITION.errorFormat.substring(0, + INCOMPLETE_PARTITION.errorFormat.length() - 3), pruneCauses.getBrief()); + } + @Test public void testCubeWhereQueryForMonthWithNoPartialData() throws Exception { Configuration conf = getConf(); http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java ---------------------------------------------------------------------- diff --git a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java index 0aa31f4..17a8b0f 100644 --- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java +++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java @@ -28,6 +28,7 @@ import org.apache.lens.api.error.ErrorCollection; import org.apache.lens.api.error.ErrorCollectionFactory; import org.apache.lens.api.error.LensError; import org.apache.lens.cube.error.LensCubeErrorCode; +import org.apache.lens.server.api.*; import org.apache.lens.server.api.error.LensException; import org.apache.hadoop.conf.Configuration; @@ -63,7 +64,11 @@ public abstract class TestQueryRewrite { @BeforeSuite public static void setup() throws Exception { + hconf.setStrings(LensConfConstants.COMPLETENESS_CHECKER_CLASS, + "org.apache.lens.cube.parse.MockCompletenessChecker"); + hconf.setBoolean(LensConfConstants.ENABLE_DATACOMPLETENESS_CHECK, true); SessionState.start(hconf); + setup = new CubeTestSetup(); setup.createSources(hconf, TestQueryRewrite.class.getSimpleName()); } http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java b/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java index cf1c233..7ccb170 100644 --- a/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java +++ b/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java @@ -21,6 +21,7 @@ package org.apache.lens.server.api; import javax.ws.rs.core.MediaType; import org.apache.lens.server.api.error.LensException; +import org.apache.lens.server.api.metastore.*; /** * The Class LensConfConstants. @@ -1223,4 +1224,26 @@ public final class LensConfConstants { * Maximum number of scheduled job per user. */ public static final String MAX_SCHEDULED_JOB_PER_USER = SERVER_PFX + "scheduler.max.job.per.user"; + + /** + * The class that implements the DataCompletenessChecker Interface. This will take effect if the flag + * "lens.cube.metastore.enable.datacompleteness.check" is set. + */ + public static final String COMPLETENESS_CHECKER_CLASS = "lens.cube.metastore.completeness.checker.class"; + + /** + * The default implementation of DataCompletenessChecker + */ + public static final Class<? extends DataCompletenessChecker> DEFAULT_COMPLETENESS_CHECKER = + DefaultChecker.class.asSubclass(DataCompletenessChecker.class); + + /** + * This property is to enable Data Completeness Checks while resolving partitions. + */ + public static final String ENABLE_DATACOMPLETENESS_CHECK = "lens.cube.metastore.enable.datacompleteness.check"; + + /** + * Default Value of the config "lens.cube.metastore.enable.datacompleteness.check" + */ + public static final boolean DEFAULT_ENABLE_DATACOMPLETENESS_CHECK = false; } http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DataCompletenessChecker.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DataCompletenessChecker.java b/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DataCompletenessChecker.java new file mode 100644 index 0000000..68713fe --- /dev/null +++ b/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DataCompletenessChecker.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.server.api.metastore; + +import java.util.Date; +import java.util.Map; +import java.util.Set; + +import org.apache.lens.server.api.error.LensException; + +/** + * DataCompletenessChecker is for identifying the completeness of data in a fact for the given set of measures, start + * and end date. A fact will have a dataCompletenessTag, multiple facts can have the same dataCompletenessTag. + * Similarly, measures will have a dataCompletenessTag, multiple measures can have the same dataCompletenessTag. + * The api will take the dataCompletenessTag for the facts and measures and compute the completeness based on these + * tags. The utility of having tags is that the similar kind of measures or facts which will have the same level of + * completeness can use the same tag, thus we avoid the redundant completeness computation for similar measures + * and facts. + * The implementations of the interface can truncate the start and end date. + */ +public interface DataCompletenessChecker { + + /** + * Get completeness of the set of measures in a fact based on the dataCompletenessTag for the given starttime and + * endtime. + * + * @param factTag This is the dataCompletenessTag for a fact. The tag can be specified by setting the property + * named dataCompletenessTag for the fact. Mutltiple facts can have the same dataCompletenessTag. + * @param start Start time of the query (Inclusive). + * @param end End time of the query (Exclusive). + * @param measureTag List of distinct tag of the measures in the query. Multiple measures can have the same + * dataCompletenessTag. + * @return map; key is the name of the dataCompletenessTag which refers to one or more measures. Value is the map + * of date and %completeness. + */ + Map<String, Map<Date, Float>> getCompleteness(String factTag, Date start, Date end, Set<String> measureTag) + throws LensException; + +} http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DefaultChecker.java ---------------------------------------------------------------------- diff --git a/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DefaultChecker.java b/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DefaultChecker.java new file mode 100644 index 0000000..2d1275c --- /dev/null +++ b/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DefaultChecker.java @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.lens.server.api.metastore; + +import java.util.Date; +import java.util.Map; +import java.util.Set; + +import org.apache.lens.server.api.error.LensException; + +public class DefaultChecker implements DataCompletenessChecker { + + @Override + public Map<String, Map<Date, Float>> getCompleteness(String factTag, Date start, Date end, Set<String> measureTag) + throws LensException { + return null; + } +} http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-server/src/main/resources/lensserver-default.xml ---------------------------------------------------------------------- diff --git a/lens-server/src/main/resources/lensserver-default.xml b/lens-server/src/main/resources/lensserver-default.xml index 0ac722d..e652a0f 100644 --- a/lens-server/src/main/resources/lensserver-default.xml +++ b/lens-server/src/main/resources/lensserver-default.xml @@ -937,4 +937,16 @@ then there is no restriction on the number of jobs scheduled. </description> </property> + <property> + <name>lens.cube.metastore.enable.datacompleteness.check</name> + <value>false</value> + <description>This property is to enable Data Completeness Checks while resolving partitions.</description> + </property> + <property> + <name>lens.cube.metastore.completeness.checker.class</name> + <value>org.apache.lens.server.api.metastore.DefaultChecker</value> + <description>The class that implements the DataCompletenessChecker Interface. This will take effect if the flag + "lens.cube.metastore.enable.datacompleteness.check" is set. + </description> + </property> </configuration> http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-server/src/main/resources/lenssession-default.xml ---------------------------------------------------------------------- diff --git a/lens-server/src/main/resources/lenssession-default.xml b/lens-server/src/main/resources/lenssession-default.xml index 8d9f097..ca38ba6 100644 --- a/lens-server/src/main/resources/lenssession-default.xml +++ b/lens-server/src/main/resources/lenssession-default.xml @@ -374,6 +374,13 @@ </property> <property> + <name>lens.cube.query.completeness.threshold</name> + <value>100</value> + <description>The query will fail if data completeness is less than the set threshold given that the flag + "lens.cube.query.fail.if.data.partial" is set as true</description> + </property> + + <property> <name>lens.session.metastore.exclude.cubetables.from.nativetables</name> <value>true</value> <description>Exclude cube related tables when fetching native tables</description>