LENS-1333: Add data completeness checks on query writing path

Project: http://git-wip-us.apache.org/repos/asf/lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/lens/commit/078555c1
Tree: http://git-wip-us.apache.org/repos/asf/lens/tree/078555c1
Diff: http://git-wip-us.apache.org/repos/asf/lens/diff/078555c1

Branch: refs/heads/master
Commit: 078555c1a92cecef19f53962703d7dd00eb44633
Parents: 0cce226
Author: Narayan Periwal <narayan.peri...@inmobi.com>
Authored: Fri Nov 11 09:05:49 2016 +0530
Committer: Amareshwari Sriramadasu <amareshw...@apache.org>
Committed: Fri Nov 11 09:05:49 2016 +0530

----------------------------------------------------------------------
 .../lens/cube/metadata/CubeFactTable.java       |   4 +
 .../lens/cube/metadata/CubeMetastoreClient.java |  22 ++
 .../lens/cube/metadata/MetastoreConstants.java  |   2 +
 .../apache/lens/cube/parse/CandidateFact.java   |   3 +
 .../cube/parse/CandidateTablePruneCause.java    |  21 ++
 .../lens/cube/parse/CubeQueryConfUtil.java      |   4 +-
 .../cube/parse/DataCompletenessChecker.java     |  55 -----
 .../cube/parse/MaxCoveringFactResolver.java     |  55 ++++-
 .../lens/cube/parse/StorageTableResolver.java   | 154 +++++++++++-
 .../src/main/resources/olap-query-conf.xml      |  13 +
 .../apache/lens/cube/parse/CubeTestSetup.java   |  42 +++-
 .../FieldsCannotBeQueriedTogetherTest.java      |   1 -
 .../cube/parse/MockCompletenessChecker.java     |  46 ++++
 .../lens/cube/parse/TestBaseCubeQueries.java    |   3 +-
 .../lens/cube/parse/TestCubeRewriter.java       |  48 ++++
 .../lens/cube/parse/TestQueryRewrite.java       |   5 +
 .../lens/server/api/LensConfConstants.java      |  23 ++
 .../api/metastore/DataCompletenessChecker.java  |  55 +++++
 .../server/api/metastore/DefaultChecker.java    |  34 +++
 .../src/main/resources/lensserver-default.xml   |  12 +
 .../src/main/resources/lenssession-default.xml  |   7 +
 src/site/apt/admin/config.apt                   | 240 ++++++++++---------
 src/site/apt/admin/session-config.apt           |  78 +++---
 src/site/apt/user/olap-query-conf.apt           |  60 ++---
 24 files changed, 737 insertions(+), 250 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeFactTable.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeFactTable.java 
b/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeFactTable.java
index fb958c3..adb6c92 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeFactTable.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeFactTable.java
@@ -314,6 +314,10 @@ public class CubeFactTable extends AbstractCubeTable {
     addCubeNames(getName(), getProperties(), cubeName);
   }
 
+  public String getDataCompletenessTag() {
+    return getProperties().get(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG);
+  }
+
   public boolean isAggregated() {
     // It's aggregate table unless explicitly set to false
     return 
!"false".equalsIgnoreCase(getProperties().get(MetastoreConstants.FACT_AGGREGATED_PROPERTY));

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeMetastoreClient.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeMetastoreClient.java
 
b/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeMetastoreClient.java
index e14c43f..6c9cde2 100644
--- 
a/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeMetastoreClient.java
+++ 
b/lens-cube/src/main/java/org/apache/lens/cube/metadata/CubeMetastoreClient.java
@@ -31,7 +31,9 @@ import org.apache.lens.cube.metadata.Storage.LatestInfo;
 import org.apache.lens.cube.metadata.Storage.LatestPartColumnInfo;
 import org.apache.lens.cube.metadata.timeline.PartitionTimeline;
 import org.apache.lens.cube.metadata.timeline.PartitionTimelineFactory;
+import org.apache.lens.server.api.*;
 import org.apache.lens.server.api.error.LensException;
+import org.apache.lens.server.api.metastore.DataCompletenessChecker;
 import org.apache.lens.server.api.util.LensUtil;
 
 import org.apache.commons.lang.StringUtils;
@@ -45,6 +47,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.metadata.Table;
 import org.apache.hadoop.hive.ql.session.SessionState;
+import org.apache.hadoop.util.ReflectionUtils;
 import org.apache.thrift.TException;
 
 import com.google.common.collect.Lists;
@@ -92,6 +95,25 @@ public class CubeMetastoreClient {
   private static final Map<String, CubeMetastoreClient> CLIENT_MAPPING = 
Maps.newConcurrentMap();
   // Set of all storage table names for which latest partitions exist
   private final Set<String> latestLookupCache = Sets.newSetFromMap(new 
ConcurrentHashMap<String, Boolean>());
+  private DataCompletenessChecker completenessChecker;
+
+  private Boolean isDataCompletenessCheckEnabled;
+
+  public DataCompletenessChecker getCompletenessChecker() {
+    if (completenessChecker == null) {
+      completenessChecker = 
ReflectionUtils.newInstance(config.getClass(LensConfConstants.COMPLETENESS_CHECKER_CLASS,
+              LensConfConstants.DEFAULT_COMPLETENESS_CHECKER, 
DataCompletenessChecker.class), this.config);
+    }
+    return completenessChecker;
+  }
+
+  public boolean isDataCompletenessCheckEnabled() {
+    if (isDataCompletenessCheckEnabled == null) {
+      isDataCompletenessCheckEnabled = 
config.getBoolean(LensConfConstants.ENABLE_DATACOMPLETENESS_CHECK,
+              LensConfConstants.DEFAULT_ENABLE_DATACOMPLETENESS_CHECK);
+    }
+    return isDataCompletenessCheckEnabled;
+  }
 
   /** extract storage name from fact and storage table name. String operation 
*/
   private String extractStorageName(CubeFactTable fact, String 
storageTableName) throws LensException {

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreConstants.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreConstants.java 
b/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreConstants.java
index 4585ef7..88500fd 100644
--- 
a/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreConstants.java
+++ 
b/lens-cube/src/main/java/org/apache/lens/cube/metadata/MetastoreConstants.java
@@ -57,6 +57,7 @@ public final class MetastoreConstants {
   public static final String FACT_RELATIVE_END_TIME = 
"cube.fact.relative.end.time";
   public static final String FACT_COL_START_TIME_PFX = 
"cube.fact.col.start.time.";
   public static final String FACT_COL_END_TIME_PFX = "cube.fact.col.end.time.";
+  public static final String FACT_DATA_COMPLETENESS_TAG = 
"cube.fact.datacompleteness.tag";
 
   // Segmentation constants
   public static final String SEGMENTATION_KEY_PFX = 
"cube.segmentation.internal.";
@@ -95,6 +96,7 @@ public final class MetastoreConstants {
   public static final String MAX_SFX = ".max";
   public static final String EXPR_SFX = ".expr";
   public static final String FORMATSTRING_SFX = ".format";
+  public static final String MEASURE_DATACOMPLETENESS_TAG = 
"cube.measure.datacompleteness.tag";
 
   // dimension constants
   public static final String DIM_KEY_PFX = "cube.dimension.";

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java
index 5dc9dc9..b42262d 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateFact.java
@@ -86,6 +86,9 @@ public class CandidateFact implements CandidateTable, 
QueryAST {
   private final Map<TimeRange, Map<String, LinkedHashSet<FactPartition>>> 
rangeToStoragePartMap = new HashMap<>();
   @Getter
   private final Map<TimeRange, Map<String, String>> rangeToStorageWhereMap = 
new HashMap<>();
+  @Getter
+  @Setter
+  private Map<String, Map<String, Float>> dataCompletenessMap;
 
   CandidateFact(CubeFactTable fact, CubeInterface cube) {
     this.fact = fact;

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java
 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java
index 78fb21d..2ad6e20 100644
--- 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java
+++ 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/CandidateTablePruneCause.java
@@ -146,6 +146,18 @@ public class CandidateTablePruneCause {
         }
         return new String[]{missingPartitions.toString()};
       }
+    },
+    // incomplete data in the fact
+    INCOMPLETE_PARTITION("Data is incomplete. Details : %s") {
+      Object[] getFormatPlaceholders(Set<CandidateTablePruneCause> causes) {
+        Set<Map<String, Map<String, Float>>> incompletePartitions = 
Sets.newHashSet();
+        for (CandidateTablePruneCause cause : causes) {
+          if (cause.getIncompletePartitions() != null) {
+            incompletePartitions.add(cause.getIncompletePartitions());
+          }
+        }
+        return new String[]{incompletePartitions.toString()};
+      }
     };
 
 
@@ -231,6 +243,8 @@ public class CandidateTablePruneCause {
 
   // populated only incase of missing partitions cause
   private Set<String> missingPartitions;
+  // populated only incase of incomplete partitions cause
+  private Map<String, Map<String, Float>> incompletePartitions;
   // populated only incase of missing update periods cause
   private List<String> missingUpdatePeriods;
   // populated in case of missing columns
@@ -300,6 +314,13 @@ public class CandidateTablePruneCause {
     return cause;
   }
 
+  public static CandidateTablePruneCause incompletePartitions(Map<String, 
Map<String, Float>> incompleteParts) {
+    CandidateTablePruneCause cause = new 
CandidateTablePruneCause(INCOMPLETE_PARTITION);
+    //incompleteParts may be null when partial data is allowed.
+    cause.setIncompletePartitions(incompleteParts);
+    return cause;
+  }
+
   public static CandidateTablePruneCause 
lessData(MaxCoveringFactResolver.TimeCovered timeCovered) {
     CandidateTablePruneCause cause = new CandidateTablePruneCause(LESS_DATA);
     cause.setMaxTimeCovered(timeCovered);

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryConfUtil.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryConfUtil.java 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryConfUtil.java
index 408086f..300d798 100644
--- a/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryConfUtil.java
+++ b/lens-cube/src/main/java/org/apache/lens/cube/parse/CubeQueryConfUtil.java
@@ -25,7 +25,6 @@ import org.apache.lens.cube.metadata.UpdatePeriod;
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
-
 /**
  * Contains all configurations of cube query rewriting.
  */
@@ -46,6 +45,7 @@ public final class CubeQueryConfUtil {
   public static final String NON_EXISTING_PARTITIONS = 
"lens.cube.query.nonexisting.partitions";
   public static final String QUERY_MAX_INTERVAL = 
"lens.cube.query.max.interval";
   public static final String PROCESS_TIME_PART_COL = 
"lens.cube.query.process.time" + ".partition.column";
+  public static final String COMPLETENESS_CHECK_PART_COL = 
"lens.cube.query.completeness.check.partition.column";
   public static final String LOOK_AHEAD_PT_PARTS_PFX = 
"lens.cube.query.lookahead.ptparts.forinterval.";
   public static final String ENABLE_GROUP_BY_TO_SELECT = 
"lens.cube.query.promote.groupby.toselect";
   public static final String ENABLE_SELECT_TO_GROUPBY = 
"lens.cube.query.promote.select.togroupby";
@@ -123,4 +123,6 @@ public final class CubeQueryConfUtil {
   public static final String DEFAULT_BRIDGE_TABLE_FIELD_ARRAY_FILTER = 
"array_contains";
   public static final String REWRITE_DIM_FILTER_TO_FACT_FILTER = 
"lens.cube.query.rewrite.dim.filter.to.fact.filter";
   public static final boolean DEFAULT_REWRITE_DIM_FILTER_TO_FACT_FILTER = 
false;
+  public static final String COMPLETENESS_THRESHOLD = 
"lens.cube.query.completeness.threshold";
+  public static final float DEFAULT_COMPLETENESS_THRESHOLD = 100f;
 }

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/DataCompletenessChecker.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/DataCompletenessChecker.java
 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/DataCompletenessChecker.java
deleted file mode 100644
index 6a0230d..0000000
--- 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/DataCompletenessChecker.java
+++ /dev/null
@@ -1,55 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *   http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-package org.apache.lens.cube.parse;
-
-import java.util.Date;
-import java.util.Map;
-import java.util.Set;
-
-import org.apache.lens.server.api.error.LensException;
-
-/**
- * DataCompletenessChecker is for identifying the completeness of data in a 
fact for the given set of measures, start
- * and end date. A fact will have a dataCompletenessTag, multiple facts can 
have the same dataCompletenessTag.
- * Similarly, measures will have a dataCompletenessTag, multiple measures can 
have the same dataCompletenessTag.
- * The api will take the dataCompletenessTag for the facts and measures and 
compute the completeness based on these
- * tags. The utility of having tags is that the similar kind of measures or 
facts which will have the same level of
- * completeness can use the same tag, thus we avoid the redundant completeness 
computation for similar measures
- * and facts.
- * The implementations of the interface can truncate the start and end date.
- */
-public interface DataCompletenessChecker {
-
-  /**
-   * Get completeness of the set of measures in a fact based on the 
dataCompletenessTag for the given starttime and
-   * endtime.
-   *
-   * @param factTag This is the dataCompletenessTag for a fact. The tag can be 
specified by setting the property
-   *                named dataCompletenessTag for the fact. Mutltiple facts 
can have the same dataCompletenessTag.
-   * @param start Start time of the query (Inclusive).
-   * @param end End time of the query (Exclusive).
-   * @param measureTag List of distinct tag of the measures in the query. 
Multiple measures can have the same
-   *                   dataCompletenessTag.
-   * @return map; key is the name of the dataCompletenessTag which refers to 
one or more measures. Value is the map
-   * of date and %completeness.
-   */
-  Map<String, Map<Date, Float>> getCompleteness(String factTag, Date start, 
Date end, Set<String> measureTag)
-    throws LensException;
-
-}

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java
 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java
index 13f1aa4..45824fe 100644
--- 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java
+++ 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/MaxCoveringFactResolver.java
@@ -57,6 +57,13 @@ class MaxCoveringFactResolver implements ContextRewriter {
       // nothing to prune.
       return;
     }
+    resolveByTimeCovered(cubeql);
+    if (cubeql.getMetastoreClient() != null && 
cubeql.getMetastoreClient().isDataCompletenessCheckEnabled()) {
+      resolveByDataCompleteness(cubeql);
+    }
+  }
+
+  private void resolveByTimeCovered(CubeQueryContext cubeql) {
     // For each part column, which candidate fact sets are covering how much 
amount.
     // Later, we'll maximize coverage for each queried part column.
     Map<String, Map<Set<CandidateFact>, Long>> partCountsPerPartCol = 
Maps.newHashMap();
@@ -82,7 +89,7 @@ class MaxCoveringFactResolver implements ContextRewriter {
           }
           if (timeCoveredLong < maxTimeCovered) {
             log.info("Not considering facts:{} from candidate fact tables as 
it covers less time than the max"
-              + " for partition column: {} which is: {}", facts, 
partColQueried, timeCovered);
+                    + " for partition column: {} which is: {}", facts, 
partColQueried, timeCovered);
             iter.remove();
           }
         }
@@ -91,6 +98,52 @@ class MaxCoveringFactResolver implements ContextRewriter {
     
cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCause.lessData(null));
   }
 
+  private void resolveByDataCompleteness(CubeQueryContext cubeql) {
+    // From the list of  candidate fact sets, we calculate the 
maxDataCompletenessFactor.
+    float maxDataCompletenessFactor = 0f;
+    for (Set<CandidateFact> facts : cubeql.getCandidateFactSets()) {
+      float dataCompletenessFactor = computeDataCompletenessFactor(facts);
+      if (dataCompletenessFactor > maxDataCompletenessFactor) {
+        maxDataCompletenessFactor = dataCompletenessFactor;
+      }
+    }
+
+    if (maxDataCompletenessFactor == 0f) {
+      //there is nothing to prune
+      return;
+    }
+
+    // We prune those candidate fact set, whose dataCompletenessFactor is less 
than maxDataCompletenessFactor
+    Iterator<Set<CandidateFact>> iter = 
cubeql.getCandidateFactSets().iterator();
+    while (iter.hasNext()) {
+      Set<CandidateFact> facts = iter.next();
+      float dataCompletenessFactor = computeDataCompletenessFactor(facts);
+      if (dataCompletenessFactor < maxDataCompletenessFactor) {
+        log.info("Not considering facts:{} from candidate fact tables as the 
dataCompletenessFactor for this:{} is "
+                + "less than the max:{}", facts, dataCompletenessFactor, 
maxDataCompletenessFactor);
+        iter.remove();
+      }
+    }
+    
cubeql.pruneCandidateFactWithCandidateSet(CandidateTablePruneCause.incompletePartitions(null));
+  }
+
+  private float computeDataCompletenessFactor(Set<CandidateFact> facts) {
+    float completenessFactor = 0f;
+    int numPartition = 0;
+    for (CandidateFact fact : facts) {
+      if (fact.getDataCompletenessMap() != null) {
+        Map<String, Map<String, Float>> completenessMap = 
fact.getDataCompletenessMap();
+        for (Map<String, Float> partitionCompleteness : 
completenessMap.values()) {
+          for (Float value : partitionCompleteness.values()) {
+            numPartition++;
+            completenessFactor += value;
+          }
+        }
+      }
+    }
+    return numPartition == 0 ? completenessFactor : 
completenessFactor/numPartition;
+  }
+
   /**
    * Returns time covered by fact set for each part column.
    * @param facts

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
index db26718..cdf6812 100644
--- 
a/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
+++ 
b/lens-cube/src/main/java/org/apache/lens/cube/parse/StorageTableResolver.java
@@ -18,22 +18,24 @@
  */
 package org.apache.lens.cube.parse;
 
-import static org.apache.lens.cube.metadata.DateUtil.WSPACE;
-import static org.apache.lens.cube.metadata.MetastoreUtil.*;
-import static org.apache.lens.cube.parse.CandidateTablePruneCause.*;
-import static 
org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.*;
-import static 
org.apache.lens.cube.parse.CandidateTablePruneCause.SkipStorageCode.*;
-
 import java.text.DateFormat;
 import java.text.ParseException;
 import java.text.SimpleDateFormat;
+
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import static org.apache.lens.cube.metadata.DateUtil.WSPACE;
+import static org.apache.lens.cube.metadata.MetastoreUtil.*;
+import static org.apache.lens.cube.parse.CandidateTablePruneCause.*;
+import static 
org.apache.lens.cube.parse.CandidateTablePruneCause.CandidateTablePruneCode.*;
+import static 
org.apache.lens.cube.parse.CandidateTablePruneCause.SkipStorageCode.*;
+
 import org.apache.lens.cube.metadata.*;
 import org.apache.lens.cube.parse.CandidateTablePruneCause.*;
 import org.apache.lens.server.api.error.LensException;
+import org.apache.lens.server.api.metastore.*;
 
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
@@ -42,6 +44,7 @@ import org.apache.hadoop.util.ReflectionUtils;
 
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
+
 import lombok.extern.slf4j.Slf4j;
 
 /**
@@ -65,6 +68,8 @@ class StorageTableResolver implements ContextRewriter {
   private DateFormat partWhereClauseFormat = null;
   private PHASE phase;
   private HashMap<CubeFactTable, Map<String, SkipStorageCause>> 
skipStorageCausesPerFact;
+  private float completenessThreshold;
+  private String completenessPartCol;
 
   enum PHASE {
     FACT_TABLES, FACT_PARTITIONS, DIM_TABLE_AND_PARTITIONS;
@@ -104,6 +109,9 @@ class StorageTableResolver implements ContextRewriter {
       partWhereClauseFormat = new SimpleDateFormat(formatStr);
     }
     this.phase = PHASE.first();
+    completenessThreshold = 
conf.getFloat(CubeQueryConfUtil.COMPLETENESS_THRESHOLD,
+            CubeQueryConfUtil.DEFAULT_COMPLETENESS_THRESHOLD);
+    completenessPartCol = 
conf.get(CubeQueryConfUtil.COMPLETENESS_CHECK_PART_COL);
   }
 
   private List<String> getSupportedStorages(Configuration conf) {
@@ -138,6 +146,13 @@ class StorageTableResolver implements ContextRewriter {
         resolveFactStoragePartitions(cubeql);
       }
       
cubeql.pruneCandidateFactSet(CandidateTablePruneCode.NO_CANDIDATE_STORAGES);
+      if (client != null && client.isDataCompletenessCheckEnabled()) {
+        if (!cubeql.getCandidateFacts().isEmpty()) {
+          // resolve incomplete fact partition
+          resolveFactCompleteness(cubeql);
+        }
+        
cubeql.pruneCandidateFactSet(CandidateTablePruneCode.INCOMPLETE_PARTITION);
+      }
       break;
     case DIM_TABLE_AND_PARTITIONS:
       resolveDimStorageTablesAndPartitions(cubeql);
@@ -499,6 +514,133 @@ class StorageTableResolver implements ContextRewriter {
     }
   }
 
+  private static boolean processCubeColForDataCompleteness(CubeQueryContext 
cubeql, String cubeCol, String alias,
+                                                        Set<String> measureTag,
+                                                        Map<String, String> 
tagToMeasureOrExprMap) {
+    CubeMeasure column = cubeql.getCube().getMeasureByName(cubeCol);
+    if (column != null && column.getTags() != null) {
+      String dataCompletenessTag = 
column.getTags().get(MetastoreConstants.MEASURE_DATACOMPLETENESS_TAG);
+      //Checking if dataCompletenessTag is set for queried measure
+      if (dataCompletenessTag != null) {
+        measureTag.add(dataCompletenessTag);
+        String value = tagToMeasureOrExprMap.get(dataCompletenessTag);
+        if (value == null) {
+          tagToMeasureOrExprMap.put(dataCompletenessTag, alias);
+        } else {
+          value = value.concat(",").concat(alias);
+          tagToMeasureOrExprMap.put(dataCompletenessTag, value);
+        }
+        return true;
+      }
+    }
+    return false;
+  }
+
+  private static void processMeasuresFromExprMeasures(CubeQueryContext cubeql, 
Set<String> measureTag,
+                                                             Map<String, 
String> tagToMeasureOrExprMap) {
+    boolean isExprProcessed;
+    String cubeAlias = cubeql.getAliasForTableName(cubeql.getCube().getName());
+    for (String expr : cubeql.getQueriedExprsWithMeasures()) {
+      isExprProcessed = false;
+      for (ExpressionResolver.ExprSpecContext esc : 
cubeql.getExprCtx().getExpressionContext(expr, cubeAlias)
+              .getAllExprs()) {
+        if (esc.getTblAliasToColumns().get(cubeAlias) != null) {
+          for (String cubeCol : esc.getTblAliasToColumns().get(cubeAlias)) {
+            if (processCubeColForDataCompleteness(cubeql, cubeCol, expr, 
measureTag, tagToMeasureOrExprMap)) {
+              /* This is done to associate the expression with one of the 
dataCompletenessTag for the measures.
+              So, even if the expression is composed of measures with 
different dataCompletenessTags, we will be
+              determining the dataCompleteness from one of the measure and 
this expression is grouped with the
+              other queried measures that have the same dataCompletenessTag. */
+              isExprProcessed = true;
+              break;
+            }
+          }
+        }
+        if (isExprProcessed) {
+          break;
+        }
+      }
+    }
+  }
+
+  private void resolveFactCompleteness(CubeQueryContext cubeql) throws 
LensException {
+    if (client == null || client.getCompletenessChecker() == null || 
completenessPartCol == null) {
+      return;
+    }
+    DataCompletenessChecker completenessChecker = 
client.getCompletenessChecker();
+    Set<String> measureTag = new HashSet<>();
+    Map<String, String> tagToMeasureOrExprMap = new HashMap<>();
+
+    processMeasuresFromExprMeasures(cubeql, measureTag, tagToMeasureOrExprMap);
+
+    Set<String> measures = cubeql.getQueriedMsrs();
+    if (measures == null) {
+      measures = new HashSet<>();
+    }
+    for (String measure : measures) {
+      processCubeColForDataCompleteness(cubeql, measure, measure, measureTag, 
tagToMeasureOrExprMap);
+    }
+    //Checking if dataCompletenessTag is set for the fact
+    if (measureTag.isEmpty()) {
+      log.info("No Queried measures with the dataCompletenessTag, hence 
skipping the availability check");
+      return;
+    }
+    Iterator<CandidateFact> i = cubeql.getCandidateFacts().iterator();
+    DateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+    formatter.setTimeZone(TimeZone.getTimeZone("UTC"));
+    while (i.hasNext()) {
+      CandidateFact cFact = i.next();
+      // Map from measure to the map from partition to %completeness
+      Map<String, Map<String, Float>> incompleteMeasureData = new HashMap<>();
+
+      String factDataCompletenessTag = cFact.fact.getDataCompletenessTag();
+      if (factDataCompletenessTag == null) {
+        log.info("Not checking completeness for the fact table:{} as the 
dataCompletenessTag is not set", cFact.fact);
+        continue;
+      }
+      boolean isFactDataIncomplete = false;
+      for (TimeRange range : cubeql.getTimeRanges()) {
+        if (!range.getPartitionColumn().equals(completenessPartCol)) {
+          log.info("Completeness check not available for partCol:{}", 
range.getPartitionColumn());
+          continue;
+        }
+        Date from = range.getFromDate();
+        Date to = range.getToDate();
+        Map<String, Map<Date, Float>> completenessMap =  
completenessChecker.getCompleteness(factDataCompletenessTag,
+                from, to, measureTag);
+        if (completenessMap != null && !completenessMap.isEmpty()) {
+          for (Map.Entry<String, Map<Date, Float>> measureCompleteness : 
completenessMap.entrySet()) {
+            String tag = measureCompleteness.getKey();
+            for (Map.Entry<Date, Float> completenessResult : 
measureCompleteness.getValue().entrySet()) {
+              if (completenessResult.getValue() < completenessThreshold) {
+                log.info("Completeness for the measure_tag {} is {}, 
threshold: {}, for the hour {}", tag,
+                        completenessResult.getValue(), completenessThreshold,
+                        formatter.format(completenessResult.getKey()));
+                String measureorExprFromTag = tagToMeasureOrExprMap.get(tag);
+                Map<String, Float> incompletePartition = 
incompleteMeasureData.get(measureorExprFromTag);
+                if (incompletePartition == null) {
+                  incompletePartition = new HashMap<>();
+                  incompleteMeasureData.put(measureorExprFromTag, 
incompletePartition);
+                }
+                
incompletePartition.put(formatter.format(completenessResult.getKey()), 
completenessResult.getValue());
+                isFactDataIncomplete = true;
+              }
+            }
+          }
+        }
+      }
+      if (isFactDataIncomplete) {
+        log.info("Fact table:{} has partitions with incomplete data: {} for 
given ranges: {}", cFact.fact,
+                incompleteMeasureData, cubeql.getTimeRanges());
+        if (failOnPartialData) {
+          i.remove();
+          cubeql.addFactPruningMsgs(cFact.fact, 
incompletePartitions(incompleteMeasureData));
+        } else {
+          cFact.setDataCompletenessMap(incompleteMeasureData);
+        }
+      }
+    }
+  }
 
   void addNonExistingParts(String name, Set<String> nonExistingParts) {
     nonExistingPartitions.put(name, nonExistingParts);

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/main/resources/olap-query-conf.xml
----------------------------------------------------------------------
diff --git a/lens-cube/src/main/resources/olap-query-conf.xml 
b/lens-cube/src/main/resources/olap-query-conf.xml
index 0c888ca..b389d6a 100644
--- a/lens-cube/src/main/resources/olap-query-conf.xml
+++ b/lens-cube/src/main/resources/olap-query-conf.xml
@@ -107,6 +107,19 @@
   </property>
 
   <property>
+    <name>lens.cube.query.completeness.check.partition.column</name>
+    <value></value>
+    <description>The Supported Partition Column for the Data Completeness 
check</description>
+  </property>
+
+  <property>
+    <name>lens.cube.query.completeness.threshold</name>
+    <value>100</value>
+    <description>The query will fail if data completeness is less than the set 
threshold given that the flag
+      "lens.cube.query.fail.if.data.partial" is set as true</description>
+  </property>
+
+  <property>
     <name>lens.cube.query.nonexisting.partitions</name>
     <value></value>
     <description>The list of comma separated non existing partitions, if query 
can run with partial data. The value will

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java 
b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
index 0c43cb5..41ea83d 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/CubeTestSetup.java
@@ -515,7 +515,10 @@ public class CubeTestSetup {
 
   private void createCube(CubeMetastoreClient client) throws HiveException, 
ParseException, LensException {
     cubeMeasures = new HashSet<CubeMeasure>();
-    cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr1", "int", "first 
measure")));
+    Map<String, String> tags = new HashMap<>();
+    tags.put(MetastoreConstants.MEASURE_DATACOMPLETENESS_TAG, "tag1");
+    cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr1", "int", "first 
measure"), null, null, null, null, null,
+            null, null, null, null, tags));
     cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr2", "float", 
"second measure"), "Measure2", null, "SUM",
       "RS"));
     cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr21", "float", 
"second measure"), "Measure22", null, "SUM",
@@ -526,6 +529,8 @@ public class CubeTestSetup {
       null));
     cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr4", "bigint", 
"fourth measure"), "Measure4", null, "COUNT",
       null));
+    cubeMeasures.add(new ColumnMeasure(new FieldSchema("msr9", "bigint", 
"ninth measure"), null, null, null, null,
+            null, null, null, null, null, tags));
     cubeMeasures.add(new ColumnMeasure(new FieldSchema("noAggrMsr", "bigint", 
"measure without a default aggregate"),
       "No aggregateMsr", null, null, null));
     cubeMeasures.add(new ColumnMeasure(new FieldSchema("newmeasure", "bigint", 
"measure available  from now"),
@@ -682,6 +687,7 @@ public class CubeTestSetup {
     measures.add("msr1");
     measures.add("msr2");
     measures.add("msr3");
+    measures.add("msr9");
     Set<String> dimensions = new HashSet<String>();
     dimensions.add("dim1");
     dimensions.add("dim2");
@@ -1208,6 +1214,7 @@ public class CubeTestSetup {
     derivedProperties.put(MetastoreConstants.CUBE_ALL_FIELDS_QUERIABLE, 
"true");
     Set<String> measures = new HashSet<>();
     measures.add("msr1");
+    measures.add("msr9");
     measures.add("msr11");
     Set<String> dimensions = new HashSet<>();
     dimensions.add("dim1");
@@ -1404,6 +1411,7 @@ public class CubeTestSetup {
     factColumns = new ArrayList<FieldSchema>();
     factColumns.add(new FieldSchema("msr11", "int", "first measure"));
     factColumns.add(new FieldSchema("msr12", "float", "second measure"));
+    factColumns.add(new FieldSchema("msr9", "bigint", "ninth measure"));
 
     // add dimensions of the cube
     factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
@@ -1427,6 +1435,7 @@ public class CubeTestSetup {
     properties.clear();
     properties.putAll(factValidityProperties);
     properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
+    properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f2");
 
     client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, 
storageAggregatePeriods, 100L, properties,
       storageTables);
@@ -1477,6 +1486,36 @@ public class CubeTestSetup {
     
properties.put(MetastoreConstants.FACT_COL_START_TIME_PFX.concat("user_id_added_far_future"),
 "2099-01-01");
     client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, 
storageAggregatePeriods, 100L, properties,
         storageTables);
+
+    factName = "testFact5_RAW_BASE";
+    factColumns = new ArrayList<FieldSchema>();
+    factColumns.add(new FieldSchema("msr9", "bigint", "ninth measure"));
+
+    // add dimensions of the cube
+    factColumns.add(new FieldSchema("d_time", "timestamp", "event time"));
+    factColumns.add(new FieldSchema("processing_time", "timestamp", 
"processing time"));
+    factColumns.add(new FieldSchema("dim1", "string", "base dim"));
+
+    properties.clear();
+    properties.putAll(factValidityProperties);
+    properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
+    properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f2");
+    client.createCubeFactTable(BASE_CUBE_NAME, factName, factColumns, 
storageAggregatePeriods, 100L, properties,
+            storageTables);
+
+    CubeFactTable fact = client.getFactTable(factName);
+    // Add all hourly partitions for two days
+    Calendar cal = Calendar.getInstance();
+    cal.setTime(TWODAYS_BACK);
+    Date temp = cal.getTime();
+    while (!(temp.after(NOW))) {
+      Map<String, Date> timeParts = new HashMap<String, Date>();
+      timeParts.put("dt", temp);
+      StoragePartitionDesc sPartSpec = new 
StoragePartitionDesc(fact.getName(), timeParts, null, HOURLY);
+      client.addPartition(sPartSpec, c1, CubeTableType.FACT);
+      cal.add(HOUR_OF_DAY, 1);
+      temp = cal.getTime();
+    }
   }
 
   private void createCubeContinuousFact(CubeMetastoreClient client) throws 
Exception {
@@ -1942,6 +1981,7 @@ public class CubeTestSetup {
     Map<String, String> properties = new HashMap<String, String>();
     properties.putAll(factValidityProperties);
     properties.put(MetastoreConstants.FACT_AGGREGATED_PROPERTY, "false");
+    properties.put(MetastoreConstants.FACT_DATA_COMPLETENESS_TAG, "f1");
 
     client.createCubeFactTable(TEST_CUBE_NAME, factName, factColumns, 
storageAggregatePeriods, 100L, properties,
       storageTables);

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java
 
b/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java
index 7afa32e..1a5bd0d 100644
--- 
a/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java
+++ 
b/lens-cube/src/test/java/org/apache/lens/cube/parse/FieldsCannotBeQueriedTogetherTest.java
@@ -228,7 +228,6 @@ public class FieldsCannotBeQueriedTogetherTest extends 
TestQueryRewrite {
     cityState.name is a dimension attribute used in where clause(filter) and 
referenced through join chain name
     cityState. It is queryable through source column basecube.cityid. 
basecube.cityid and msr1 are not present in the
     same derived cube. However since cityState.name is only present in the 
case statement, the query is allowed. */
-
     rewrite("select SUM(CASE WHEN cityState.name ='foo' THEN msr1 END) from 
basecube where " + TWO_DAYS_RANGE, conf);
   }
 

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/MockCompletenessChecker.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/test/java/org/apache/lens/cube/parse/MockCompletenessChecker.java
 
b/lens-cube/src/test/java/org/apache/lens/cube/parse/MockCompletenessChecker.java
new file mode 100644
index 0000000..76e81d5
--- /dev/null
+++ 
b/lens-cube/src/test/java/org/apache/lens/cube/parse/MockCompletenessChecker.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lens.cube.parse;
+
+import java.util.*;
+
+import org.apache.lens.server.api.error.LensException;
+import org.apache.lens.server.api.metastore.*;
+
+public class MockCompletenessChecker implements DataCompletenessChecker {
+
+  @Override
+  public Map<String, Map<Date, Float>> getCompleteness(String factTag, Date 
start, Date end, Set<String> measureTag)
+    throws LensException {
+    Map<Date, Float> partitionCompleteness = new HashMap<>();
+    Map<String, Map<Date, Float>> result = new HashMap<>();
+    Calendar cal = Calendar.getInstance();
+    cal.setTimeZone(TimeZone.getTimeZone("GMT"));
+    cal.add(Calendar.DATE, -1);
+    if (factTag.equals("f1")) {
+      partitionCompleteness.put(cal.getTime(), 80f);
+    } else {
+      partitionCompleteness.put(cal.getTime(), 90f);
+    }
+    result.put("tag1", partitionCompleteness);
+    return result;
+  }
+
+}
+

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/TestBaseCubeQueries.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestBaseCubeQueries.java 
b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestBaseCubeQueries.java
index f6cec1b..dbb8fa3 100644
--- 
a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestBaseCubeQueries.java
+++ 
b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestBaseCubeQueries.java
@@ -118,7 +118,8 @@ public class TestBaseCubeQueries extends TestQueryRewrite {
     boolean columnNotFound = false;
     List<String> testTimeDimFactTables = Arrays.asList("testfact3_base", 
"testfact1_raw_base", "testfact3_raw_base",
       "testfact5_base", "testfact6_base", "testfact4_raw_base");
-    List<String> factTablesForMeasures = Arrays.asList("testfact_deprecated", 
"testfact2_raw_base", "testfact2_base");
+    List<String> factTablesForMeasures = Arrays.asList("testfact_deprecated", 
"testfact2_raw_base", "testfact2_base",
+            "testfact5_raw_base");
     for (Map.Entry<String, List<CandidateTablePruneCause>> entry : 
pruneCauses.getDetails().entrySet()) {
       if 
(entry.getValue().contains(CandidateTablePruneCause.columnNotFound("test_time_dim")))
 {
         columnNotFound = true;

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java 
b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
index b90d4d3..c9e7c29 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestCubeRewriter.java
@@ -929,6 +929,54 @@ public class TestCubeRewriter extends TestQueryRewrite {
     compareQueries(hqlQuery, expected);
   }
 
+  /* The test is to check no failure on partial data when the flag 
FAIL_QUERY_ON_PARTIAL_DATA is not set
+   */
+  @Test
+  public void 
testQueryWithMeasureWithDataCompletenessTagWithNoFailureOnPartialData() throws 
ParseException,
+          LensException {
+    //In this query a measure is used for which dataCompletenessTag is set.
+    Configuration conf = getConf();
+    conf.setStrings(CubeQueryConfUtil.COMPLETENESS_CHECK_PART_COL, "dt");
+    String hqlQuery = rewrite("select SUM(msr1) from basecube where " + 
TWO_DAYS_RANGE, conf);
+    String expected = getExpectedQuery("basecube", "select sum(basecube.msr1) 
FROM ", null, null,
+            getWhereForHourly2days("basecube", "c1_testfact1_raw_base"));
+    compareQueries(hqlQuery, expected);
+  }
+
+  @Test
+  public void testQueryWithMeasureWithDataCompletenessPresentInMultipleFacts() 
throws ParseException,
+          LensException {
+    /*In this query a measure is used which is present in two facts with 
different %completeness. While resolving the
+    facts, the fact with the higher dataCompletenessFactor gets picked up.*/
+    Configuration conf = getConf();
+    conf.setStrings(CubeQueryConfUtil.COMPLETENESS_CHECK_PART_COL, "dt");
+    String hqlQuery = rewrite("select SUM(msr9) from basecube where " + 
TWO_DAYS_RANGE, conf);
+    String expected = getExpectedQuery("basecube", "select sum(basecube.msr9) 
FROM ", null, null,
+            getWhereForHourly2days("basecube", "c1_testfact5_raw_base"));
+    compareQueries(hqlQuery, expected);
+  }
+
+  @Test
+  public void 
testCubeWhereQueryWithMeasureWithDataCompletenessAndFailIfPartialDataFlagSet() 
throws ParseException,
+          LensException {
+    /*In this query a measure is used for which dataCompletenessTag is set and 
the flag FAIL_QUERY_ON_PARTIAL_DATA is
+    set. The partitions for the queried range are present but some of the them 
have incomplete data. So, the query
+    throws NO_CANDIDATE_FACT_AVAILABLE Exception*/
+    Configuration conf = getConf();
+    conf.setStrings(CubeQueryConfUtil.COMPLETENESS_CHECK_PART_COL, "dt");
+    conf.setBoolean(CubeQueryConfUtil.FAIL_QUERY_ON_PARTIAL_DATA, true);
+
+    LensException e = getLensExceptionInRewrite("select SUM(msr9) from 
basecube where " + TWO_DAYS_RANGE, conf);
+    assertEquals(e.getErrorCode(), 
LensCubeErrorCode.NO_CANDIDATE_FACT_AVAILABLE.getLensErrorInfo().getErrorCode());
+    NoCandidateFactAvailableException ne = (NoCandidateFactAvailableException) 
e;
+    PruneCauses.BriefAndDetailedError pruneCauses = ne.getJsonMessage();
+    /*Since the Flag FAIL_QUERY_ON_PARTIAL_DATA is set, and thhe queried fact 
has incomplete data, hence, we expect the
+    prune cause to be INCOMPLETE_PARTITION. The below check is to validate 
this.*/
+    assertEquals(pruneCauses.getBrief().substring(0, 
INCOMPLETE_PARTITION.errorFormat.length() - 3),
+            INCOMPLETE_PARTITION.errorFormat.substring(0,
+                    INCOMPLETE_PARTITION.errorFormat.length() - 3), 
pruneCauses.getBrief());
+  }
+
   @Test
   public void testCubeWhereQueryForMonthWithNoPartialData() throws Exception {
     Configuration conf = getConf();

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java
----------------------------------------------------------------------
diff --git 
a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java 
b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java
index 0aa31f4..17a8b0f 100644
--- a/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java
+++ b/lens-cube/src/test/java/org/apache/lens/cube/parse/TestQueryRewrite.java
@@ -28,6 +28,7 @@ import org.apache.lens.api.error.ErrorCollection;
 import org.apache.lens.api.error.ErrorCollectionFactory;
 import org.apache.lens.api.error.LensError;
 import org.apache.lens.cube.error.LensCubeErrorCode;
+import org.apache.lens.server.api.*;
 import org.apache.lens.server.api.error.LensException;
 
 import org.apache.hadoop.conf.Configuration;
@@ -63,7 +64,11 @@ public abstract class TestQueryRewrite {
 
   @BeforeSuite
   public static void setup() throws Exception {
+    hconf.setStrings(LensConfConstants.COMPLETENESS_CHECKER_CLASS,
+            "org.apache.lens.cube.parse.MockCompletenessChecker");
+    hconf.setBoolean(LensConfConstants.ENABLE_DATACOMPLETENESS_CHECK, true);
     SessionState.start(hconf);
+
     setup = new CubeTestSetup();
     setup.createSources(hconf, TestQueryRewrite.class.getSimpleName());
   }

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java
----------------------------------------------------------------------
diff --git 
a/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java
 
b/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java
index cf1c233..7ccb170 100644
--- 
a/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java
+++ 
b/lens-server-api/src/main/java/org/apache/lens/server/api/LensConfConstants.java
@@ -21,6 +21,7 @@ package org.apache.lens.server.api;
 import javax.ws.rs.core.MediaType;
 
 import org.apache.lens.server.api.error.LensException;
+import org.apache.lens.server.api.metastore.*;
 
 /**
  * The Class LensConfConstants.
@@ -1223,4 +1224,26 @@ public final class LensConfConstants {
    * Maximum number of scheduled job per user.
    */
   public static final String MAX_SCHEDULED_JOB_PER_USER  = SERVER_PFX + 
"scheduler.max.job.per.user";
+
+  /**
+   * The class that implements the DataCompletenessChecker Interface. This 
will take effect if the flag
+   * "lens.cube.metastore.enable.datacompleteness.check" is set.
+   */
+  public static final String COMPLETENESS_CHECKER_CLASS = 
"lens.cube.metastore.completeness.checker.class";
+
+  /**
+   * The default implementation of DataCompletenessChecker
+   */
+  public static final Class<? extends DataCompletenessChecker> 
DEFAULT_COMPLETENESS_CHECKER =
+          DefaultChecker.class.asSubclass(DataCompletenessChecker.class);
+
+  /**
+   * This property is to enable Data Completeness Checks while resolving 
partitions.
+   */
+  public static final String ENABLE_DATACOMPLETENESS_CHECK = 
"lens.cube.metastore.enable.datacompleteness.check";
+
+  /**
+   * Default Value of the config 
"lens.cube.metastore.enable.datacompleteness.check"
+   */
+  public static final boolean DEFAULT_ENABLE_DATACOMPLETENESS_CHECK = false;
 }

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DataCompletenessChecker.java
----------------------------------------------------------------------
diff --git 
a/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DataCompletenessChecker.java
 
b/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DataCompletenessChecker.java
new file mode 100644
index 0000000..68713fe
--- /dev/null
+++ 
b/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DataCompletenessChecker.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lens.server.api.metastore;
+
+import java.util.Date;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lens.server.api.error.LensException;
+
+/**
+ * DataCompletenessChecker is for identifying the completeness of data in a 
fact for the given set of measures, start
+ * and end date. A fact will have a dataCompletenessTag, multiple facts can 
have the same dataCompletenessTag.
+ * Similarly, measures will have a dataCompletenessTag, multiple measures can 
have the same dataCompletenessTag.
+ * The api will take the dataCompletenessTag for the facts and measures and 
compute the completeness based on these
+ * tags. The utility of having tags is that the similar kind of measures or 
facts which will have the same level of
+ * completeness can use the same tag, thus we avoid the redundant completeness 
computation for similar measures
+ * and facts.
+ * The implementations of the interface can truncate the start and end date.
+ */
+public interface DataCompletenessChecker {
+
+  /**
+   * Get completeness of the set of measures in a fact based on the 
dataCompletenessTag for the given starttime and
+   * endtime.
+   *
+   * @param factTag This is the dataCompletenessTag for a fact. The tag can be 
specified by setting the property
+   *                named dataCompletenessTag for the fact. Mutltiple facts 
can have the same dataCompletenessTag.
+   * @param start Start time of the query (Inclusive).
+   * @param end End time of the query (Exclusive).
+   * @param measureTag List of distinct tag of the measures in the query. 
Multiple measures can have the same
+   *                   dataCompletenessTag.
+   * @return map; key is the name of the dataCompletenessTag which refers to 
one or more measures. Value is the map
+   * of date and %completeness.
+   */
+  Map<String, Map<Date, Float>> getCompleteness(String factTag, Date start, 
Date end, Set<String> measureTag)
+    throws LensException;
+
+}

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DefaultChecker.java
----------------------------------------------------------------------
diff --git 
a/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DefaultChecker.java
 
b/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DefaultChecker.java
new file mode 100644
index 0000000..2d1275c
--- /dev/null
+++ 
b/lens-server-api/src/main/java/org/apache/lens/server/api/metastore/DefaultChecker.java
@@ -0,0 +1,34 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lens.server.api.metastore;
+
+import java.util.Date;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.lens.server.api.error.LensException;
+
+public class DefaultChecker implements DataCompletenessChecker {
+
+  @Override
+  public Map<String, Map<Date, Float>> getCompleteness(String factTag, Date 
start, Date end, Set<String> measureTag)
+    throws LensException {
+    return null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-server/src/main/resources/lensserver-default.xml
----------------------------------------------------------------------
diff --git a/lens-server/src/main/resources/lensserver-default.xml 
b/lens-server/src/main/resources/lensserver-default.xml
index 0ac722d..e652a0f 100644
--- a/lens-server/src/main/resources/lensserver-default.xml
+++ b/lens-server/src/main/resources/lensserver-default.xml
@@ -937,4 +937,16 @@
        then there is no restriction on the number of jobs scheduled.
     </description>
   </property>
+  <property>
+    <name>lens.cube.metastore.enable.datacompleteness.check</name>
+    <value>false</value>
+    <description>This property is to enable Data Completeness Checks while 
resolving partitions.</description>
+  </property>
+  <property>
+    <name>lens.cube.metastore.completeness.checker.class</name>
+    <value>org.apache.lens.server.api.metastore.DefaultChecker</value>
+    <description>The class that implements the DataCompletenessChecker 
Interface. This will take effect if the flag
+      "lens.cube.metastore.enable.datacompleteness.check" is set.
+    </description>
+  </property>
 </configuration>

http://git-wip-us.apache.org/repos/asf/lens/blob/078555c1/lens-server/src/main/resources/lenssession-default.xml
----------------------------------------------------------------------
diff --git a/lens-server/src/main/resources/lenssession-default.xml 
b/lens-server/src/main/resources/lenssession-default.xml
index 8d9f097..ca38ba6 100644
--- a/lens-server/src/main/resources/lenssession-default.xml
+++ b/lens-server/src/main/resources/lenssession-default.xml
@@ -374,6 +374,13 @@
   </property>
 
   <property>
+    <name>lens.cube.query.completeness.threshold</name>
+    <value>100</value>
+    <description>The query will fail if data completeness is less than the set 
threshold given that the flag
+      "lens.cube.query.fail.if.data.partial" is set as true</description>
+  </property>
+
+  <property>
     <name>lens.session.metastore.exclude.cubetables.from.nativetables</name>
     <value>true</value>
     <description>Exclude cube related tables when fetching native 
tables</description>

Reply via email to