HIVE-16997: Extend object store to store and use bit vectors (Pengcheng Xiong, 
reviewed by Ashutosh Chauhan)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/f8b79fe6
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/f8b79fe6
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/f8b79fe6

Branch: refs/heads/master
Commit: f8b79fe6d136f348820ce81dc7a6883f1e70dcfc
Parents: 892841a
Author: Pengcheng Xiong <pxi...@apache.org>
Authored: Tue Jul 25 15:41:14 2017 -0700
Committer: Pengcheng Xiong <pxi...@apache.org>
Committed: Tue Jul 25 15:41:14 2017 -0700

----------------------------------------------------------------------
 .../apache/hadoop/hive/common/ndv/FMSketch.java | 117 +--
 .../ndv/NumDistinctValueEstimatorFactory.java   |  30 +-
 .../hive/common/ndv/fm/FMSketchUtils.java       | 133 ++++
 .../hadoop/hive/common/ndv/hll/HyperLogLog.java |   4 +-
 .../org/apache/hadoop/hive/conf/HiveConf.java   |   4 +-
 .../ndv/fm/TestFMSketchSerialization.java       |  97 +++
 data/conf/hive-site.xml                         |   4 +
 data/conf/llap/hive-site.xml                    |   4 +
 data/conf/perf-reg/hive-site.xml                |   5 +
 data/conf/tez/hive-site.xml                     |   5 +
 .../upgrade/derby/044-HIVE-16997.derby.sql      |   1 +
 .../upgrade/derby/hive-schema-3.0.0.derby.sql   |   2 +-
 .../derby/upgrade-2.3.0-to-3.0.0.derby.sql      |   1 +
 .../upgrade/mssql/029-HIVE-16997.mssql.sql      |   1 +
 .../upgrade/mssql/hive-schema-3.0.0.mssql.sql   |   1 +
 .../mssql/upgrade-2.3.0-to-3.0.0.mssql.sql      |   1 +
 .../upgrade/mysql/044-HIVE-16997.mysql.sql      |   1 +
 .../upgrade/mysql/hive-schema-3.0.0.mysql.sql   |   1 +
 .../mysql/upgrade-2.3.0-to-3.0.0.mysql.sql      |   1 +
 .../upgrade/oracle/044-HIVE-16997.oracle.sql    |   1 +
 .../upgrade/oracle/hive-schema-3.0.0.oracle.sql |   1 +
 .../oracle/upgrade-2.3.0-to-3.0.0.oracle.sql    |   1 +
 .../postgres/043-HIVE-16997.postgres.sql        |   1 +
 .../postgres/hive-schema-3.0.0.postgres.sql     |   1 +
 .../upgrade-2.3.0-to-3.0.0.postgres.sql         |   1 +
 .../hive/metastore/MetaStoreDirectSql.java      |  98 ++-
 .../hadoop/hive/metastore/MetaStoreUtils.java   |  49 +-
 .../hadoop/hive/metastore/ObjectStore.java      |  20 +-
 .../hive/metastore/StatObjectConverter.java     |  40 +-
 .../hive/metastore/cache/CachedStore.java       |  50 +-
 .../aggr/DateColumnStatsAggregator.java         | 358 +++++++++
 .../aggr/StringColumnStatsAggregator.java       | 303 ++++++++
 .../hadoop/hive/metastore/hbase/StatsCache.java |  11 +-
 .../stats/BinaryColumnStatsAggregator.java      |   2 +-
 .../stats/BooleanColumnStatsAggregator.java     |   2 +-
 .../hbase/stats/ColumnStatsAggregator.java      |   4 +-
 .../stats/ColumnStatsAggregatorFactory.java     |  14 +-
 .../stats/DecimalColumnStatsAggregator.java     |  35 +-
 .../stats/DoubleColumnStatsAggregator.java      |  33 +-
 .../hbase/stats/IExtrapolatePartStatus.java     |   2 +-
 .../hbase/stats/LongColumnStatsAggregator.java  |  34 +-
 .../stats/StringColumnStatsAggregator.java      | 122 ---
 .../stats/merge/BinaryColumnStatsMerger.java    |   2 +-
 .../stats/merge/BooleanColumnStatsMerger.java   |   2 +-
 .../hbase/stats/merge/ColumnStatsMerger.java    |   2 +-
 .../stats/merge/ColumnStatsMergerFactory.java   |   2 +-
 .../stats/merge/DateColumnStatsMerger.java      |   2 +-
 .../stats/merge/DecimalColumnStatsMerger.java   |   2 +-
 .../stats/merge/DoubleColumnStatsMerger.java    |   2 +-
 .../stats/merge/LongColumnStatsMerger.java      |   2 +-
 .../stats/merge/StringColumnStatsMerger.java    |   2 +-
 .../model/MPartitionColumnStatistics.java       |  24 +-
 .../metastore/model/MTableColumnStatistics.java |  24 +-
 metastore/src/model/package.jdo                 |   6 +
 .../hadoop/hive/metastore/TestOldSchema.java    | 229 ++++++
 .../hive/metastore/cache/TestCachedStore.java   | 156 ++++
 ...stHBaseAggregateStatsCacheWithBitVector.java |  13 +-
 .../TestHBaseAggregateStatsExtrapolation.java   |  11 +-
 .../TestHBaseAggregateStatsNDVUniformDist.java  |  15 +-
 .../org/apache/hadoop/hive/ql/exec/DDLTask.java |   2 +-
 .../formatting/MetaDataFormatUtils.java         |  19 +-
 .../hadoop/hive/ql/plan/ColStatistics.java      |   3 -
 .../hadoop/hive/ql/plan/DescTableDesc.java      |   4 +-
 .../ql/udf/generic/GenericUDAFComputeStats.java |   2 +-
 ...lter_table_update_status_disable_bitvector.q | 139 ++++
 ql/src/test/queries/clientpositive/bitvector.q  |   3 +
 ql/src/test/queries/clientpositive/fm-sketch.q  |  58 ++
 ql/src/test/queries/clientpositive/hll.q        |  11 +
 .../clientpositive/alterColumnStats.q.out       |  16 +-
 .../clientpositive/alterColumnStatsPart.q.out   |   6 +-
 .../alter_partition_update_status.q.out         |  40 +-
 .../alter_table_column_stats.q.out              | 764 +++++++++++--------
 .../alter_table_update_status.q.out             | 394 +++++-----
 ..._table_update_status_disable_bitvector.q.out | 708 +++++++++++++++++
 .../clientpositive/analyze_tbl_part.q.out       |  24 +-
 .../clientpositive/autoColumnStats_5.q.out      |  36 +-
 .../clientpositive/autoColumnStats_9.q.out      |  58 +-
 .../results/clientpositive/avro_decimal.q.out   |   9 +-
 .../clientpositive/avro_decimal_native.q.out    |   9 +-
 .../test/results/clientpositive/bitvector.q.out |  31 +
 .../test/results/clientpositive/char_udf1.q.out |   2 +-
 .../clientpositive/colstats_all_nulls.q.out     |  18 +-
 ...names_with_leading_and_trailing_spaces.q.out |  17 +-
 .../column_pruner_multiple_children.q.out       |  18 +-
 .../clientpositive/columnstats_partlvl.q.out    |  46 +-
 .../clientpositive/columnstats_partlvl_dp.q.out |  70 +-
 .../clientpositive/columnstats_tbllvl.q.out     |  78 +-
 .../results/clientpositive/compustat_avro.q.out |  16 +-
 .../clientpositive/compute_stats_date.q.out     |  20 +-
 .../clientpositive/compute_stats_decimal.q.out  |   2 +-
 .../clientpositive/compute_stats_double.q.out   |   2 +-
 .../clientpositive/compute_stats_long.q.out     |   2 +-
 .../clientpositive/compute_stats_string.q.out   |   2 +-
 .../confirm_initial_tbl_stats.q.out             | 223 ++++--
 .../results/clientpositive/decimal_stats.q.out  |   9 +-
 .../results/clientpositive/deleteAnalyze.q.out  |   8 +-
 .../clientpositive/describe_syntax.q.out        |  24 +-
 .../results/clientpositive/describe_table.q.out |  74 +-
 .../display_colstats_tbllvl.q.out               | 111 +--
 .../encrypted/encryption_move_tbl.q.out         | 116 ++-
 .../extrapolate_part_stats_full.q.out           |   7 +-
 .../extrapolate_part_stats_partial.q.out        |  22 +-
 .../test/results/clientpositive/fm-sketch.q.out | 333 ++++++++
 ql/src/test/results/clientpositive/hll.q.out    | 181 ++++-
 .../clientpositive/llap/autoColumnStats_2.q.out | 233 +++++-
 ...names_with_leading_and_trailing_spaces.q.out |  17 +-
 .../llap/columnstats_part_coltype.q.out         | 165 ++--
 .../clientpositive/llap/deleteAnalyze.q.out     |   8 +-
 .../extrapolate_part_stats_partial_ndv.q.out    | 154 ++--
 .../results/clientpositive/llap/llap_smb.q.out  |   4 +-
 .../clientpositive/llap/stats_only_null.q.out   |   7 +-
 .../clientpositive/llap/subquery_scalar.q.out   |  10 +-
 .../clientpositive/llap/varchar_udf1.q.out      |   2 +-
 .../clientpositive/llap/vector_udf1.q.out       |   2 +-
 .../clientpositive/partial_column_stats.q.out   |   8 +-
 .../partition_coltype_literals.q.out            |  48 +-
 .../reduceSinkDeDuplication_pRS_key_empty.q.out |   6 +-
 .../rename_external_partition_location.q.out    |  28 +-
 .../rename_table_update_column_stats.q.out      | 108 +--
 .../spark/avro_decimal_native.q.out             |   8 +-
 .../spark/spark_dynamic_partition_pruning.q.out |   8 +-
 .../clientpositive/spark/stats_only_null.q.out  |   6 +-
 .../clientpositive/stats_only_null.q.out        |   7 +-
 .../temp_table_display_colstats_tbllvl.q.out    |  99 ++-
 .../clientpositive/tez/explainanalyze_5.q.out   |   6 +-
 .../clientpositive/tez/explainuser_3.q.out      |   6 +-
 .../results/clientpositive/tunable_ndv.q.out    |  68 +-
 127 files changed, 5217 insertions(+), 1620 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java 
b/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java
index e20d299..160ce66 100644
--- a/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java
+++ b/common/src/java/org/apache/hadoop/hive/common/ndv/FMSketch.java
@@ -15,22 +15,28 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.apache.hadoop.hive.common.ndv;
+package org.apache.hadoop.hive.common.ndv.fm;
 
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
 import java.util.Random;
 
 import javolution.util.FastBitSet;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
-import org.apache.commons.lang.StringUtils;
+import org.apache.commons.codec.binary.Base64;
 import org.apache.hadoop.hive.common.classification.InterfaceAudience;
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator;
 import org.apache.hadoop.hive.common.type.HiveDecimal;
 import org.apache.hadoop.hive.ql.util.JavaDataModel;
 
 public class FMSketch implements NumDistinctValueEstimator{
 
   static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName());
+  public static final byte[] MAGIC = new byte[] { 'F', 'M' };
 
   /* We want a,b,x to come from a finite field of size 0 to k, where k is a 
prime number.
    * 2^p - 1 is prime for p = 31. Hence bitvectorSize has to be 31. Pick k to 
be 2^p -1.
@@ -38,7 +44,7 @@ public class FMSketch implements NumDistinctValueEstimator{
    * independent. As a consequence, the hash values will not distribute 
uniformly from 0 to 2^p-1
    * thus introducing errors in the estimates.
    */
-  private static final int BIT_VECTOR_SIZE = 31;
+  public static final int BIT_VECTOR_SIZE = 31;
 
   // Refer to Flajolet-Martin'86 for the value of phi
   private static final double PHI = 0.77351;
@@ -111,27 +117,6 @@ public class FMSketch implements NumDistinctValueEstimator{
     }
   }
 
-  public FMSketch(String s, int numBitVectors) {
-    this.numBitVectors = numBitVectors;
-    FastBitSet bitVectorDeser[] = genBitSet(s, numBitVectors);
-    bitVector = new FastBitSet[numBitVectors];
-    for(int i=0; i <numBitVectors; i++) {
-       bitVector[i] = new FastBitSet(BIT_VECTOR_SIZE);
-       bitVector[i].clear();
-       bitVector[i].or(bitVectorDeser[i]);
-    }
-
-    a = null;
-    b = null;
-
-    aValue = null;
-    bValue = null;
-  }
-
-  public FMSketch(String s) {
-    this(s, StringUtils.countMatches(s, "{"));
-  }
-
   /**
    * Resets a distinctValueEstimator object to its original state.
    */
@@ -145,6 +130,10 @@ public class FMSketch implements NumDistinctValueEstimator{
     return bitVector[index];
   }
 
+  public FastBitSet setBitVector(FastBitSet fastBitSet, int index) {
+    return bitVector[index] = fastBitSet;
+  }
+
   public int getnumBitVectors() {
     return numBitVectors;
   }
@@ -168,67 +157,30 @@ public class FMSketch implements 
NumDistinctValueEstimator{
     LOG.debug(t);
   }
 
-  /* Serializes a distinctValueEstimator object to Text for transport.
-   *
-   */
+  @Override
   public String serialize() {
-    String s = new String();
-    for(int i=0; i < numBitVectors; i++) {
-      s = s + (bitVector[i].toString());
+    ByteArrayOutputStream bos = new ByteArrayOutputStream();
+    // write bytes to bos ...
+    try {
+      FMSketchUtils.serializeFM(bos, this);
+      String result = Base64.encodeBase64String(bos.toByteArray());
+      bos.close();
+      return result;
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
-    return s;
   }
 
-  /* Deserializes from string to FastBitSet; Creates a 
NumDistinctValueEstimator object and
-   * returns it.
-   */
-
-  private FastBitSet[] genBitSet(String s, int numBitVectors) {
-    FastBitSet[] b = new FastBitSet[numBitVectors];
-    for (int j=0; j < numBitVectors; j++) {
-      b[j] = new FastBitSet(BIT_VECTOR_SIZE);
-      b[j].clear();
-    }
-
-    int vectorIndex =0;
-
-    /* Parse input string to obtain the indexes that are set in the bitvector.
-     * When a toString() is called on a FastBitSet object to serialize it, the 
serialization
-     * adds { and } to the beginning and end of the return String.
-     * Skip "{", "}", ",", " " in the input string.
-     */
-    for(int i=1; i < s.length()-1;) {
-      char c = s.charAt(i);
-      i = i + 1;
-
-      // Move on to the next bit vector
-      if (c == '}') {
-         vectorIndex = vectorIndex + 1;
-      }
-
-      // Encountered a numeric value; Extract out the entire number
-      if (c >= '0' && c <= '9') {
-        String t = new String();
-        t = t + c;
-        c = s.charAt(i);
-        i = i + 1;
-
-        while (c != ',' && c!= '}') {
-          t = t + c;
-          c = s.charAt(i);
-          i = i + 1;
-        }
-
-        int bitIndex = Integer.parseInt(t);
-        assert(bitIndex >= 0);
-        assert(vectorIndex < numBitVectors);
-        b[vectorIndex].set(bitIndex);
-        if (c == '}') {
-          vectorIndex =  vectorIndex + 1;
-        }
-      }
+  @Override
+  public NumDistinctValueEstimator deserialize(String s) {
+    InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s));
+    try {
+      NumDistinctValueEstimator n = FMSketchUtils.deserializeFM(is);
+      is.close();
+      return n;
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
-    return b;
   }
 
   private int generateHash(long v, int hashNum) {
@@ -387,11 +339,6 @@ public class FMSketch implements NumDistinctValueEstimator{
     return lengthFor(model, getnumBitVectors());
   }
 
-  @Override
-  public NumDistinctValueEstimator deserialize(String s) {
-    return new FMSketch(s);
-  }
-
   // the caller needs to gurrantee that they are the same type based on 
numBitVectors
   @Override
   public void mergeEstimators(NumDistinctValueEstimator o) {

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
----------------------------------------------------------------------
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
 
b/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
index e810ac5..6a29859 100644
--- 
a/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
+++ 
b/common/src/java/org/apache/hadoop/hive/common/ndv/NumDistinctValueEstimatorFactory.java
@@ -19,6 +19,14 @@
 
 package org.apache.hadoop.hive.common.ndv;
 
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import org.apache.commons.codec.binary.Base64;
+import org.apache.hadoop.hive.common.ndv.fm.FMSketch;
+import org.apache.hadoop.hive.common.ndv.fm.FMSketchUtils;
 import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog;
 
 public class NumDistinctValueEstimatorFactory {
@@ -26,11 +34,25 @@ public class NumDistinctValueEstimatorFactory {
   private NumDistinctValueEstimatorFactory() {
   }
 
+  private static boolean isFMSketch(String s) throws IOException {
+    InputStream in = new ByteArrayInputStream(Base64.decodeBase64(s));
+    byte[] magic = new byte[2];
+    magic[0] = (byte) in.read();
+    magic[1] = (byte) in.read();
+    in.close();
+    return Arrays.equals(magic, FMSketchUtils.MAGIC);
+  }
+
   public static NumDistinctValueEstimator getNumDistinctValueEstimator(String 
s) {
-    if (s.startsWith("{")) {
-      return new FMSketch(s);
-    } else {
-      return HyperLogLog.builder().build().deserialize(s);
+    // Right now we assume only FM and HLL are available.
+    try {
+      if (isFMSketch(s)) {
+        return FMSketchUtils.deserializeFM(s);
+      } else {
+        return HyperLogLog.builder().build().deserialize(s);
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
     }
   }
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java
----------------------------------------------------------------------
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java 
b/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java
new file mode 100644
index 0000000..b6f7fdd
--- /dev/null
+++ b/common/src/java/org/apache/hadoop/hive/common/ndv/fm/FMSketchUtils.java
@@ -0,0 +1,133 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.common.ndv.fm;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.Arrays;
+
+import javolution.util.FastBitSet;
+
+import org.apache.commons.codec.binary.Base64;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class FMSketchUtils {
+
+  static final Logger LOG = LoggerFactory.getLogger(FMSketch.class.getName());
+  public static final byte[] MAGIC = new byte[] { 'F', 'M' };
+
+  /*
+   * Serializes a distinctValueEstimator object to Text for transport.
+   *
+   * <b>4 byte header</b> is encoded like below 2 bytes - FM magic string to
+   * identify serialized stream 2 bytes - numbitvectors because
+   * BIT_VECTOR_SIZE=31, 4 bytes are enough to hold positions of 0-31
+   */
+  public static void serializeFM(OutputStream out, FMSketch fm) throws 
IOException {
+    out.write(MAGIC);
+
+    // max of numBitVectors = 1024, 2 bytes is enough.
+    byte[] nbv = new byte[2];
+    nbv[0] = (byte) fm.getnumBitVectors();
+    nbv[1] = (byte) (fm.getnumBitVectors() >>> 8);
+
+    out.write(nbv);
+
+    // original toString takes too much space
+    // we compress a fastbitset to 4 bytes
+    for (int i = 0; i < fm.getnumBitVectors(); i++) {
+      writeBitVector(out, fm.getBitVector(i));
+    }
+  }
+
+  // BIT_VECTOR_SIZE is 31, we can use 32 bits, i.e., 4 bytes to represent a
+  // FastBitSet, rather than using 31 integers.
+  private static void writeBitVector(OutputStream out, FastBitSet bit) throws 
IOException {
+    int num = 0;
+    for (int pos = 0; pos < FMSketch.BIT_VECTOR_SIZE; pos++) {
+      if (bit.get(pos)) {
+        num |= 1 << pos;
+      }
+    }
+    byte[] i = new byte[4];
+    for (int j = 0; j < 4; j++) {
+      i[j] = (byte) ((num >>> (8 * j)) & 0xff);
+    }
+    out.write(i);
+  }
+
+  /*
+   * Deserializes from string to FastBitSet; Creates a 
NumDistinctValueEstimator
+   * object and returns it.
+   */
+  public static FMSketch deserializeFM(String s) throws IOException {
+    InputStream is = new ByteArrayInputStream(Base64.decodeBase64(s));
+    try {
+      FMSketch sketch = deserializeFM(is);
+      is.close();
+      return sketch;
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  public static FMSketch deserializeFM(InputStream in) throws IOException {
+    checkMagicString(in);
+
+    byte[] nbv = new byte[2];
+    nbv[0] = (byte) in.read();
+    nbv[1] = (byte) in.read();
+
+    int numBitVectors = 0;
+    numBitVectors |= (nbv[0] & 0xff);
+    numBitVectors |= ((nbv[1] & 0xff) << 8);
+
+    FMSketch sketch = new FMSketch(numBitVectors);
+    for (int n = 0; n < numBitVectors; n++) {
+      sketch.setBitVector(readBitVector(in), n);
+    }
+    return sketch;
+  }
+
+  private static FastBitSet readBitVector(InputStream in) throws IOException {
+    FastBitSet fastBitSet = new FastBitSet();
+    fastBitSet.clear();
+    for (int i = 0; i < 4; i++) {
+      byte b = (byte) in.read();
+      for (int j = 0; j < 8; j++) {
+        if ((b & (1 << j)) != 0) {
+          fastBitSet.set(j + 8 * i);
+        }
+      }
+    }
+    return fastBitSet;
+  }
+
+  private static void checkMagicString(InputStream in) throws IOException {
+    byte[] magic = new byte[2];
+    magic[0] = (byte) in.read();
+    magic[1] = (byte) in.read();
+
+    if (!Arrays.equals(magic, MAGIC)) {
+      throw new IllegalArgumentException("The input stream is not a FMSketch 
stream.");
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
----------------------------------------------------------------------
diff --git 
a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java 
b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
index d195546..182560a 100644
--- a/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
+++ b/common/src/java/org/apache/hadoop/hive/common/ndv/hll/HyperLogLog.java
@@ -277,7 +277,9 @@ public class HyperLogLog implements 
NumDistinctValueEstimator{
   }
 
   public long estimateNumDistinctValues() {
-    return count();
+    // FMSketch treats the ndv of all nulls as 1 but hll treates the ndv as 0.
+    // In order to get rid of divide by 0 problem, we follow FMSketch
+    return count() > 0 ? count() : 1;
   }
 
   public long count() {

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
----------------------------------------------------------------------
diff --git a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java 
b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
index dd9ad71..05f6cc9 100644
--- a/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
+++ b/common/src/java/org/apache/hadoop/hive/conf/HiveConf.java
@@ -1737,7 +1737,9 @@ public class HiveConf extends Configuration {
         "Whether column accesses are tracked in the QueryPlan.\n" +
         "This is useful to identify how tables are accessed and to determine 
if there are wasted columns that can be trimmed."),
     HIVE_STATS_NDV_ALGO("hive.stats.ndv.algo", "hll", new PatternSet("hll", 
"fm"),
-            "hll and fm stand for HyperLogLog and FM-sketch, respectively for 
computing ndv."), 
+        "hll and fm stand for HyperLogLog and FM-sketch, respectively for 
computing ndv."), 
+    HIVE_STATS_FETCH_BITVECTOR("hive.stats.fetch.bitvector", false,
+        "Whether we fetch bitvector when we compute ndv. Users can turn it off 
if they want to use old schema"),
     // standard error allowed for ndv estimates for FM-sketch. A lower value 
indicates higher accuracy and a
     // higher compute cost.
     HIVE_STATS_NDV_ERROR("hive.stats.ndv.error", (float)20.0,

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java
----------------------------------------------------------------------
diff --git 
a/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java
 
b/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java
new file mode 100644
index 0000000..74fdf58
--- /dev/null
+++ 
b/common/src/test/org/apache/hadoop/hive/common/ndv/fm/TestFMSketchSerialization.java
@@ -0,0 +1,97 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.common.ndv.fm;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import javolution.util.FastBitSet;
+
+import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory;
+import org.junit.Test;
+
+public class TestFMSketchSerialization {
+
+  private FastBitSet[] deserialize(String s, int numBitVectors) {
+    FastBitSet[] b = new FastBitSet[numBitVectors];
+    for (int j = 0; j < numBitVectors; j++) {
+      b[j] = new FastBitSet(FMSketch.BIT_VECTOR_SIZE);
+      b[j].clear();
+    }
+
+    int vectorIndex = 0;
+
+    /*
+     * Parse input string to obtain the indexes that are set in the bitvector.
+     * When a toString() is called on a FastBitSet object to serialize it, the
+     * serialization adds { and } to the beginning and end of the return 
String.
+     * Skip "{", "}", ",", " " in the input string.
+     */
+    for (int i = 1; i < s.length() - 1;) {
+      char c = s.charAt(i);
+      i = i + 1;
+
+      // Move on to the next bit vector
+      if (c == '}') {
+        vectorIndex = vectorIndex + 1;
+      }
+
+      // Encountered a numeric value; Extract out the entire number
+      if (c >= '0' && c <= '9') {
+        String t = new String();
+        t = t + c;
+        c = s.charAt(i);
+        i = i + 1;
+
+        while (c != ',' && c != '}') {
+          t = t + c;
+          c = s.charAt(i);
+          i = i + 1;
+        }
+
+        int bitIndex = Integer.parseInt(t);
+        assert (bitIndex >= 0);
+        assert (vectorIndex < numBitVectors);
+        b[vectorIndex].set(bitIndex);
+        if (c == '}') {
+          vectorIndex = vectorIndex + 1;
+        }
+      }
+    }
+    return b;
+  }
+
+  @Test
+  public void testSerDe() throws IOException {
+    String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 
3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}";
+    FastBitSet[] fastBitSet = deserialize(bitVectors, 16);
+    FMSketch sketch = new FMSketch(16);
+    for (int i = 0; i < 16; i++) {
+      sketch.setBitVector(fastBitSet[i], i);
+    }
+    assertEquals(sketch.estimateNumDistinctValues(), 3);
+    String s = sketch.serialize();
+    FMSketch newSketch = (FMSketch) NumDistinctValueEstimatorFactory
+        .getNumDistinctValueEstimator(s);
+    sketch.equals(newSketch);
+    assertEquals(newSketch.estimateNumDistinctValues(), 3);
+    assertEquals(newSketch.serialize(), s);
+  }
+
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/data/conf/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/hive-site.xml b/data/conf/hive-site.xml
index 62364fe..a205b8c 100644
--- a/data/conf/hive-site.xml
+++ b/data/conf/hive-site.xml
@@ -314,5 +314,9 @@
   <value>true</value>
 </property>
 
+<property>
+  <name>hive.stats.fetch.bitvector</name>
+  <value>true</value>
+</property>
 
 </configuration>

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/data/conf/llap/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/llap/hive-site.xml b/data/conf/llap/hive-site.xml
index cac5a3b..870b584 100644
--- a/data/conf/llap/hive-site.xml
+++ b/data/conf/llap/hive-site.xml
@@ -333,5 +333,9 @@
   <value>4</value>
   <description> </description>
 </property>
+<property>
+  <name>hive.stats.fetch.bitvector</name>
+  <value>true</value>
+</property>
 
 </configuration>

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/data/conf/perf-reg/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/perf-reg/hive-site.xml b/data/conf/perf-reg/hive-site.xml
index 012369f..331a500 100644
--- a/data/conf/perf-reg/hive-site.xml
+++ b/data/conf/perf-reg/hive-site.xml
@@ -282,4 +282,9 @@
   <value>true</value>
 </property>
 
+<property>
+  <name>hive.stats.fetch.bitvector</name>
+  <value>true</value>
+</property>
+
 </configuration>

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/data/conf/tez/hive-site.xml
----------------------------------------------------------------------
diff --git a/data/conf/tez/hive-site.xml b/data/conf/tez/hive-site.xml
index 28abc2d..35e8c99 100644
--- a/data/conf/tez/hive-site.xml
+++ b/data/conf/tez/hive-site.xml
@@ -278,4 +278,9 @@
   <value>true</value>
 </property>
 
+<property>
+  <name>hive.stats.fetch.bitvector</name>
+  <value>true</value>
+</property>
+
 </configuration>

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql 
b/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql
new file mode 100644
index 0000000..2c2177b
--- /dev/null
+++ b/metastore/scripts/upgrade/derby/044-HIVE-16997.derby.sql
@@ -0,0 +1 @@
+ALTER TABLE "APP"."PART_COL_STATS" ADD COLUMN "BIT_VECTOR" BLOB;

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql 
b/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql
index a9a5329..f4cbba6 100644
--- a/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/hive-schema-3.0.0.derby.sql
@@ -94,7 +94,7 @@ CREATE TABLE "APP"."MASTER_KEYS" ("KEY_ID" INTEGER NOT NULL 
generated always as
 
 CREATE TABLE "APP"."DELEGATION_TOKENS" ( "TOKEN_IDENT" VARCHAR(767) NOT NULL, 
"TOKEN" VARCHAR(767));
 
-CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT 
NULL,"TABLE_NAME" VARCHAR(256) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT 
NULL, "COLUMN_NAME" VARCHAR(767) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, 
"LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, 
"DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), 
"BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "NUM_NULLS" 
BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, "NUM_TRUES" 
BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT NOT NULL, 
"PART_ID" BIGINT NOT NULL);
+CREATE TABLE "APP"."PART_COL_STATS"("DB_NAME" VARCHAR(128) NOT 
NULL,"TABLE_NAME" VARCHAR(256) NOT NULL, "PARTITION_NAME" VARCHAR(767) NOT 
NULL, "COLUMN_NAME" VARCHAR(767) NOT NULL, "COLUMN_TYPE" VARCHAR(128) NOT NULL, 
"LONG_LOW_VALUE" BIGINT, "LONG_HIGH_VALUE" BIGINT, "DOUBLE_LOW_VALUE" DOUBLE, 
"DOUBLE_HIGH_VALUE" DOUBLE, "BIG_DECIMAL_LOW_VALUE" VARCHAR(4000), 
"BIG_DECIMAL_HIGH_VALUE" VARCHAR(4000),"NUM_DISTINCTS" BIGINT, "BIT_VECTOR" 
BLOB, "NUM_NULLS" BIGINT NOT NULL, "AVG_COL_LEN" DOUBLE, "MAX_COL_LEN" BIGINT, 
"NUM_TRUES" BIGINT, "NUM_FALSES" BIGINT, "LAST_ANALYZED" BIGINT, "CS_ID" BIGINT 
NOT NULL, "PART_ID" BIGINT NOT NULL);
 
 CREATE TABLE "APP"."VERSION" ("VER_ID" BIGINT NOT NULL, "SCHEMA_VERSION" 
VARCHAR(127) NOT NULL, "VERSION_COMMENT" VARCHAR(255));
 

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql 
b/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql
index 30513dc..01b6f90 100644
--- a/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql
+++ b/metastore/scripts/upgrade/derby/upgrade-2.3.0-to-3.0.0.derby.sql
@@ -2,5 +2,6 @@
 RUN '041-HIVE-16556.derby.sql';
 RUN '042-HIVE-16575.derby.sql';
 RUN '043-HIVE-16922.derby.sql';
+RUN '044-HIVE-16997.derby.sql';
 
 UPDATE "APP".VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release 
version 3.0.0' where VER_ID=1;

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql 
b/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql
new file mode 100644
index 0000000..1882c59
--- /dev/null
+++ b/metastore/scripts/upgrade/mssql/029-HIVE-16997.mssql.sql
@@ -0,0 +1 @@
+ALTER TABLE PART_COL_STATS ADD BIT_VECTOR VARBINARY(MAX);

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql 
b/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql
index 1cfe2d1..fa8fc6e 100644
--- a/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/hive-schema-3.0.0.mssql.sql
@@ -87,6 +87,7 @@ CREATE TABLE PART_COL_STATS
     LONG_LOW_VALUE bigint NULL,
     MAX_COL_LEN bigint NULL,
     NUM_DISTINCTS bigint NULL,
+    BIT_VECTOR varbinary(max) NULL,
     NUM_FALSES bigint NULL,
     NUM_NULLS bigint NOT NULL,
     NUM_TRUES bigint NULL,

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql 
b/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql
index 5683254..21d62ae 100644
--- a/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql
+++ b/metastore/scripts/upgrade/mssql/upgrade-2.3.0-to-3.0.0.mssql.sql
@@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS 
MESSAGE;
 :r 026-HIVE-16556.mssql.sql
 :r 027-HIVE-16575.mssql.sql
 :r 028-HIVE-16922.mssql.sql
+:r 029-HIVE-16997.mssql.sql
 
 UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release 
version 3.0.0' where VER_ID=1;
 SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS MESSAGE;

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql 
b/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql
new file mode 100644
index 0000000..4954b2e
--- /dev/null
+++ b/metastore/scripts/upgrade/mysql/044-HIVE-16997.mysql.sql
@@ -0,0 +1 @@
+ALTER TABLE PART_COL_STATS ADD COLUMN BIT_VECTOR BLOB;

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql 
b/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql
index 97d881f..31963d0 100644
--- a/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/hive-schema-3.0.0.mysql.sql
@@ -690,6 +690,7 @@ CREATE TABLE IF NOT EXISTS `PART_COL_STATS` (
  `BIG_DECIMAL_HIGH_VALUE` varchar(4000) CHARACTER SET latin1 COLLATE 
latin1_bin,
  `NUM_NULLS` bigint(20) NOT NULL,
  `NUM_DISTINCTS` bigint(20),
+ `BIT_VECTOR` blob,
  `AVG_COL_LEN` double(53,4),
  `MAX_COL_LEN` bigint(20),
  `NUM_TRUES` bigint(20),

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql 
b/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql
index ba62939..9cd3a62 100644
--- a/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql
+++ b/metastore/scripts/upgrade/mysql/upgrade-2.3.0-to-3.0.0.mysql.sql
@@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS ' ';
 SOURCE 041-HIVE-16556.mysql.sql;
 SOURCE 042-HIVE-16575.mysql.sql;
 SOURCE 043-HIVE-16922.mysql.sql;
+SOURCE 044-HIVE-16997.mysql.sql;
 
 UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release 
version 3.0.0' where VER_ID=1;
 SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS ' ';

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql 
b/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql
new file mode 100644
index 0000000..44e5fa3
--- /dev/null
+++ b/metastore/scripts/upgrade/oracle/044-HIVE-16997.oracle.sql
@@ -0,0 +1 @@
+ALTER TABLE PART_COL_STATS ADD BIT_VECTOR BLOB NULL;

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql 
b/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql
index 8fdb552..81e4208 100644
--- a/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/hive-schema-3.0.0.oracle.sql
@@ -515,6 +515,7 @@ CREATE TABLE PART_COL_STATS (
  BIG_DECIMAL_HIGH_VALUE VARCHAR2(4000),
  NUM_NULLS NUMBER NOT NULL,
  NUM_DISTINCTS NUMBER,
+ BIT_VECTOR BLOB,
  AVG_COL_LEN NUMBER,
  MAX_COL_LEN NUMBER,
  NUM_TRUES NUMBER,

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql 
b/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql
index 0a70d47..6a26649 100644
--- a/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql
+++ b/metastore/scripts/upgrade/oracle/upgrade-2.3.0-to-3.0.0.oracle.sql
@@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0' AS 
Status from dual;
 @041-HIVE-16556.oracle.sql;
 @042-HIVE-16575.oracle.sql;
 @043-HIVE-16922.oracle.sql;
+@044-HIVE-16997.oracle.sql;
 
 UPDATE VERSION SET SCHEMA_VERSION='3.0.0', VERSION_COMMENT='Hive release 
version 3.0.0' where VER_ID=1;
 SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0' AS Status 
from dual;

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql 
b/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql
new file mode 100644
index 0000000..bee8c44
--- /dev/null
+++ b/metastore/scripts/upgrade/postgres/043-HIVE-16997.postgres.sql
@@ -0,0 +1 @@
+ALTER TABLE "PART_COL_STATS" ADD COLUMN "BIT_VECTOR" BYTEA;

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql
----------------------------------------------------------------------
diff --git a/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql 
b/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql
index 1cdeb6b..5cb5cb0 100644
--- a/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/hive-schema-3.0.0.postgres.sql
@@ -534,6 +534,7 @@ CREATE TABLE "PART_COL_STATS" (
  "BIG_DECIMAL_HIGH_VALUE" character varying(4000) DEFAULT NULL::character 
varying,
  "NUM_NULLS" bigint NOT NULL,
  "NUM_DISTINCTS" bigint,
+ "BIT_VECTOR" bytea,
  "AVG_COL_LEN" double precision,
  "MAX_COL_LEN" bigint,
  "NUM_TRUES" bigint,

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql
----------------------------------------------------------------------
diff --git 
a/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql 
b/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql
index c44dd06..ee5a673 100644
--- a/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql
+++ b/metastore/scripts/upgrade/postgres/upgrade-2.3.0-to-3.0.0.postgres.sql
@@ -3,6 +3,7 @@ SELECT 'Upgrading MetaStore schema from 2.3.0 to 3.0.0';
 \i 040-HIVE-16556.postgres.sql;
 \i 041-HIVE-16575.postgres.sql;
 \i 042-HIVE-16922.postgres.sql;
+\i 043-HIVE-16997.postgres.sql;
 
 UPDATE "VERSION" SET "SCHEMA_VERSION"='3.0.0', "VERSION_COMMENT"='Hive release 
version 3.0.0' where "VER_ID"=1;
 SELECT 'Finished upgrading MetaStore schema from 2.3.0 to 3.0.0';

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
index a960b2d..73754ff 100644
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
+++ 
b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
@@ -21,6 +21,7 @@ package org.apache.hadoop.hive.metastore;
 import static org.apache.commons.lang.StringUtils.join;
 import static org.apache.commons.lang.StringUtils.repeat;
 
+import java.sql.Blob;
 import java.sql.Clob;
 import java.sql.Connection;
 import java.sql.Statement;
@@ -33,6 +34,7 @@ import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.Map.Entry;
 import java.util.TreeMap;
 
 import javax.jdo.PersistenceManager;
@@ -64,6 +66,8 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.SkewedInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
+import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator;
+import 
org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory;
 import org.apache.hadoop.hive.metastore.model.MConstraint;
 import org.apache.hadoop.hive.metastore.model.MDatabase;
 import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics;
@@ -941,6 +945,24 @@ class MetaStoreDirectSql {
     }
   }
 
+  static String extractSqlBlob(Object value) throws MetaException {
+    if (value == null)
+      return null;
+    if (value instanceof Blob) {
+      try {
+        // getBytes function says: pos the ordinal position of the first byte 
in
+        // the BLOB value to be extracted; the first byte is at position 1
+        return new String(((Blob) value).getBytes(1, (int) ((Blob) 
value).length()));
+      } catch (SQLException e) {
+        throw new MetaException("Encounter error while processing blob.");
+      }
+    } else {
+      // this may happen when enablebitvector is false
+      LOG.debug("Expected blob type but got " + value.getClass().getName());
+      return null;
+    }
+  }
+
   private static String trimCommaList(StringBuilder sb) {
     if (sb.length() > 0) {
       sb.setLength(sb.length() - 1);
@@ -1221,12 +1243,12 @@ class MetaStoreDirectSql {
    * @throws MetaException
    */
   public ColumnStatistics getTableStats(final String dbName, final String 
tableName,
-      List<String> colNames) throws MetaException {
+      List<String> colNames, boolean enableBitVector) throws MetaException {
     if (colNames == null || colNames.isEmpty()) {
       return null;
     }
     final boolean doTrace = LOG.isDebugEnabled();
-    final String queryText0 = "select " + STATS_COLLIST + " from " + 
TAB_COL_STATS + " "
+    final String queryText0 = "select " + getStatsList(enableBitVector) + " 
from " + TAB_COL_STATS + " "
           + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? and \"COLUMN_NAME\" 
in (";
     Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
       public List<Object[]> run(List<String> input) throws MetaException {
@@ -1260,8 +1282,8 @@ class MetaStoreDirectSql {
   }
 
   public AggrStats aggrColStatsForPartitions(String dbName, String tableName,
-      List<String> partNames, List<String> colNames, boolean 
useDensityFunctionForNDVEstimation, double  ndvTuner)
-      throws MetaException {
+      List<String> partNames, List<String> colNames, boolean 
useDensityFunctionForNDVEstimation,
+      double ndvTuner, boolean enableBitVector) throws MetaException {
     if (colNames.isEmpty() || partNames.isEmpty()) {
       LOG.debug("Columns is empty or partNames is empty : Short-circuiting 
stats eval");
       return new AggrStats(Collections.<ColumnStatisticsObj>emptyList(), 0); 
// Nothing to aggregate
@@ -1295,7 +1317,7 @@ class MetaStoreDirectSql {
           // Read aggregated stats for one column
           colStatsAggrFromDB =
               columnStatisticsObjForPartitions(dbName, tableName, partNames, 
colNamesForDB,
-                  partsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+                  partsFound, useDensityFunctionForNDVEstimation, ndvTuner, 
enableBitVector);
           if (!colStatsAggrFromDB.isEmpty()) {
             ColumnStatisticsObj colStatsAggr = colStatsAggrFromDB.get(0);
             colStatsList.add(colStatsAggr);
@@ -1308,7 +1330,7 @@ class MetaStoreDirectSql {
       partsFound = partsFoundForPartitions(dbName, tableName, partNames, 
colNames);
       colStatsList =
           columnStatisticsObjForPartitions(dbName, tableName, partNames, 
colNames, partsFound,
-              useDensityFunctionForNDVEstimation, ndvTuner);
+              useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector);
     }
     LOG.info("useDensityFunctionForNDVEstimation = " + 
useDensityFunctionForNDVEstimation
         + "\npartsFound = " + partsFound + "\nColumnStatisticsObj = "
@@ -1371,14 +1393,14 @@ class MetaStoreDirectSql {
 
   private List<ColumnStatisticsObj> columnStatisticsObjForPartitions(final 
String dbName,
     final String tableName, final List<String> partNames, List<String> 
colNames, long partsFound,
-    final boolean useDensityFunctionForNDVEstimation, final double  ndvTuner) 
throws MetaException {
+    final boolean useDensityFunctionForNDVEstimation, final double ndvTuner, 
final boolean enableBitVector) throws MetaException {
     final boolean areAllPartsFound = (partsFound == partNames.size());
     return runBatched(colNames, new Batchable<String, ColumnStatisticsObj>() {
       public List<ColumnStatisticsObj> run(final List<String> inputColNames) 
throws MetaException {
         return runBatched(partNames, new Batchable<String, 
ColumnStatisticsObj>() {
           public List<ColumnStatisticsObj> run(List<String> inputPartNames) 
throws MetaException {
             return columnStatisticsObjForPartitionsBatch(dbName, tableName, 
inputPartNames,
-                inputColNames, areAllPartsFound, 
useDensityFunctionForNDVEstimation, ndvTuner);
+                inputColNames, areAllPartsFound, 
useDensityFunctionForNDVEstimation, ndvTuner, enableBitVector);
           }
         });
       }
@@ -1388,14 +1410,10 @@ class MetaStoreDirectSql {
   // Get aggregated column stats for a table per partition for all columns in 
the partition
   // This is primarily used to populate stats object when using CachedStore 
(Check CachedStore#prewarm)
   public Map<String, List<ColumnStatisticsObj>> 
getColStatsForTablePartitions(String dbName,
-      String tblName) throws MetaException {
-    String queryText =
-        "select \"PARTITION_NAME\", \"COLUMN_NAME\", \"COLUMN_TYPE\", 
\"LONG_LOW_VALUE\", "
-            + "\"LONG_HIGH_VALUE\", \"DOUBLE_LOW_VALUE\", 
\"DOUBLE_HIGH_VALUE\",  "
-            + "\"BIG_DECIMAL_LOW_VALUE\", \"BIG_DECIMAL_HIGH_VALUE\", 
\"NUM_NULLS\", "
-            + "\"NUM_DISTINCTS\", \"AVG_COL_LEN\", \"MAX_COL_LEN\", 
\"NUM_TRUES\", \"NUM_FALSES\""
-            + " from " + PART_COL_STATS + " where \"DB_NAME\" = ? and 
\"TABLE_NAME\" = ?"
-            + " order by \"PARTITION_NAME\"";
+      String tblName, boolean enableBitVector) throws MetaException {
+    String queryText = "select \"PARTITION_NAME\", " + 
getStatsList(enableBitVector) + " from "
+        + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = 
?"
+        + " order by \"PARTITION_NAME\"";
     long start = 0;
     long end = 0;
     Query query = null;
@@ -1446,6 +1464,28 @@ class MetaStoreDirectSql {
   /** Should be called with the list short enough to not trip up Oracle/etc. */
   private List<ColumnStatisticsObj> 
columnStatisticsObjForPartitionsBatch(String dbName,
       String tableName, List<String> partNames, List<String> colNames, boolean 
areAllPartsFound,
+      boolean useDensityFunctionForNDVEstimation, double ndvTuner, boolean 
enableBitVector) throws MetaException {
+    if(enableBitVector) {
+      return aggrStatsUseJava(dbName, tableName, partNames, colNames, 
areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+    }
+    else {
+      return aggrStatsUseDB(dbName, tableName, partNames, colNames, 
areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+    }
+  }
+
+  private List<ColumnStatisticsObj> aggrStatsUseJava(String dbName, String 
tableName,
+      List<String> partNames, List<String> colNames, boolean areAllPartsFound,
+      boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws 
MetaException {
+    // 1. get all the stats for colNames in partNames;
+    List<ColumnStatistics> partStats = getPartitionStats(dbName, tableName, 
partNames, colNames,
+        true);
+    // 2. use util function to aggr stats
+    return MetaStoreUtils.aggrPartitionStats(partStats, dbName, tableName, 
partNames, colNames,
+        areAllPartsFound, useDensityFunctionForNDVEstimation, ndvTuner);
+  }
+
+  private List<ColumnStatisticsObj> aggrStatsUseDB(String dbName,
+      String tableName, List<String> partNames, List<String> colNames, boolean 
areAllPartsFound,
       boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws 
MetaException {
     // TODO: all the extrapolation logic should be moved out of this class,
     // only mechanical data retrieval should remain here.
@@ -1717,10 +1757,10 @@ class MetaStoreDirectSql {
     ColumnStatisticsData data = new ColumnStatisticsData();
     ColumnStatisticsObj cso = new ColumnStatisticsObj((String)row[i++], 
(String)row[i++], data);
     Object llow = row[i++], lhigh = row[i++], dlow = row[i++], dhigh = 
row[i++],
-        declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = 
row[i++],
+        declow = row[i++], dechigh = row[i++], nulls = row[i++], dist = 
row[i++], bitVector = row[i++],
         avglen = row[i++], maxlen = row[i++], trues = row[i++], falses = 
row[i++];
     StatObjectConverter.fillColumnStatisticsData(cso.getColType(), data,
-        llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, avglen, 
maxlen, trues, falses);
+        llow, lhigh, dlow, dhigh, declow, dechigh, nulls, dist, bitVector, 
avglen, maxlen, trues, falses);
     return cso;
   }
 
@@ -1753,14 +1793,14 @@ class MetaStoreDirectSql {
   }
 
   public List<ColumnStatistics> getPartitionStats(final String dbName, final 
String tableName,
-      final List<String> partNames, List<String> colNames) throws 
MetaException {
+      final List<String> partNames, List<String> colNames, boolean 
enableBitVector) throws MetaException {
     if (colNames.isEmpty() || partNames.isEmpty()) {
       return Collections.emptyList();
     }
     final boolean doTrace = LOG.isDebugEnabled();
-    final String queryText0 = "select \"PARTITION_NAME\", " + STATS_COLLIST + 
" from "
-      + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = ? 
and \"COLUMN_NAME\""
-      + "  in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by 
\"PARTITION_NAME\"";
+    final String queryText0 = "select \"PARTITION_NAME\", " + 
getStatsList(enableBitVector) + " from "
+        + " " + PART_COL_STATS + " where \"DB_NAME\" = ? and \"TABLE_NAME\" = 
? and \"COLUMN_NAME\""
+        + "  in (%1$s) AND \"PARTITION_NAME\" in (%2$s) order by 
\"PARTITION_NAME\"";
     Batchable<String, Object[]> b = new Batchable<String, Object[]>() {
       public List<Object[]> run(final List<String> inputColNames) throws 
MetaException {
         Batchable<String, Object[]> b2 = new Batchable<String, Object[]>() {
@@ -1812,11 +1852,13 @@ class MetaStoreDirectSql {
   }
 
   /** The common query part for table and partition stats */
-  private static final String STATS_COLLIST =
-      "\"COLUMN_NAME\", \"COLUMN_TYPE\", \"LONG_LOW_VALUE\", 
\"LONG_HIGH_VALUE\", "
-    + "\"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", \"BIG_DECIMAL_LOW_VALUE\", 
"
-    + "\"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", 
\"AVG_COL_LEN\", "
-    + "\"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\" ";
+  private final String getStatsList(boolean enableBitVector) {
+    return "\"COLUMN_NAME\", \"COLUMN_TYPE\", \"LONG_LOW_VALUE\", 
\"LONG_HIGH_VALUE\", "
+        + "\"DOUBLE_LOW_VALUE\", \"DOUBLE_HIGH_VALUE\", 
\"BIG_DECIMAL_LOW_VALUE\", "
+        + "\"BIG_DECIMAL_HIGH_VALUE\", \"NUM_NULLS\", \"NUM_DISTINCTS\", "
+        + (enableBitVector ? "\"BIT_VECTOR\", " : "\'\', ") + 
"\"AVG_COL_LEN\", "
+        + "\"MAX_COL_LEN\", \"NUM_TRUES\", \"NUM_FALSES\", \"LAST_ANALYZED\" ";
+  }
 
   private ColumnStatistics makeColumnStats(
       List<Object[]> list, ColumnStatisticsDesc csd, int offset) throws 
MetaException {
@@ -1826,7 +1868,7 @@ class MetaStoreDirectSql {
     for (Object[] row : list) {
       // LastAnalyzed is stored per column but thrift has it per several;
       // get the lowest for now as nobody actually uses this field.
-      Object laObj = row[offset + 14];
+      Object laObj = row[offset + 15];
       if (laObj != null && (!csd.isSetLastAnalyzed() || csd.getLastAnalyzed() 
> extractSqlLong(laObj))) {
         csd.setLastAnalyzed(extractSqlLong(laObj));
       }

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
index b52c94c..edfbf3a 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreUtils.java
@@ -71,8 +71,10 @@ import org.apache.hadoop.hive.metastore.api.SerDeInfo;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
-import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger;
-import 
org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory;
+import org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregator;
+import 
org.apache.hadoop.hive.metastore.columnstats.aggr.ColumnStatsAggregatorFactory;
+import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger;
+import 
org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory;
 import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.Deserializer;
@@ -1936,7 +1938,7 @@ public class MetaStoreUtils {
     }
     return metaException;
   }
-  
+
   public static List<String> getColumnNames(List<FieldSchema> schema) {
     List<String> cols = new ArrayList<>(schema.size());
     for (FieldSchema fs : schema) {
@@ -1945,4 +1947,45 @@ public class MetaStoreUtils {
     return cols;
   }
 
+  // given a list of partStats, this function will give you an aggr stats
+  public static List<ColumnStatisticsObj> 
aggrPartitionStats(List<ColumnStatistics> partStats,
+      String dbName, String tableName, List<String> partNames, List<String> 
colNames,
+      boolean areAllPartsFound, boolean useDensityFunctionForNDVEstimation, 
double ndvTuner)
+      throws MetaException {
+    // 1. group by the stats by colNames
+    // map the colName to List<ColumnStatistics>
+    Map<String, List<ColumnStatistics>> map = new HashMap<>();
+    for (ColumnStatistics css : partStats) {
+      List<ColumnStatisticsObj> objs = css.getStatsObj();
+      for (ColumnStatisticsObj obj : objs) {
+        List<ColumnStatisticsObj> singleObj = new ArrayList<>();
+        singleObj.add(obj);
+        ColumnStatistics singleCS = new ColumnStatistics(css.getStatsDesc(), 
singleObj);
+        if (!map.containsKey(obj.getColName())) {
+          map.put(obj.getColName(), new ArrayList<ColumnStatistics>());
+        }
+        map.get(obj.getColName()).add(singleCS);
+      }
+    }
+    return 
aggrPartitionStats(map,dbName,tableName,partNames,colNames,areAllPartsFound,useDensityFunctionForNDVEstimation,
 ndvTuner);
+  }
+
+  public static List<ColumnStatisticsObj> aggrPartitionStats(
+      Map<String, List<ColumnStatistics>> map, String dbName, String tableName,
+      List<String> partNames, List<String> colNames, boolean areAllPartsFound,
+      boolean useDensityFunctionForNDVEstimation, double ndvTuner) throws 
MetaException {
+    List<ColumnStatisticsObj> colStats = new ArrayList<>();
+    // 2. aggr stats for each colName
+    // TODO: thread pool can be used to speed up the process
+    for (Entry<String, List<ColumnStatistics>> entry : map.entrySet()) {
+      List<ColumnStatistics> css = entry.getValue();
+      ColumnStatsAggregator aggregator = 
ColumnStatsAggregatorFactory.getColumnStatsAggregator(css
+          
.iterator().next().getStatsObj().iterator().next().getStatsData().getSetField(),
+          useDensityFunctionForNDVEstimation, ndvTuner);
+      ColumnStatisticsObj statsObj = aggregator.aggregate(entry.getKey(), 
partNames, css);
+      colStats.add(statsObj);
+    }
+    return colStats;
+  }
+
 }

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
index db4ec91..eea12291 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
@@ -7193,11 +7193,13 @@ public class ObjectStore implements RawStore, 
Configurable {
   protected ColumnStatistics getTableColumnStatisticsInternal(
       String dbName, String tableName, final List<String> colNames, boolean 
allowSql,
       boolean allowJdo) throws MetaException, NoSuchObjectException {
+    final boolean enableBitVector = HiveConf.getBoolVar(getConf(),
+        HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR);
     return new GetStatHelper(HiveStringUtils.normalizeIdentifier(dbName),
         HiveStringUtils.normalizeIdentifier(tableName), allowSql, allowJdo) {
       @Override
       protected ColumnStatistics getSqlResult(GetHelper<ColumnStatistics> ctx) 
throws MetaException {
-        return directSql.getTableStats(dbName, tblName, colNames);
+        return directSql.getTableStats(dbName, tblName, colNames, 
enableBitVector);
       }
       @Override
       protected ColumnStatistics getJdoResult(
@@ -7215,7 +7217,7 @@ public class ObjectStore implements RawStore, 
Configurable {
           if (desc.getLastAnalyzed() > mStat.getLastAnalyzed()) {
             desc.setLastAnalyzed(mStat.getLastAnalyzed());
           }
-          statObjs.add(StatObjectConverter.getTableColumnStatisticsObj(mStat));
+          statObjs.add(StatObjectConverter.getTableColumnStatisticsObj(mStat, 
enableBitVector));
           Deadline.checkTimeout();
         }
         return new ColumnStatistics(desc, statObjs);
@@ -7236,11 +7238,13 @@ public class ObjectStore implements RawStore, 
Configurable {
   protected List<ColumnStatistics> getPartitionColumnStatisticsInternal(
       String dbName, String tableName, final List<String> partNames, final 
List<String> colNames,
       boolean allowSql, boolean allowJdo) throws MetaException, 
NoSuchObjectException {
+    final boolean enableBitVector = HiveConf.getBoolVar(getConf(),
+        HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR);
     return new GetListHelper<ColumnStatistics>(dbName, tableName, allowSql, 
allowJdo) {
       @Override
       protected List<ColumnStatistics> getSqlResult(
           GetHelper<List<ColumnStatistics>> ctx) throws MetaException {
-        return directSql.getPartitionStats(dbName, tblName, partNames, 
colNames);
+        return directSql.getPartitionStats(dbName, tblName, partNames, 
colNames, enableBitVector);
       }
       @Override
       protected List<ColumnStatistics> getJdoResult(
@@ -7268,7 +7272,7 @@ public class ObjectStore implements RawStore, 
Configurable {
               csd = 
StatObjectConverter.getPartitionColumnStatisticsDesc(mStatsObj);
               curList = new ArrayList<ColumnStatisticsObj>(colNames.size());
             }
-            
curList.add(StatObjectConverter.getPartitionColumnStatisticsObj(mStatsObj));
+            
curList.add(StatObjectConverter.getPartitionColumnStatisticsObj(mStatsObj, 
enableBitVector));
             lastPartName = partName;
             Deadline.checkTimeout();
           }
@@ -7288,12 +7292,14 @@ public class ObjectStore implements RawStore, 
Configurable {
         HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION);
     final double ndvTuner = HiveConf.getFloatVar(getConf(),
         HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER);
+    final boolean enableBitVector = HiveConf.getBoolVar(getConf(),
+        HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR);
     return new GetHelper<AggrStats>(dbName, tblName, true, false) {
       @Override
       protected AggrStats getSqlResult(GetHelper<AggrStats> ctx)
           throws MetaException {
         return directSql.aggrColStatsForPartitions(dbName, tblName, partNames,
-            colNames, useDensityFunctionForNDVEstimation, ndvTuner);
+            colNames, useDensityFunctionForNDVEstimation, ndvTuner, 
enableBitVector);
       }
       @Override
       protected AggrStats getJdoResult(GetHelper<AggrStats> ctx)
@@ -7313,11 +7319,13 @@ public class ObjectStore implements RawStore, 
Configurable {
   @Override
   public Map<String, List<ColumnStatisticsObj>> 
getColStatsForTablePartitions(String dbName,
       String tableName) throws MetaException, NoSuchObjectException {
+    final boolean enableBitVector = HiveConf.getBoolVar(getConf(),
+        HiveConf.ConfVars.HIVE_STATS_FETCH_BITVECTOR);
     return new GetHelper<Map<String, List<ColumnStatisticsObj>>>(dbName, 
tableName, true, false) {
       @Override
       protected Map<String, List<ColumnStatisticsObj>> getSqlResult(
           GetHelper<Map<String, List<ColumnStatisticsObj>>> ctx) throws 
MetaException {
-        return directSql.getColStatsForTablePartitions(dbName, tblName);
+        return directSql.getColStatsForTablePartitions(dbName, tblName, 
enableBitVector);
       }
 
       @Override

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
index 2dc2804..d53ea4c 100644
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
+++ 
b/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
@@ -76,6 +76,7 @@ public class StatObjectConverter {
        mColStats.setLongStats(
            longStats.isSetNumNulls() ? longStats.getNumNulls() : null,
            longStats.isSetNumDVs() ? longStats.getNumDVs() : null,
+           longStats.isSetBitVectors() ? longStats.getBitVectors().getBytes() 
: null,
            longStats.isSetLowValue() ? longStats.getLowValue() : null,
            longStats.isSetHighValue() ? longStats.getHighValue() : null);
      } else if (statsObj.getStatsData().isSetDoubleStats()) {
@@ -83,6 +84,7 @@ public class StatObjectConverter {
        mColStats.setDoubleStats(
            doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null,
            doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null,
+           doubleStats.isSetBitVectors() ? 
doubleStats.getBitVectors().getBytes() : null,
            doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null,
            doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
      } else if (statsObj.getStatsData().isSetDecimalStats()) {
@@ -92,12 +94,14 @@ public class StatObjectConverter {
        mColStats.setDecimalStats(
            decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null,
            decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null,
+           decimalStats.isSetBitVectors() ? 
decimalStats.getBitVectors().getBytes() : null,
                low, high);
      } else if (statsObj.getStatsData().isSetStringStats()) {
        StringColumnStatsData stringStats = 
statsObj.getStatsData().getStringStats();
        mColStats.setStringStats(
            stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null,
            stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null,
+           stringStats.isSetBitVectors() ? 
stringStats.getBitVectors().getBytes() : null,
            stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null,
            stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
      } else if (statsObj.getStatsData().isSetBinaryStats()) {
@@ -111,6 +115,7 @@ public class StatObjectConverter {
        mColStats.setDateStats(
            dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null,
            dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null,
+           dateStats.isSetBitVectors() ? dateStats.getBitVectors().getBytes() 
: null,
            dateStats.isSetLowValue() ? 
dateStats.getLowValue().getDaysSinceEpoch() : null,
            dateStats.isSetHighValue() ? 
dateStats.getHighValue().getDaysSinceEpoch() : null);
      }
@@ -146,6 +151,9 @@ public class StatObjectConverter {
     if (mStatsObj.getNumDVs() != null) {
       oldStatsObj.setNumDVs(mStatsObj.getNumDVs());
     }
+    if (mStatsObj.getBitVector() != null) {
+      oldStatsObj.setBitVector(mStatsObj.getBitVector());
+    }
     if (mStatsObj.getNumFalses() != null) {
       oldStatsObj.setNumFalses(mStatsObj.getNumFalses());
     }
@@ -188,6 +196,9 @@ public class StatObjectConverter {
     if (mStatsObj.getNumDVs() != null) {
       oldStatsObj.setNumDVs(mStatsObj.getNumDVs());
     }
+    if (mStatsObj.getBitVector() != null) {
+      oldStatsObj.setBitVector(mStatsObj.getBitVector());
+    }
     if (mStatsObj.getNumFalses() != null) {
       oldStatsObj.setNumFalses(mStatsObj.getNumFalses());
     }
@@ -200,7 +211,7 @@ public class StatObjectConverter {
   }
 
   public static ColumnStatisticsObj getTableColumnStatisticsObj(
-      MTableColumnStatistics mStatsObj) {
+      MTableColumnStatistics mStatsObj, boolean enableBitVector) {
     ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
     statsObj.setColType(mStatsObj.getColType());
     statsObj.setColName(mStatsObj.getColName());
@@ -220,6 +231,7 @@ public class StatObjectConverter {
       stringStats.setAvgColLen(mStatsObj.getAvgColLen());
       stringStats.setMaxColLen(mStatsObj.getMaxColLen());
       stringStats.setNumDVs(mStatsObj.getNumDVs());
+      
stringStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? 
null : new String(mStatsObj.getBitVector()));
       colStatsData.setStringStats(stringStats);
     } else if (colType.equals("binary")) {
       BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
@@ -241,6 +253,7 @@ public class StatObjectConverter {
         longStats.setLowValue(longLowValue);
       }
       longStats.setNumDVs(mStatsObj.getNumDVs());
+      
longStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? 
null : new String(mStatsObj.getBitVector()));
       colStatsData.setLongStats(longStats);
     } else if (colType.equals("double") || colType.equals("float")) {
       DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
@@ -254,6 +267,7 @@ public class StatObjectConverter {
         doubleStats.setLowValue(doubleLowValue);
       }
       doubleStats.setNumDVs(mStatsObj.getNumDVs());
+      
doubleStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? 
null : new String(mStatsObj.getBitVector()));
       colStatsData.setDoubleStats(doubleStats);
     } else if (colType.startsWith("decimal")) {
       DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
@@ -267,6 +281,7 @@ public class StatObjectConverter {
         decimalStats.setLowValue(createThriftDecimal(decimalLowValue));
       }
       decimalStats.setNumDVs(mStatsObj.getNumDVs());
+      
decimalStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? 
null : new String(mStatsObj.getBitVector()));
       colStatsData.setDecimalStats(decimalStats);
     } else if (colType.equals("date")) {
       DateColumnStatsData dateStats = new DateColumnStatsData();
@@ -280,6 +295,7 @@ public class StatObjectConverter {
         dateStats.setLowValue(new Date(lowValue));
       }
       dateStats.setNumDVs(mStatsObj.getNumDVs());
+      
dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? 
null : new String(mStatsObj.getBitVector()));
       colStatsData.setDateStats(dateStats);
     }
     statsObj.setStatsData(colStatsData);
@@ -323,6 +339,7 @@ public class StatObjectConverter {
       mColStats.setLongStats(
           longStats.isSetNumNulls() ? longStats.getNumNulls() : null,
           longStats.isSetNumDVs() ? longStats.getNumDVs() : null,
+          longStats.isSetBitVectors() ? longStats.getBitVectors().getBytes() : 
null,
           longStats.isSetLowValue() ? longStats.getLowValue() : null,
           longStats.isSetHighValue() ? longStats.getHighValue() : null);
     } else if (statsObj.getStatsData().isSetDoubleStats()) {
@@ -330,6 +347,7 @@ public class StatObjectConverter {
       mColStats.setDoubleStats(
           doubleStats.isSetNumNulls() ? doubleStats.getNumNulls() : null,
           doubleStats.isSetNumDVs() ? doubleStats.getNumDVs() : null,
+          doubleStats.isSetBitVectors() ? 
doubleStats.getBitVectors().getBytes() : null,
           doubleStats.isSetLowValue() ? doubleStats.getLowValue() : null,
           doubleStats.isSetHighValue() ? doubleStats.getHighValue() : null);
     } else if (statsObj.getStatsData().isSetDecimalStats()) {
@@ -339,12 +357,14 @@ public class StatObjectConverter {
       mColStats.setDecimalStats(
           decimalStats.isSetNumNulls() ? decimalStats.getNumNulls() : null,
           decimalStats.isSetNumDVs() ? decimalStats.getNumDVs() : null,
+          decimalStats.isSetBitVectors() ? 
decimalStats.getBitVectors().getBytes() : null,
               low, high);
     } else if (statsObj.getStatsData().isSetStringStats()) {
       StringColumnStatsData stringStats = 
statsObj.getStatsData().getStringStats();
       mColStats.setStringStats(
           stringStats.isSetNumNulls() ? stringStats.getNumNulls() : null,
           stringStats.isSetNumDVs() ? stringStats.getNumDVs() : null,
+          stringStats.isSetBitVectors() ? 
stringStats.getBitVectors().getBytes() : null,
           stringStats.isSetMaxColLen() ? stringStats.getMaxColLen() : null,
           stringStats.isSetAvgColLen() ? stringStats.getAvgColLen() : null);
     } else if (statsObj.getStatsData().isSetBinaryStats()) {
@@ -358,6 +378,7 @@ public class StatObjectConverter {
       mColStats.setDateStats(
           dateStats.isSetNumNulls() ? dateStats.getNumNulls() : null,
           dateStats.isSetNumDVs() ? dateStats.getNumDVs() : null,
+          dateStats.isSetBitVectors() ? dateStats.getBitVectors().getBytes() : 
null,
           dateStats.isSetLowValue() ? 
dateStats.getLowValue().getDaysSinceEpoch() : null,
           dateStats.isSetHighValue() ? 
dateStats.getHighValue().getDaysSinceEpoch() : null);
     }
@@ -365,7 +386,7 @@ public class StatObjectConverter {
   }
 
   public static ColumnStatisticsObj getPartitionColumnStatisticsObj(
-      MPartitionColumnStatistics mStatsObj) {
+      MPartitionColumnStatistics mStatsObj, boolean enableBitVector) {
     ColumnStatisticsObj statsObj = new ColumnStatisticsObj();
     statsObj.setColType(mStatsObj.getColType());
     statsObj.setColName(mStatsObj.getColName());
@@ -385,6 +406,7 @@ public class StatObjectConverter {
       stringStats.setAvgColLen(mStatsObj.getAvgColLen());
       stringStats.setMaxColLen(mStatsObj.getMaxColLen());
       stringStats.setNumDVs(mStatsObj.getNumDVs());
+      
stringStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? 
null : new String(mStatsObj.getBitVector()));
       colStatsData.setStringStats(stringStats);
     } else if (colType.equals("binary")) {
       BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
@@ -404,6 +426,7 @@ public class StatObjectConverter {
         longStats.setLowValue(mStatsObj.getLongLowValue());
       }
       longStats.setNumDVs(mStatsObj.getNumDVs());
+      
longStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? 
null : new String(mStatsObj.getBitVector()));
       colStatsData.setLongStats(longStats);
     } else if (colType.equals("double") || colType.equals("float")) {
       DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
@@ -415,6 +438,7 @@ public class StatObjectConverter {
         doubleStats.setLowValue(mStatsObj.getDoubleLowValue());
       }
       doubleStats.setNumDVs(mStatsObj.getNumDVs());
+      
doubleStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? 
null : new String(mStatsObj.getBitVector()));
       colStatsData.setDoubleStats(doubleStats);
     } else if (colType.startsWith("decimal")) {
       DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
@@ -426,6 +450,7 @@ public class StatObjectConverter {
         
decimalStats.setLowValue(createThriftDecimal(mStatsObj.getDecimalLowValue()));
       }
       decimalStats.setNumDVs(mStatsObj.getNumDVs());
+      
decimalStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? 
null : new String(mStatsObj.getBitVector()));
       colStatsData.setDecimalStats(decimalStats);
     } else if (colType.equals("date")) {
       DateColumnStatsData dateStats = new DateColumnStatsData();
@@ -433,6 +458,7 @@ public class StatObjectConverter {
       dateStats.setHighValue(new Date(mStatsObj.getLongHighValue()));
       dateStats.setLowValue(new Date(mStatsObj.getLongLowValue()));
       dateStats.setNumDVs(mStatsObj.getNumDVs());
+      
dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? 
null : new String(mStatsObj.getBitVector()));
       colStatsData.setDateStats(dateStats);
     }
     statsObj.setStatsData(colStatsData);
@@ -450,10 +476,10 @@ public class StatObjectConverter {
     return statsDesc;
   }
 
-  // SQL
+  // JAVA
   public static void fillColumnStatisticsData(String colType, 
ColumnStatisticsData data,
       Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, 
Object dechigh,
-      Object nulls, Object dist, Object avglen, Object maxlen, Object trues, 
Object falses) throws MetaException {
+      Object nulls, Object dist, Object bitVector, Object avglen, Object 
maxlen, Object trues, Object falses) throws MetaException {
     colType = colType.toLowerCase();
     if (colType.equals("boolean")) {
       BooleanColumnStatsData boolStats = new BooleanColumnStatsData();
@@ -468,6 +494,7 @@ public class StatObjectConverter {
       stringStats.setAvgColLen(MetaStoreDirectSql.extractSqlDouble(avglen));
       stringStats.setMaxColLen(MetaStoreDirectSql.extractSqlLong(maxlen));
       stringStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
+      stringStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
       data.setStringStats(stringStats);
     } else if (colType.equals("binary")) {
       BinaryColumnStatsData binaryStats = new BinaryColumnStatsData();
@@ -487,6 +514,7 @@ public class StatObjectConverter {
         longStats.setLowValue(MetaStoreDirectSql.extractSqlLong(llow));
       }
       longStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
+      longStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
       data.setLongStats(longStats);
     } else if (colType.equals("double") || colType.equals("float")) {
       DoubleColumnStatsData doubleStats = new DoubleColumnStatsData();
@@ -498,6 +526,7 @@ public class StatObjectConverter {
         doubleStats.setLowValue(MetaStoreDirectSql.extractSqlDouble(dlow));
       }
       doubleStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
+      doubleStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
       data.setDoubleStats(doubleStats);
     } else if (colType.startsWith("decimal")) {
       DecimalColumnStatsData decimalStats = new DecimalColumnStatsData();
@@ -509,6 +538,7 @@ public class StatObjectConverter {
         decimalStats.setLowValue(createThriftDecimal((String)declow));
       }
       decimalStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
+      decimalStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
       data.setDecimalStats(decimalStats);
     } else if (colType.equals("date")) {
       DateColumnStatsData dateStats = new DateColumnStatsData();
@@ -520,10 +550,12 @@ public class StatObjectConverter {
         dateStats.setLowValue(new 
Date(MetaStoreDirectSql.extractSqlLong(llow)));
       }
       dateStats.setNumDVs(MetaStoreDirectSql.extractSqlLong(dist));
+      dateStats.setBitVectors(MetaStoreDirectSql.extractSqlBlob(bitVector));
       data.setDateStats(dateStats);
     }
   }
 
+  //DB
   public static void fillColumnStatisticsData(String colType, 
ColumnStatisticsData data,
       Object llow, Object lhigh, Object dlow, Object dhigh, Object declow, 
Object dechigh,
       Object nulls, Object dist, Object avglen, Object maxlen, Object trues, 
Object falses,

http://git-wip-us.apache.org/repos/asf/hive/blob/f8b79fe6/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
index cea94a0..fb98ccf 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/cache/CachedStore.java
@@ -19,6 +19,7 @@ package org.apache.hadoop.hive.metastore.cache;
 
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
+import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
 import java.util.Map;
@@ -80,8 +81,8 @@ import org.apache.hadoop.hive.metastore.api.Type;
 import org.apache.hadoop.hive.metastore.api.UnknownDBException;
 import org.apache.hadoop.hive.metastore.api.UnknownPartitionException;
 import org.apache.hadoop.hive.metastore.api.UnknownTableException;
-import org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMerger;
-import 
org.apache.hadoop.hive.metastore.hbase.stats.merge.ColumnStatsMergerFactory;
+import org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMerger;
+import 
org.apache.hadoop.hive.metastore.columnstats.merge.ColumnStatsMergerFactory;
 import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
 import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
@@ -92,6 +93,7 @@ import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
 import com.google.common.annotations.VisibleForTesting;
+import com.google.common.collect.Lists;
 
 // TODO filter->expr
 // TODO functionCache
@@ -1562,27 +1564,37 @@ public class CachedStore implements RawStore, 
Configurable {
 
   private ColumnStatisticsObj mergeColStatsForPartitions(String dbName, String 
tblName,
       List<String> partNames, String colName) throws MetaException {
-    ColumnStatisticsObj colStats = null;
+    final boolean useDensityFunctionForNDVEstimation = 
HiveConf.getBoolVar(getConf(),
+        HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION);
+    final double ndvTuner = HiveConf.getFloatVar(getConf(),
+        HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_TUNER);
+    Map<String, List<ColumnStatistics>> map = new HashMap<>();
+    List<ColumnStatistics> list = new ArrayList<>();
+    boolean areAllPartsFound = true;
     for (String partName : partNames) {
-      String colStatsCacheKey =
-          CacheUtils.buildKey(dbName, tblName, partNameToVals(partName), 
colName);
-      ColumnStatisticsObj colStatsForPart =
-          SharedCache.getCachedPartitionColStats(colStatsCacheKey);
-      if (colStatsForPart == null) {
-        // we don't have stats for all the partitions
-        // logic for extrapolation hasn't been added to CacheStore
-        // So stop now, and lets fallback to underlying RawStore
-        return null;
-      }
-      if (colStats == null) {
-        colStats = colStatsForPart;
+      String colStatsCacheKey = CacheUtils.buildKey(dbName, tblName, 
partNameToVals(partName),
+          colName);
+      List<ColumnStatisticsObj> singleObj = new ArrayList<>();
+      ColumnStatisticsObj colStatsForPart = SharedCache
+          .getCachedPartitionColStats(colStatsCacheKey);
+      if (colStatsForPart != null) {
+        singleObj.add(colStatsForPart);
+        ColumnStatisticsDesc css = new ColumnStatisticsDesc(false, dbName, 
tblName);
+        css.setPartName(partName);
+        list.add(new ColumnStatistics(css, singleObj));
       } else {
-        ColumnStatsMerger merger =
-            ColumnStatsMergerFactory.getColumnStatsMerger(colStats, 
colStatsForPart);
-        merger.merge(colStats, colStatsForPart);
+        areAllPartsFound = false;
       }
     }
-    return colStats;
+    map.put(colName, list);
+    List<String> colNames = new ArrayList<>();
+    colNames.add(colName);
+    // Note that enableBitVector does not apply here because 
ColumnStatisticsObj
+    // itself will tell whether
+    // bitvector is null or not and aggr logic can automatically apply.
+    return MetaStoreUtils
+        .aggrPartitionStats(map, dbName, tblName, partNames, colNames, 
areAllPartsFound,
+            useDensityFunctionForNDVEstimation, ndvTuner).iterator().next();
   }
 
   @Override

Reply via email to