hive git commit: HIVE-10954 AggregateStatsCache duplicated in HBaseMetastore (gates)

gates Wed, 17 Jun 2015 17:22:22 -0700

Repository: hive
Updated Branches:
  refs/heads/hbase-metastore 1a64664ae -> 15012469a



HIVE-10954 AggregateStatsCache duplicated in HBaseMetastore (gates)


Project: http://git-wip-us.apache.org/repos/asf/hive/repo
Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/15012469
Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/15012469
Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/15012469

Branch: refs/heads/hbase-metastore
Commit: 15012469afb8e0c0354b82c972f058ed3cd0d221
Parents: 1a64664
Author: Alan Gates <ga...@hortonworks.com>
Authored: Wed Jun 17 17:20:19 2015 -0700
Committer: Alan Gates <ga...@hortonworks.com>
Committed: Wed Jun 17 17:20:19 2015 -0700

----------------------------------------------------------------------
 .../metastore/hbase/AggregateStatsCache.java    | 559 -------------------
 .../hive/metastore/hbase/HBaseReadWrite.java    |   9 +-
 .../hadoop/hive/metastore/hbase/HBaseStore.java |   2 -
 .../hive/metastore/hbase/utils/BitVector.java   | 122 ----
 .../hive/metastore/hbase/utils/BloomFilter.java | 144 -----
 .../hbase/TestAggregateStatsCache.java          | 266 ---------
 .../metastore/hbase/utils/TestBitVector.java    |  92 ---
 .../metastore/hbase/utils/TestBloomFilter.java  |  82 ---
 8 files changed, 4 insertions(+), 1272 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java
deleted file mode 100644
index 4dfcd81..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java
+++ /dev/null
@@ -1,559 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.hbase;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.hbase.utils.BloomFilter;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-import java.util.concurrent.locks.Lock;
-import java.util.concurrent.locks.ReadWriteLock;
-import java.util.concurrent.locks.ReentrantReadWriteLock;
-
-class AggregateStatsCache {
-
-  private static final Log LOG = 
LogFactory.getLog(AggregateStatsCache.class.getName());
-  private static AggregateStatsCache self = null;
-
-  // Backing store for this cache
-  private final ConcurrentHashMap<Key, AggrColStatsList> cacheStore;
-  // Cache size
-  private final int maxCacheNodes;
-  // Current nodes in the cache
-  private AtomicInteger currentNodes = new AtomicInteger(0);
-  // Run the cleaner thread when the cache is maxFull% full
-  private final float maxFull;
-  // Run the cleaner thread until cache is cleanUntil% occupied
-  private final float cleanUntil;
-  // Nodes go stale after this
-  private final long timeToLive;
-  // Max time when waiting for write locks on node list
-  private final long maxWriterWaitTime;
-  // Max time when waiting for read locks on node list
-  private final long maxReaderWaitTime;
-  // Maximum number of paritions aggregated per cache node
-  private final int maxPartsPerCacheNode;
-  // Bloom filter false positive probability
-  private final float falsePositiveProbability;
-  // Max tolerable variance for matches
-  private final float maxVariance;
-  // Used to determine if cleaner thread is already running
-  private boolean isCleaning = false;
-
-  private AggregateStatsCache(int maxCacheNodes, int maxPartsPerCacheNode, 
long timeToLive,
-      float falsePositiveProbability, float maxVariance, long 
maxWriterWaitTime,
-      long maxReaderWaitTime, float maxFull, float cleanUntil) {
-    this.maxCacheNodes = maxCacheNodes;
-    this.maxPartsPerCacheNode = maxPartsPerCacheNode;
-    this.timeToLive = timeToLive;
-    this.falsePositiveProbability = falsePositiveProbability;
-    this.maxVariance = maxVariance;
-    this.maxWriterWaitTime = maxWriterWaitTime;
-    this.maxReaderWaitTime = maxReaderWaitTime;
-    this.maxFull = maxFull;
-    this.cleanUntil = cleanUntil;
-    this.cacheStore = new ConcurrentHashMap<Key, AggrColStatsList>();
-  }
-
-  static synchronized AggregateStatsCache getInstance(Configuration conf) {
-    if (self == null) {
-      int maxCacheNodes =
-          HiveConf.getIntVar(conf, 
HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE);
-      // The number of partitions aggregated per cache node
-      // If the number of partitions requested is > this value, we'll fetch 
directly from Metastore
-      int maxPartitionsPerCacheNode =
-          HiveConf.getIntVar(conf,
-              
HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS);
-      long timeToLive =
-          HiveConf.getTimeVar(conf, 
HiveConf.ConfVars.METASTORE_HBASE_CACHE_TIME_TO_LIVE,
-              TimeUnit.SECONDS);
-      // False positives probability we are ready to tolerate for the 
underlying bloom filter
-      float falsePositiveProbability =
-          HiveConf.getFloatVar(conf,
-              
HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY);
-      // Maximum tolerable variance in number of partitions between cached 
node and our request
-      float maxVariance =
-          HiveConf.getFloatVar(conf,
-              
HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE);
-      long maxWriterWaitTime =
-          HiveConf.getTimeVar(conf, 
HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_WRITER_WAIT,
-              TimeUnit.MILLISECONDS);
-      long maxReaderWaitTime =
-          HiveConf.getTimeVar(conf, 
HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_READER_WAIT,
-              TimeUnit.MILLISECONDS);
-      float maxFull = HiveConf.getFloatVar(conf, 
HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_FULL);
-      float cleanUntil =
-          HiveConf.getFloatVar(conf, 
HiveConf.ConfVars.METASTORE_HBASE_CACHE_CLEAN_UNTIL);
-      self =
-          new AggregateStatsCache(maxCacheNodes, maxPartitionsPerCacheNode, 
timeToLive,
-              falsePositiveProbability, maxVariance, maxWriterWaitTime, 
maxReaderWaitTime, maxFull,
-              cleanUntil);
-    }
-    return self;
-  }
-
-  int getMaxCacheNodes() {
-    return maxCacheNodes;
-  }
-
-  // Don't want to lock on this, so this may return approximate value
-  int getCurrentNodes() {
-    return currentNodes.intValue();
-  }
-
-  int getMaxPartsPerCacheNode() {
-    return maxPartsPerCacheNode;
-  }
-
-  float getFalsePositiveProbability() {
-    return falsePositiveProbability;
-  }
-
-  /**
-   * Return aggregate stats for a column from the cache or null.
-   * While reading from the nodelist for a key, we wait maxReaderWaitTime to 
acquire the lock,
-   * failing which we return a cache miss (i.e. null)
-   *
-   * @param dbName
-   * @param tblName
-   * @param colName
-   * @param partNames
-   * @return
-   */
-  AggrColStatsCached get(String dbName, String tblName, String colName, 
List<String> partNames) {
-    // Cache key
-    Key key = new Key(dbName, tblName, colName);
-    AggrColStatsList candidateList = cacheStore.get(key);
-    // No key, or no nodes in candidate list
-    if ((candidateList == null) || (candidateList.nodes.size() == 0)) {
-      LOG.info("No aggregate stats cached for " + key.toString());
-      return null;
-    }
-    // Find the value object
-    // Update the timestamp of the key,value if value matches the criteria
-    // Return the value
-    AggrColStatsCached match = null;
-    boolean isLocked = false;
-    try {
-      // Try to readlock the candidateList; timeout after maxReaderWaitTime
-      isLocked = candidateList.readLock.tryLock(maxReaderWaitTime, 
TimeUnit.MILLISECONDS);
-      if (isLocked) {
-        match = findBestMatch(partNames, candidateList.nodes);
-      }
-      if (match != null) {
-        // Ok to not lock the list for this and use a volatile lastAccessTime 
instead
-        candidateList.updateLastAccessTime();
-      }
-    } catch (InterruptedException e) {
-      LOG.debug(e);
-      match = null;
-    } finally {
-      if (isLocked) {
-        candidateList.readLock.unlock();
-      }
-    }
-    return match;
-  }
-
-  /**
-   * Find the best match using the configurable error tolerance and time to 
live value
-   *
-   * @param partNames
-   * @param candidates
-   * @return best matched node or null
-   */
-  private AggrColStatsCached findBestMatch(List<String> partNames, 
List<AggrColStatsCached> candidates) {
-    // Hits, misses, shouldSkip for a node
-    MatchStats matchStats;
-    // MatchStats for each candidate
-    Map<AggrColStatsCached, MatchStats> candidateMatchStats = new 
HashMap<AggrColStatsCached, MatchStats>();
-    // The final match we intend to return
-    AggrColStatsCached bestMatch = null;
-    // To compare among potentially multiple matches
-    int bestMatchHits = 0;
-    int numPartsRequested = partNames.size();
-    // 1st pass at marking invalid candidates
-    // Checks based on variance and TTL
-    // Note: we're not creating a copy of the list for saving memory
-    for (AggrColStatsCached candidate : candidates) {
-      // Variance check
-      if ((float) Math.abs((candidate.getNumPartsCached() - numPartsRequested)
-          / numPartsRequested) > maxVariance) {
-        candidateMatchStats.put(candidate, new MatchStats(0, 0, true));
-        continue;
-      }
-      // TTL check
-      if (isExpired(candidate)) {
-        candidateMatchStats.put(candidate, new MatchStats(0, 0, true));
-      }
-      else {
-        candidateMatchStats.put(candidate, new MatchStats(0, 0, false));
-      }
-    }
-    // We'll count misses as we iterate
-    int maxMisses = (int) maxVariance * numPartsRequested;
-    for (String partName : partNames) {
-      for (AggrColStatsCached candidate : candidates) {
-        matchStats = candidateMatchStats.get(candidate);
-        if (matchStats.shouldSkip) {
-          continue;
-        }
-        if (candidate.getBloomFilter().contains(partName.getBytes())) {
-          ++matchStats.hits;
-        } else {
-          ++matchStats.misses;
-        }
-        // 2nd pass at marking invalid candidates
-        // If misses so far exceed max tolerable misses
-        if (matchStats.misses > maxMisses) {
-          matchStats.shouldSkip = true;
-          continue;
-        }
-        // Check if this is the best match so far
-        if (matchStats.hits > bestMatchHits) {
-          bestMatch = candidate;
-        }
-      }
-    }
-    if (bestMatch != null) {
-      // Update the last access time for this node
-      bestMatch.updateLastAccessTime();
-    }
-    return bestMatch;
-  }
-
-  /**
-   * Add a new node to the cache; may trigger the cleaner thread if the cache 
is near full capacity.
-   * We'll however add the node even if we temporaily exceed maxCacheNodes, 
because the cleaner
-   * will eventually create space from expired nodes or by removing LRU nodes.
-   *
-   * @param dbName
-   * @param tblName
-   * @param colName
-   * @param numPartsCached
-   * @param colStats
-   * @param bloomFilter
-   */
-  // TODO: make add asynchronous: add shouldn't block the higher level calls
-  void add(String dbName, String tblName, String colName, int numPartsCached,
-      ColumnStatisticsObj colStats, BloomFilter bloomFilter) {
-    // If we have no space in the cache, run cleaner thread
-    if (getCurrentNodes() / maxCacheNodes > maxFull) {
-      clean();
-    }
-    // Cache key
-    Key key = new Key(dbName, tblName, colName);
-    // Add new node to the cache
-    AggrColStatsCached node = new AggrColStatsCached(numPartsCached, 
bloomFilter, colStats);
-    AggrColStatsList nodeList;
-    AggrColStatsList newNodeList = new AggrColStatsList();
-    newNodeList.nodes = new ArrayList<AggrColStatsCached>();
-    nodeList = cacheStore.putIfAbsent(key, newNodeList);
-    if (nodeList == null) {
-      nodeList = newNodeList;
-    }
-    boolean isLocked = false;
-    try {
-      isLocked = nodeList.writeLock.tryLock(maxWriterWaitTime, 
TimeUnit.MILLISECONDS);
-      if (isLocked) {
-        nodeList.nodes.add(node);
-        node.updateLastAccessTime();
-        nodeList.updateLastAccessTime();
-        currentNodes.getAndIncrement();
-      }
-    } catch (InterruptedException e) {
-      LOG.debug(e);
-    } finally {
-      if (isLocked) {
-        nodeList.writeLock.unlock();
-      }
-    }
-  }
-
-  /**
-   * Cleans the expired nodes or removes LRU nodes of the cache,
-   * until the cache size reduces to cleanUntil% full.
-   */
-  private void clean() {
-    // This spawns a separate thread to walk through the cache and removes 
expired nodes.
-    // Only one cleaner thread should be running at any point.
-    synchronized (this) {
-      if (isCleaning) {
-        return;
-      }
-      isCleaning = true;
-    }
-    Thread cleaner = new Thread() {
-      @Override
-      public void run() {
-        Iterator<Map.Entry<Key, AggrColStatsList>> mapIterator = 
cacheStore.entrySet().iterator();
-        while (mapIterator.hasNext()) {
-          Map.Entry<Key, AggrColStatsList> pair =
-              (Map.Entry<Key, AggrColStatsList>) mapIterator.next();
-          AggrColStatsCached node;
-          AggrColStatsList candidateList = (AggrColStatsList) pair.getValue();
-          List<AggrColStatsCached> nodes = candidateList.nodes;
-          if (nodes.size() == 0) {
-            mapIterator.remove();
-            continue;
-          }
-          boolean isLocked = false;
-          try {
-            isLocked = candidateList.writeLock.tryLock(maxWriterWaitTime, 
TimeUnit.MILLISECONDS);
-            if (isLocked) {
-              for (Iterator<AggrColStatsCached> listIterator = 
nodes.iterator(); listIterator.hasNext();) {
-                node = listIterator.next();
-                // Remove the node if it has expired
-                if (isExpired(node)) {
-                  listIterator.remove();
-                  currentNodes.getAndDecrement();
-                }
-              }
-            }
-          } catch (InterruptedException e) {
-            LOG.debug(e);
-          } finally {
-            if (isLocked) {
-              candidateList.writeLock.unlock();
-            }
-          }
-          // We want to make sure this runs at a low priority in the background
-          Thread.yield();
-        }
-        // If the expired nodes did not result in cache being cleanUntil% in 
size,
-        // start removing LRU nodes
-        while (getCurrentNodes() / maxCacheNodes > cleanUntil) {
-          evictOneNode();
-        }
-      }
-    };
-    cleaner.setPriority(Thread.MIN_PRIORITY);
-    cleaner.setDaemon(true);
-    cleaner.start();
-  }
-
-  /**
-   * Evict an LRU node or expired node whichever we find first
-   */
-  private void evictOneNode() {
-    // Get the LRU key, value
-    Key lruKey = null;
-    AggrColStatsList lruValue = null;
-    for (Map.Entry<Key, AggrColStatsList> entry : cacheStore.entrySet()) {
-      Key key = entry.getKey();
-      AggrColStatsList value = entry.getValue();
-      if (lruKey == null) {
-        lruKey = key;
-        lruValue = value;
-        continue;
-      }
-      if ((value.lastAccessTime < lruValue.lastAccessTime) && 
!(value.nodes.isEmpty())) {
-        lruKey = key;
-        lruValue = value;
-      }
-    }
-    // Now delete a node for this key's list
-    AggrColStatsList candidateList = cacheStore.get(lruKey);
-    boolean isLocked = false;
-    try {
-      isLocked = candidateList.writeLock.tryLock(maxWriterWaitTime, 
TimeUnit.MILLISECONDS);
-      if (isLocked) {
-        AggrColStatsCached candidate;
-        AggrColStatsCached lruNode = null;
-        int currentIndex = 0;
-        int deleteIndex = 0;
-        for (Iterator<AggrColStatsCached> iterator = 
candidateList.nodes.iterator(); iterator.hasNext();) {
-          candidate = iterator.next();
-          // Since we have to create space for 1, if we find an expired node 
we will remove it &
-          // return
-          if (isExpired(candidate)) {
-            iterator.remove();
-            currentNodes.getAndDecrement();
-            return;
-          }
-          // Sorry, too many ifs but this form looks optimal
-          // Update the LRU node from what we've seen so far
-          if (lruNode == null) {
-            lruNode = candidate;
-            ++currentIndex;
-            continue;
-          }
-          if (lruNode != null) {
-            if (candidate.lastAccessTime < lruNode.lastAccessTime) {
-              lruNode = candidate;
-              deleteIndex = currentIndex;
-            }
-          }
-        }
-        candidateList.nodes.remove(deleteIndex);
-        currentNodes.getAndDecrement();
-      }
-    } catch (InterruptedException e) {
-      LOG.debug(e);
-    } finally {
-      if (isLocked) {
-        candidateList.writeLock.unlock();
-      }
-    }
-  }
-
-  // TODO: store and print metrics
-  void printMetrics() {
-
-  }
-
-  private boolean isExpired(AggrColStatsCached aggrColStats) {
-    return System.currentTimeMillis() - aggrColStats.lastAccessTime > 
timeToLive;
-  }
-
-  /**
-   * Key object for the stats cache hashtable
-   */
-  static class Key {
-    private final String dbName;
-    private final String tblName;
-    private final String colName;
-
-    Key(String db, String table, String col) {
-      // Don't construct an illegal cache key
-      if ((db == null) || (table == null) || (col == null)) {
-        throw new IllegalArgumentException("dbName, tblName, colName can't be 
null");
-      }
-      dbName = db;
-      tblName = table;
-      colName = col;
-    }
-
-    @Override
-    public boolean equals(Object other) {
-      if ((other == null) || !(other instanceof Key)) {
-        return false;
-      }
-      Key that = (Key) other;
-      return dbName.equals(that.dbName) && tblName.equals(that.tblName)
-          && colName.equals(that.colName);
-    }
-
-    @Override
-    public int hashCode() {
-      return dbName.hashCode() * 31 + tblName.hashCode() * 31 + 
colName.hashCode();
-    }
-
-    @Override
-    public String toString() {
-      return "Database: " + dbName + ", Table: " + tblName + ", Column: " + 
colName;
-    }
-
-  }
-
-  static class AggrColStatsList {
-    // TODO: figure out a better data structure for node list(?)
-    private List<AggrColStatsCached> nodes = new 
ArrayList<AggrColStatsCached>();
-    private ReadWriteLock lock = new ReentrantReadWriteLock();
-    // Read lock for get operation
-    private Lock readLock = lock.readLock();
-    // Write lock for add, evict and clean operation
-    private Lock writeLock = lock.writeLock();
-    // Using volatile instead of locking updates to this variable,
-    // since we can rely on approx lastAccessTime but don't want a performance 
hit
-    private volatile long lastAccessTime = 0;
-
-    List<AggrColStatsCached> getNodes() {
-      return nodes;
-    }
-
-    void updateLastAccessTime() {
-      this.lastAccessTime = System.currentTimeMillis();
-    }
-  }
-
-  static class AggrColStatsCached {
-    private final int numPartsCached;
-    private final BloomFilter bloomFilter;
-    private final ColumnStatisticsObj colStats;
-    private volatile long lastAccessTime;
-
-    AggrColStatsCached(int numPartsCached, BloomFilter bloomFilter,
-        ColumnStatisticsObj colStats) {
-      this.numPartsCached = numPartsCached;
-      this.bloomFilter = bloomFilter;
-      this.colStats = colStats;
-      this.lastAccessTime = System.currentTimeMillis();
-    }
-
-    int getNumPartsCached() {
-      return numPartsCached;
-    }
-
-    ColumnStatisticsObj getColStats() {
-      updateLastAccessTime();
-      return colStats;
-    }
-
-    BloomFilter getBloomFilter() {
-      return bloomFilter;
-    }
-
-    void updateLastAccessTime() {
-      this.lastAccessTime = System.currentTimeMillis();
-    }
-  }
-
-  /**
-   * Intermediate object, used to collect hits & misses for each cache node 
that is evaluate for an
-   * incoming request
-   */
-  private static class MatchStats {
-    private int hits = 0;
-    private int misses = 0;
-    private boolean shouldSkip = false;
-
-    MatchStats(int hits, int misses, boolean shouldSkip) {
-      this.hits = hits;
-      this.misses = misses;
-      this.shouldSkip = shouldSkip;
-    }
-  }
-
-  /**
-   * TODO: capture some metrics for the cache
-   */
-  class Metrics {
-
-  }
-
-  /**
-   * TODO: implement memory management for the cache
-   */
-  static class MemoryManager {
-
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java
index b0dc707..b54afb9 100644
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java
+++ 
b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java
@@ -19,7 +19,6 @@
 package org.apache.hadoop.hive.metastore.hbase;
 
 import com.google.common.annotations.VisibleForTesting;
-
 import org.apache.commons.codec.binary.Base64;
 import org.apache.commons.lang.ArrayUtils;
 import org.apache.commons.logging.Log;
@@ -38,7 +37,9 @@ import org.apache.hadoop.hbase.filter.Filter;
 import org.apache.hadoop.hbase.filter.RegexStringComparator;
 import org.apache.hadoop.hbase.filter.RowFilter;
 import org.apache.hadoop.hive.common.ObjectPair;
+import org.apache.hive.common.util.BloomFilter;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.AggregateStatsCache;
 import org.apache.hadoop.hive.metastore.api.AggrStats;
 import org.apache.hadoop.hive.metastore.api.ColumnStatistics;
 import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc;
@@ -52,10 +53,8 @@ import org.apache.hadoop.hive.metastore.api.PrincipalType;
 import org.apache.hadoop.hive.metastore.api.Role;
 import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
 import org.apache.hadoop.hive.metastore.api.Table;
-import 
org.apache.hadoop.hive.metastore.hbase.AggregateStatsCache.AggrColStatsCached;
 import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregator;
 import 
org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregatorFactory;
-import org.apache.hadoop.hive.metastore.hbase.utils.BloomFilter;
 
 import java.io.IOException;
 import java.security.MessageDigest;
@@ -1581,7 +1580,7 @@ class HBaseReadWrite {
       List<List<String>> partVals, List<String> colNames) throws IOException {
     // One ColumnStatisticsObj per column
     List<ColumnStatisticsObj> colStatsList = new 
ArrayList<ColumnStatisticsObj>();
-    AggrColStatsCached colStatsAggrCached;
+    AggregateStatsCache.AggrColStats colStatsAggrCached;
     ColumnStatisticsObj colStatsAggr;
     int maxPartitionsPerCacheNode = aggrStatsCache.getMaxPartsPerCacheNode();
     float falsePositiveProbability = 
aggrStatsCache.getFalsePositiveProbability();
@@ -1672,7 +1671,7 @@ class HBaseReadWrite {
       }
       // Add partition to the bloom filter if it's requested
       if (bloomFilter != null) {
-        bloomFilter.addToFilter(partNames.get(pOff).getBytes());
+        bloomFilter.add(partNames.get(pOff).getBytes());
       }
     }
     return colStatsAggr;

http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java 
b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java
index 79a61d4..0dbdba2 100644
--- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java
+++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java
@@ -23,7 +23,6 @@ import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.common.FileUtils;
-import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.HiveMetaStore;
 import org.apache.hadoop.hive.metastore.PartFilterExprUtil;
 import org.apache.hadoop.hive.metastore.PartitionExpressionProxy;
@@ -62,7 +61,6 @@ import 
org.apache.hadoop.hive.metastore.api.UnknownTableException;
 import org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.PlanResult;
 import org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.ScanPlan;
 import org.apache.hadoop.hive.metastore.parser.ExpressionTree;
-import 
org.apache.hadoop.hive.metastore.hbase.AggregateStatsCache.AggrColStatsCached;
 import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy;
 import org.apache.thrift.TException;
 

http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BitVector.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BitVector.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BitVector.java
deleted file mode 100644
index 20af350..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BitVector.java
+++ /dev/null
@@ -1,122 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.hbase.utils;
-
-import java.util.Arrays;
-
-/**
- * Barebones fixed length bit vector using a byte array
- */
-public class BitVector {
-  // We'll use this as the bit vector container
-  private byte data[];
-  public static int ELEMENT_SIZE = Byte.SIZE;
-
-  public BitVector(int size) {
-    data = new byte[size/ELEMENT_SIZE];
-  }
-
-  /**
-   * Total bits -> num elements * size of each element
-   *
-   */
-  public long getSize() {
-    return data.length * ELEMENT_SIZE;
-  }
-
-  /**
-   * Set the bit at the given index to 1
-   *
-   * @param bitIndex
-   */
-  public void setBit(int bitIndex) {
-    validateBitIndex(bitIndex);
-    int dataIndex = bitIndex / ELEMENT_SIZE;
-    int elementIndex = ELEMENT_SIZE - bitIndex % ELEMENT_SIZE - 1;
-    // Set the elementIndex'th bit of data[dataIndex]'th element
-    data[dataIndex] = (byte) (data[dataIndex] | (1 << elementIndex));
-  }
-
-  /**
-   * Set the bit at the given index to 0
-   *
-   * @param bitIndex
-   */
-  public void unSetBit(int bitIndex) {
-    validateBitIndex(bitIndex);
-    int dataIndex = bitIndex / ELEMENT_SIZE;
-    int elementIndex = ELEMENT_SIZE - bitIndex % ELEMENT_SIZE - 1;
-    // Unset the elementIndex'th bit of data[dataIndex]'th element
-    data[dataIndex] = (byte) (data[dataIndex] & ~(1 << elementIndex));
-  }
-
-  /**
-   * Check if a bit at the given index is 1
-   * @param bitIndex
-   */
-  public boolean isBitSet(int bitIndex) {
-    validateBitIndex(bitIndex);
-    int dataIndex = bitIndex / ELEMENT_SIZE;
-    int elementIndex = ELEMENT_SIZE - bitIndex % ELEMENT_SIZE - 1;
-    if ((data[dataIndex] & (1 << elementIndex)) > 0) {
-      return true;
-    }
-    return false;
-  }
-
-  /**
-   * Set all bits to 0
-   *
-   */
-  public void clearAll() {
-    Arrays.fill(data, (byte) 0x00);
-  }
-
-  /**
-   * Set all bits to 1
-   *
-   */
-  public void setAll() {
-    Arrays.fill(data, (byte) 0xFF);
-  }
-
-  /**
-   * Prints the bit vector as a string of bit values (e.g. 01010111)
-   */
-  @Override
-  public String toString() {
-    StringBuilder str = new StringBuilder();
-    for(byte b : data) {
-      str.append(Integer.toBinaryString((b & 0xFF) + 0x100).substring(1));
-    }
-    return str.toString();
-  }
-
-  /**
-   * Check if queried bitIndex is in valid range
-   * @param bitIndex
-   */
-  private void validateBitIndex(int bitIndex) {
-    if ((bitIndex >= getSize()) || (bitIndex < 0)) {
-      throw new IllegalArgumentException("Bit index out of range: " + 
bitIndex);
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BloomFilter.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BloomFilter.java
 
b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BloomFilter.java
deleted file mode 100644
index 5606596..0000000
--- 
a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BloomFilter.java
+++ /dev/null
@@ -1,144 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.hbase.utils;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.hbase.util.Hash;
-
-/**
- * Barebones bloom filter implementation
- */
-public class BloomFilter {
-  private static final Log LOG = 
LogFactory.getLog(BloomFilter.class.getName());
-  // The cardinality of the set for which we are generating the bloom filter
-  // Default size is 10000
-  private int setSize = 10000;
-  // The probability of false positives we are ready to tolerate
-  // Default is 1%
-  private double falsePositiveProbability = 0.01;
-  // Number of bits used for the filter
-  // Formula: -n*ln(p) / (ln(2)^2)
-  private int numBits;
-  // Number of hashing functions
-  // Formula: m/n * ln(2)
-  private int numHashFunctions;
-  private final Hash hash;
-  private BitVector bitVector;
-
-  public BloomFilter(int setSize, double falsePositiveProbability) {
-    this.setSize = setSize;
-    this.falsePositiveProbability = falsePositiveProbability;
-    this.numBits = calculateFilterSize(this.setSize, 
this.falsePositiveProbability);
-    this.numHashFunctions = calculateHashFunctions(this.setSize, this.numBits);
-    // Create a bit vector of size numBits
-    this.bitVector = new BitVector(numBits);
-    // Use MurmurHash3
-    hash = Hash.getInstance(Hash.MURMUR_HASH3);
-  }
-
-  /**
-   * Calculate the number of bits in the filter
-   * Also align size to BitVector.HOLDER_SIZE
-   * @param setSize
-   * @param falsePositiveProbability
-   * @return numBits
-   */
-  private int calculateFilterSize(int setSize, double 
falsePositiveProbability) {
-    int numBits = (int) (-setSize * Math.log(falsePositiveProbability) / 
(Math.log(2) * Math.log(2)));
-    numBits = numBits + (BitVector.ELEMENT_SIZE - (numBits % 
BitVector.ELEMENT_SIZE));
-    LOG.info("Bloom Filter size: " + numBits);
-    return numBits;
-  }
-
-  /**
-   * Calculate the number of hash functions needed by the BloomFilter
-   * @param setSize
-   * @param numBits
-   * @return numHashFunctions
-   */
-  private int calculateHashFunctions(int setSize, int numBits) {
-    int numHashFunctions = Math.max(1, (int) Math.round((double) numBits / 
setSize * Math.log(2)));
-    LOG.info("Number of hashing functions: " + numHashFunctions);
-    return numHashFunctions;
-  }
-
-  /**
-   * @return the underlying BitVector object
-   */
-  public BitVector getBitVector() {
-    return bitVector;
-  }
-
-  public int getFilterSize() {
-    return numBits;
-  }
-
-
-  public int getNumHashFunctions() {
-    return numHashFunctions;
-  }
-
-  /**
-   * Add an item to the filter
-   *
-   * @param item to add
-   */
-  public void addToFilter(byte[] item) {
-    int bitIndex;
-    // Hash the item numHashFunctions times
-    for (int i = 0; i < numHashFunctions; i++) {
-      bitIndex = getBitIndex(item, i);
-      // Set the bit at this index
-      bitVector.setBit(bitIndex);
-    }
-  }
-
-  /**
-   * Check whether the item is present in the filter
-   * @param candidate
-   * @return hasItem (true if the bloom filter contains the item)
-   */
-  public boolean contains(byte[] item) {
-    int bitIndex;
-    boolean hasItem = true;
-    // Hash the item numHashFunctions times
-    for (int i = 0; i < numHashFunctions; i++) {
-      bitIndex = getBitIndex(item, i);
-      hasItem = hasItem && bitVector.isBitSet(bitIndex);
-      if (!hasItem) {
-        return hasItem;
-      }
-    }
-    return hasItem;
-  }
-
-  /**
-   * Hash the item using the i as the seed and return its potential position 
in the bit vector
-   * Also we negate a negative hash value
-   *
-   * @param item
-   * @param i (for the i-th hash function)
-   * @return position of item in unerlying bit vector
-   */
-  private int getBitIndex(byte[] item, int i) {
-    return Math.abs(hash.hash(item, i) % (numBits));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java
 
b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java
deleted file mode 100644
index aa2f4a2..0000000
--- 
a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java
+++ /dev/null
@@ -1,266 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.hbase;
-
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData;
-import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
-import org.apache.hadoop.hive.metastore.api.LongColumnStatsData;
-import 
org.apache.hadoop.hive.metastore.hbase.AggregateStatsCache.AggrColStatsCached;
-import org.apache.hadoop.hive.metastore.hbase.AggregateStatsCache.Key;
-import org.apache.hadoop.hive.metastore.hbase.utils.BloomFilter;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class TestAggregateStatsCache {
-  static String DB_NAME = "db";
-  static String TAB_PREFIX = "tab";
-  static String PART_PREFIX = "part";
-  static String COL_PREFIX = "col";
-  static int NUM_TABS = 2;
-  static int NUM_PARTS = 20;
-  static int NUM_COLS = 5;
-  static int MAX_CACHE_NODES = 10;
-  static int MAX_PARTITIONS_PER_CACHE_NODE = 10;
-  static String TIME_TO_LIVE = "20s";
-  static String MAX_WRITER_WAIT = "1s";
-  static String MAX_READER_WAIT = "1s";
-  static float FALSE_POSITIVE_PROBABILITY = (float) 0.01;
-  static float MAX_VARIANCE = (float) 0.5;
-  static AggregateStatsCache cache;
-  static List<String> tables = new ArrayList<String>();
-  static List<String> tabParts = new ArrayList<String>();
-  static List<String> tabCols = new ArrayList<String>();
-
-  @BeforeClass
-  public static void beforeTest() {
-    // All data intitializations
-    initializeTables();
-    initializePartitions();
-    initializeColumns();
-  }
-
-  // tab1, tab2
-  private static void initializeTables() {
-    for (int i = 1; i <= NUM_TABS; i++) {
-      tables.add(TAB_PREFIX + i);
-    }
-  }
-
-  // part1 ... part20
-  private static void initializePartitions() {
-    for (int i = 1; i <= NUM_PARTS; i++) {
-      tabParts.add(PART_PREFIX + i);
-    }
-  }
-
-  // col1 ... col5
-  private static void initializeColumns() {
-    for (int i = 1; i <= NUM_COLS; i++) {
-      tabCols.add(COL_PREFIX + i);
-    }
-  }
-
-  @AfterClass
-  public static void afterTest() {
-  }
-
-  @Before
-  public void setUp() {
-    HiveConf hiveConf = new HiveConf();
-    
hiveConf.setIntVar(HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE,
-        MAX_CACHE_NODES);
-    
hiveConf.setIntVar(HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS,
-        MAX_PARTITIONS_PER_CACHE_NODE);
-    hiveConf.setFloatVar(
-        
HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY,
-        FALSE_POSITIVE_PROBABILITY);
-    
hiveConf.setFloatVar(HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE,
-        MAX_VARIANCE);
-    hiveConf.setVar(HiveConf.ConfVars.METASTORE_HBASE_CACHE_TIME_TO_LIVE, 
TIME_TO_LIVE);
-    hiveConf.setVar(HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_WRITER_WAIT, 
MAX_WRITER_WAIT);
-    hiveConf.setVar(HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_READER_WAIT, 
MAX_READER_WAIT);
-    cache = AggregateStatsCache.getInstance(hiveConf);
-  }
-
-  @After
-  public void tearDown() {
-  }
-
-  @Test
-  public void testCacheKey() {
-    Key k1 = new Key("db", "tbl1", "col");
-    Key k2 = new Key("db", "tbl1", "col");
-    // k1 equals k2
-    Assert.assertEquals(k1, k2);
-    Key k3 = new Key("db", "tbl2", "col");
-    // k1 not equals k3
-    Assert.assertNotEquals(k1, k3);
-  }
-
-  @Test
-  public void testBasicAddAndGet() throws Exception {
-    // Partnames: [tab1part1...tab1part9]
-    List<String> partNames = preparePartNames(tables.get(0), 1, 9);
-    // Prepare the bloom filter
-    BloomFilter bloomFilter = prepareBloomFilter(partNames);
-    // Add a dummy aggregate stats object for the above parts (part1...part9) 
of tab1 for col1
-    String tblName = tables.get(0);
-    String colName = tabCols.get(0);
-    int highVal = 100, lowVal = 10, numDVs = 50, numNulls = 5;
-    // We'll treat this as the aggregate col stats for part1...part9 of tab1, 
col1
-    ColumnStatisticsObj aggrColStats =
-        getDummyLongColStat(colName, highVal, lowVal, numDVs, numNulls);
-    // Now add to cache the dummy colstats for these 10 partitions
-    cache.add(DB_NAME, tblName, colName, 10, aggrColStats, bloomFilter);
-    // Now get from cache
-    AggrColStatsCached aggrStatsCached = cache.get(DB_NAME, tblName, colName, 
partNames);
-    Assert.assertNotNull(aggrStatsCached);
-
-    ColumnStatisticsObj aggrColStatsCached = aggrStatsCached.getColStats();
-    Assert.assertEquals(aggrColStats, aggrColStatsCached);
-
-    // Now get a non-existant entry
-    aggrStatsCached = cache.get("dbNotThere", tblName, colName, partNames);
-    Assert.assertNull(aggrStatsCached);
-  }
-
-  @Test
-  public void testAddGetWithVariance() throws Exception {
-    // Partnames: [tab1part1...tab1part9]
-    List<String> partNames = preparePartNames(tables.get(0), 1, 9);
-    // Prepare the bloom filter
-    BloomFilter bloomFilter = prepareBloomFilter(partNames);
-    // Add a dummy aggregate stats object for the above parts (part1...part9) 
of tab1 for col1
-    String tblName = tables.get(0);
-    String colName = tabCols.get(0);
-    int highVal = 100, lowVal = 10, numDVs = 50, numNulls = 5;
-    // We'll treat this as the aggregate col stats for part1...part9 of tab1, 
col1
-    ColumnStatisticsObj aggrColStats =
-        getDummyLongColStat(colName, highVal, lowVal, numDVs, numNulls);
-    // Now add to cache
-    cache.add(DB_NAME, tblName, colName, 10, aggrColStats, bloomFilter);
-
-    // Now prepare partnames with only 5 partitions: [tab1part1...tab1part5]
-    partNames = preparePartNames(tables.get(0), 1, 5);
-    // This get should fail because its variance ((10-5)/5) is way past 
MAX_VARIANCE (0.5)
-    AggrColStatsCached aggrStatsCached = cache.get(DB_NAME, tblName, colName, 
partNames);
-    Assert.assertNull(aggrStatsCached);
-
-    // Now prepare partnames with 10 partitions: [tab1part11...tab1part20], 
but with no overlap
-    partNames = preparePartNames(tables.get(0), 11, 20);
-    // This get should fail because its variance ((10-0)/10) is way past 
MAX_VARIANCE (0.5)
-    aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames);
-    Assert.assertNull(aggrStatsCached);
-
-    // Now prepare partnames with 9 partitions: [tab1part1...tab1part8], which 
are contained in the
-    // object that we added to the cache
-    partNames = preparePartNames(tables.get(0), 1, 8);
-    // This get should succeed because its variance ((10-9)/9) is within past 
MAX_VARIANCE (0.5)
-    aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames);
-    Assert.assertNotNull(aggrStatsCached);
-    ColumnStatisticsObj aggrColStatsCached = aggrStatsCached.getColStats();
-    Assert.assertEquals(aggrColStats, aggrColStatsCached);
-  }
-
-  @Test
-  public void testTimeToLive() throws Exception {
-    // Add a dummy node to cache
-    // Partnames: [tab1part1...tab1part9]
-    List<String> partNames = preparePartNames(tables.get(0), 1, 9);
-    // Prepare the bloom filter
-    BloomFilter bloomFilter = prepareBloomFilter(partNames);
-    // Add a dummy aggregate stats object for the above parts (part1...part9) 
of tab1 for col1
-    String tblName = tables.get(0);
-    String colName = tabCols.get(0);
-    int highVal = 100, lowVal = 10, numDVs = 50, numNulls = 5;
-    // We'll treat this as the aggregate col stats for part1...part9 of tab1, 
col1
-    ColumnStatisticsObj aggrColStats =
-        getDummyLongColStat(colName, highVal, lowVal, numDVs, numNulls);
-    // Now add to cache
-    cache.add(DB_NAME, tblName, colName, 10, aggrColStats, bloomFilter);
-
-    // Sleep for 30 seconds
-    Thread.sleep(30000);
-
-    // Get should fail now (since TTL is 20s) and we've snoozed for 30 seconds
-    AggrColStatsCached aggrStatsCached = cache.get(DB_NAME, tblName, colName, 
partNames);
-    Assert.assertNull(aggrStatsCached);
-  }
-
-  /**
-   * Prepares an array of partition names by getting partitions from minPart 
... maxPart and
-   * prepending with table name
-   * Example: [tab1part1, tab1part2 ...]
-   *
-   * @param tabName
-   * @param minPart
-   * @param maxPart
-   * @return
-   * @throws Exception
-   */
-  private List<String> preparePartNames(String tabName, int minPart, int 
maxPart) throws Exception {
-    if ((minPart < 1) || (maxPart > NUM_PARTS)) {
-      throw new Exception("tabParts does not have these partition numbers");
-    }
-    List<String> partNames = new ArrayList<String>();
-    for (int i = minPart; i <= maxPart; i++) {
-      String partName = tabParts.get(i-1);
-      partNames.add(tabName + partName);
-    }
-    return partNames;
-  }
-
-  /**
-   * Prepares a bloom filter from the list of partition names
-   * @param partNames
-   * @return
-   */
-  private BloomFilter prepareBloomFilter(List <String> partNames) {
-    BloomFilter bloomFilter =
-        new BloomFilter(MAX_PARTITIONS_PER_CACHE_NODE, 
FALSE_POSITIVE_PROBABILITY);
-    for (String partName: partNames) {
-      bloomFilter.addToFilter(partName.getBytes());
-    }
-    return bloomFilter;
-  }
-
-  private ColumnStatisticsObj getDummyLongColStat(String colName, int highVal, 
int lowVal, int numDVs, int numNulls) {
-    ColumnStatisticsObj aggrColStats = new ColumnStatisticsObj();
-    aggrColStats.setColName(colName);
-    aggrColStats.setColType("long");
-    LongColumnStatsData longStatsData = new LongColumnStatsData();
-    longStatsData.setHighValue(highVal);
-    longStatsData.setLowValue(lowVal);
-    longStatsData.setNumDVs(numDVs);
-    longStatsData.setNumNulls(numNulls);
-    ColumnStatisticsData aggrColStatsData = new ColumnStatisticsData();
-    aggrColStatsData.setLongStats(longStatsData);
-    aggrColStats.setStatsData(aggrColStatsData);
-    return aggrColStats;
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBitVector.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBitVector.java
 
b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBitVector.java
deleted file mode 100644
index 559fbad..0000000
--- 
a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBitVector.java
+++ /dev/null
@@ -1,92 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.hbase.utils;
-
-import org.apache.hadoop.hive.metastore.hbase.utils.BitVector;
-import org.junit.Assert;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class TestBitVector {
-
-  static int BIT_VECTOR_SIZE = 32;
-  BitVector bitVector;
-
-  @BeforeClass
-  public static void beforeTest() {
-  }
-
-  @AfterClass
-  public static void afterTest() {
-  }
-
-  @Before
-  public void setUp() {
-    // Create a new BitVector
-    bitVector = new BitVector(BIT_VECTOR_SIZE);
-  }
-
-
-  @After
-  public void tearDown() {
-  }
-
-  @Test
-  public void testSetAll() {
-    // Set bits
-    bitVector.setAll();
-    Assert.assertEquals("11111111111111111111111111111111", 
bitVector.toString());
-  }
-
-  @Test
-  public void testClearAll() {
-    // Clear all bits
-    bitVector.clearAll();
-    Assert.assertEquals("00000000000000000000000000000000", 
bitVector.toString());
-  }
-
-  @Test
-  public void testSetUnsetBit() {
-    // Set 3rd bit
-    bitVector.setBit(2);
-    Assert.assertEquals("00100000000000000000000000000000", 
bitVector.toString());
-    // Now check if 3rd bit is set
-    Assert.assertTrue(bitVector.isBitSet(2));
-    // Now set 30th bit
-    bitVector.setBit(29);
-    Assert.assertEquals("00100000000000000000000000000100", 
bitVector.toString());
-    // Now check if 30th bit is set
-    Assert.assertTrue(bitVector.isBitSet(29));
-
-    // Now unset 3rd bit
-    bitVector.unSetBit(2);
-    Assert.assertEquals("00000000000000000000000000000100", 
bitVector.toString());
-    // Now check if 3rd bit is unset
-    Assert.assertFalse(bitVector.isBitSet(2));
-    // Now unset 30th bit
-    bitVector.unSetBit(29);
-    Assert.assertEquals("00000000000000000000000000000000", 
bitVector.toString());
-    // Now check if 30th bit is unset
-    Assert.assertFalse(bitVector.isBitSet(29));
-  }
-}

http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBloomFilter.java
----------------------------------------------------------------------
diff --git 
a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBloomFilter.java
 
b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBloomFilter.java
deleted file mode 100644
index 5175efd..0000000
--- 
a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBloomFilter.java
+++ /dev/null
@@ -1,82 +0,0 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.hadoop.hive.metastore.hbase.utils;
-
-import org.apache.hadoop.hive.metastore.hbase.utils.BloomFilter;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class TestBloomFilter {
-  static final int SET_SIZE = 50;
-  static final double FALSE_POSITIVE_PROBABILITY = 0.01;
-  // Pre-calculated for the above set size and fpp
-  static final int FILTER_SIZE = 480;
-  static final int NUM_HASH_FUNCTIONS = 7;
-  BloomFilter bloomFilter;
-  // Items that we'll add to the filter
-  String[] items = {"Part1=Val1", "Part2=Val2", "Part3=Val3", "Part4=Val4", 
"Part5=Val5"};
-
-  @BeforeClass
-  public static void beforeTest() {
-  }
-
-  @AfterClass
-  public static void afterTest() {
-  }
-
-  @Before
-  public void setUp() {
-    bloomFilter = new BloomFilter(SET_SIZE, FALSE_POSITIVE_PROBABILITY);
-  }
-
-  @After
-  public void tearDown() {
-  }
-
-  @Test
-  public void testFilterAndHashSize() {
-    Assert.assertEquals(bloomFilter.getFilterSize(), FILTER_SIZE);
-    Assert.assertEquals(bloomFilter.getNumHashFunctions(), NUM_HASH_FUNCTIONS);
-  }
-
-  @Test
-  public void testFilterFunctions() {
-    // Add all items to the bloom filter
-    // (since bloom filter returns false positives, no point testing for 
negative cases)
-    for (String item: items) {
-      bloomFilter.addToFilter(item.getBytes());
-    }
-    // Test for presence
-    for (String item: items) {
-      Assert.assertTrue(bloomFilter.contains(item.getBytes()));
-    }
-    // Clear all bits
-    bloomFilter.getBitVector().clearAll();
-    // Test for presence now - should fail
-    for (String item: items) {
-      Assert.assertFalse(bloomFilter.contains(item.getBytes()));
-    }
-  }
-
-}

hive git commit: HIVE-10954 AggregateStatsCache duplicated in HBaseMetastore (gates)

Reply via email to