Repository: hive Updated Branches: refs/heads/hbase-metastore 1a64664ae -> 15012469a
HIVE-10954 AggregateStatsCache duplicated in HBaseMetastore (gates) Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/15012469 Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/15012469 Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/15012469 Branch: refs/heads/hbase-metastore Commit: 15012469afb8e0c0354b82c972f058ed3cd0d221 Parents: 1a64664 Author: Alan Gates <ga...@hortonworks.com> Authored: Wed Jun 17 17:20:19 2015 -0700 Committer: Alan Gates <ga...@hortonworks.com> Committed: Wed Jun 17 17:20:19 2015 -0700 ---------------------------------------------------------------------- .../metastore/hbase/AggregateStatsCache.java | 559 ------------------- .../hive/metastore/hbase/HBaseReadWrite.java | 9 +- .../hadoop/hive/metastore/hbase/HBaseStore.java | 2 - .../hive/metastore/hbase/utils/BitVector.java | 122 ---- .../hive/metastore/hbase/utils/BloomFilter.java | 144 ----- .../hbase/TestAggregateStatsCache.java | 266 --------- .../metastore/hbase/utils/TestBitVector.java | 92 --- .../metastore/hbase/utils/TestBloomFilter.java | 82 --- 8 files changed, 4 insertions(+), 1272 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java deleted file mode 100644 index 4dfcd81..0000000 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/AggregateStatsCache.java +++ /dev/null @@ -1,559 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.hbase; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.hbase.utils.BloomFilter; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.locks.Lock; -import java.util.concurrent.locks.ReadWriteLock; -import java.util.concurrent.locks.ReentrantReadWriteLock; - -class AggregateStatsCache { - - private static final Log LOG = LogFactory.getLog(AggregateStatsCache.class.getName()); - private static AggregateStatsCache self = null; - - // Backing store for this cache - private final ConcurrentHashMap<Key, AggrColStatsList> cacheStore; - // Cache size - private final int maxCacheNodes; - // Current nodes in the cache - private AtomicInteger currentNodes = new AtomicInteger(0); - // Run the cleaner thread when the cache is maxFull% full - private final float maxFull; - // Run the cleaner thread until cache is cleanUntil% occupied - private final float cleanUntil; - // Nodes go stale after this - private final long timeToLive; - // Max time when waiting for write locks on node list - private final long maxWriterWaitTime; - // Max time when waiting for read locks on node list - private final long maxReaderWaitTime; - // Maximum number of paritions aggregated per cache node - private final int maxPartsPerCacheNode; - // Bloom filter false positive probability - private final float falsePositiveProbability; - // Max tolerable variance for matches - private final float maxVariance; - // Used to determine if cleaner thread is already running - private boolean isCleaning = false; - - private AggregateStatsCache(int maxCacheNodes, int maxPartsPerCacheNode, long timeToLive, - float falsePositiveProbability, float maxVariance, long maxWriterWaitTime, - long maxReaderWaitTime, float maxFull, float cleanUntil) { - this.maxCacheNodes = maxCacheNodes; - this.maxPartsPerCacheNode = maxPartsPerCacheNode; - this.timeToLive = timeToLive; - this.falsePositiveProbability = falsePositiveProbability; - this.maxVariance = maxVariance; - this.maxWriterWaitTime = maxWriterWaitTime; - this.maxReaderWaitTime = maxReaderWaitTime; - this.maxFull = maxFull; - this.cleanUntil = cleanUntil; - this.cacheStore = new ConcurrentHashMap<Key, AggrColStatsList>(); - } - - static synchronized AggregateStatsCache getInstance(Configuration conf) { - if (self == null) { - int maxCacheNodes = - HiveConf.getIntVar(conf, HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE); - // The number of partitions aggregated per cache node - // If the number of partitions requested is > this value, we'll fetch directly from Metastore - int maxPartitionsPerCacheNode = - HiveConf.getIntVar(conf, - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS); - long timeToLive = - HiveConf.getTimeVar(conf, HiveConf.ConfVars.METASTORE_HBASE_CACHE_TIME_TO_LIVE, - TimeUnit.SECONDS); - // False positives probability we are ready to tolerate for the underlying bloom filter - float falsePositiveProbability = - HiveConf.getFloatVar(conf, - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY); - // Maximum tolerable variance in number of partitions between cached node and our request - float maxVariance = - HiveConf.getFloatVar(conf, - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE); - long maxWriterWaitTime = - HiveConf.getTimeVar(conf, HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_WRITER_WAIT, - TimeUnit.MILLISECONDS); - long maxReaderWaitTime = - HiveConf.getTimeVar(conf, HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_READER_WAIT, - TimeUnit.MILLISECONDS); - float maxFull = HiveConf.getFloatVar(conf, HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_FULL); - float cleanUntil = - HiveConf.getFloatVar(conf, HiveConf.ConfVars.METASTORE_HBASE_CACHE_CLEAN_UNTIL); - self = - new AggregateStatsCache(maxCacheNodes, maxPartitionsPerCacheNode, timeToLive, - falsePositiveProbability, maxVariance, maxWriterWaitTime, maxReaderWaitTime, maxFull, - cleanUntil); - } - return self; - } - - int getMaxCacheNodes() { - return maxCacheNodes; - } - - // Don't want to lock on this, so this may return approximate value - int getCurrentNodes() { - return currentNodes.intValue(); - } - - int getMaxPartsPerCacheNode() { - return maxPartsPerCacheNode; - } - - float getFalsePositiveProbability() { - return falsePositiveProbability; - } - - /** - * Return aggregate stats for a column from the cache or null. - * While reading from the nodelist for a key, we wait maxReaderWaitTime to acquire the lock, - * failing which we return a cache miss (i.e. null) - * - * @param dbName - * @param tblName - * @param colName - * @param partNames - * @return - */ - AggrColStatsCached get(String dbName, String tblName, String colName, List<String> partNames) { - // Cache key - Key key = new Key(dbName, tblName, colName); - AggrColStatsList candidateList = cacheStore.get(key); - // No key, or no nodes in candidate list - if ((candidateList == null) || (candidateList.nodes.size() == 0)) { - LOG.info("No aggregate stats cached for " + key.toString()); - return null; - } - // Find the value object - // Update the timestamp of the key,value if value matches the criteria - // Return the value - AggrColStatsCached match = null; - boolean isLocked = false; - try { - // Try to readlock the candidateList; timeout after maxReaderWaitTime - isLocked = candidateList.readLock.tryLock(maxReaderWaitTime, TimeUnit.MILLISECONDS); - if (isLocked) { - match = findBestMatch(partNames, candidateList.nodes); - } - if (match != null) { - // Ok to not lock the list for this and use a volatile lastAccessTime instead - candidateList.updateLastAccessTime(); - } - } catch (InterruptedException e) { - LOG.debug(e); - match = null; - } finally { - if (isLocked) { - candidateList.readLock.unlock(); - } - } - return match; - } - - /** - * Find the best match using the configurable error tolerance and time to live value - * - * @param partNames - * @param candidates - * @return best matched node or null - */ - private AggrColStatsCached findBestMatch(List<String> partNames, List<AggrColStatsCached> candidates) { - // Hits, misses, shouldSkip for a node - MatchStats matchStats; - // MatchStats for each candidate - Map<AggrColStatsCached, MatchStats> candidateMatchStats = new HashMap<AggrColStatsCached, MatchStats>(); - // The final match we intend to return - AggrColStatsCached bestMatch = null; - // To compare among potentially multiple matches - int bestMatchHits = 0; - int numPartsRequested = partNames.size(); - // 1st pass at marking invalid candidates - // Checks based on variance and TTL - // Note: we're not creating a copy of the list for saving memory - for (AggrColStatsCached candidate : candidates) { - // Variance check - if ((float) Math.abs((candidate.getNumPartsCached() - numPartsRequested) - / numPartsRequested) > maxVariance) { - candidateMatchStats.put(candidate, new MatchStats(0, 0, true)); - continue; - } - // TTL check - if (isExpired(candidate)) { - candidateMatchStats.put(candidate, new MatchStats(0, 0, true)); - } - else { - candidateMatchStats.put(candidate, new MatchStats(0, 0, false)); - } - } - // We'll count misses as we iterate - int maxMisses = (int) maxVariance * numPartsRequested; - for (String partName : partNames) { - for (AggrColStatsCached candidate : candidates) { - matchStats = candidateMatchStats.get(candidate); - if (matchStats.shouldSkip) { - continue; - } - if (candidate.getBloomFilter().contains(partName.getBytes())) { - ++matchStats.hits; - } else { - ++matchStats.misses; - } - // 2nd pass at marking invalid candidates - // If misses so far exceed max tolerable misses - if (matchStats.misses > maxMisses) { - matchStats.shouldSkip = true; - continue; - } - // Check if this is the best match so far - if (matchStats.hits > bestMatchHits) { - bestMatch = candidate; - } - } - } - if (bestMatch != null) { - // Update the last access time for this node - bestMatch.updateLastAccessTime(); - } - return bestMatch; - } - - /** - * Add a new node to the cache; may trigger the cleaner thread if the cache is near full capacity. - * We'll however add the node even if we temporaily exceed maxCacheNodes, because the cleaner - * will eventually create space from expired nodes or by removing LRU nodes. - * - * @param dbName - * @param tblName - * @param colName - * @param numPartsCached - * @param colStats - * @param bloomFilter - */ - // TODO: make add asynchronous: add shouldn't block the higher level calls - void add(String dbName, String tblName, String colName, int numPartsCached, - ColumnStatisticsObj colStats, BloomFilter bloomFilter) { - // If we have no space in the cache, run cleaner thread - if (getCurrentNodes() / maxCacheNodes > maxFull) { - clean(); - } - // Cache key - Key key = new Key(dbName, tblName, colName); - // Add new node to the cache - AggrColStatsCached node = new AggrColStatsCached(numPartsCached, bloomFilter, colStats); - AggrColStatsList nodeList; - AggrColStatsList newNodeList = new AggrColStatsList(); - newNodeList.nodes = new ArrayList<AggrColStatsCached>(); - nodeList = cacheStore.putIfAbsent(key, newNodeList); - if (nodeList == null) { - nodeList = newNodeList; - } - boolean isLocked = false; - try { - isLocked = nodeList.writeLock.tryLock(maxWriterWaitTime, TimeUnit.MILLISECONDS); - if (isLocked) { - nodeList.nodes.add(node); - node.updateLastAccessTime(); - nodeList.updateLastAccessTime(); - currentNodes.getAndIncrement(); - } - } catch (InterruptedException e) { - LOG.debug(e); - } finally { - if (isLocked) { - nodeList.writeLock.unlock(); - } - } - } - - /** - * Cleans the expired nodes or removes LRU nodes of the cache, - * until the cache size reduces to cleanUntil% full. - */ - private void clean() { - // This spawns a separate thread to walk through the cache and removes expired nodes. - // Only one cleaner thread should be running at any point. - synchronized (this) { - if (isCleaning) { - return; - } - isCleaning = true; - } - Thread cleaner = new Thread() { - @Override - public void run() { - Iterator<Map.Entry<Key, AggrColStatsList>> mapIterator = cacheStore.entrySet().iterator(); - while (mapIterator.hasNext()) { - Map.Entry<Key, AggrColStatsList> pair = - (Map.Entry<Key, AggrColStatsList>) mapIterator.next(); - AggrColStatsCached node; - AggrColStatsList candidateList = (AggrColStatsList) pair.getValue(); - List<AggrColStatsCached> nodes = candidateList.nodes; - if (nodes.size() == 0) { - mapIterator.remove(); - continue; - } - boolean isLocked = false; - try { - isLocked = candidateList.writeLock.tryLock(maxWriterWaitTime, TimeUnit.MILLISECONDS); - if (isLocked) { - for (Iterator<AggrColStatsCached> listIterator = nodes.iterator(); listIterator.hasNext();) { - node = listIterator.next(); - // Remove the node if it has expired - if (isExpired(node)) { - listIterator.remove(); - currentNodes.getAndDecrement(); - } - } - } - } catch (InterruptedException e) { - LOG.debug(e); - } finally { - if (isLocked) { - candidateList.writeLock.unlock(); - } - } - // We want to make sure this runs at a low priority in the background - Thread.yield(); - } - // If the expired nodes did not result in cache being cleanUntil% in size, - // start removing LRU nodes - while (getCurrentNodes() / maxCacheNodes > cleanUntil) { - evictOneNode(); - } - } - }; - cleaner.setPriority(Thread.MIN_PRIORITY); - cleaner.setDaemon(true); - cleaner.start(); - } - - /** - * Evict an LRU node or expired node whichever we find first - */ - private void evictOneNode() { - // Get the LRU key, value - Key lruKey = null; - AggrColStatsList lruValue = null; - for (Map.Entry<Key, AggrColStatsList> entry : cacheStore.entrySet()) { - Key key = entry.getKey(); - AggrColStatsList value = entry.getValue(); - if (lruKey == null) { - lruKey = key; - lruValue = value; - continue; - } - if ((value.lastAccessTime < lruValue.lastAccessTime) && !(value.nodes.isEmpty())) { - lruKey = key; - lruValue = value; - } - } - // Now delete a node for this key's list - AggrColStatsList candidateList = cacheStore.get(lruKey); - boolean isLocked = false; - try { - isLocked = candidateList.writeLock.tryLock(maxWriterWaitTime, TimeUnit.MILLISECONDS); - if (isLocked) { - AggrColStatsCached candidate; - AggrColStatsCached lruNode = null; - int currentIndex = 0; - int deleteIndex = 0; - for (Iterator<AggrColStatsCached> iterator = candidateList.nodes.iterator(); iterator.hasNext();) { - candidate = iterator.next(); - // Since we have to create space for 1, if we find an expired node we will remove it & - // return - if (isExpired(candidate)) { - iterator.remove(); - currentNodes.getAndDecrement(); - return; - } - // Sorry, too many ifs but this form looks optimal - // Update the LRU node from what we've seen so far - if (lruNode == null) { - lruNode = candidate; - ++currentIndex; - continue; - } - if (lruNode != null) { - if (candidate.lastAccessTime < lruNode.lastAccessTime) { - lruNode = candidate; - deleteIndex = currentIndex; - } - } - } - candidateList.nodes.remove(deleteIndex); - currentNodes.getAndDecrement(); - } - } catch (InterruptedException e) { - LOG.debug(e); - } finally { - if (isLocked) { - candidateList.writeLock.unlock(); - } - } - } - - // TODO: store and print metrics - void printMetrics() { - - } - - private boolean isExpired(AggrColStatsCached aggrColStats) { - return System.currentTimeMillis() - aggrColStats.lastAccessTime > timeToLive; - } - - /** - * Key object for the stats cache hashtable - */ - static class Key { - private final String dbName; - private final String tblName; - private final String colName; - - Key(String db, String table, String col) { - // Don't construct an illegal cache key - if ((db == null) || (table == null) || (col == null)) { - throw new IllegalArgumentException("dbName, tblName, colName can't be null"); - } - dbName = db; - tblName = table; - colName = col; - } - - @Override - public boolean equals(Object other) { - if ((other == null) || !(other instanceof Key)) { - return false; - } - Key that = (Key) other; - return dbName.equals(that.dbName) && tblName.equals(that.tblName) - && colName.equals(that.colName); - } - - @Override - public int hashCode() { - return dbName.hashCode() * 31 + tblName.hashCode() * 31 + colName.hashCode(); - } - - @Override - public String toString() { - return "Database: " + dbName + ", Table: " + tblName + ", Column: " + colName; - } - - } - - static class AggrColStatsList { - // TODO: figure out a better data structure for node list(?) - private List<AggrColStatsCached> nodes = new ArrayList<AggrColStatsCached>(); - private ReadWriteLock lock = new ReentrantReadWriteLock(); - // Read lock for get operation - private Lock readLock = lock.readLock(); - // Write lock for add, evict and clean operation - private Lock writeLock = lock.writeLock(); - // Using volatile instead of locking updates to this variable, - // since we can rely on approx lastAccessTime but don't want a performance hit - private volatile long lastAccessTime = 0; - - List<AggrColStatsCached> getNodes() { - return nodes; - } - - void updateLastAccessTime() { - this.lastAccessTime = System.currentTimeMillis(); - } - } - - static class AggrColStatsCached { - private final int numPartsCached; - private final BloomFilter bloomFilter; - private final ColumnStatisticsObj colStats; - private volatile long lastAccessTime; - - AggrColStatsCached(int numPartsCached, BloomFilter bloomFilter, - ColumnStatisticsObj colStats) { - this.numPartsCached = numPartsCached; - this.bloomFilter = bloomFilter; - this.colStats = colStats; - this.lastAccessTime = System.currentTimeMillis(); - } - - int getNumPartsCached() { - return numPartsCached; - } - - ColumnStatisticsObj getColStats() { - updateLastAccessTime(); - return colStats; - } - - BloomFilter getBloomFilter() { - return bloomFilter; - } - - void updateLastAccessTime() { - this.lastAccessTime = System.currentTimeMillis(); - } - } - - /** - * Intermediate object, used to collect hits & misses for each cache node that is evaluate for an - * incoming request - */ - private static class MatchStats { - private int hits = 0; - private int misses = 0; - private boolean shouldSkip = false; - - MatchStats(int hits, int misses, boolean shouldSkip) { - this.hits = hits; - this.misses = misses; - this.shouldSkip = shouldSkip; - } - } - - /** - * TODO: capture some metrics for the cache - */ - class Metrics { - - } - - /** - * TODO: implement memory management for the cache - */ - static class MemoryManager { - - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java index b0dc707..b54afb9 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseReadWrite.java @@ -19,7 +19,6 @@ package org.apache.hadoop.hive.metastore.hbase; import com.google.common.annotations.VisibleForTesting; - import org.apache.commons.codec.binary.Base64; import org.apache.commons.lang.ArrayUtils; import org.apache.commons.logging.Log; @@ -38,7 +37,9 @@ import org.apache.hadoop.hbase.filter.Filter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hive.common.ObjectPair; +import org.apache.hive.common.util.BloomFilter; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.AggregateStatsCache; import org.apache.hadoop.hive.metastore.api.AggrStats; import org.apache.hadoop.hive.metastore.api.ColumnStatistics; import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; @@ -52,10 +53,8 @@ import org.apache.hadoop.hive.metastore.api.PrincipalType; import org.apache.hadoop.hive.metastore.api.Role; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.hbase.AggregateStatsCache.AggrColStatsCached; import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregator; import org.apache.hadoop.hive.metastore.hbase.stats.ColumnStatsAggregatorFactory; -import org.apache.hadoop.hive.metastore.hbase.utils.BloomFilter; import java.io.IOException; import java.security.MessageDigest; @@ -1581,7 +1580,7 @@ class HBaseReadWrite { List<List<String>> partVals, List<String> colNames) throws IOException { // One ColumnStatisticsObj per column List<ColumnStatisticsObj> colStatsList = new ArrayList<ColumnStatisticsObj>(); - AggrColStatsCached colStatsAggrCached; + AggregateStatsCache.AggrColStats colStatsAggrCached; ColumnStatisticsObj colStatsAggr; int maxPartitionsPerCacheNode = aggrStatsCache.getMaxPartsPerCacheNode(); float falsePositiveProbability = aggrStatsCache.getFalsePositiveProbability(); @@ -1672,7 +1671,7 @@ class HBaseReadWrite { } // Add partition to the bloom filter if it's requested if (bloomFilter != null) { - bloomFilter.addToFilter(partNames.get(pOff).getBytes()); + bloomFilter.add(partNames.get(pOff).getBytes()); } } return colStatsAggr; http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java index 79a61d4..0dbdba2 100644 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java +++ b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/HBaseStore.java @@ -23,7 +23,6 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.metastore.HiveMetaStore; import org.apache.hadoop.hive.metastore.PartFilterExprUtil; import org.apache.hadoop.hive.metastore.PartitionExpressionProxy; @@ -62,7 +61,6 @@ import org.apache.hadoop.hive.metastore.api.UnknownTableException; import org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.PlanResult; import org.apache.hadoop.hive.metastore.hbase.HBaseFilterPlanUtil.ScanPlan; import org.apache.hadoop.hive.metastore.parser.ExpressionTree; -import org.apache.hadoop.hive.metastore.hbase.AggregateStatsCache.AggrColStatsCached; import org.apache.hadoop.hive.metastore.partition.spec.PartitionSpecProxy; import org.apache.thrift.TException; http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BitVector.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BitVector.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BitVector.java deleted file mode 100644 index 20af350..0000000 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BitVector.java +++ /dev/null @@ -1,122 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.hbase.utils; - -import java.util.Arrays; - -/** - * Barebones fixed length bit vector using a byte array - */ -public class BitVector { - // We'll use this as the bit vector container - private byte data[]; - public static int ELEMENT_SIZE = Byte.SIZE; - - public BitVector(int size) { - data = new byte[size/ELEMENT_SIZE]; - } - - /** - * Total bits -> num elements * size of each element - * - */ - public long getSize() { - return data.length * ELEMENT_SIZE; - } - - /** - * Set the bit at the given index to 1 - * - * @param bitIndex - */ - public void setBit(int bitIndex) { - validateBitIndex(bitIndex); - int dataIndex = bitIndex / ELEMENT_SIZE; - int elementIndex = ELEMENT_SIZE - bitIndex % ELEMENT_SIZE - 1; - // Set the elementIndex'th bit of data[dataIndex]'th element - data[dataIndex] = (byte) (data[dataIndex] | (1 << elementIndex)); - } - - /** - * Set the bit at the given index to 0 - * - * @param bitIndex - */ - public void unSetBit(int bitIndex) { - validateBitIndex(bitIndex); - int dataIndex = bitIndex / ELEMENT_SIZE; - int elementIndex = ELEMENT_SIZE - bitIndex % ELEMENT_SIZE - 1; - // Unset the elementIndex'th bit of data[dataIndex]'th element - data[dataIndex] = (byte) (data[dataIndex] & ~(1 << elementIndex)); - } - - /** - * Check if a bit at the given index is 1 - * @param bitIndex - */ - public boolean isBitSet(int bitIndex) { - validateBitIndex(bitIndex); - int dataIndex = bitIndex / ELEMENT_SIZE; - int elementIndex = ELEMENT_SIZE - bitIndex % ELEMENT_SIZE - 1; - if ((data[dataIndex] & (1 << elementIndex)) > 0) { - return true; - } - return false; - } - - /** - * Set all bits to 0 - * - */ - public void clearAll() { - Arrays.fill(data, (byte) 0x00); - } - - /** - * Set all bits to 1 - * - */ - public void setAll() { - Arrays.fill(data, (byte) 0xFF); - } - - /** - * Prints the bit vector as a string of bit values (e.g. 01010111) - */ - @Override - public String toString() { - StringBuilder str = new StringBuilder(); - for(byte b : data) { - str.append(Integer.toBinaryString((b & 0xFF) + 0x100).substring(1)); - } - return str.toString(); - } - - /** - * Check if queried bitIndex is in valid range - * @param bitIndex - */ - private void validateBitIndex(int bitIndex) { - if ((bitIndex >= getSize()) || (bitIndex < 0)) { - throw new IllegalArgumentException("Bit index out of range: " + bitIndex); - } - } - -} http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BloomFilter.java ---------------------------------------------------------------------- diff --git a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BloomFilter.java b/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BloomFilter.java deleted file mode 100644 index 5606596..0000000 --- a/metastore/src/java/org/apache/hadoop/hive/metastore/hbase/utils/BloomFilter.java +++ /dev/null @@ -1,144 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.hbase.utils; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.util.Hash; - -/** - * Barebones bloom filter implementation - */ -public class BloomFilter { - private static final Log LOG = LogFactory.getLog(BloomFilter.class.getName()); - // The cardinality of the set for which we are generating the bloom filter - // Default size is 10000 - private int setSize = 10000; - // The probability of false positives we are ready to tolerate - // Default is 1% - private double falsePositiveProbability = 0.01; - // Number of bits used for the filter - // Formula: -n*ln(p) / (ln(2)^2) - private int numBits; - // Number of hashing functions - // Formula: m/n * ln(2) - private int numHashFunctions; - private final Hash hash; - private BitVector bitVector; - - public BloomFilter(int setSize, double falsePositiveProbability) { - this.setSize = setSize; - this.falsePositiveProbability = falsePositiveProbability; - this.numBits = calculateFilterSize(this.setSize, this.falsePositiveProbability); - this.numHashFunctions = calculateHashFunctions(this.setSize, this.numBits); - // Create a bit vector of size numBits - this.bitVector = new BitVector(numBits); - // Use MurmurHash3 - hash = Hash.getInstance(Hash.MURMUR_HASH3); - } - - /** - * Calculate the number of bits in the filter - * Also align size to BitVector.HOLDER_SIZE - * @param setSize - * @param falsePositiveProbability - * @return numBits - */ - private int calculateFilterSize(int setSize, double falsePositiveProbability) { - int numBits = (int) (-setSize * Math.log(falsePositiveProbability) / (Math.log(2) * Math.log(2))); - numBits = numBits + (BitVector.ELEMENT_SIZE - (numBits % BitVector.ELEMENT_SIZE)); - LOG.info("Bloom Filter size: " + numBits); - return numBits; - } - - /** - * Calculate the number of hash functions needed by the BloomFilter - * @param setSize - * @param numBits - * @return numHashFunctions - */ - private int calculateHashFunctions(int setSize, int numBits) { - int numHashFunctions = Math.max(1, (int) Math.round((double) numBits / setSize * Math.log(2))); - LOG.info("Number of hashing functions: " + numHashFunctions); - return numHashFunctions; - } - - /** - * @return the underlying BitVector object - */ - public BitVector getBitVector() { - return bitVector; - } - - public int getFilterSize() { - return numBits; - } - - - public int getNumHashFunctions() { - return numHashFunctions; - } - - /** - * Add an item to the filter - * - * @param item to add - */ - public void addToFilter(byte[] item) { - int bitIndex; - // Hash the item numHashFunctions times - for (int i = 0; i < numHashFunctions; i++) { - bitIndex = getBitIndex(item, i); - // Set the bit at this index - bitVector.setBit(bitIndex); - } - } - - /** - * Check whether the item is present in the filter - * @param candidate - * @return hasItem (true if the bloom filter contains the item) - */ - public boolean contains(byte[] item) { - int bitIndex; - boolean hasItem = true; - // Hash the item numHashFunctions times - for (int i = 0; i < numHashFunctions; i++) { - bitIndex = getBitIndex(item, i); - hasItem = hasItem && bitVector.isBitSet(bitIndex); - if (!hasItem) { - return hasItem; - } - } - return hasItem; - } - - /** - * Hash the item using the i as the seed and return its potential position in the bit vector - * Also we negate a negative hash value - * - * @param item - * @param i (for the i-th hash function) - * @return position of item in unerlying bit vector - */ - private int getBitIndex(byte[] item, int i) { - return Math.abs(hash.hash(item, i) % (numBits)); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java deleted file mode 100644 index aa2f4a2..0000000 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestAggregateStatsCache.java +++ /dev/null @@ -1,266 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.hbase; - -import java.util.ArrayList; -import java.util.List; - -import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; -import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; -import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; -import org.apache.hadoop.hive.metastore.hbase.AggregateStatsCache.AggrColStatsCached; -import org.apache.hadoop.hive.metastore.hbase.AggregateStatsCache.Key; -import org.apache.hadoop.hive.metastore.hbase.utils.BloomFilter; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; - -public class TestAggregateStatsCache { - static String DB_NAME = "db"; - static String TAB_PREFIX = "tab"; - static String PART_PREFIX = "part"; - static String COL_PREFIX = "col"; - static int NUM_TABS = 2; - static int NUM_PARTS = 20; - static int NUM_COLS = 5; - static int MAX_CACHE_NODES = 10; - static int MAX_PARTITIONS_PER_CACHE_NODE = 10; - static String TIME_TO_LIVE = "20s"; - static String MAX_WRITER_WAIT = "1s"; - static String MAX_READER_WAIT = "1s"; - static float FALSE_POSITIVE_PROBABILITY = (float) 0.01; - static float MAX_VARIANCE = (float) 0.5; - static AggregateStatsCache cache; - static List<String> tables = new ArrayList<String>(); - static List<String> tabParts = new ArrayList<String>(); - static List<String> tabCols = new ArrayList<String>(); - - @BeforeClass - public static void beforeTest() { - // All data intitializations - initializeTables(); - initializePartitions(); - initializeColumns(); - } - - // tab1, tab2 - private static void initializeTables() { - for (int i = 1; i <= NUM_TABS; i++) { - tables.add(TAB_PREFIX + i); - } - } - - // part1 ... part20 - private static void initializePartitions() { - for (int i = 1; i <= NUM_PARTS; i++) { - tabParts.add(PART_PREFIX + i); - } - } - - // col1 ... col5 - private static void initializeColumns() { - for (int i = 1; i <= NUM_COLS; i++) { - tabCols.add(COL_PREFIX + i); - } - } - - @AfterClass - public static void afterTest() { - } - - @Before - public void setUp() { - HiveConf hiveConf = new HiveConf(); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_SIZE, - MAX_CACHE_NODES); - hiveConf.setIntVar(HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS, - MAX_PARTITIONS_PER_CACHE_NODE); - hiveConf.setFloatVar( - HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_FALSE_POSITIVE_PROBABILITY, - FALSE_POSITIVE_PROBABILITY); - hiveConf.setFloatVar(HiveConf.ConfVars.METASTORE_HBASE_AGGREGATE_STATS_CACHE_MAX_VARIANCE, - MAX_VARIANCE); - hiveConf.setVar(HiveConf.ConfVars.METASTORE_HBASE_CACHE_TIME_TO_LIVE, TIME_TO_LIVE); - hiveConf.setVar(HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_WRITER_WAIT, MAX_WRITER_WAIT); - hiveConf.setVar(HiveConf.ConfVars.METASTORE_HBASE_CACHE_MAX_READER_WAIT, MAX_READER_WAIT); - cache = AggregateStatsCache.getInstance(hiveConf); - } - - @After - public void tearDown() { - } - - @Test - public void testCacheKey() { - Key k1 = new Key("db", "tbl1", "col"); - Key k2 = new Key("db", "tbl1", "col"); - // k1 equals k2 - Assert.assertEquals(k1, k2); - Key k3 = new Key("db", "tbl2", "col"); - // k1 not equals k3 - Assert.assertNotEquals(k1, k3); - } - - @Test - public void testBasicAddAndGet() throws Exception { - // Partnames: [tab1part1...tab1part9] - List<String> partNames = preparePartNames(tables.get(0), 1, 9); - // Prepare the bloom filter - BloomFilter bloomFilter = prepareBloomFilter(partNames); - // Add a dummy aggregate stats object for the above parts (part1...part9) of tab1 for col1 - String tblName = tables.get(0); - String colName = tabCols.get(0); - int highVal = 100, lowVal = 10, numDVs = 50, numNulls = 5; - // We'll treat this as the aggregate col stats for part1...part9 of tab1, col1 - ColumnStatisticsObj aggrColStats = - getDummyLongColStat(colName, highVal, lowVal, numDVs, numNulls); - // Now add to cache the dummy colstats for these 10 partitions - cache.add(DB_NAME, tblName, colName, 10, aggrColStats, bloomFilter); - // Now get from cache - AggrColStatsCached aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames); - Assert.assertNotNull(aggrStatsCached); - - ColumnStatisticsObj aggrColStatsCached = aggrStatsCached.getColStats(); - Assert.assertEquals(aggrColStats, aggrColStatsCached); - - // Now get a non-existant entry - aggrStatsCached = cache.get("dbNotThere", tblName, colName, partNames); - Assert.assertNull(aggrStatsCached); - } - - @Test - public void testAddGetWithVariance() throws Exception { - // Partnames: [tab1part1...tab1part9] - List<String> partNames = preparePartNames(tables.get(0), 1, 9); - // Prepare the bloom filter - BloomFilter bloomFilter = prepareBloomFilter(partNames); - // Add a dummy aggregate stats object for the above parts (part1...part9) of tab1 for col1 - String tblName = tables.get(0); - String colName = tabCols.get(0); - int highVal = 100, lowVal = 10, numDVs = 50, numNulls = 5; - // We'll treat this as the aggregate col stats for part1...part9 of tab1, col1 - ColumnStatisticsObj aggrColStats = - getDummyLongColStat(colName, highVal, lowVal, numDVs, numNulls); - // Now add to cache - cache.add(DB_NAME, tblName, colName, 10, aggrColStats, bloomFilter); - - // Now prepare partnames with only 5 partitions: [tab1part1...tab1part5] - partNames = preparePartNames(tables.get(0), 1, 5); - // This get should fail because its variance ((10-5)/5) is way past MAX_VARIANCE (0.5) - AggrColStatsCached aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames); - Assert.assertNull(aggrStatsCached); - - // Now prepare partnames with 10 partitions: [tab1part11...tab1part20], but with no overlap - partNames = preparePartNames(tables.get(0), 11, 20); - // This get should fail because its variance ((10-0)/10) is way past MAX_VARIANCE (0.5) - aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames); - Assert.assertNull(aggrStatsCached); - - // Now prepare partnames with 9 partitions: [tab1part1...tab1part8], which are contained in the - // object that we added to the cache - partNames = preparePartNames(tables.get(0), 1, 8); - // This get should succeed because its variance ((10-9)/9) is within past MAX_VARIANCE (0.5) - aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames); - Assert.assertNotNull(aggrStatsCached); - ColumnStatisticsObj aggrColStatsCached = aggrStatsCached.getColStats(); - Assert.assertEquals(aggrColStats, aggrColStatsCached); - } - - @Test - public void testTimeToLive() throws Exception { - // Add a dummy node to cache - // Partnames: [tab1part1...tab1part9] - List<String> partNames = preparePartNames(tables.get(0), 1, 9); - // Prepare the bloom filter - BloomFilter bloomFilter = prepareBloomFilter(partNames); - // Add a dummy aggregate stats object for the above parts (part1...part9) of tab1 for col1 - String tblName = tables.get(0); - String colName = tabCols.get(0); - int highVal = 100, lowVal = 10, numDVs = 50, numNulls = 5; - // We'll treat this as the aggregate col stats for part1...part9 of tab1, col1 - ColumnStatisticsObj aggrColStats = - getDummyLongColStat(colName, highVal, lowVal, numDVs, numNulls); - // Now add to cache - cache.add(DB_NAME, tblName, colName, 10, aggrColStats, bloomFilter); - - // Sleep for 30 seconds - Thread.sleep(30000); - - // Get should fail now (since TTL is 20s) and we've snoozed for 30 seconds - AggrColStatsCached aggrStatsCached = cache.get(DB_NAME, tblName, colName, partNames); - Assert.assertNull(aggrStatsCached); - } - - /** - * Prepares an array of partition names by getting partitions from minPart ... maxPart and - * prepending with table name - * Example: [tab1part1, tab1part2 ...] - * - * @param tabName - * @param minPart - * @param maxPart - * @return - * @throws Exception - */ - private List<String> preparePartNames(String tabName, int minPart, int maxPart) throws Exception { - if ((minPart < 1) || (maxPart > NUM_PARTS)) { - throw new Exception("tabParts does not have these partition numbers"); - } - List<String> partNames = new ArrayList<String>(); - for (int i = minPart; i <= maxPart; i++) { - String partName = tabParts.get(i-1); - partNames.add(tabName + partName); - } - return partNames; - } - - /** - * Prepares a bloom filter from the list of partition names - * @param partNames - * @return - */ - private BloomFilter prepareBloomFilter(List <String> partNames) { - BloomFilter bloomFilter = - new BloomFilter(MAX_PARTITIONS_PER_CACHE_NODE, FALSE_POSITIVE_PROBABILITY); - for (String partName: partNames) { - bloomFilter.addToFilter(partName.getBytes()); - } - return bloomFilter; - } - - private ColumnStatisticsObj getDummyLongColStat(String colName, int highVal, int lowVal, int numDVs, int numNulls) { - ColumnStatisticsObj aggrColStats = new ColumnStatisticsObj(); - aggrColStats.setColName(colName); - aggrColStats.setColType("long"); - LongColumnStatsData longStatsData = new LongColumnStatsData(); - longStatsData.setHighValue(highVal); - longStatsData.setLowValue(lowVal); - longStatsData.setNumDVs(numDVs); - longStatsData.setNumNulls(numNulls); - ColumnStatisticsData aggrColStatsData = new ColumnStatisticsData(); - aggrColStatsData.setLongStats(longStatsData); - aggrColStats.setStatsData(aggrColStatsData); - return aggrColStats; - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBitVector.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBitVector.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBitVector.java deleted file mode 100644 index 559fbad..0000000 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBitVector.java +++ /dev/null @@ -1,92 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.hbase.utils; - -import org.apache.hadoop.hive.metastore.hbase.utils.BitVector; -import org.junit.Assert; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; - -public class TestBitVector { - - static int BIT_VECTOR_SIZE = 32; - BitVector bitVector; - - @BeforeClass - public static void beforeTest() { - } - - @AfterClass - public static void afterTest() { - } - - @Before - public void setUp() { - // Create a new BitVector - bitVector = new BitVector(BIT_VECTOR_SIZE); - } - - - @After - public void tearDown() { - } - - @Test - public void testSetAll() { - // Set bits - bitVector.setAll(); - Assert.assertEquals("11111111111111111111111111111111", bitVector.toString()); - } - - @Test - public void testClearAll() { - // Clear all bits - bitVector.clearAll(); - Assert.assertEquals("00000000000000000000000000000000", bitVector.toString()); - } - - @Test - public void testSetUnsetBit() { - // Set 3rd bit - bitVector.setBit(2); - Assert.assertEquals("00100000000000000000000000000000", bitVector.toString()); - // Now check if 3rd bit is set - Assert.assertTrue(bitVector.isBitSet(2)); - // Now set 30th bit - bitVector.setBit(29); - Assert.assertEquals("00100000000000000000000000000100", bitVector.toString()); - // Now check if 30th bit is set - Assert.assertTrue(bitVector.isBitSet(29)); - - // Now unset 3rd bit - bitVector.unSetBit(2); - Assert.assertEquals("00000000000000000000000000000100", bitVector.toString()); - // Now check if 3rd bit is unset - Assert.assertFalse(bitVector.isBitSet(2)); - // Now unset 30th bit - bitVector.unSetBit(29); - Assert.assertEquals("00000000000000000000000000000000", bitVector.toString()); - // Now check if 30th bit is unset - Assert.assertFalse(bitVector.isBitSet(29)); - } -} http://git-wip-us.apache.org/repos/asf/hive/blob/15012469/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBloomFilter.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBloomFilter.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBloomFilter.java deleted file mode 100644 index 5175efd..0000000 --- a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/utils/TestBloomFilter.java +++ /dev/null @@ -1,82 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.hadoop.hive.metastore.hbase.utils; - -import org.apache.hadoop.hive.metastore.hbase.utils.BloomFilter; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Assert; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; - -public class TestBloomFilter { - static final int SET_SIZE = 50; - static final double FALSE_POSITIVE_PROBABILITY = 0.01; - // Pre-calculated for the above set size and fpp - static final int FILTER_SIZE = 480; - static final int NUM_HASH_FUNCTIONS = 7; - BloomFilter bloomFilter; - // Items that we'll add to the filter - String[] items = {"Part1=Val1", "Part2=Val2", "Part3=Val3", "Part4=Val4", "Part5=Val5"}; - - @BeforeClass - public static void beforeTest() { - } - - @AfterClass - public static void afterTest() { - } - - @Before - public void setUp() { - bloomFilter = new BloomFilter(SET_SIZE, FALSE_POSITIVE_PROBABILITY); - } - - @After - public void tearDown() { - } - - @Test - public void testFilterAndHashSize() { - Assert.assertEquals(bloomFilter.getFilterSize(), FILTER_SIZE); - Assert.assertEquals(bloomFilter.getNumHashFunctions(), NUM_HASH_FUNCTIONS); - } - - @Test - public void testFilterFunctions() { - // Add all items to the bloom filter - // (since bloom filter returns false positives, no point testing for negative cases) - for (String item: items) { - bloomFilter.addToFilter(item.getBytes()); - } - // Test for presence - for (String item: items) { - Assert.assertTrue(bloomFilter.contains(item.getBytes())); - } - // Clear all bits - bloomFilter.getBitVector().clearAll(); - // Test for presence now - should fail - for (String item: items) { - Assert.assertFalse(bloomFilter.contains(item.getBytes())); - } - } - -}