Github user hbdeshmukh commented on a diff in the pull request: https://github.com/apache/incubator-quickstep/pull/121#discussion_r84796051 --- Diff: storage/PartitionedHashTablePool.hpp --- @@ -0,0 +1,224 @@ +/** + * Copyright 2016, Quickstep Research Group, Computer Sciences Department, + * University of WisconsinâMadison. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ + +#ifndef QUICKSTEP_STORAGE_PARTITIONED_HASH_TABLE_POOL_HPP_ +#define QUICKSTEP_STORAGE_PARTITIONED_HASH_TABLE_POOL_HPP_ + +#include <algorithm> +#include <chrono> +#include <memory> +#include <utility> +#include <vector> + +#include "expressions/aggregation/AggregationHandle.hpp" +#include "storage/HashTableBase.hpp" +#include "storage/FastHashTable.hpp" +#include "storage/FastHashTableFactory.hpp" +#include "utility/Macros.hpp" +#include "utility/StringUtil.hpp" + +#include "glog/logging.h" + +namespace quickstep { + +class StorageManager; +class Type; + +/** \addtogroup Storage + * @{ + */ + +/** + * @brief A pool of HashTables used for a single aggregation handle. Each + * HashTable represents values from a given partition, which is + * determined by the keys in the group by clause. + **/ +class PartitionedHashTablePool { + public: + /** + * @brief Constructor. + * + * @param estimated_num_entries The maximum number of entries in a hash table. + * @param num_partitions The number of partitions (i.e. number of HashTables) + * @param hash_table_impl_type The type of hash table implementation. + * @param group_by_types A vector of pointer of types which form the group by + * key. + * @param agg_handle The aggregation handle. + * @param storage_manager A pointer to the storage manager. + * + * @note The estimate of number of entries is quite inaccurate at this time. + * If we go by the current estimate, each hash table demands much + * larger space than it actually needs, which causes the system to + * either trigger evictions or worse - run out of memory. To fix this + * issue, we divide the estimate by 100. The division will not affect + * correctness, however it may allocate some hash tables smaller space + * than their requirement, causing them to be resized during build + * phase, which has a performance penalty. + **/ + PartitionedHashTablePool(const std::size_t estimated_num_entries, + const std::size_t num_partitions, + const HashTableImplType hash_table_impl_type, + const std::vector<const Type *> &group_by_types, + AggregationHandle *agg_handle, + StorageManager *storage_manager) + : estimated_num_entries_( + setHashTableSize(estimated_num_entries, num_partitions)), + num_partitions_(num_partitions), + hash_table_impl_type_(hash_table_impl_type), + group_by_types_(group_by_types), + agg_handle_(DCHECK_NOTNULL(agg_handle)), + storage_manager_(DCHECK_NOTNULL(storage_manager)) { + initializeAllHashTables(); + } + + /** + * @brief Constructor. + * + * @note This constructor is relevant for the HashTable specialized for + * aggregation. + * + * @param estimated_num_entries The maximum number of entries in a hash table. + * @param num_partitions The number of partitions (i.e. number of HashTables) + * @param hash_table_impl_type The type of hash table implementation. + * @param group_by_types A vector of pointer of types which form the group by + * key. + * @param payload_sizes The sizes of the payload elements (i.e. + * AggregationStates). + * @param handles The aggregation handles. + * @param storage_manager A pointer to the storage manager. + **/ + PartitionedHashTablePool(const std::size_t estimated_num_entries, + const std::size_t num_partitions, + const HashTableImplType hash_table_impl_type, + const std::vector<const Type *> &group_by_types, + const std::vector<std::size_t> &payload_sizes, + const std::vector<AggregationHandle *> &handles, + StorageManager *storage_manager) + : estimated_num_entries_( + setHashTableSize(estimated_num_entries, num_partitions)), + num_partitions_(num_partitions), + hash_table_impl_type_(hash_table_impl_type), + group_by_types_(group_by_types), + payload_sizes_(payload_sizes), + handles_(handles), + storage_manager_(DCHECK_NOTNULL(storage_manager)) { + initializeAllHashTables(); + } + + /** + * @brief Check out a hash table for insertion. + * + * @param partition_id The ID of the partitioned HashTable. + * + * @return A hash table pointer for the given HashTable. + **/ + AggregationStateHashTableBase* getHashTable(const std::size_t partition_id) { + DCHECK_LT(partition_id, num_partitions_); + DCHECK_LT(partition_id, hash_tables_.size()); + return hash_tables_[partition_id].get(); + } + + /** + * @brief Check out a hash table for insertion. + * + * @param partition_id The ID of the partitioned HashTable. + * + * @return A hash table pointer for the given HashTable. + **/ + AggregationStateHashTableBase* getHashTableFast(const std::size_t partition_id) { --- End diff -- This function may not be needed.
--- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---