Github user hbdeshmukh commented on a diff in the pull request:

    https://github.com/apache/incubator-quickstep/pull/121#discussion_r84796051
  
    --- Diff: storage/PartitionedHashTablePool.hpp ---
    @@ -0,0 +1,224 @@
    +/**
    + *   Copyright 2016, Quickstep Research Group, Computer Sciences 
Department,
    + *     University of Wisconsin—Madison.
    + *
    + *   Licensed under the Apache License, Version 2.0 (the "License");
    + *   you may not use this file except in compliance with the License.
    + *   You may obtain a copy of the License at
    + *
    + *       http://www.apache.org/licenses/LICENSE-2.0
    + *
    + *   Unless required by applicable law or agreed to in writing, software
    + *   distributed under the License is distributed on an "AS IS" BASIS,
    + *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 
implied.
    + *   See the License for the specific language governing permissions and
    + *   limitations under the License.
    + **/
    +
    +#ifndef QUICKSTEP_STORAGE_PARTITIONED_HASH_TABLE_POOL_HPP_
    +#define QUICKSTEP_STORAGE_PARTITIONED_HASH_TABLE_POOL_HPP_
    +
    +#include <algorithm>
    +#include <chrono>
    +#include <memory>
    +#include <utility>
    +#include <vector>
    +
    +#include "expressions/aggregation/AggregationHandle.hpp"
    +#include "storage/HashTableBase.hpp"
    +#include "storage/FastHashTable.hpp"
    +#include "storage/FastHashTableFactory.hpp"
    +#include "utility/Macros.hpp"
    +#include "utility/StringUtil.hpp"
    +
    +#include "glog/logging.h"
    +
    +namespace quickstep {
    +
    +class StorageManager;
    +class Type;
    +
    +/** \addtogroup Storage
    + *  @{
    + */
    +
    +/**
    + * @brief A pool of HashTables used for a single aggregation handle. Each
    + *        HashTable represents values from a given partition, which is
    + *        determined by the keys in the group by clause.
    + **/
    +class PartitionedHashTablePool {
    + public:
    +  /**
    +   * @brief Constructor.
    +   *
    +   * @param estimated_num_entries The maximum number of entries in a hash 
table.
    +   * @param num_partitions The number of partitions (i.e. number of 
HashTables)
    +   * @param hash_table_impl_type The type of hash table implementation.
    +   * @param group_by_types A vector of pointer of types which form the 
group by
    +   *        key.
    +   * @param agg_handle The aggregation handle.
    +   * @param storage_manager A pointer to the storage manager.
    +   *
    +   * @note The estimate of number of entries is quite inaccurate at this 
time.
    +   *       If we go by the current estimate, each hash table demands much
    +   *       larger space than it actually needs, which causes the system to
    +   *       either trigger evictions or worse - run out of memory. To fix 
this
    +   *       issue, we divide the estimate by 100. The division will not 
affect
    +   *       correctness, however it may allocate some hash tables smaller 
space
    +   *       than their requirement, causing them to be resized during build
    +   *       phase, which has a performance penalty.
    +   **/
    +  PartitionedHashTablePool(const std::size_t estimated_num_entries,
    +                           const std::size_t num_partitions,
    +                           const HashTableImplType hash_table_impl_type,
    +                           const std::vector<const Type *> &group_by_types,
    +                           AggregationHandle *agg_handle,
    +                           StorageManager *storage_manager)
    +      : estimated_num_entries_(
    +            setHashTableSize(estimated_num_entries, num_partitions)),
    +        num_partitions_(num_partitions),
    +        hash_table_impl_type_(hash_table_impl_type),
    +        group_by_types_(group_by_types),
    +        agg_handle_(DCHECK_NOTNULL(agg_handle)),
    +        storage_manager_(DCHECK_NOTNULL(storage_manager)) {
    +    initializeAllHashTables();
    +  }
    +
    +  /**
    +   * @brief Constructor.
    +   *
    +   * @note This constructor is relevant for the HashTable specialized for
    +   *       aggregation.
    +   *
    +   * @param estimated_num_entries The maximum number of entries in a hash 
table.
    +   * @param num_partitions The number of partitions (i.e. number of 
HashTables)
    +   * @param hash_table_impl_type The type of hash table implementation.
    +   * @param group_by_types A vector of pointer of types which form the 
group by
    +   *        key.
    +   * @param payload_sizes The sizes of the payload elements (i.e.
    +   *        AggregationStates).
    +   * @param handles The aggregation handles.
    +   * @param storage_manager A pointer to the storage manager.
    +   **/
    +  PartitionedHashTablePool(const std::size_t estimated_num_entries,
    +                           const std::size_t num_partitions,
    +                           const HashTableImplType hash_table_impl_type,
    +                           const std::vector<const Type *> &group_by_types,
    +                           const std::vector<std::size_t> &payload_sizes,
    +                           const std::vector<AggregationHandle *> &handles,
    +                           StorageManager *storage_manager)
    +      : estimated_num_entries_(
    +            setHashTableSize(estimated_num_entries, num_partitions)),
    +        num_partitions_(num_partitions),
    +        hash_table_impl_type_(hash_table_impl_type),
    +        group_by_types_(group_by_types),
    +        payload_sizes_(payload_sizes),
    +        handles_(handles),
    +        storage_manager_(DCHECK_NOTNULL(storage_manager)) {
    +    initializeAllHashTables();
    +  }
    +
    +  /**
    +   * @brief Check out a hash table for insertion.
    +   *
    +   * @param partition_id The ID of the partitioned HashTable.
    +   *
    +   * @return A hash table pointer for the given HashTable.
    +   **/
    +  AggregationStateHashTableBase* getHashTable(const std::size_t 
partition_id) {
    +    DCHECK_LT(partition_id, num_partitions_);
    +    DCHECK_LT(partition_id, hash_tables_.size());
    +    return hash_tables_[partition_id].get();
    +  }
    +
    +  /**
    +   * @brief Check out a hash table for insertion.
    +   *
    +   * @param partition_id The ID of the partitioned HashTable.
    +   *
    +   * @return A hash table pointer for the given HashTable.
    +   **/
    +  AggregationStateHashTableBase* getHashTableFast(const std::size_t 
partition_id) {
    --- End diff --
    
    This function may not be needed. 


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastruct...@apache.org or file a JIRA ticket
with INFRA.
---

Reply via email to