Repository: incubator-quickstep
Updated Branches:
  refs/heads/adaptive-bloom-filters 319d557bf -> 7d868b0dd


Updates


Project: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-quickstep/commit/9cc47e56
Tree: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/tree/9cc47e56
Diff: http://git-wip-us.apache.org/repos/asf/incubator-quickstep/diff/9cc47e56

Branch: refs/heads/adaptive-bloom-filters
Commit: 9cc47e561d893a108fc2ad686413e6b251de64cb
Parents: 319d557
Author: Jianqiao Zhu <jianq...@cs.wisc.edu>
Authored: Thu Jul 28 17:29:43 2016 -0400
Committer: Jianqiao Zhu <jianq...@cs.wisc.edu>
Committed: Thu Jul 28 17:29:43 2016 -0400

----------------------------------------------------------------------
 cli/QuickstepCli.cpp                            |  18 +++
 query_optimizer/ExecutionGenerator.cpp          |  76 +++-------
 query_optimizer/ExecutionHeuristics.cpp         | 128 ++++++++---------
 query_optimizer/ExecutionHeuristics.hpp         |  28 ++--
 query_optimizer/PhysicalGenerator.cpp           |   1 -
 query_optimizer/physical/HashJoin.cpp           |  18 +++
 query_optimizer/physical/HashJoin.hpp           |  70 ++++++++-
 query_optimizer/rules/AttachBloomFilters.cpp    | 143 +++++++++++++++----
 query_optimizer/rules/AttachBloomFilters.hpp    |  16 ++-
 .../StarSchemaHashJoinOrderOptimization.cpp     |  32 ++++-
 .../StarSchemaHashJoinOrderOptimization.hpp     |  14 +-
 storage/HashTable.hpp                           |  35 ++---
 storage/HashTable.proto                         |  10 +-
 storage/HashTableFactory.hpp                    |  16 +--
 utility/BloomFilterAdapter.hpp                  |  34 ++---
 15 files changed, 403 insertions(+), 236 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/cli/QuickstepCli.cpp
----------------------------------------------------------------------
diff --git a/cli/QuickstepCli.cpp b/cli/QuickstepCli.cpp
index ddabf31..8031dd3 100644
--- a/cli/QuickstepCli.cpp
+++ b/cli/QuickstepCli.cpp
@@ -219,6 +219,23 @@ void 
addPrimaryKeyInfoForTPCHTables(quickstep::CatalogDatabase *database) {
   }
 }
 
+void addPrimaryKeyInfoForSSBTables(quickstep::CatalogDatabase *database) {
+  const std::vector<std::pair<std::string, std::vector<std::string>>> 
rel_pkeys = {
+      { "supplier", { "s_suppkey" } },
+      { "customer", { "c_custkey" } },
+      { "part", { "p_partkey" } },
+      { "ddate", { "d_datekey" } }
+  };
+  for (const auto &rel_pair : rel_pkeys) {
+    CatalogRelation *rel = database->getRelationByNameMutable(rel_pair.first);
+    std::vector<quickstep::attribute_id> attrs;
+    for (const auto &pkey : rel_pair.second) {
+      attrs.emplace_back(rel->getAttributeByName(pkey)->getID());
+    }
+    rel->getConstraintsMutable()->setPrimaryKey(attrs);
+  }
+}
+
 int main(int argc, char* argv[]) {
   google::InitGoogleLogging(argv[0]);
   gflags::ParseCommandLineFlags(&argc, &argv, true);
@@ -327,6 +344,7 @@ int main(int argc, char* argv[]) {
   }
 
 //  addPrimaryKeyInfoForTPCHTables(query_processor->getDefaultDatabase());
+//  addPrimaryKeyInfoForSSBTables(query_processor->getDefaultDatabase());
 //  std::string proto_str;
 //  google::protobuf::TextFormat::PrintToString(
 //      query_processor->getDefaultDatabase()->getProto(), &proto_str);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/query_optimizer/ExecutionGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionGenerator.cpp 
b/query_optimizer/ExecutionGenerator.cpp
index f84c371..fe6b6e7 100644
--- a/query_optimizer/ExecutionGenerator.cpp
+++ b/query_optimizer/ExecutionGenerator.cpp
@@ -599,8 +599,10 @@ void ExecutionGenerator::convertHashJoin(const 
P::HashJoinPtr &physical_plan) {
   std::vector<attribute_id> probe_attribute_ids;
   std::vector<attribute_id> build_attribute_ids;
 
-  std::vector<attribute_id> probe_original_attribute_ids;
-  std::vector<attribute_id> build_original_attribute_ids;
+  const P::BloomFilterConfig &bloom_filter_config =
+      physical_plan->bloom_filter_config();
+  std::vector<attribute_id> probe_side_bloom_filter_attribute_ids;
+  std::vector<attribute_id> build_side_bloom_filter_attribute_ids;
 
   const CatalogRelation *referenced_stored_probe_relation = nullptr;
   const CatalogRelation *referenced_stored_build_relation = nullptr;
@@ -613,18 +615,6 @@ void ExecutionGenerator::convertHashJoin(const 
P::HashJoinPtr &physical_plan) {
   const std::vector<E::AttributeReferencePtr> &left_join_attributes =
       physical_plan->left_join_attributes();
   for (const E::AttributeReferencePtr &left_join_attribute : 
left_join_attributes) {
-    // Try to determine the original stored relation referenced in the Hash 
Join.
-    referenced_stored_probe_relation =
-        
optimizer_context_->catalog_database()->getRelationByName(left_join_attribute->relation_name());
-    if (referenced_stored_probe_relation == nullptr) {
-      // Hash Join optimizations are not possible, if the referenced relation 
cannot be determined.
-      skip_hash_join_optimization = true;
-    } else {
-      const attribute_id probe_operator_attribute_id =
-          
referenced_stored_probe_relation->getAttributeByName(left_join_attribute->attribute_name())->getID();
-      probe_original_attribute_ids.emplace_back(probe_operator_attribute_id);
-    }
-
     const CatalogAttribute *probe_catalog_attribute
         = attribute_substitution_map_[left_join_attribute->id()];
     probe_attribute_ids.emplace_back(probe_catalog_attribute->getID());
@@ -637,18 +627,6 @@ void ExecutionGenerator::convertHashJoin(const 
P::HashJoinPtr &physical_plan) {
   const std::vector<E::AttributeReferencePtr> &right_join_attributes =
       physical_plan->right_join_attributes();
   for (const E::AttributeReferencePtr &right_join_attribute : 
right_join_attributes) {
-    // Try to determine the original stored relation referenced in the Hash 
Join.
-    referenced_stored_build_relation =
-        
optimizer_context_->catalog_database()->getRelationByName(right_join_attribute->relation_name());
-    if (referenced_stored_build_relation == nullptr) {
-      // Hash Join optimizations are not possible, if the referenced relation 
cannot be determined.
-      skip_hash_join_optimization = true;
-    } else {
-      const attribute_id build_operator_attribute_id =
-          
referenced_stored_build_relation->getAttributeByName(right_join_attribute->attribute_name())->getID();
-      build_original_attribute_ids.emplace_back(build_operator_attribute_id);
-    }
-
     const CatalogAttribute *build_catalog_attribute
         = attribute_substitution_map_[right_join_attribute->id()];
     build_attribute_ids.emplace_back(build_catalog_attribute->getID());
@@ -658,6 +636,20 @@ void ExecutionGenerator::convertHashJoin(const 
P::HashJoinPtr &physical_plan) {
     }
   }
 
+  for (const auto &bf : bloom_filter_config.probe_side_bloom_filters) {
+    const CatalogAttribute *probe_bf_catalog_attribute
+        = attribute_substitution_map_[bf.attribute->id()];
+    probe_side_bloom_filter_attribute_ids.emplace_back(
+        probe_bf_catalog_attribute->getID());
+  }
+
+  for (const auto &bf : bloom_filter_config.build_side_bloom_filters) {
+    const CatalogAttribute *build_bf_catalog_attribute
+        = attribute_substitution_map_[bf.attribute->id()];
+    build_side_bloom_filter_attribute_ids.emplace_back(
+        build_bf_catalog_attribute->getID());
+  }
+
   // Remember key types for call to SimplifyHashTableImplTypeProto() below.
   std::vector<const Type*> key_types;
   for (std::vector<E::AttributeReferencePtr>::size_type attr_idx = 0;
@@ -672,34 +664,7 @@ void ExecutionGenerator::convertHashJoin(const 
P::HashJoinPtr &physical_plan) {
     key_types.push_back(&left_attribute_type);
   }
 
-  std::size_t probe_cardinality = 
cost_model_->estimateCardinality(probe_physical);
   std::size_t build_cardinality = 
cost_model_->estimateCardinality(build_physical);
-  // For inner join, we may swap the probe table and the build table.
-  if (physical_plan->join_type() == P::HashJoin::JoinType::kInnerJoin)  {
-    const bool left_unique_join_attrs =
-        probe_physical->impliesUniqueAttributes(left_join_attributes);
-    const bool right_unique_join_attrs =
-        build_physical->impliesUniqueAttributes(right_join_attributes);
-
-    bool swap_probe_build;
-    if (left_unique_join_attrs != right_unique_join_attrs) {
-      swap_probe_build = left_unique_join_attrs;
-    } else {
-      // Choose the smaller table as the inner build table,
-      // and the other one as the outer probe table.
-      swap_probe_build = (probe_cardinality < build_cardinality);
-    }
-
-    if (swap_probe_build) {
-      // Switch the probe and build physical nodes.
-      std::swap(probe_physical, build_physical);
-      std::swap(probe_cardinality, build_cardinality);
-      std::swap(probe_attribute_ids, build_attribute_ids);
-      std::swap(any_probe_attributes_nullable, any_build_attributes_nullable);
-      std::swap(probe_original_attribute_ids, build_original_attribute_ids);
-      std::swap(referenced_stored_probe_relation, 
referenced_stored_build_relation);
-    }
-  }
 
   // Convert the residual predicate proto.
   QueryContext::predicate_id residual_predicate_index = 
QueryContext::kInvalidPredicateId;
@@ -861,8 +826,9 @@ void ExecutionGenerator::convertHashJoin(const 
P::HashJoinPtr &physical_plan) {
                                            join_operator_index,
                                            referenced_stored_build_relation,
                                            referenced_stored_probe_relation,
-                                           
std::move(build_original_attribute_ids),
-                                           
std::move(probe_original_attribute_ids),
+                                           bloom_filter_config,
+                                           
std::move(build_side_bloom_filter_attribute_ids),
+                                           
std::move(probe_side_bloom_filter_attribute_ids),
                                            join_hash_table_index,
                                            
star_schema_cost_model_->estimateCardinality(build_physical));
   }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/query_optimizer/ExecutionHeuristics.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.cpp 
b/query_optimizer/ExecutionHeuristics.cpp
index dec078c..b407453 100644
--- a/query_optimizer/ExecutionHeuristics.cpp
+++ b/query_optimizer/ExecutionHeuristics.cpp
@@ -25,6 +25,8 @@
 #include "catalog/CatalogTypedefs.hpp"
 #include "query_execution/QueryContext.pb.h"
 #include "query_optimizer/QueryPlan.hpp"
+#include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
 #include "utility/Macros.hpp"
 
 #include "glog/logging.h"
@@ -32,91 +34,79 @@
 namespace quickstep {
 namespace optimizer {
 
+namespace E = ::quickstep::optimizer::expressions;
+namespace P = ::quickstep::optimizer::physical;
+
 static const std::size_t kNumBitsPerByte = 8;
 DEFINE_double(bloom_num_bits_per_tuple, kNumBitsPerByte,
     "Number of bits per tuple used to size the Bloom filter.");
 
-DEFINE_int32(bloom_num_hash_fns, 3,
+DEFINE_int32(bloom_num_hash_fns, 1,
     "Number of hash functions used in the Bloom filter.");
 
 void ExecutionHeuristics::optimizeExecutionPlan(QueryPlan *query_plan,
                                                 serialization::QueryContext 
*query_context_proto) {
-  // Currently this only optimizes left deep joins using bloom filters.
-  // It uses a simple algorithm to discover the left deep joins.
-  // It starts with the first hash join in the plan and keeps on iterating
-  // over the next hash joins, till a probe on a different relation id is 
found.
-  // The set of hash joins found in this way forms a chain and can be 
recognized
-  // as a left deep join. It becomes a candidate for optimization.
-
-  // The optimization is done by modifying each of the build operators in the 
chain
-  // to generate a bloom filter on the build key during their hash table 
creation.
-  // The leaf-level probe operator is then modified to query all the bloom
-  // filters generated from all the build operators in the chain. These
-  // bloom filters are queried to test the membership of the probe key
-  // just prior to probing the hash table.
-
-  QueryPlan::DAGNodeIndex origin_node = 0;
-  while (origin_node < hash_joins_.size() - 1) {
-    std::vector<std::size_t> chained_nodes;
-    chained_nodes.push_back(origin_node);
-    for (std::size_t i = origin_node + 1; i < hash_joins_.size(); ++i) {
-      const relation_id checked_relation_id = 
hash_joins_[origin_node].referenced_stored_probe_relation_->getID();
-      const relation_id expected_relation_id = 
hash_joins_[i].referenced_stored_probe_relation_->getID();
-      if (checked_relation_id == expected_relation_id) {
-        chained_nodes.push_back(i);
-      } else {
-        break;
-      }
+  std::map<std::pair<E::ExprId, P::PhysicalPtr>,
+           std::pair<QueryContext::bloom_filter_id, QueryPlan::DAGNodeIndex>> 
bloom_filter_map;
+  for (const auto &info : hash_joins_) {
+    auto *hash_table_proto =
+        
query_context_proto->mutable_join_hash_tables(info.join_hash_table_id_);
+    const auto &bloom_filter_config = info.bloom_filter_config_;
+
+    for (std::size_t i = 0; i < info.build_side_bloom_filter_ids_.size(); ++i) 
{
+      const QueryContext::bloom_filter_id bloom_filter_id = 
query_context_proto->bloom_filters_size();
+      serialization::BloomFilter *bloom_filter_proto = 
query_context_proto->add_bloom_filters();
+      setBloomFilterProperties(bloom_filter_proto, 
info.estimated_build_relation_cardinality_);
+
+      const auto &build_side_bf =
+          bloom_filter_config.build_side_bloom_filters[i];
+      bloom_filter_map.emplace(
+          std::make_pair(build_side_bf.attribute->id(),
+                         bloom_filter_config.builder),
+          std::make_pair(bloom_filter_id, info.build_operator_index_));
+
+      auto *build_side_bloom_filter = 
hash_table_proto->add_build_side_bloom_filters();
+      build_side_bloom_filter->set_bloom_filter_id(bloom_filter_id);
+      
build_side_bloom_filter->set_attr_id(info.build_side_bloom_filter_ids_[i]);
+
+      std::cout << "Build " << build_side_bf.attribute->toString()
+                << " @" << bloom_filter_config.builder << "\n";
     }
+  }
 
-    // Only chains of length greater than one are suitable candidates for 
semi-join optimization.
-    if (chained_nodes.size() > 1) {
-      std::unordered_map<QueryContext::bloom_filter_id, 
std::vector<attribute_id>> probe_bloom_filter_info;
-      for (const std::size_t node : chained_nodes) {
-        // Provision for a new bloom filter to be used by the build operator.
-        const QueryContext::bloom_filter_id bloom_filter_id =  
query_context_proto->bloom_filters_size();
-        serialization::BloomFilter *bloom_filter_proto = 
query_context_proto->add_bloom_filters();
-
-        // Modify the bloom filter properties based on the statistics of the 
relation.
-        setBloomFilterProperties(bloom_filter_proto, 
hash_joins_[node].referenced_stored_build_relation_);
-
-        // Add build-side bloom filter information to the corresponding hash 
table proto.
-        
query_context_proto->mutable_join_hash_tables(hash_joins_[node].join_hash_table_id_)
-            ->add_build_side_bloom_filter_id(bloom_filter_id);
-
-        probe_bloom_filter_info.insert(std::make_pair(bloom_filter_id, 
hash_joins_[node].probe_attributes_));
-      }
-
-      // Add probe-side bloom filter information to the corresponding hash 
table proto for each build-side bloom filter.
-      for (const std::pair<QueryContext::bloom_filter_id, 
std::vector<attribute_id>>
-               &bloom_filter_info : probe_bloom_filter_info) {
-        auto *probe_side_bloom_filter =
-            
query_context_proto->mutable_join_hash_tables(hash_joins_[origin_node].join_hash_table_id_)
-                                  ->add_probe_side_bloom_filters();
-        
probe_side_bloom_filter->set_probe_side_bloom_filter_id(bloom_filter_info.first);
-        for (const attribute_id &probe_attribute_id : 
bloom_filter_info.second) {
-          probe_side_bloom_filter->add_probe_side_attr_ids(probe_attribute_id);
-        }
-      }
-
-      // Add node dependencies from chained build nodes to origin node probe.
-      for (std::size_t i = 1; i < chained_nodes.size(); ++i) {  // Note: It 
starts from index 1.
-        
query_plan->addDirectDependency(hash_joins_[origin_node].join_operator_index_,
-                                        hash_joins_[origin_node + 
i].build_operator_index_,
-                                        true /* is_pipeline_breaker */);
-      }
+  for (const auto &info : hash_joins_) {
+    auto *hash_table_proto =
+        
query_context_proto->mutable_join_hash_tables(info.join_hash_table_id_);
+    const auto &bloom_filter_config = info.bloom_filter_config_;
+
+    for (std::size_t i = 0; i < info.probe_side_bloom_filter_ids_.size(); ++i) 
{
+      auto *probe_side_bloom_filter = 
hash_table_proto->add_probe_side_bloom_filters();
+      const auto &probe_side_bf =
+          bloom_filter_config.probe_side_bloom_filters[i];
+      std::cout << "Probe " << probe_side_bf.attribute->toString()
+                << " @" << probe_side_bf.builder << "\n";
+
+      const auto &build_side_info =
+           bloom_filter_map.at(
+               std::make_pair(probe_side_bf.source_attribute->id(),
+                              probe_side_bf.builder));
+      probe_side_bloom_filter->set_bloom_filter_id(build_side_info.first);
+      
probe_side_bloom_filter->set_attr_id(info.probe_side_bloom_filter_ids_[i]);
+      std::cout << "Probe attr_id = " << info.probe_side_bloom_filter_ids_[i] 
<< "\n";
+
+      query_plan->addDirectDependency(info.join_operator_index_,
+                                      build_side_info.second,
+                                      true /* is_pipeline_breaker */);
     }
-
-    // Update the origin node.
-    origin_node = chained_nodes.back() + 1;
   }
 }
 
 void ExecutionHeuristics::setBloomFilterProperties(serialization::BloomFilter 
*bloom_filter_proto,
-                                                   const CatalogRelation 
*relation) {
-  const std::size_t cardinality = relation->getStatistics().getNumTuples();
+                                                   const std::size_t 
cardinality) {
   bloom_filter_proto->set_bloom_filter_size(
-      static_cast<std::size_t>(FLAGS_bloom_num_bits_per_tuple * cardinality) / 
kNumBitsPerByte);
+      BloomFilter::getNearestAllowedSize(
+          (FLAGS_bloom_num_bits_per_tuple * cardinality) / kNumBitsPerByte));
+  std::cout << "bf size = " << bloom_filter_proto->bloom_filter_size() << "\n";
   bloom_filter_proto->set_number_of_hashes(FLAGS_bloom_num_hash_fns);
 }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/query_optimizer/ExecutionHeuristics.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/ExecutionHeuristics.hpp 
b/query_optimizer/ExecutionHeuristics.hpp
index 2106a1b..8af1b4a 100644
--- a/query_optimizer/ExecutionHeuristics.hpp
+++ b/query_optimizer/ExecutionHeuristics.hpp
@@ -25,6 +25,7 @@
 #include "query_execution/QueryContext.hpp"
 #include "query_execution/QueryContext.pb.h"
 #include "query_optimizer/QueryPlan.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
 #include "utility/Macros.hpp"
 
 #include "glog/logging.h"
@@ -65,16 +66,18 @@ class ExecutionHeuristics {
                  const QueryPlan::DAGNodeIndex join_operator_index,
                  const CatalogRelation *referenced_stored_build_relation,
                  const CatalogRelation *referenced_stored_probe_relation,
-                 std::vector<attribute_id> &&build_attributes,
-                 std::vector<attribute_id> &&probe_attributes,
+                 const physical::BloomFilterConfig &bloom_filter_config,
+                 std::vector<attribute_id> &&build_side_bloom_filter_ids,
+                 std::vector<attribute_id> &&probe_side_bloom_filter_ids,
                  const QueryContext::join_hash_table_id join_hash_table_id,
                  const std::size_t estimated_build_relation_cardinality)
         : build_operator_index_(build_operator_index),
           join_operator_index_(join_operator_index),
           referenced_stored_build_relation_(referenced_stored_build_relation),
           referenced_stored_probe_relation_(referenced_stored_probe_relation),
-          build_attributes_(std::move(build_attributes)),
-          probe_attributes_(std::move(probe_attributes)),
+          bloom_filter_config_(bloom_filter_config),
+          build_side_bloom_filter_ids_(std::move(build_side_bloom_filter_ids)),
+          probe_side_bloom_filter_ids_(std::move(probe_side_bloom_filter_ids)),
           join_hash_table_id_(join_hash_table_id),
           
estimated_build_relation_cardinality_(estimated_build_relation_cardinality) {
     }
@@ -83,8 +86,9 @@ class ExecutionHeuristics {
     const QueryPlan::DAGNodeIndex join_operator_index_;
     const CatalogRelation *referenced_stored_build_relation_;
     const CatalogRelation *referenced_stored_probe_relation_;
-    const std::vector<attribute_id> build_attributes_;
-    const std::vector<attribute_id> probe_attributes_;
+    const physical::BloomFilterConfig &bloom_filter_config_;
+    const std::vector<attribute_id> build_side_bloom_filter_ids_;
+    const std::vector<attribute_id> probe_side_bloom_filter_ids_;
     const QueryContext::join_hash_table_id join_hash_table_id_;
     const std::size_t estimated_build_relation_cardinality_;
   };
@@ -112,16 +116,18 @@ class ExecutionHeuristics {
                               const QueryPlan::DAGNodeIndex 
join_operator_index,
                               const CatalogRelation 
*referenced_stored_build_relation,
                               const CatalogRelation 
*referenced_stored_probe_relation,
-                              std::vector<attribute_id> &&build_attributes,
-                              std::vector<attribute_id> &&probe_attributes,
+                              const physical::BloomFilterConfig 
&bloom_filter_config,
+                              std::vector<attribute_id> 
&&build_side_bloom_filter_ids,
+                              std::vector<attribute_id> 
&&probe_side_bloom_filter_ids,
                               const QueryContext::join_hash_table_id 
join_hash_table_id,
                               const std::size_t 
estimated_build_relation_cardinality) {
     hash_joins_.push_back(HashJoinInfo(build_operator_index,
                                        join_operator_index,
                                        referenced_stored_build_relation,
                                        referenced_stored_probe_relation,
-                                       std::move(build_attributes),
-                                       std::move(probe_attributes),
+                                       bloom_filter_config,
+                                       std::move(build_side_bloom_filter_ids),
+                                       std::move(probe_side_bloom_filter_ids),
                                        join_hash_table_id,
                                        estimated_build_relation_cardinality));
   }
@@ -144,7 +150,7 @@ class ExecutionHeuristics {
    * @param relation The catalog relation on which bloom filter is being built.
    **/
   void setBloomFilterProperties(serialization::BloomFilter *bloom_filter_proto,
-                                const CatalogRelation *relation);
+                                const std::size_t cardinality);
 
   std::size_t estimated_build_relation_cardinality() const {
     return estimated_build_relation_cardinality_;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/query_optimizer/PhysicalGenerator.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/PhysicalGenerator.cpp 
b/query_optimizer/PhysicalGenerator.cpp
index 50e1d86..f73a546 100644
--- a/query_optimizer/PhysicalGenerator.cpp
+++ b/query_optimizer/PhysicalGenerator.cpp
@@ -114,7 +114,6 @@ P::PhysicalPtr PhysicalGenerator::optimizePlan() {
     quickstep::PlanVisualizer plan_visualizer;
     std::cerr << "\n" << plan_visualizer.visualize(physical_plan_) << "\n";
   }
-  exit(0);
 
 #ifdef QUICKSTEP_DEBUG
   Validate(physical_plan_);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/query_optimizer/physical/HashJoin.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.cpp 
b/query_optimizer/physical/HashJoin.cpp
index 7bea86a..f0e72e8 100644
--- a/query_optimizer/physical/HashJoin.cpp
+++ b/query_optimizer/physical/HashJoin.cpp
@@ -115,6 +115,24 @@ void HashJoin::getFieldStringItems(
   
container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(left_join_attributes_));
   container_child_field_names->push_back("right_join_attributes");
   
container_child_fields->push_back(CastSharedPtrVector<OptimizerTreeBase>(right_join_attributes_));
+
+  if (!bloom_filter_config_.build_side_bloom_filters.empty()) {
+    container_child_field_names->push_back("build_side_bloom_filters");
+    container_child_fields->emplace_back();
+    auto &container = container_child_fields->back();
+    for (const auto& bf : bloom_filter_config_.build_side_bloom_filters) {
+      container.emplace_back(bf.attribute);
+    }
+  }
+
+  if (!bloom_filter_config_.probe_side_bloom_filters.empty()) {
+    container_child_field_names->push_back("probe_side_bloom_filters");
+    container_child_fields->emplace_back();
+    auto &container = container_child_fields->back();
+    for (const auto& bf : bloom_filter_config_.probe_side_bloom_filters) {
+      container.emplace_back(bf.attribute);
+    }
+  }
 }
 
 }  // namespace physical

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/query_optimizer/physical/HashJoin.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/physical/HashJoin.hpp 
b/query_optimizer/physical/HashJoin.hpp
index 55c9588..cacb08b 100644
--- a/query_optimizer/physical/HashJoin.hpp
+++ b/query_optimizer/physical/HashJoin.hpp
@@ -48,6 +48,56 @@ namespace physical {
 class HashJoin;
 typedef std::shared_ptr<const HashJoin> HashJoinPtr;
 
+struct BloomFilterConfig {
+  struct BuildSide {
+    BuildSide(const expressions::AttributeReferencePtr &attribute_in)
+        : attribute(attribute_in) {
+    }
+    expressions::AttributeReferencePtr attribute;
+  };
+  struct ProbeSide {
+    ProbeSide(const expressions::AttributeReferencePtr &attribute_in,
+              const expressions::AttributeReferencePtr &source_attribute_in,
+              const physical::PhysicalPtr &builder_in)
+        : attribute(attribute_in),
+          source_attribute(source_attribute_in),
+          builder(builder_in) {
+    }
+    expressions::AttributeReferencePtr attribute;
+    expressions::AttributeReferencePtr source_attribute;
+    PhysicalPtr builder;
+  };
+  BloomFilterConfig() {}
+  BloomFilterConfig(const PhysicalPtr &builder_in)
+      : builder(builder_in) {
+  }
+  BloomFilterConfig(const PhysicalPtr &builder_in,
+                    const std::vector<BuildSide> &build_side_bloom_filters_in,
+                    const std::vector<ProbeSide> &probe_side_bloom_filters_in)
+      : builder(builder_in),
+        build_side_bloom_filters(build_side_bloom_filters_in),
+        probe_side_bloom_filters(probe_side_bloom_filters_in) {
+  }
+  void addBuildSideBloomFilter(const expressions::AttributeReferencePtr 
&attribute_in) {
+    for (const auto &build_bf : build_side_bloom_filters) {
+      if (attribute_in == build_bf.attribute) {
+        return;
+      }
+    }
+    build_side_bloom_filters.emplace_back(attribute_in);
+  }
+  void addProbeSideBloomFilter(const expressions::AttributeReferencePtr 
&attribute_in,
+                               const expressions::AttributeReferencePtr 
&source_attribute_in,
+                               const physical::PhysicalPtr &builder_in) {
+    probe_side_bloom_filters.emplace_back(attribute_in,
+                                          source_attribute_in,
+                                          builder_in);
+  }
+  PhysicalPtr builder;
+  std::vector<BuildSide> build_side_bloom_filters;
+  std::vector<ProbeSide> probe_side_bloom_filters;
+};
+
 /**
  * @brief Physical hash join node.
  */
@@ -115,7 +165,8 @@ class HashJoin : public BinaryJoin {
                   right_join_attributes_,
                   residual_predicate_,
                   project_expressions(),
-                  join_type_);
+                  join_type_,
+                  bloom_filter_config_);
   }
 
   std::vector<expressions::AttributeReferencePtr> getReferencedAttributes() 
const override;
@@ -127,6 +178,10 @@ class HashJoin : public BinaryJoin {
   bool impliesUniqueAttributes(
       const std::vector<expressions::AttributeReferencePtr> &attributes) const 
override;
 
+  const BloomFilterConfig &bloom_filter_config() const {
+    return bloom_filter_config_;
+  }
+
   /**
    * @brief Creates a physical HashJoin. The left/right operand does not 
correspond to
    *        probe/build operand.
@@ -147,7 +202,8 @@ class HashJoin : public BinaryJoin {
       const std::vector<expressions::AttributeReferencePtr> 
&right_join_attributes,
       const expressions::PredicatePtr &residual_predicate,
       const std::vector<expressions::NamedExpressionPtr> &project_expressions,
-      const JoinType join_type) {
+      const JoinType join_type,
+      const BloomFilterConfig bloom_filter_config = BloomFilterConfig()) {
     return HashJoinPtr(
         new HashJoin(left,
                      right,
@@ -155,7 +211,8 @@ class HashJoin : public BinaryJoin {
                      right_join_attributes,
                      residual_predicate,
                      project_expressions,
-                     join_type));
+                     join_type,
+                     bloom_filter_config));
   }
 
  protected:
@@ -175,18 +232,21 @@ class HashJoin : public BinaryJoin {
       const std::vector<expressions::AttributeReferencePtr> 
&right_join_attributes,
       const expressions::PredicatePtr &residual_predicate,
       const std::vector<expressions::NamedExpressionPtr> &project_expressions,
-      const JoinType join_type)
+      const JoinType join_type,
+      const BloomFilterConfig &bloom_filter_config)
       : BinaryJoin(left, right, project_expressions),
         left_join_attributes_(left_join_attributes),
         right_join_attributes_(right_join_attributes),
         residual_predicate_(residual_predicate),
-        join_type_(join_type) {
+        join_type_(join_type),
+        bloom_filter_config_(bloom_filter_config) {
   }
 
   std::vector<expressions::AttributeReferencePtr> left_join_attributes_;
   std::vector<expressions::AttributeReferencePtr> right_join_attributes_;
   expressions::PredicatePtr residual_predicate_;
   JoinType join_type_;
+  BloomFilterConfig bloom_filter_config_;
 
   DISALLOW_COPY_AND_ASSIGN(HashJoin);
 };

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/query_optimizer/rules/AttachBloomFilters.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachBloomFilters.cpp 
b/query_optimizer/rules/AttachBloomFilters.cpp
index b3cf0da..e3bdc36 100644
--- a/query_optimizer/rules/AttachBloomFilters.cpp
+++ b/query_optimizer/rules/AttachBloomFilters.cpp
@@ -70,16 +70,29 @@ P::PhysicalPtr AttachBloomFilters::apply(const 
P::PhysicalPtr &input) {
     std::cerr << "********\n";
   }
 
-  return input;
+  return visitAndAttach(input);
 }
 
-void AttachBloomFilters::visitProducer(const P::PhysicalPtr &node, int depth) {
+void AttachBloomFilters::visitProducer(const P::PhysicalPtr &node, const int 
depth) {
   for (const P::PhysicalPtr &child : node->children()) {
     visitProducer(child, depth+1);
   }
 
   std::vector<BloomFilterInfo> bloom_filters;
 
+  if (node->getPhysicalType() == P::PhysicalType::kHashJoin) {
+    const P::HashJoinPtr &hash_join =
+        std::static_pointer_cast<const P::HashJoin>(node);
+    const P::PhysicalPtr &build_node = hash_join->right();
+    double selectivity = cost_model_->estimateSelectivity(build_node);
+    if (selectivity < 1.0) {
+      auto &build_node_info = producers_[build_node];
+      for (const auto &attr : hash_join->right_join_attributes()) {
+        build_node_info.emplace_back(node, attr, depth, selectivity, false);
+      }
+    }
+  }
+
   const std::vector<E::AttributeReferencePtr> output_attributes(
       node->getOutputAttributes());
   std::unordered_set<E::ExprId> output_attribute_ids;
@@ -111,12 +124,12 @@ void AttachBloomFilters::visitProducer(const 
P::PhysicalPtr &node, int depth) {
   }
 
   // Self-produced bloom filters
-  double selectivity = cost_model_->estimateSelectivity(node);
-  if (selectivity < 1.0) {
-    for (const auto &attr : output_attributes) {
-      bloom_filters.emplace_back(node, attr, depth, selectivity, false);
-    }
-  }
+//  double selectivity = cost_model_->estimateSelectivity(node);
+//  if (selectivity < 1.0) {
+//    for (const auto &attr : output_attributes) {
+//      bloom_filters.emplace_back(node, attr, depth, selectivity, false);
+//    }
+//  }
 
   producers_.emplace(node, std::move(bloom_filters));
 }
@@ -149,41 +162,62 @@ void AttachBloomFilters::visitConsumer(const 
P::PhysicalPtr &node) {
     }
   }
 
-  // Bloom filters from siblings via HashJoin
+  // Bloom filters from build side to probe side via HashJoin
   if (node->getPhysicalType() == P::PhysicalType::kHashJoin) {
     const P::HashJoinPtr hash_join =
         std::static_pointer_cast<const P::HashJoin>(node);
-    std::vector<P::PhysicalPtr> children =
-        { hash_join->left(), hash_join->right() };
+    const P::PhysicalPtr &producer_child = hash_join->right();
+    const P::PhysicalPtr &consumer_child = hash_join->left();
 
     std::unordered_map<E::ExprId, E::AttributeReferencePtr> 
join_attribute_pairs;
     for (std::size_t i = 0; i < hash_join->left_join_attributes().size(); ++i) 
{
-      const E::AttributeReferencePtr left_join_attribute =
+      const E::AttributeReferencePtr probe_join_attribute =
           hash_join->left_join_attributes()[i];
-      const E::AttributeReferencePtr right_join_attribute =
+      const E::AttributeReferencePtr build_join_attribute =
           hash_join->right_join_attributes()[i];
-      join_attribute_pairs.emplace(left_join_attribute->id(),
-                                   right_join_attribute);
-      join_attribute_pairs.emplace(right_join_attribute->id(),
-                                   left_join_attribute);
+      join_attribute_pairs.emplace(build_join_attribute->id(),
+                                   probe_join_attribute);
     }
 
-    for (int side = 0; side < 2; ++side) {
-      const P::PhysicalPtr &producer_child = children[side];
-      const P::PhysicalPtr &consumer_child = children[1-side];
-
-      auto &consumer_bloom_filters = consumers_[consumer_child];
-      for (const auto &info : producers_[producer_child]) {
-        const auto pair_it = join_attribute_pairs.find(info.attribute->id());
-        if (pair_it != join_attribute_pairs.end()) {
-          consumer_bloom_filters.emplace_back(info.source,
-                                              pair_it->second,
-                                              info.depth,
-                                              info.selectivity,
-                                              true,
-                                              info.attribute);
+
+    auto &consumer_bloom_filters = consumers_[consumer_child];
+    for (const auto &info : producers_[producer_child]) {
+      const auto pair_it = join_attribute_pairs.find(info.attribute->id());
+      if (pair_it != join_attribute_pairs.end()) {
+        consumer_bloom_filters.emplace_back(info.source,
+                                            pair_it->second,
+                                            info.depth,
+                                            info.selectivity,
+                                            true,
+                                            info.attribute);
+      }
+    }
+
+    // Decide attaches
+    if (cost_model_->estimateCardinality(consumer_child) > 200000000 &&
+        !consumer_bloom_filters.empty()) {
+      std::map<E::AttributeReferencePtr, const BloomFilterInfo*> filters;
+      for (const auto &info : consumer_bloom_filters) {
+        auto it = filters.find(info.attribute);
+        if (it == filters.end()) {
+          filters.emplace(info.attribute, &info);
+        } else {
+          if (BloomFilterInfo::isBetterThan(&info, it->second)) {
+            it->second = &info;
+          }
         }
       }
+
+      auto &probe_attaches = getBloomFilterConfig(node);
+      for (const auto &pair : filters) {
+        auto &build_attaches = getBloomFilterConfig(pair.second->source);
+        build_attaches.addBuildSideBloomFilter(
+            pair.second->source_attribute);
+        probe_attaches.addProbeSideBloomFilter(
+            pair.first,
+            pair.second->source_attribute,
+            pair.second->source);
+      }
     }
   }
 
@@ -192,5 +226,52 @@ void AttachBloomFilters::visitConsumer(const 
P::PhysicalPtr &node) {
   }
 }
 
+P::PhysicalPtr AttachBloomFilters::visitAndAttach(const physical::PhysicalPtr 
&node) {
+  std::vector<P::PhysicalPtr> new_children;
+  bool has_changed = false;
+  for (const auto &child : node->children()) {
+    P::PhysicalPtr new_child = visitAndAttach(child);
+    if (new_child != child) {
+      has_changed = true;
+    }
+    new_children.emplace_back(new_child);
+  }
+
+  if (node->getPhysicalType() == P::PhysicalType::kHashJoin) {
+    const auto attach_it = attaches_.find(node);
+    if (attach_it != attaches_.end()) {
+      for (const auto& item : attach_it->second.probe_side_bloom_filters) {
+        std::cout << "Attach probe from " << item.builder
+                  << " to " << node << "\n";
+      }
+
+      const P::HashJoinPtr hash_join =
+          std::static_pointer_cast<const P::HashJoin>(node);
+      return P::HashJoin::Create(
+          new_children[0],
+          new_children[1],
+          hash_join->left_join_attributes(),
+          hash_join->right_join_attributes(),
+          hash_join->residual_predicate(),
+          hash_join->project_expressions(),
+          hash_join->join_type(),
+          attach_it->second);
+    }
+  }
+
+  if (has_changed) {
+    return node->copyWithNewChildren(new_children);
+  }
+
+  return node;
+}
+
+P::BloomFilterConfig& AttachBloomFilters::getBloomFilterConfig(const 
physical::PhysicalPtr &node) {
+  if (attaches_.find(node) == attaches_.end()) {
+    attaches_.emplace(node, node);
+  }
+  return attaches_[node];
+}
+
 }  // namespace optimizer
 }  // namespace quickstep

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/query_optimizer/rules/AttachBloomFilters.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/AttachBloomFilters.hpp 
b/query_optimizer/rules/AttachBloomFilters.hpp
index 69b50e6..e4437f7 100644
--- a/query_optimizer/rules/AttachBloomFilters.hpp
+++ b/query_optimizer/rules/AttachBloomFilters.hpp
@@ -32,6 +32,7 @@
 #include "query_optimizer/expressions/NamedExpression.hpp"
 #include "query_optimizer/expressions/Predicate.hpp"
 #include "query_optimizer/physical/Physical.hpp"
+#include "query_optimizer/physical/HashJoin.hpp"
 #include "query_optimizer/rules/Rule.hpp"
 #include "utility/Macros.hpp"
 
@@ -76,6 +77,14 @@ class AttachBloomFilters : public Rule<physical::Physical> {
                   : source_attribute_in) {
 
     }
+    static bool isBetterThan(const BloomFilterInfo *a,
+                             const BloomFilterInfo *b) {
+      if (a->selectivity == b->selectivity) {
+        return a->depth > b->depth;
+      } else {
+        return a->selectivity < b->selectivity;
+      }
+    }
     physical::PhysicalPtr source;
     expressions::AttributeReferencePtr attribute;
     int depth;
@@ -84,14 +93,19 @@ class AttachBloomFilters : public Rule<physical::Physical> {
     expressions::AttributeReferencePtr source_attribute;
   };
 
-  void visitProducer(const physical::PhysicalPtr &node, int depth);
+  void visitProducer(const physical::PhysicalPtr &node, const int depth);
 
   void visitConsumer(const physical::PhysicalPtr &node);
 
+  physical::PhysicalPtr visitAndAttach(const physical::PhysicalPtr &node);
+
+  physical::BloomFilterConfig &getBloomFilterConfig(const 
physical::PhysicalPtr &node);
+
   std::unique_ptr<cost::StarSchemaSimpleCostModel> cost_model_;
 
   std::map<physical::PhysicalPtr, std::vector<BloomFilterInfo>> producers_;
   std::map<physical::PhysicalPtr, std::vector<BloomFilterInfo>> consumers_;
+  std::map<physical::PhysicalPtr, physical::BloomFilterConfig> attaches_;
 
   DISALLOW_COPY_AND_ASSIGN(AttachBloomFilters);
 };

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp 
b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
index e198149..42a7402 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.cpp
@@ -223,6 +223,21 @@ physical::PhysicalPtr 
StarSchemaHashJoinOrderOptimization::generatePlan(
             std::unique_ptr<JoinPair> new_join(
                 new JoinPair(probe_table_info, build_table_info));
             if (best_join == nullptr || new_join->isBetterThan(*best_join)) {
+//              if (best_join != nullptr) {
+//                std::cerr << "(" << best_join->probe->estimated_selectivity
+//                          << ", " << best_join->probe->estimated_cardinality 
<< ")"
+//                          << " -- "
+//                          << "(" << best_join->build->estimated_selectivity
+//                          << ", " << best_join->build->estimated_cardinality 
<< ")"
+//                          << "\n";
+//                std::cerr << "REPLACED WITH\n";
+//              }
+//              std::cerr << "(" << new_join->probe->estimated_selectivity
+//                        << ", " << new_join->probe->estimated_cardinality << 
")"
+//                        << " -- "
+//                        << "(" << new_join->build->estimated_selectivity
+//                        << ", " << new_join->build->estimated_cardinality << 
")"
+//                        << "\n****\n";
               best_join.reset(new_join.release());
             }
           }
@@ -242,6 +257,11 @@ physical::PhysicalPtr 
StarSchemaHashJoinOrderOptimization::generatePlan(
     CHECK(selected_probe_table_info != nullptr);
     CHECK(selected_build_table_info != nullptr);
 
+//    std::cerr << selected_probe_table_info->estimated_selectivity
+//              << " -- "
+//              << selected_build_table_info->estimated_selectivity
+//              << "\n";
+
 //    std::cerr << selected_probe_table_info->estimated_num_output_attributes
 //              << " -- "
 //              << selected_build_table_info->estimated_num_output_attributes
@@ -278,10 +298,10 @@ physical::PhysicalPtr 
StarSchemaHashJoinOrderOptimization::generatePlan(
 
     if (remaining_tables.size() > 0) {
       P::PhysicalPtr output =
-          P::HashJoin::Create(build_child,
-                              probe_child,
-                              build_attributes,
+          P::HashJoin::Create(probe_child,
+                              build_child,
                               probe_attributes,
+                              build_attributes,
                               nullptr,
                               output_attributes,
                               P::HashJoin::JoinType::kInnerJoin);
@@ -310,10 +330,10 @@ physical::PhysicalPtr 
StarSchemaHashJoinOrderOptimization::generatePlan(
         }
       }
     } else {
-      return P::HashJoin::Create(build_child,
-                                 probe_child,
-                                 build_attributes,
+      return P::HashJoin::Create(probe_child,
+                                 build_child,
                                  probe_attributes,
+                                 build_attributes,
                                  residual_predicate,
                                  project_expressions,
                                  P::HashJoin::JoinType::kInnerJoin);

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
----------------------------------------------------------------------
diff --git a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp 
b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
index 6ad300c..a0e34ce 100644
--- a/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
+++ b/query_optimizer/rules/StarSchemaHashJoinOrderOptimization.hpp
@@ -107,13 +107,13 @@ class StarSchemaHashJoinOrderOptimization : public 
Rule<physical::Physical> {
         return rhs_has_large_output;
       }
 
-      const bool lhs_has_small_build =
-          !lhs_has_large_output && lhs.build->estimated_cardinality < 0x1000;
-      const bool rhs_has_small_build =
-          !rhs_has_large_output && rhs.build->estimated_cardinality < 0x1000;
-      if (lhs_has_small_build != rhs_has_small_build) {
-        return lhs_has_small_build;
-      }
+//      const bool lhs_has_small_build =
+//          !lhs_has_large_output && lhs.build->estimated_cardinality < 0x1000;
+//      const bool rhs_has_small_build =
+//          !rhs_has_large_output && rhs.build->estimated_cardinality < 0x1000;
+//      if (lhs_has_small_build != rhs_has_small_build) {
+//        return lhs_has_small_build;
+//      }
 
       if (lhs.probe->estimated_cardinality != 
rhs.probe->estimated_cardinality) {
         return lhs.probe->estimated_cardinality < 
rhs.probe->estimated_cardinality;

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/storage/HashTable.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTable.hpp b/storage/HashTable.hpp
index 13cd83c..2c526c2 100644
--- a/storage/HashTable.hpp
+++ b/storage/HashTable.hpp
@@ -41,6 +41,7 @@
 #include "types/TypedValue.hpp"
 #include "utility/BloomFilter.hpp"
 #include "utility/BloomFilterAdapter.hpp"
+#include "utility/EventProfiler.hpp"
 #include "utility/HashPair.hpp"
 #include "utility/Macros.hpp"
 
@@ -1047,8 +1048,8 @@ class HashTable : public HashTableBase<resizable,
    * @param probe_attribute_ids The vector of attribute ids to use for probing
    *        the bloom filter.
    **/
-  inline void addProbeSideAttributeIds(std::vector<attribute_id> 
&&probe_attribute_ids) {
-    probe_attribute_ids_.push_back(probe_attribute_ids);
+  inline void addProbeSideAttributeIds(const attribute_id &probe_attribute_id) 
{
+    probe_attribute_ids_.push_back(probe_attribute_id);
   }
 
  protected:
@@ -1336,7 +1337,7 @@ class HashTable : public HashTableBase<resizable,
   bool has_probe_side_bloom_filter_ = false;
   BloomFilter *build_bloom_filter_;
   std::vector<const BloomFilter*> probe_bloom_filters_;
-  std::vector<std::vector<attribute_id>> probe_attribute_ids_;
+  std::vector<attribute_id> probe_attribute_ids_;
 
   DISALLOW_COPY_AND_ASSIGN(HashTable);
 };
@@ -2257,23 +2258,16 @@ void HashTable<ValueT, resizable, serializable, 
force_key_copy, allow_duplicate_
       // Find (and cache) the size of each attribute in the probe lists.
       // NOTE(nav): This code uses the accessor to get the size,
       // and hence only works if there's at least one tuple.
-      std::vector<std::vector<std::size_t>> attr_size_vectors;
-      attr_size_vectors.reserve(probe_attribute_ids_.size());
-      for (const auto &probe_attr_vector : probe_attribute_ids_) {
-        std::vector<std::size_t> attr_sizes;
-        attr_sizes.reserve(probe_attr_vector.size());
-        for (const auto &probe_attr : probe_attr_vector) {
-          // Using the function below is the easiest way to get attribute size
-          // here. The template parameter check_null is set to false to ensure
-          // that we get the size even if the first attr value is null.
-          auto val_and_size = accessor->template 
getUntypedValueAndByteLengthAtAbsolutePosition<false>(0, probe_attr);
-          attr_sizes.push_back(val_and_size.second);
-        }
-        attr_size_vectors.push_back(attr_sizes);
+      std::vector<std::size_t> attr_size_vector;
+      attr_size_vector.reserve(probe_attribute_ids_.size());
+      for (const auto &probe_attr : probe_attribute_ids_) {
+        auto val_and_size =
+            accessor->template 
getUntypedValueAndByteLengthAtAbsolutePosition<false>(0, probe_attr);
+        attr_size_vector.push_back(val_and_size.second);
       }
 
       bloom_filter_adapter.reset(new BloomFilterAdapter(
-              probe_bloom_filters_, probe_attribute_ids_, attr_size_vectors));
+              probe_bloom_filters_, probe_attribute_ids_, attr_size_vector));
 
       // We want to have large batch sizes for cache efficiency while probeing,
       // but small batch sizes to ensure that the adaptation logic kicks in
@@ -2286,6 +2280,9 @@ void HashTable<ValueT, resizable, serializable, 
force_key_copy, allow_duplicate_
       std::uint32_t num_tuples_left = accessor->getNumTuples();
       std::vector<tuple_id> batch(num_tuples_left);
 
+      auto *container = simple_profiler.getContainer();
+      auto *line = container->getEventLine(0);
+
       do {
         const std::uint32_t batch_size =
             batch_size_try < num_tuples_left ? batch_size_try : 
num_tuples_left;
@@ -2294,12 +2291,16 @@ void HashTable<ValueT, resizable, serializable, 
force_key_copy, allow_duplicate_
           batch.push_back(accessor->getCurrentPosition());
         }
 
+        line->emplace_back();
         std::size_t num_hits;
         if (FLAGS_adapt_bloom_filters) {
           num_hits = bloom_filter_adapter->bulkProbe<true>(accessor, batch);
         } else {
           num_hits = bloom_filter_adapter->bulkProbe<false>(accessor, batch);
         }
+        line->back().setPayload(num_hits+0);
+        line->back().endEvent();
+//        std::size_t num_hits = batch_size;
 
         for (std::size_t i = 0; i < num_hits; ++i){
           const tuple_id probe_tid = batch[i];

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/storage/HashTable.proto
----------------------------------------------------------------------
diff --git a/storage/HashTable.proto b/storage/HashTable.proto
index 7f00f29..0cf9f5e 100644
--- a/storage/HashTable.proto
+++ b/storage/HashTable.proto
@@ -34,10 +34,10 @@ message HashTable {
   required HashTableImplType hash_table_impl_type = 1;
   repeated Type key_types = 2;
   required uint64 estimated_num_entries = 3;
-  repeated uint32 build_side_bloom_filter_id = 4;
-  message ProbeSideBloomFilter {
-    required uint32 probe_side_bloom_filter_id = 1;
-    repeated uint32 probe_side_attr_ids = 2;
+  message BloomFilterReference {
+    required uint32 bloom_filter_id = 1;
+    required uint32 attr_id = 2;
   }
-  repeated ProbeSideBloomFilter probe_side_bloom_filters = 6;
+  repeated BloomFilterReference build_side_bloom_filters = 4;
+  repeated BloomFilterReference probe_side_bloom_filters = 5;
 }

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/storage/HashTableFactory.hpp
----------------------------------------------------------------------
diff --git a/storage/HashTableFactory.hpp b/storage/HashTableFactory.hpp
index 34baaeb..00a09c1 100644
--- a/storage/HashTableFactory.hpp
+++ b/storage/HashTableFactory.hpp
@@ -318,9 +318,11 @@ class HashTableFactory {
     //                 individual implementations of the hash table 
constructors.
 
     // Check if there are any build side bloom filter defined on the hash 
table.
-    if (proto.build_side_bloom_filter_id_size() > 0) {
+    if (proto.build_side_bloom_filters_size() > 0) {
+      CHECK_EQ(1u, proto.build_side_bloom_filters_size());
       hash_table->enableBuildSideBloomFilter();
-      
hash_table->setBuildSideBloomFilter(bloom_filters[proto.build_side_bloom_filter_id(0)].get());
+      hash_table->setBuildSideBloomFilter(
+          
bloom_filters[proto.build_side_bloom_filters(0).bloom_filter_id()].get());
     }
 
     // Check if there are any probe side bloom filters defined on the hash 
table.
@@ -330,15 +332,11 @@ class HashTableFactory {
       for (int j = 0; j < proto.probe_side_bloom_filters_size(); ++j) {
         // Add the pointer to the probe bloom filter within the list of probe 
bloom filters to use.
         const auto probe_side_bloom_filter = proto.probe_side_bloom_filters(j);
-        
hash_table->addProbeSideBloomFilter(bloom_filters[probe_side_bloom_filter.probe_side_bloom_filter_id()].get());
+        hash_table->addProbeSideBloomFilter(
+            bloom_filters[probe_side_bloom_filter.bloom_filter_id()].get());
 
         // Add the attribute ids corresponding to this probe bloom filter.
-        std::vector<attribute_id> probe_attribute_ids;
-        for (int k = 0; k < 
probe_side_bloom_filter.probe_side_attr_ids_size(); ++k) {
-          const attribute_id probe_attribute_id = 
probe_side_bloom_filter.probe_side_attr_ids(k);
-          probe_attribute_ids.push_back(probe_attribute_id);
-        }
-        hash_table->addProbeSideAttributeIds(std::move(probe_attribute_ids));
+        
hash_table->addProbeSideAttributeIds(probe_side_bloom_filter.attr_id());
       }
     }
 

http://git-wip-us.apache.org/repos/asf/incubator-quickstep/blob/9cc47e56/utility/BloomFilterAdapter.hpp
----------------------------------------------------------------------
diff --git a/utility/BloomFilterAdapter.hpp b/utility/BloomFilterAdapter.hpp
index fbd6957..a160353 100644
--- a/utility/BloomFilterAdapter.hpp
+++ b/utility/BloomFilterAdapter.hpp
@@ -40,8 +40,8 @@ namespace quickstep {
 class BloomFilterAdapter {
  public:
   BloomFilterAdapter(const std::vector<const BloomFilter*> &bloom_filters,
-                     const std::vector<std::vector<attribute_id>> 
attribute_ids,
-                     const std::vector<std::vector<std::size_t>> attr_sizes) {
+                     const std::vector<attribute_id> &attribute_ids,
+                     const std::vector<std::size_t> &attr_sizes) {
     DCHECK_EQ(bloom_filters.size(), attribute_ids.size());
     DCHECK_EQ(bloom_filters.size(), attr_sizes.size());
 
@@ -64,7 +64,7 @@ class BloomFilterAdapter {
                                std::vector<tuple_id> &batch) {
     std::size_t out_size = batch.size();
     for (auto &entry : bloom_filter_entries_) {
-      out_size = bulkProbeBloomFilterEntry(*entry, accessor, batch, out_size);
+      out_size = bulkProbeBloomFilterEntry<adapt_filters>(*entry, accessor, 
batch, out_size);
     }
     adaptEntryOrder();
     return out_size;
@@ -73,11 +73,11 @@ class BloomFilterAdapter {
  private:
   struct BloomFilterEntry {
     BloomFilterEntry(const BloomFilter *in_bloom_filter,
-                     const std::vector<attribute_id> &in_attribute_ids,
-                     const std::vector<std::size_t> &in_attribute_sizes)
+                     const attribute_id &in_attribute_id,
+                     const std::size_t &in_attribute_size)
         : bloom_filter(in_bloom_filter),
-          attribute_ids(in_attribute_ids),
-          attribute_sizes(in_attribute_sizes),
+          attribute_id(in_attribute_id),
+          attribute_size(in_attribute_size),
           miss(0),
           cnt(0) {
     }
@@ -88,8 +88,8 @@ class BloomFilterAdapter {
     }
 
     const BloomFilter *bloom_filter;
-    const std::vector<attribute_id> attribute_ids;
-    const std::vector<std::size_t> attribute_sizes;
+    const attribute_id attribute_id;
+    const std::size_t attribute_size;
     std::uint32_t miss;
     std::uint32_t cnt;
     float miss_rate;
@@ -105,16 +105,12 @@ class BloomFilterAdapter {
     const BloomFilter *bloom_filter = entry.bloom_filter;
 
     for (std::size_t t = 0; t < in_size; ++t) {
-      tuple_id tid = batch[t];
-      for (std::size_t a = 0; a < entry.attribute_ids.size(); ++a) {
-        const attribute_id &attr_id = entry.attribute_ids[a];
-        const std::size_t size = entry.attribute_sizes[a];
-        const auto value = static_cast<const std::uint8_t*>(
-            accessor->getUntypedValueAtAbsolutePosition(attr_id, tid));
-        if (bloom_filter->contains(value, size)) {
-          batch[out_size] = tid;
-          ++out_size;
-        }
+      const tuple_id tid = batch[t];
+      const auto value = static_cast<const std::uint8_t*>(
+          accessor->getUntypedValueAtAbsolutePosition(entry.attribute_id, 
tid));
+      if (bloom_filter->contains(value, entry.attribute_size)) {
+        batch[out_size] = tid;
+        ++out_size;
       }
     }
     if (adapt_filters) {

Reply via email to