This is an automated email from the ASF dual-hosted git repository. boroknagyz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push: new e0aabdd IMPALA-8064: Improve observability of wait times for runtime filters e0aabdd is described below commit e0aabddd573c204a780d3f5ff0af442cdb26b7c6 Author: poojanilangekar <pooja.nilange...@cloudera.com> AuthorDate: Thu Feb 7 17:00:34 2019 -0800 IMPALA-8064: Improve observability of wait times for runtime filters This change is a diagnostic fix to improve the wait times logged for runtime filters. The filter wait time counts against the elapsed time since the filter's registration in ScanNode::Init() while the duration logged in ScanNode::WaitForRuntimeFilters() is the time spent in the function waiting for all the filters to arrive. This could be misleading as it doesn't account for the elapsed time spent between ScanNode::Init() and ScanNode::WaitForRuntimeFilters(). This change logs the maximum arrival delay for any runtime filter to arrive. From my analysis of the logs of the failed tests, I believe the filters are actually waiting for the specified time but logging the duration incorrectly. The solution would be to increase the wait time further. This change would help validate this hypothesis. Change-Id: I28fd45e75c773bc01d424f5a179ae186ee9b7469 Reviewed-on: http://gerrit.cloudera.org:8080/12401 Reviewed-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com> --- be/src/exec/scan-node.cc | 13 +++++++++---- be/src/runtime/runtime-filter-bank.cc | 7 +++---- be/src/runtime/runtime-filter.h | 9 +++++---- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/be/src/exec/scan-node.cc b/be/src/exec/scan-node.cc index 906af66..039836c 100644 --- a/be/src/exec/scan-node.cc +++ b/be/src/exec/scan-node.cc @@ -168,6 +168,7 @@ bool ScanNode::WaitForRuntimeFilters() { } vector<string> arrived_filter_ids; vector<string> missing_filter_ids; + int32_t max_arrival_delay = 0; int64_t start = MonotonicMillis(); for (auto& ctx: filter_ctxs_) { string filter_id = Substitute("$0", ctx.filter->id()); @@ -176,20 +177,24 @@ bool ScanNode::WaitForRuntimeFilters() { } else { missing_filter_ids.push_back(filter_id); } + max_arrival_delay = max(max_arrival_delay, ctx.filter->arrival_delay_ms()); } int64_t end = MonotonicMillis(); const string& wait_time = PrettyPrinter::Print(end - start, TUnit::TIME_MS); + const string& arrival_delay = PrettyPrinter::Print(max_arrival_delay, TUnit::TIME_MS); if (arrived_filter_ids.size() == filter_ctxs_.size()) { runtime_profile()->AddInfoString("Runtime filters", - Substitute("All filters arrived. Waited $0", wait_time)); + Substitute("All filters arrived. Waited $0. Maximum arrival delay: $1.", + wait_time, arrival_delay)); VLOG(2) << "Filters arrived. Waited " << wait_time; return true; } - const string& filter_str = Substitute( - "Not all filters arrived (arrived: [$0], missing [$1]), waited for $2", - join(arrived_filter_ids, ", "), join(missing_filter_ids, ", "), wait_time); + const string& filter_str = Substitute("Not all filters arrived (arrived: [$0], missing " + "[$1]), waited for $2. Arrival delay: $3.", + join(arrived_filter_ids, ", "), join(missing_filter_ids, ", "), wait_time, + arrival_delay); runtime_profile()->AddInfoString("Runtime filters", filter_str); VLOG(2) << filter_str; return false; diff --git a/be/src/runtime/runtime-filter-bank.cc b/be/src/runtime/runtime-filter-bank.cc index 85c9625..f8667bc 100644 --- a/be/src/runtime/runtime-filter-bank.cc +++ b/be/src/runtime/runtime-filter-bank.cc @@ -146,9 +146,8 @@ void RuntimeFilterBank::UpdateFilterFromLocal( filter = it->second; } filter->SetFilter(bloom_filter, min_max_filter); - state_->runtime_profile()->AddInfoString( - Substitute("Filter $0 arrival", filter_id), - PrettyPrinter::Print(filter->arrival_delay(), TUnit::TIME_MS)); + state_->runtime_profile()->AddInfoString(Substitute("Filter $0 arrival", filter_id), + PrettyPrinter::Print(filter->arrival_delay_ms(), TUnit::TIME_MS)); } if (has_remote_target @@ -211,7 +210,7 @@ void RuntimeFilterBank::PublishGlobalFilter(const TPublishFilterParams& params) it->second->SetFilter(bloom_filter, min_max_filter); state_->runtime_profile()->AddInfoString( Substitute("Filter $0 arrival", params.filter_id), - PrettyPrinter::Print(it->second->arrival_delay(), TUnit::TIME_MS)); + PrettyPrinter::Print(it->second->arrival_delay_ms(), TUnit::TIME_MS)); } BloomFilter* RuntimeFilterBank::AllocateScratchBloomFilter(int32_t filter_id) { diff --git a/be/src/runtime/runtime-filter.h b/be/src/runtime/runtime-filter.h index 7ab73d7..97a2842 100644 --- a/be/src/runtime/runtime-filter.h +++ b/be/src/runtime/runtime-filter.h @@ -76,10 +76,11 @@ class RuntimeFilter { /// Inlined in IR so that the constant 'col_type' can be propagated. bool IR_ALWAYS_INLINE Eval(void* val, const ColumnType& col_type) const noexcept; - /// Returns the amount of time waited since registration for the filter to - /// arrive. Returns 0 if filter has not yet arrived. - int32_t arrival_delay() const { - if (arrival_time_.Load() == 0L) return 0L; + /// Returns the amount of time in milliseconds elapsed between the registration of the + /// filter and its arrival. If the filter has not yet arrived, it returns the time + /// elapsed since registration. + int32_t arrival_delay_ms() const { + if (arrival_time_.Load() == 0L) return MonotonicMillis() - registration_time_; return arrival_time_.Load() - registration_time_; }